| ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| ; REQUIRES: amdgpu-registered-target |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -amdgpu-spill-sgpr-to-vgpr=1 \ |
| ; RUN: -verify-machineinstrs -stop-after=si-lower-sgpr-spills -o - %s | FileCheck %s |
| |
| ;; Ensure that si-lower-sgpr-spills prevents IMPLICIT_DEF assignments from clobbering |
| ;; backedge writes by placing the assignment in the cycle preheader and not the header. |
| |
| define amdgpu_kernel void @loop_sgpr_spill_implicit_def_in_preheader( |
| ; CHECK-LABEL: name: loop_sgpr_spill_implicit_def_in_preheader |
| ; CHECK: bb.0.entry: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr8_sgpr9 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s64) from constant-pool, align 16, addrspace 4) |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 16, addrspace 4) |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 1, [[DEF]], implicit killed $sgpr4_sgpr5 |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = IMPLICIT_DEF |
| ; CHECK-NEXT: renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (load (s32) from `ptr addrspace(4) poison`, addrspace 4) |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 2, [[DEF]] |
| ; CHECK-NEXT: renamable $sgpr11 = COPY renamable $sgpr4 |
| ; CHECK-NEXT: renamable $sgpr10 = COPY renamable $sgpr4 |
| ; CHECK-NEXT: renamable $sgpr9 = COPY renamable $sgpr4 |
| ; CHECK-NEXT: renamable $sgpr8 = COPY renamable $sgpr4 |
| ; CHECK-NEXT: renamable $sgpr7 = COPY renamable $sgpr4 |
| ; CHECK-NEXT: renamable $sgpr6 = COPY renamable $sgpr4 |
| ; CHECK-NEXT: renamable $sgpr5 = COPY renamable $sgpr4 |
| ; CHECK-NEXT: renamable $sgpr12 = S_MOV_B32 0 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 3, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr11, 4, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 5, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr9, 6, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr8, 7, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 8, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 9, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 10, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 11, [[DEF]] |
| ; CHECK-NEXT: S_BRANCH %bb.1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1.loop.header: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3 |
| ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4 |
| ; CHECK-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5 |
| ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6 |
| ; CHECK-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7 |
| ; CHECK-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 8 |
| ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 9 |
| ; CHECK-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 10 |
| ; CHECK-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 11 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 12, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr11, 13, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 14, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr9, 15, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr8, 16, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 17, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 18, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 19, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 20, [[DEF]] |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 -1 |
| ; CHECK-NEXT: renamable $sgpr7 = S_MOV_B32 0 |
| ; CHECK-NEXT: S_CMP_EQ_U32 renamable $sgpr6, killed renamable $sgpr7, implicit-def $scc |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_CSELECT_B64 -1, 0, implicit killed $scc |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_XOR_B64 killed renamable $sgpr6_sgpr7, renamable $sgpr4_sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF |
| ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF |
| ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF |
| ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF |
| ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF |
| ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF |
| ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF |
| ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF |
| ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 21, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 22, [[DEF]], implicit killed $sgpr4_sgpr5 |
| ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc |
| ; CHECK-NEXT: S_BRANCH %bb.2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2.loop.latch: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 20 |
| ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 12 |
| ; CHECK-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 13 |
| ; CHECK-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 14 |
| ; CHECK-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 15 |
| ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 16 |
| ; CHECK-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 17 |
| ; CHECK-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 18 |
| ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 19 |
| ; CHECK-NEXT: renamable $sgpr5 = S_MOV_B32 1 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 23, [[DEF]] |
| ; CHECK-NEXT: renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, renamable $sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr8 = S_ADD_I32 renamable $sgpr8, renamable $sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr9 = S_ADD_I32 renamable $sgpr9, renamable $sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr10 = S_ADD_I32 renamable $sgpr10, renamable $sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr11 = S_ADD_I32 renamable $sgpr11, renamable $sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr12 = S_ADD_I32 renamable $sgpr12, renamable $sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr13 = S_ADD_I32 renamable $sgpr13, renamable $sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr14 = S_ADD_I32 renamable $sgpr6, renamable $sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr6 = nuw nsw S_ADD_I32 renamable $sgpr4, renamable $sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr14, 24, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr13, 25, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 26, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr11, 27, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 28, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr9, 29, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr8, 30, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 31, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 32, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 21, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 22, [[DEF]], implicit killed $sgpr4_sgpr5 |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3.Flow: |
| ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR [[DEF]], 21, implicit-def $vcc |
| ; CHECK-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR [[DEF]], 22 |
| ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 24 |
| ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 25 |
| ; CHECK-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 26 |
| ; CHECK-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 27 |
| ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 28 |
| ; CHECK-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 29 |
| ; CHECK-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 30 |
| ; CHECK-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 31 |
| ; CHECK-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 32 |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 -1 |
| ; CHECK-NEXT: renamable $vcc = S_XOR_B64 killed renamable $vcc, killed renamable $sgpr4_sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 19 |
| ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 18 |
| ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $vcc, implicit-def dead $scc |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr14, 3, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr13, 4, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 5, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr11, 6, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 7, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr9, 8, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr8, 9, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 10, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 11, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 33, [[DEF]] |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 34, [[DEF]] |
| ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc |
| ; CHECK-NEXT: S_BRANCH %bb.4 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4.exit: |
| ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0, implicit-def $sgpr4_sgpr5 |
| ; CHECK-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1 |
| ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 34 |
| ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 33 |
| ; CHECK-NEXT: renamable $sgpr6 = S_ADD_I32 killed renamable $sgpr6, killed renamable $sgpr7, implicit-def dead $scc |
| ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr6 |
| ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[COPY]], killed renamable $sgpr4_sgpr5, 0, 0, implicit $exec :: (volatile store (s32) into %ir.out.load, addrspace 1) |
| ; CHECK-NEXT: S_ENDPGM 0 |
| ptr addrspace(1) %out) local_unnamed_addr #0 { |
| entry: |
| %a0 = load i32, ptr addrspace(4) poison |
| %a1 = load i32, ptr addrspace(4) poison |
| %a2 = load i32, ptr addrspace(4) poison |
| %a3 = load i32, ptr addrspace(4) poison |
| %a4 = load i32, ptr addrspace(4) poison |
| %a5 = load i32, ptr addrspace(4) poison |
| %a6 = load i32, ptr addrspace(4) poison |
| %a7 = load i32, ptr addrspace(4) poison |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| %v0 = phi i32 [ %a0, %entry ], [ %t0, %loop.latch ] |
| %v1 = phi i32 [ %a1, %entry ], [ %t1, %loop.latch ] |
| %v2 = phi i32 [ %a2, %entry ], [ %t2, %loop.latch ] |
| %v3 = phi i32 [ %a3, %entry ], [ %t3, %loop.latch ] |
| %v4 = phi i32 [ %a4, %entry ], [ %t4, %loop.latch ] |
| %v5 = phi i32 [ %a5, %entry ], [ %t5, %loop.latch ] |
| %v6 = phi i32 [ %a6, %entry ], [ %t6, %loop.latch ] |
| %v7 = phi i32 [ %a7, %entry ], [ %t7, %loop.latch ] |
| %cmp = icmp eq i32 %iv, 0 |
| br i1 %cmp, label %loop.latch, label %exit |
| |
| loop.latch: |
| %t0 = add i32 %v0, 1 |
| %t1 = add i32 %v1, 1 |
| %t2 = add i32 %v2, 1 |
| %t3 = add i32 %v3, 1 |
| %t4 = add i32 %v4, 1 |
| %t5 = add i32 %v5, 1 |
| %t6 = add i32 %v6, 1 |
| %t7 = add i32 %v7, 1 |
| %iv.next = add nuw nsw i32 %iv, 1 |
| br label %loop.header |
| |
| exit: |
| %sum = add i32 %v0, %v1 |
| store volatile i32 %sum, ptr addrspace(1) %out |
| ret void |
| } |
| |
| attributes #0 = { nounwind "amdgpu-num-sgpr"="24" "amdgpu-num-vgpr"="64" } |