| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=greedy,0 -stop-after=virtregrewriter,0 -o - %s | FileCheck %s |
| |
| # This testcase used to fail due to introducing a spill of an SGPR |
| # 1024 for every subregister use inside the loop. With overlapping |
| # unspillable split ranges, it wasn't able to allocate one of the |
| # tuples. We avoid this by ensuring wide tuples are always allocated |
| # first (although the allocator should probably have been smart enough |
| # to handle this without that hint. Ideally it would understand we |
| # only need to spill/restore single subregisters at a time). |
| |
| --- |
| name: greedy_fail_alloc_sgpr1024_spill |
| tracksRegLiveness: true |
| frameInfo: |
| adjustsStack: true |
| hasCalls: true |
| machineFunctionInfo: |
| explicitKernArgSize: 16 |
| maxKernArgAlign: 8 |
| isEntryFunction: true |
| waveLimiter: true |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| occupancy: 6 |
| body: | |
| ; CHECK-LABEL: name: greedy_fail_alloc_sgpr1024_spill |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: renamable $sgpr34_sgpr35 = COPY $sgpr8_sgpr9 |
| ; CHECK-NEXT: renamable $sgpr33 = COPY $sgpr15 |
| ; CHECK-NEXT: renamable $sgpr50 = COPY $sgpr14 |
| ; CHECK-NEXT: renamable $sgpr36_sgpr37 = COPY $sgpr10_sgpr11 |
| ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY $sgpr6_sgpr7 |
| ; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY $sgpr4_sgpr5 |
| ; CHECK-NEXT: renamable $sgpr64_sgpr65 = S_LOAD_DWORDX2_IMM renamable $sgpr34_sgpr35, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) |
| ; CHECK-NEXT: renamable $sgpr68 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr69 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr70 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr71 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr72 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr73 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr74 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr75 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr76 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr77 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr78 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr79 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr80 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr81 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr82 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr83 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr84 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr85 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr86 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr87 = S_MOV_B32 0 |
| ; CHECK-NEXT: renamable $sgpr88 = S_MOV_B32 0 |
| ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5) |
| ; CHECK-NEXT: renamable $sgpr52_sgpr53 = IMPLICIT_DEF |
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 |
| ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL renamable $sgpr52_sgpr53, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 |
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 |
| ; CHECK-NEXT: $sgpr4_sgpr5 = COPY killed renamable $sgpr48_sgpr49 |
| ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr38_sgpr39 |
| ; CHECK-NEXT: $sgpr8_sgpr9 = COPY killed renamable $sgpr34_sgpr35 |
| ; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr36_sgpr37 |
| ; CHECK-NEXT: $sgpr12 = COPY killed renamable $sgpr50 |
| ; CHECK-NEXT: $sgpr13 = COPY killed renamable $sgpr33 |
| ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr52_sgpr53, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY $exec, implicit-def $exec |
| ; CHECK-NEXT: dead renamable $sgpr6_sgpr7 = IMPLICIT_DEF |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec |
| ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5) |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 |
| ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr65, 1, implicit-def dead $scc |
| ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[COPY]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec |
| ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr51 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr53 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr54 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr68 |
| ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr68 |
| ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec |
| ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc |
| ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr6_sgpr7, $sgpr64_sgpr65:0x0000000000000003 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc |
| ; CHECK-NEXT: dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr64, 1, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5) |
| ; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_1024 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15 |
| |
| %0:sgpr_64 = COPY $sgpr8_sgpr9 |
| %1:sgpr_32 = COPY $sgpr15 |
| %2:sgpr_32 = COPY $sgpr14 |
| %3:sgpr_64 = COPY $sgpr10_sgpr11 |
| %4:sgpr_64 = COPY $sgpr6_sgpr7 |
| %5:sgpr_64 = COPY $sgpr4_sgpr5 |
| %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) |
| undef %7.sub0:sgpr_1024 = S_MOV_B32 0 |
| %7.sub1:sgpr_1024 = S_MOV_B32 0 |
| %7.sub2:sgpr_1024 = S_MOV_B32 0 |
| %7.sub3:sgpr_1024 = S_MOV_B32 0 |
| %7.sub4:sgpr_1024 = S_MOV_B32 0 |
| %7.sub5:sgpr_1024 = S_MOV_B32 0 |
| %7.sub6:sgpr_1024 = S_MOV_B32 0 |
| %7.sub7:sgpr_1024 = S_MOV_B32 0 |
| %7.sub8:sgpr_1024 = S_MOV_B32 0 |
| %7.sub9:sgpr_1024 = S_MOV_B32 0 |
| %7.sub10:sgpr_1024 = S_MOV_B32 0 |
| %7.sub11:sgpr_1024 = S_MOV_B32 0 |
| %7.sub12:sgpr_1024 = S_MOV_B32 0 |
| %7.sub13:sgpr_1024 = S_MOV_B32 0 |
| %7.sub14:sgpr_1024 = S_MOV_B32 0 |
| %7.sub15:sgpr_1024 = S_MOV_B32 0 |
| %7.sub16:sgpr_1024 = S_MOV_B32 0 |
| %7.sub17:sgpr_1024 = S_MOV_B32 0 |
| %7.sub18:sgpr_1024 = S_MOV_B32 0 |
| %7.sub19:sgpr_1024 = S_MOV_B32 0 |
| %7.sub20:sgpr_1024 = S_MOV_B32 0 |
| %8:sreg_64 = IMPLICIT_DEF |
| ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 |
| dead $sgpr30_sgpr31 = SI_CALL %8, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3 |
| ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 |
| ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 |
| $sgpr4_sgpr5 = COPY %5 |
| $sgpr6_sgpr7 = COPY %4 |
| $sgpr8_sgpr9 = COPY %0 |
| $sgpr10_sgpr11 = COPY %3 |
| $sgpr12 = COPY %2 |
| $sgpr13 = COPY %1 |
| dead $sgpr30_sgpr31 = SI_CALL %8, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3 |
| ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 |
| %9:sreg_64 = COPY $exec, implicit-def $exec |
| %10:sreg_64 = IMPLICIT_DEF |
| |
| bb.1: |
| successors: %bb.2, %bb.4 |
| |
| %11:sreg_64 = COPY $exec, implicit-def $exec |
| S_CBRANCH_EXECZ %bb.4, implicit $exec |
| |
| bb.2: |
| %12:vreg_1024 = COPY %7 |
| %13:sreg_32 = S_LSHL_B32 %6.sub1, 1, implicit-def dead $scc |
| %12:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 %12, 0, %13, 3, implicit-def $m0, implicit $m0, implicit $exec |
| |
| bb.3: |
| %14:sreg_64 = S_OR_SAVEEXEC_B64 %9, implicit-def $exec, implicit-def $scc, implicit $exec |
| undef %15.sub0:sgpr_1024 = COPY %7.sub0 |
| %15.sub1:sgpr_1024 = COPY %7.sub0 |
| %15.sub2:sgpr_1024 = COPY %7.sub0 |
| %15.sub3:sgpr_1024 = COPY %7.sub0 |
| %15.sub4:sgpr_1024 = COPY %7.sub0 |
| %15.sub5:sgpr_1024 = COPY %7.sub0 |
| %15.sub6:sgpr_1024 = COPY %7.sub0 |
| %15.sub7:sgpr_1024 = COPY %7.sub0 |
| %15.sub8:sgpr_1024 = COPY %7.sub0 |
| %15.sub9:sgpr_1024 = COPY %7.sub0 |
| %15.sub10:sgpr_1024 = COPY %7.sub0 |
| %15.sub11:sgpr_1024 = COPY %7.sub0 |
| %15.sub12:sgpr_1024 = COPY %7.sub0 |
| %15.sub13:sgpr_1024 = COPY %7.sub0 |
| %15.sub14:sgpr_1024 = COPY %7.sub0 |
| %15.sub15:sgpr_1024 = COPY %7.sub0 |
| %15.sub16:sgpr_1024 = COPY %7.sub0 |
| %15.sub17:sgpr_1024 = COPY %7.sub0 |
| %15.sub18:sgpr_1024 = COPY %7.sub0 |
| %15.sub19:sgpr_1024 = COPY %7.sub0 |
| %15.sub20:sgpr_1024 = COPY %7.sub0 |
| %15.sub21:sgpr_1024 = COPY %7.sub0 |
| %16:vreg_1024 = COPY %15, implicit $exec |
| $exec = S_XOR_B64_term $exec, %14, implicit-def $scc |
| S_CBRANCH_EXECZ %bb.5, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.4: |
| $exec = S_OR_B64 $exec, %11, implicit-def $scc |
| %17:sreg_32 = S_LSHL_B32 %6.sub0, 1, implicit-def dead $scc |
| %16:vreg_1024 = COPY %7 |
| |
| bb.5: |
| |
| ... |