| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -start-before=greedy,2 -stop-before=virtregrewriter,2 -o - %s | FileCheck %s |
| # FIXME: Assert if run to end |
| |
| # The V_MFMA_F32_32X32X1F32_vgprcd_e64 as written requires 66 VGPRs |
| # to allocate, but the register budget (with a forced AGPR usage and |
| # occupancy 4) permits 64 VGPRs and 64 AGPRs, so we need to force a |
| # copy from VGPR to AGPR. The minimal copies required for this need to |
| # copy %3 and %4 V_MOV_B32s into temporary AGPRs for use by the MFMA. |
| |
| # Previously we would attempt a register subclass based split, but |
| # immediately rematerialize the V_MOV_B32 into the new temporary |
| # register, defeating the point of introducing the split. The |
| # allocation would fail since it's 2 registers short. |
| |
| --- |
| name: temp_vgpr_to_agpr_should_not_undo_split_with_remat |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| occupancy: 4 |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| |
| ; CHECK-LABEL: name: temp_vgpr_to_agpr_should_not_undo_split_with_remat |
| ; CHECK: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0 |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) |
| ; CHECK-NEXT: [[V_LSHLREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 7, [[COPY]], implicit $exec |
| ; CHECK-NEXT: undef [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub28_sub29_sub30_sub31:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 112, 0, implicit $exec :: (load (s128), addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub24_sub25_sub26_sub27:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 96, 0, implicit $exec :: (load (s128), align 32, addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub20_sub21_sub22_sub23:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 80, 0, implicit $exec :: (load (s128), addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub16_sub17_sub18_sub19:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 64, 0, implicit $exec :: (load (s128), align 64, addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub12_sub13_sub14_sub15:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 48, 0, implicit $exec :: (load (s128), addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub8_sub9_sub10_sub11:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 32, 0, implicit $exec :: (load (s128), align 32, addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub4_sub5_sub6_sub7:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 16, 0, implicit $exec :: (load (s128), addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub0_sub1_sub2_sub3:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1) |
| ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec |
| ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[GLOBAL_LOAD_DWORDX4_SADDR]], 0, 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_1]] |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]] |
| ; CHECK-NEXT: early-clobber %5:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_vgprcd_e64 [[COPY2]], [[COPY1]], [[GLOBAL_LOAD_DWORDX4_SADDR]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub2:vreg_1024_align2 = COPY %5.sub0 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub3:vreg_1024_align2 = COPY %5.sub1 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub4:vreg_1024_align2 = COPY %5.sub2 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub5:vreg_1024_align2 = COPY %5.sub3 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub6:vreg_1024_align2 = COPY %5.sub4 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub7:vreg_1024_align2 = COPY %5.sub5 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub8:vreg_1024_align2 = COPY %5.sub6 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub9:vreg_1024_align2 = COPY %5.sub7 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub10:vreg_1024_align2 = COPY %5.sub8 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub11:vreg_1024_align2 = COPY %5.sub9 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub12:vreg_1024_align2 = COPY %5.sub10 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub13:vreg_1024_align2 = COPY %5.sub11 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub14:vreg_1024_align2 = COPY %5.sub12 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub15:vreg_1024_align2 = COPY %5.sub13 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub16:vreg_1024_align2 = COPY %5.sub14 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub17:vreg_1024_align2 = COPY %5.sub15 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub18:vreg_1024_align2 = COPY %5.sub16 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub19:vreg_1024_align2 = COPY %5.sub17 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub20:vreg_1024_align2 = COPY %5.sub18 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub21:vreg_1024_align2 = COPY %5.sub19 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub22:vreg_1024_align2 = COPY %5.sub20 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub23:vreg_1024_align2 = COPY %5.sub21 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub24:vreg_1024_align2 = COPY %5.sub22 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub25:vreg_1024_align2 = COPY %5.sub23 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub26:vreg_1024_align2 = COPY %5.sub24 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub27:vreg_1024_align2 = COPY %5.sub25 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub28:vreg_1024_align2 = COPY %5.sub26 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub29:vreg_1024_align2 = COPY %5.sub27 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub30:vreg_1024_align2 = COPY %5.sub28 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub31:vreg_1024_align2 = COPY %5.sub29 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 [[COPY2]], [[COPY1]], [[GLOBAL_LOAD_DWORDX4_SADDR]], 0, 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub24_sub25_sub26_sub27, renamable $sgpr0_sgpr1, 96, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub28_sub29_sub30_sub31, renamable $sgpr0_sgpr1, 112, 0, implicit $exec :: (store (s128), addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub16_sub17_sub18_sub19, renamable $sgpr0_sgpr1, 64, 0, implicit $exec :: (store (s128), align 64, addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub20_sub21_sub22_sub23, renamable $sgpr0_sgpr1, 80, 0, implicit $exec :: (store (s128), addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub8_sub9_sub10_sub11, renamable $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub12_sub13_sub14_sub15, renamable $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2_sub3, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub4_sub5_sub6_sub7, killed renamable $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1) |
| ; CHECK-NEXT: S_ENDPGM 0 |
| S_NOP 0, implicit-def $agpr0 |
| %0:vgpr_32 = COPY $vgpr0 |
| renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) |
| %1:vgpr_32 = V_LSHLREV_B32_e32 7, %0, implicit $exec |
| undef %2.sub28_sub29_sub30_sub31:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 112, 0, implicit $exec :: (load (s128), addrspace 1) |
| %2.sub24_sub25_sub26_sub27:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 96, 0, implicit $exec :: (load (s128), align 32, addrspace 1) |
| %2.sub20_sub21_sub22_sub23:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 80, 0, implicit $exec :: (load (s128), addrspace 1) |
| %2.sub16_sub17_sub18_sub19:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 64, 0, implicit $exec :: (load (s128), align 64, addrspace 1) |
| %2.sub12_sub13_sub14_sub15:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 48, 0, implicit $exec :: (load (s128), addrspace 1) |
| %2.sub8_sub9_sub10_sub11:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 32, 0, implicit $exec :: (load (s128), align 32, addrspace 1) |
| %2.sub4_sub5_sub6_sub7:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 16, 0, implicit $exec :: (load (s128), addrspace 1) |
| %2.sub0_sub1_sub2_sub3:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1) |
| %3:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec |
| %4:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec |
| %2:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 %3, %4, %2, 0, 0, 0, implicit $mode, implicit $exec |
| early-clobber %5:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_vgprcd_e64 %3, %4, %2, 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec |
| %2.sub2:vreg_1024_align2 = COPY %5.sub0 |
| %2.sub3:vreg_1024_align2 = COPY %5.sub1 |
| %2.sub4:vreg_1024_align2 = COPY %5.sub2 |
| %2.sub5:vreg_1024_align2 = COPY %5.sub3 |
| %2.sub6:vreg_1024_align2 = COPY %5.sub4 |
| %2.sub7:vreg_1024_align2 = COPY %5.sub5 |
| %2.sub8:vreg_1024_align2 = COPY %5.sub6 |
| %2.sub9:vreg_1024_align2 = COPY %5.sub7 |
| %2.sub10:vreg_1024_align2 = COPY %5.sub8 |
| %2.sub11:vreg_1024_align2 = COPY %5.sub9 |
| %2.sub12:vreg_1024_align2 = COPY %5.sub10 |
| %2.sub13:vreg_1024_align2 = COPY %5.sub11 |
| %2.sub14:vreg_1024_align2 = COPY %5.sub12 |
| %2.sub15:vreg_1024_align2 = COPY %5.sub13 |
| %2.sub16:vreg_1024_align2 = COPY %5.sub14 |
| %2.sub17:vreg_1024_align2 = COPY %5.sub15 |
| %2.sub18:vreg_1024_align2 = COPY %5.sub16 |
| %2.sub19:vreg_1024_align2 = COPY %5.sub17 |
| %2.sub20:vreg_1024_align2 = COPY %5.sub18 |
| %2.sub21:vreg_1024_align2 = COPY %5.sub19 |
| %2.sub22:vreg_1024_align2 = COPY %5.sub20 |
| %2.sub23:vreg_1024_align2 = COPY %5.sub21 |
| %2.sub24:vreg_1024_align2 = COPY %5.sub22 |
| %2.sub25:vreg_1024_align2 = COPY %5.sub23 |
| %2.sub26:vreg_1024_align2 = COPY %5.sub24 |
| %2.sub27:vreg_1024_align2 = COPY %5.sub25 |
| %2.sub28:vreg_1024_align2 = COPY %5.sub26 |
| %2.sub29:vreg_1024_align2 = COPY %5.sub27 |
| %2.sub30:vreg_1024_align2 = COPY %5.sub28 |
| %2.sub31:vreg_1024_align2 = COPY %5.sub29 |
| %2:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 %3, %4, %2, 0, 0, 0, implicit $mode, implicit $exec |
| %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub24_sub25_sub26_sub27, renamable $sgpr0_sgpr1, 96, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub28_sub29_sub30_sub31, renamable $sgpr0_sgpr1, 112, 0, implicit $exec :: (store (s128), addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub16_sub17_sub18_sub19, renamable $sgpr0_sgpr1, 64, 0, implicit $exec :: (store (s128), align 64, addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub20_sub21_sub22_sub23, renamable $sgpr0_sgpr1, 80, 0, implicit $exec :: (store (s128), addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub8_sub9_sub10_sub11, renamable $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub12_sub13_sub14_sub15, renamable $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub0_sub1_sub2_sub3, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub4_sub5_sub6_sub7, killed renamable $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1) |
| S_ENDPGM 0 |
| |
| ... |
| |
| # Same, except uses AV_MOV_B32_IMM_PSEUDO which is able to |
| # rematerialize in the inflated register class. |
| --- |
| name: temp_vgpr_to_agpr_should_not_undo_split_with_remat_av_pseudo |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| occupancy: 4 |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| |
| ; CHECK-LABEL: name: temp_vgpr_to_agpr_should_not_undo_split_with_remat_av_pseudo |
| ; CHECK: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0 |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) |
| ; CHECK-NEXT: [[V_LSHLREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 7, [[COPY]], implicit $exec |
| ; CHECK-NEXT: undef [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub28_sub29_sub30_sub31:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 112, 0, implicit $exec :: (load (s128), addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub24_sub25_sub26_sub27:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 96, 0, implicit $exec :: (load (s128), align 32, addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub20_sub21_sub22_sub23:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 80, 0, implicit $exec :: (load (s128), addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub16_sub17_sub18_sub19:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 64, 0, implicit $exec :: (load (s128), align 64, addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub12_sub13_sub14_sub15:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 48, 0, implicit $exec :: (load (s128), addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub8_sub9_sub10_sub11:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 32, 0, implicit $exec :: (load (s128), align 32, addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub4_sub5_sub6_sub7:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 16, 0, implicit $exec :: (load (s128), addrspace 1) |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub0_sub1_sub2_sub3:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1) |
| ; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 1065353216, implicit $exec |
| ; CHECK-NEXT: [[AV_MOV_1:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 1073741824, implicit $exec |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 [[AV_MOV_]], [[AV_MOV_1]], [[GLOBAL_LOAD_DWORDX4_SADDR]], 0, 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[AV_MOV_2:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 1073741824, implicit $exec |
| ; CHECK-NEXT: [[AV_MOV_3:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 1065353216, implicit $exec |
| ; CHECK-NEXT: early-clobber %5:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_vgprcd_e64 [[AV_MOV_3]], [[AV_MOV_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub2:vreg_1024_align2 = COPY %5.sub0 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub3:vreg_1024_align2 = COPY %5.sub1 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub4:vreg_1024_align2 = COPY %5.sub2 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub5:vreg_1024_align2 = COPY %5.sub3 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub6:vreg_1024_align2 = COPY %5.sub4 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub7:vreg_1024_align2 = COPY %5.sub5 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub8:vreg_1024_align2 = COPY %5.sub6 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub9:vreg_1024_align2 = COPY %5.sub7 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub10:vreg_1024_align2 = COPY %5.sub8 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub11:vreg_1024_align2 = COPY %5.sub9 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub12:vreg_1024_align2 = COPY %5.sub10 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub13:vreg_1024_align2 = COPY %5.sub11 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub14:vreg_1024_align2 = COPY %5.sub12 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub15:vreg_1024_align2 = COPY %5.sub13 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub16:vreg_1024_align2 = COPY %5.sub14 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub17:vreg_1024_align2 = COPY %5.sub15 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub18:vreg_1024_align2 = COPY %5.sub16 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub19:vreg_1024_align2 = COPY %5.sub17 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub20:vreg_1024_align2 = COPY %5.sub18 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub21:vreg_1024_align2 = COPY %5.sub19 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub22:vreg_1024_align2 = COPY %5.sub20 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub23:vreg_1024_align2 = COPY %5.sub21 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub24:vreg_1024_align2 = COPY %5.sub22 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub25:vreg_1024_align2 = COPY %5.sub23 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub26:vreg_1024_align2 = COPY %5.sub24 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub27:vreg_1024_align2 = COPY %5.sub25 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub28:vreg_1024_align2 = COPY %5.sub26 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub29:vreg_1024_align2 = COPY %5.sub27 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub30:vreg_1024_align2 = COPY %5.sub28 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub31:vreg_1024_align2 = COPY %5.sub29 |
| ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 [[AV_MOV_3]], [[AV_MOV_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]], 0, 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[AV_MOV_4:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub24_sub25_sub26_sub27, renamable $sgpr0_sgpr1, 96, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub28_sub29_sub30_sub31, renamable $sgpr0_sgpr1, 112, 0, implicit $exec :: (store (s128), addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub16_sub17_sub18_sub19, renamable $sgpr0_sgpr1, 64, 0, implicit $exec :: (store (s128), align 64, addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub20_sub21_sub22_sub23, renamable $sgpr0_sgpr1, 80, 0, implicit $exec :: (store (s128), addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub8_sub9_sub10_sub11, renamable $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub12_sub13_sub14_sub15, renamable $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2_sub3, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) |
| ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub4_sub5_sub6_sub7, killed renamable $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1) |
| ; CHECK-NEXT: S_ENDPGM 0 |
| S_NOP 0, implicit-def $agpr0 |
| %0:vgpr_32 = COPY $vgpr0 |
| renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) |
| %1:vgpr_32 = V_LSHLREV_B32_e32 7, %0, implicit $exec |
| undef %2.sub28_sub29_sub30_sub31:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 112, 0, implicit $exec :: (load (s128), addrspace 1) |
| %2.sub24_sub25_sub26_sub27:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 96, 0, implicit $exec :: (load (s128), align 32, addrspace 1) |
| %2.sub20_sub21_sub22_sub23:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 80, 0, implicit $exec :: (load (s128), addrspace 1) |
| %2.sub16_sub17_sub18_sub19:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 64, 0, implicit $exec :: (load (s128), align 64, addrspace 1) |
| %2.sub12_sub13_sub14_sub15:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 48, 0, implicit $exec :: (load (s128), addrspace 1) |
| %2.sub8_sub9_sub10_sub11:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 32, 0, implicit $exec :: (load (s128), align 32, addrspace 1) |
| %2.sub4_sub5_sub6_sub7:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 16, 0, implicit $exec :: (load (s128), addrspace 1) |
| %2.sub0_sub1_sub2_sub3:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1) |
| %3:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1065353216, implicit $exec |
| %4:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1073741824, implicit $exec |
| %2:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 %3, %4, %2, 0, 0, 0, implicit $mode, implicit $exec |
| early-clobber %5:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_vgprcd_e64 %3, %4, %2, 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec |
| %2.sub2:vreg_1024_align2 = COPY %5.sub0 |
| %2.sub3:vreg_1024_align2 = COPY %5.sub1 |
| %2.sub4:vreg_1024_align2 = COPY %5.sub2 |
| %2.sub5:vreg_1024_align2 = COPY %5.sub3 |
| %2.sub6:vreg_1024_align2 = COPY %5.sub4 |
| %2.sub7:vreg_1024_align2 = COPY %5.sub5 |
| %2.sub8:vreg_1024_align2 = COPY %5.sub6 |
| %2.sub9:vreg_1024_align2 = COPY %5.sub7 |
| %2.sub10:vreg_1024_align2 = COPY %5.sub8 |
| %2.sub11:vreg_1024_align2 = COPY %5.sub9 |
| %2.sub12:vreg_1024_align2 = COPY %5.sub10 |
| %2.sub13:vreg_1024_align2 = COPY %5.sub11 |
| %2.sub14:vreg_1024_align2 = COPY %5.sub12 |
| %2.sub15:vreg_1024_align2 = COPY %5.sub13 |
| %2.sub16:vreg_1024_align2 = COPY %5.sub14 |
| %2.sub17:vreg_1024_align2 = COPY %5.sub15 |
| %2.sub18:vreg_1024_align2 = COPY %5.sub16 |
| %2.sub19:vreg_1024_align2 = COPY %5.sub17 |
| %2.sub20:vreg_1024_align2 = COPY %5.sub18 |
| %2.sub21:vreg_1024_align2 = COPY %5.sub19 |
| %2.sub22:vreg_1024_align2 = COPY %5.sub20 |
| %2.sub23:vreg_1024_align2 = COPY %5.sub21 |
| %2.sub24:vreg_1024_align2 = COPY %5.sub22 |
| %2.sub25:vreg_1024_align2 = COPY %5.sub23 |
| %2.sub26:vreg_1024_align2 = COPY %5.sub24 |
| %2.sub27:vreg_1024_align2 = COPY %5.sub25 |
| %2.sub28:vreg_1024_align2 = COPY %5.sub26 |
| %2.sub29:vreg_1024_align2 = COPY %5.sub27 |
| %2.sub30:vreg_1024_align2 = COPY %5.sub28 |
| %2.sub31:vreg_1024_align2 = COPY %5.sub29 |
| %2:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 %3, %4, %2, 0, 0, 0, implicit $mode, implicit $exec |
| %6:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub24_sub25_sub26_sub27, renamable $sgpr0_sgpr1, 96, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub28_sub29_sub30_sub31, renamable $sgpr0_sgpr1, 112, 0, implicit $exec :: (store (s128), addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub16_sub17_sub18_sub19, renamable $sgpr0_sgpr1, 64, 0, implicit $exec :: (store (s128), align 64, addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub20_sub21_sub22_sub23, renamable $sgpr0_sgpr1, 80, 0, implicit $exec :: (store (s128), addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub8_sub9_sub10_sub11, renamable $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub12_sub13_sub14_sub15, renamable $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub0_sub1_sub2_sub3, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) |
| GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub4_sub5_sub6_sub7, killed renamable $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1) |
| S_ENDPGM 0 |
| |
| ... |