blob: c7767cb8320781d3694fa8d743abed492b4fbe6b [file] [log] [blame]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=greedy,2 -stop-after=virtregrewriter,2 -o - %s | FileCheck %s
--- |
define amdgpu_kernel void @inflate_result_to_agpr__not_mfma() {
ret void
}
; FIXME: Need IR for amdgpu-waves-per-eu
define amdgpu_kernel void @av_split_is_phi_def_crash() #0 {
ret void
}
define amdgpu_kernel void @copy_physreg_to_av_crash() #0 {
ret void
}
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2() #0 {
ret void
}
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg() #0 {
ret void
}
attributes #0 = { "amdgpu-wave-limiter"="true" "amdgpu-waves-per-eu"="8,8" }
...
# Inflate pattern, except the defining instruction isn't an MFMA.
---
name: inflate_result_to_agpr__not_mfma
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
stackPtrOffsetReg: '$sgpr32'
occupancy: 10
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: inflate_result_to_agpr__not_mfma
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
; CHECK-NEXT: renamable $vgpr2_vgpr3 = COPY killed renamable $sgpr0_sgpr1
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vcc, $vgpr2_vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit renamable $vgpr2_vgpr3, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15:0x00000000FFFFFFFF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
S_NOP 0, implicit-def $agpr0
renamable $sgpr0 = S_MOV_B32 0
undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
renamable $sgpr1 = COPY renamable $sgpr0
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
%0.sub9:vreg_512_align2 = COPY %0.sub8
bb.1:
liveins: $vcc
undef %4.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
S_NOP 0, implicit-def %0:vreg_512_align2, implicit %1, implicit %4
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
S_BRANCH %bb.2
bb.2:
; No VGPRs available for %0
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
S_ENDPGM 0
...
---
name: av_split_is_phi_def_crash
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
waveLimiter: true
hasSpilledSGPRs: true
scratchRSrcReg: '$sgpr68_sgpr69_sgpr70_sgpr71'
stackPtrOffsetReg: '$sgpr32'
occupancy: 8
wwmReservedRegs:
- '$vgpr31'
sgprForEXECCopy: '$sgpr72_sgpr73'
body: |
; CHECK-LABEL: name: av_split_is_phi_def_crash
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr2_sgpr3, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vgpr8 = IMPLICIT_DEF
; CHECK-NEXT: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 0, implicit $exec
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: liveins: $vgpr8, $vgpr0_vgpr1:0x0000000000000003
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: liveins: $vgpr8, $vgpr31, $sgpr8_sgpr9, $vgpr0_vgpr1:0x0000000000000003, $vgpr2_vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vgpr1 = COPY renamable $vgpr8
; CHECK-NEXT: renamable $agpr0_agpr1 = COPY killed renamable $vgpr2_vgpr3
; CHECK-NEXT: renamable $vgpr2 = COPY renamable $vgpr1
; CHECK-NEXT: renamable $vgpr3 = COPY renamable $vgpr8
; CHECK-NEXT: renamable $vgpr6 = COPY renamable $vgpr1
; CHECK-NEXT: dead renamable $vgpr7 = COPY renamable $vgpr8
; CHECK-NEXT: renamable $vgpr6 = COPY renamable $vgpr1
; CHECK-NEXT: renamable $vgpr7 = COPY renamable $vgpr8
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: renamable $vgpr10_vgpr11 = V_ADD_F64_e64 0, undef $sgpr6_sgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr12_vgpr13 = V_ADD_F64_e64 0, undef $sgpr6_sgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr14_vgpr15 = V_ADD_F64_e64 0, undef $sgpr6_sgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr16_vgpr17 = V_ADD_F64_e64 0, undef $sgpr6_sgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr18_vgpr19 = V_ADD_F64_e64 0, undef $sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr20_vgpr21 = V_ADD_F64_e64 0, undef $sgpr28_sgpr29, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr22_vgpr23 = V_ADD_F64_e64 0, undef $sgpr24_sgpr25, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr24_vgpr25 = V_ADD_F64_e64 0, undef $sgpr30_sgpr31, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr26_vgpr27 = V_ADD_F64_e64 0, undef $sgpr26_sgpr27, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr29 = COPY renamable $vgpr12
; CHECK-NEXT: renamable $vgpr28 = COPY renamable $vgpr12
; CHECK-NEXT: renamable $vgpr4_vgpr5 = V_MOV_B64_PSEUDO 0, implicit $exec
; CHECK-NEXT: renamable $vgpr4_vgpr5 = V_ADD_F64_e64 0, killed $vgpr28_vgpr29, 0, killed $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr28_vgpr29 = COPY killed renamable $agpr0_agpr1
; CHECK-NEXT: renamable $vgpr4_vgpr5 = V_ADD_F64_e64 0, killed $vgpr4_vgpr5, 0, killed $vgpr28_vgpr29, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: dead renamable $vgpr4 = COPY killed renamable $vgpr11
; CHECK-NEXT: dead renamable $vgpr4 = COPY killed renamable $vgpr14
; CHECK-NEXT: dead renamable $vgpr4 = COPY killed renamable $vgpr20
; CHECK-NEXT: dead renamable $vgpr4 = COPY killed renamable $vgpr24
; CHECK-NEXT: dead renamable $vgpr4 = COPY killed renamable $vgpr26
; CHECK-NEXT: renamable $vgpr4 = COPY killed renamable $vgpr6
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: dead renamable $vgpr4 = COPY killed renamable $vgpr16
; CHECK-NEXT: dead renamable $vgpr4 = COPY killed renamable $vgpr22
; CHECK-NEXT: dead renamable $vgpr4 = COPY killed renamable $vgpr18
; CHECK-NEXT: renamable $vgpr2 = COPY killed renamable $vgpr1
; CHECK-NEXT: dead renamable $vgpr3 = COPY killed renamable $vgpr8
; CHECK-NEXT: dead renamable $vgpr1 = COPY renamable $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr8_sgpr9, $sgpr2_sgpr3, $sgpr8_sgpr9
%0:vgpr_32 = IMPLICIT_DEF
%1:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
undef %2.sub0:vreg_64_align2 = V_MOV_B32_e32 0, implicit $exec
S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
%1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec
bb.2:
liveins: $vgpr31, $sgpr8_sgpr9
%3:vgpr_32 = COPY %0
%4:vgpr_32 = COPY %0
undef %5.sub0:vreg_64_align2 = COPY %3
%5.sub1:vreg_64_align2 = COPY %4
undef %6.sub0:vreg_64_align2 = COPY %3
dead %6.sub1:vreg_64_align2 = COPY %4
undef %7.sub0:vreg_64_align2 = COPY %3
%7.sub1:vreg_64_align2 = COPY %4
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
%8:vreg_64_align2 = V_ADD_F64_e64 0, undef $sgpr6_sgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
%9:vreg_64_align2 = V_ADD_F64_e64 0, undef $sgpr6_sgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
%10:vreg_64_align2 = V_ADD_F64_e64 0, undef $sgpr6_sgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
%11:vreg_64_align2 = V_ADD_F64_e64 0, undef $sgpr6_sgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
%12:vreg_64_align2 = V_ADD_F64_e64 0, undef $sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec
%13:vreg_64_align2 = V_ADD_F64_e64 0, undef $sgpr28_sgpr29, 0, 0, 0, 0, implicit $mode, implicit $exec
%14:vreg_64_align2 = V_ADD_F64_e64 0, undef $sgpr24_sgpr25, 0, 0, 0, 0, implicit $mode, implicit $exec
%15:vreg_64_align2 = V_ADD_F64_e64 0, undef $sgpr30_sgpr31, 0, 0, 0, 0, implicit $mode, implicit $exec
%16:vreg_64_align2 = V_ADD_F64_e64 0, undef $sgpr26_sgpr27, 0, 0, 0, 0, implicit $mode, implicit $exec
undef %17.sub1:vreg_64_align2 = COPY %9.sub0
%17.sub0:vreg_64_align2 = COPY %9.sub0
%18:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
%19:vreg_64_align2 = V_ADD_F64_e64 0, %17, 0, %18, 0, 0, implicit $mode, implicit $exec
%20:vreg_64_align2 = V_ADD_F64_e64 0, %19, 0, %1, 0, 0, implicit $mode, implicit $exec
dead %21:vgpr_32 = COPY %20.sub0
dead %22:vgpr_32 = COPY %8.sub1
dead %23:vgpr_32 = COPY %10.sub0
dead %24:vgpr_32 = COPY %13.sub0
dead %25:vgpr_32 = COPY %15.sub0
dead %26:vgpr_32 = COPY %16.sub0
%27:vgpr_32 = COPY %7.sub0
%28:vgpr_32 = COPY %27
S_NOP 0
dead %29:vgpr_32 = COPY %11.sub0
dead %30:vgpr_32 = COPY %14.sub0
dead %31:vgpr_32 = COPY %12.sub0
%32:vreg_64_align2 = COPY %5
undef %33.sub0:vreg_64_align2 = COPY %3
dead %33.sub1:vreg_64_align2 = COPY %4
dead undef %34.sub1:vreg_64_align2 = COPY %9.sub1
dead %2.sub1:vreg_64_align2 = COPY %2.sub0
bb.3:
S_ENDPGM 0
...
---
name: copy_physreg_to_av_crash
tracksRegLiveness: true
registers:
- { id: 0, class: av_32, preferred-register: '$agpr0' }
machineFunctionInfo:
isEntryFunction: true
stackPtrOffsetReg: '$sgpr32'
occupancy: 10
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: copy_physreg_to_av_crash
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $agpr0 = COPY $vgpr0
%0:av_32 = COPY $vgpr0
$agpr0 = COPY %0
...
# Non-mac variant, src2 is an immediate.
---
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
stackPtrOffsetReg: '$sgpr32'
occupancy: 10
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
S_NOP 0, implicit-def $agpr0
renamable $sgpr0 = S_MOV_B32 0
undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
renamable $sgpr1 = COPY renamable $sgpr0
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
%0.sub9:vreg_512_align2 = COPY %0.sub8
bb.1:
liveins: $vcc
%0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, 0, 0, 0, 0, implicit $mode, implicit $exec
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
S_BRANCH %bb.2
bb.2:
; No VGPRs available for %0
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
S_ENDPGM 0
...
# Non-mac variant, src2 is the same VGPR, but a different subregister.
---
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
stackPtrOffsetReg: '$sgpr32'
occupancy: 10
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31:0x00000000FFFFFFFF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
S_NOP 0, implicit-def $agpr0
renamable $sgpr0 = S_MOV_B32 0
undef %0.sub8:vreg_1024_align2 = V_MOV_B32_e32 0, implicit $exec
renamable $sgpr1 = COPY renamable $sgpr0
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
%0.sub9:vreg_1024_align2 = COPY %0.sub8
bb.1:
liveins: $vcc
undef %0.sub0_sub1:vreg_1024_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
%0.sub16_sub17:vreg_1024_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
%0.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:vreg_1024_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31, 0, 0, 0, implicit $mode, implicit $exec
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
S_BRANCH %bb.2
bb.2:
; No VGPRs available for %0
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
S_ENDPGM 0
...