blob: 34c82cf2c73fb4c36df08a2e0bb34ef8594b93eb [file]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -amdgpu-disable-rewrite-mfma-form-sched-stage=false -o - %s | FileCheck %s
--- |
define void @src2_singledef_singleuse_dst_singleuse_singledef_vgpr() #0 {
entry:
unreachable
}
define void @src2_singledef_multiuse_dst_singleuse_singledef_vgpr() #0 {
entry:
unreachable
}
define void @src2_multidef_singleuse_dst_singleuse_singledef_vgpr() #0 {
entry:
unreachable
}
define void @src2_multidef_multiuse_dst_singleuse_singledef_vgpr() #0 {
entry:
unreachable
}
define void @src2_singledef_singleuse_dst_singleuse_multidef_vgpr() #0 {
entry:
unreachable
}
define void @src2_multidef_singleuse_dst_singleuse_multidef_vgpr() #0 {
entry:
unreachable
}
define void @src2_singledef_multiuse_dst_singleuse_multidef_vgpr() #0 {
entry:
unreachable
}
define void @src2_multidef_multiuse_dst_singleuse_multidef_vgpr() #0 {
entry:
unreachable
}
define void @src2_singledef_singleuse_dst_multiuse_singledef_vgpr() #0 {
entry:
unreachable
}
define void @src2_multidef_singleuse_dst_multiuse_singledef_vgpr() #0 {
entry:
unreachable
}
define void @src2_singledef_multiuse_dst_multiuse_singledef_vgpr() #0 {
entry:
unreachable
}
define void @src2_multidef_multiuse_dst_multiuse_singledef_vgpr() #0 {
entry:
unreachable
}
define void @src2_singledef_singleuse_dst_multiuse_multidef_vgpr() #0 {
entry:
unreachable
}
define void @src2_singledef_multiuse_dst_multiuse_multidef_vgpr() #0 {
entry:
unreachable
}
define void @src2_multidef_singleuse_dst_multiuse_multidef_vgpr() #0 {
entry:
unreachable
}
define void @src2_multidef_multiuse_dst_multiuse_multidef_vgpr() #0 {
entry:
unreachable
}
define void @src2_singledef_singleuse_dst_singleuse_singledef_agpr() #0 {
entry:
unreachable
}
define void @src2_multidef_singleuse_dst_singleuse_singledef_agpr() #0 {
entry:
unreachable
}
define void @src2_singledef_multiuse_dst_singleuse_singleedef_agpr() #0 {
entry:
unreachable
}
define void @src2_multidef_multiuse_dst_singleuse_singledef_agpr() #0 {
entry:
unreachable
}
define void @src2_singledef_singleuse_dst_singleuse_multidef_agpr() #0 {
entry:
unreachable
}
define void @src2_multidef_singleuse_dst_singleuse_multidef_agpr() #0 {
entry:
unreachable
}
define void @src2_singledef_multiuse_dst_singleuse_multidef_agpr() #0 {
entry:
unreachable
}
define void @src2_multidef_multiuse_dst_singleuse_multidef_agpr() #0 {
entry:
unreachable
}
define void @src2_singledef_singleuse_dst_multiuse_singledef_agpr() #0 {
entry:
unreachable
}
define void @src2_multidef_singleuse_dst_multiuse_singledef_agpr() #0 {
entry:
unreachable
}
define void @src2_singledef_multiuse_dst_multiuse_singledef_agpr() #0 {
entry:
unreachable
}
define void @src2_multidef_multiuse_dst_multiuse_singledef_agpr() #0 {
entry:
unreachable
}
define void @src2_singledef_singleuse_dst_multiuse_multidef_agpr() #0 {
entry:
unreachable
}
define void @src2_multidef_singleuse_dst_multiuse_multidef_agpr() #0 {
entry:
unreachable
}
define void @src2_singledef_multiuse_dst_multiuse_multidef_agpr() #0 {
entry:
unreachable
}
define void @src2_multidef_multiuse_dst_multiuse_multidef_agpr() #0 {
entry:
unreachable
}
define void @src2_singledef_singleuse_dst_singleuse_singledef_mixed() #0 {
entry:
unreachable
}
define void @src2_multidef_singleuse_dst_multiuse_multidef_mixed() #0 {
entry:
unreachable
}
define void @src2_singledef_multiuse_dst_singleuse_multidef_mixed() #0 {
entry:
unreachable
}
define void @src2_multidef_multiuse_dst_multiuse_multidef_mixed() #0 {
entry:
unreachable
}
define void @no_copy_for_mfma() #0 {
entry:
unreachable
}
attributes #0 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"}
...
---
name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
bb.2:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.3:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
bb.1:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.2:
%94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
S_BRANCH %bb.4
bb.3:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
bb.4:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.7:
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
S_BRANCH %bb.4
bb.3:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
bb.4:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.7:
%94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.2:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.3:
KILL %89, %90, %91, %92, %93, %193
bb.4:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.2:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.3:
KILL %89, %90, %91, %92, %93, %193
bb.4:
%94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
S_BRANCH %bb.4
bb.3:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
S_BRANCH %bb.4
bb.4:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.7, implicit killed $scc
bb.5:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.6:
KILL %89, %90, %91, %92, %93, %193
bb.7:
undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %95
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
S_BRANCH %bb.4
bb.3:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
S_BRANCH %bb.4
bb.4:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.7, implicit killed $scc
bb.5:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.6:
KILL %89, %90, %91, %92, %93, %193
bb.7:
%94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
bb.1:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.2:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.3:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
S_BRANCH %bb.5
bb.4:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
bb.5:
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
bb.1:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.2:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.3:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
S_BRANCH %bb.5
bb.4:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
bb.5:
%104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
S_BRANCH %bb.4
bb.3:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
bb.4:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.5:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.7, implicit killed $scc
bb.6:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
S_BRANCH %bb.8
bb.7:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
bb.8:
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
S_BRANCH %bb.4
bb.3:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
bb.4:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.5:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.7, implicit killed $scc
bb.6:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
S_BRANCH %bb.8
bb.7:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
bb.8:
%104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_6:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_7:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_8:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_9:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_10:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_11:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_6]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_7]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_8]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_11]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.1:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%194:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%195:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%196:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%197:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%198:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%199:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.2:
KILL %89, %90, %91, %92, %93, %193, %194, %195, %196, %197, %198, %199
bb.3:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.5, implicit killed $scc
bb.4:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
S_BRANCH %bb.5
bb.5:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
bb.6:
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.6
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.1:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.2:
KILL %89, %90, %91, %92, %93, %193
bb.3:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.5, implicit killed $scc
bb.4:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
S_BRANCH %bb.6
bb.5:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
bb.6:
%104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.7(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.8, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.9(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: successors: %bb.9(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
S_BRANCH %bb.4
bb.3:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
S_BRANCH %bb.4
bb.4:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.7, implicit killed $scc
bb.5:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.6:
KILL %89, %90, %91, %92, %93, %193
bb.7:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.9, implicit killed $scc
bb.8:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
S_BRANCH %bb.10
bb.9:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
bb.10:
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
S_BRANCH %bb.4
bb.3:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
S_BRANCH %bb.4
bb.4:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.6, implicit killed $scc
bb.5:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.6:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.8, implicit killed $scc
bb.7:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
S_BRANCH %bb.9
bb.8:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
bb.9:
%104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_singleuse_dst_singleuse_singledef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[COPY7]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[COPY7]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.2:
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.3:
KILL %89, %90, %91, %92, %93, %193
bb.4:
DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_singleuse_dst_singleuse_singledef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[COPY7]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[COPY7]], [[V_ADD_U32_e32_]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.1:
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.2:
KILL %89, %90, %91, %92, %93, %193
bb.3:
%94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
S_BRANCH %bb.4
bb.3:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec
bb.4:
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.7:
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_multiuse_dst_singleuse_singledef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
S_BRANCH %bb.4
bb.3:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec
bb.4:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.7:
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_singleuse_dst_singleuse_multidef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 128, 0, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
%88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.3:
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_singleuse_dst_singleuse_multidef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 128, 0, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
%88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.3:
DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_multiuse_dst_singleuse_multidef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 512, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]], 128, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
%88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
S_BRANCH %bb.4
bb.3:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec
S_BRANCH %bb.4
bb.4:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.6, implicit killed $scc
bb.5:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.6:
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_multiuse_dst_singleuse_multidef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 512, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]], 384, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
%88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
S_BRANCH %bb.4
bb.3:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec
S_BRANCH %bb.4
bb.4:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.6, implicit killed $scc
bb.5:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.6:
DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 128, 0, implicit $exec
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 384, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_singleuse_dst_multiuse_singledef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
bb.1:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.2:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.3:
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
S_BRANCH %bb.5
bb.4:
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec
bb.5:
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_singleuse_dst_multiuse_singledef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 256, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 256, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
bb.1:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.2:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.3:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec
S_BRANCH %bb.5
bb.4:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec
bb.5:
DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_multiuse_dst_multiuse_singledef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 256, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 128, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
S_BRANCH %bb.4
bb.3:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
bb.4:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.5:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.7, implicit killed $scc
bb.6:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec
S_BRANCH %bb.8
bb.7:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec
bb.8:
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_multiuse_dst_multiuse_singledef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 256, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 128, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
S_BRANCH %bb.4
bb.3:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
bb.4:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.5:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.7, implicit killed $scc
bb.6:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec
S_BRANCH %bb.8
bb.7:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec
bb.8:
DS_WRITE_B32_gfx9 %64:vgpr_32, %84.sub0:vreg_128_align2, 256, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_singleuse_dst_multiuse_multidef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 0, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
%88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.1:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.2:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.3:
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
S_BRANCH %bb.5
bb.4:
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 256, 0, implicit $exec
bb.5:
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_singleuse_dst_multiuse_multidef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]].sub0, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]].sub1, 256, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]].sub1, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]].sub0, 256, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
%88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.1:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.2:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.3:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec
S_BRANCH %bb.5
bb.4:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec
bb.5:
DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_multiuse_dst_multiuse_multidef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 512, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub0, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub1, 128, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub1, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub0, 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
%88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
S_BRANCH %bb.4
bb.3:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec
S_BRANCH %bb.4
bb.4:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.6, implicit killed $scc
bb.5:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.6:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.8, implicit killed $scc
bb.7:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec
S_BRANCH %bb.9
bb.8:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec
bb.9:
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_multiuse_dst_multiuse_multidef_agpr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_agpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 512, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub0, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub1, 128, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub1, 0, 0, implicit $exec
; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub0, 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
%88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
S_BRANCH %bb.4
bb.3:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec
S_BRANCH %bb.4
bb.4:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.6, implicit killed $scc
bb.5:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.6:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.8, implicit killed $scc
bb.7:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec
S_BRANCH %bb.9
bb.8:
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec
bb.9:
DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 256, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_singleuse_dst_singleuse_singledef_mixed
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_mixed
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
bb.2:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.3:
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_singleuse_dst_multiuse_multidef_mixed
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_mixed
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF14]], [[V_ADD_U32_e32_]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: S_BRANCH %bb.8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[DEF19:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[DEF19:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF21]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[DEF19]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
S_BRANCH %bb.4
bb.3:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
S_BRANCH %bb.4
bb.4:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.6, implicit killed $scc
bb.5:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.6:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.8, implicit killed $scc
bb.7:
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
%94:vreg_128_align2 = IMPLICIT_DEF
S_BRANCH %bb.9
bb.8:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
bb.9:
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_singledef_multiuse_dst_singleuse_multidef_mixed
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_mixed
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF14]], [[V_ADD_U32_e32_1]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
%85:vreg_128_align2 = IMPLICIT_DEF
%86:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.4, implicit killed $scc
bb.2:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.3:
KILL %89, %90, %91, %92, %93, %193
bb.4:
%94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: src2_multidef_multiuse_dst_multiuse_multidef_mixed
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_mixed
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
S_BRANCH %bb.4
bb.3:
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
bb.4:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
bb.5:
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.7, implicit killed $scc
bb.6:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
%94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
S_BRANCH %bb.8
bb.7:
DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
%94:vreg_128_align2 = IMPLICIT_DEF
bb.8:
%95:vgpr_32 = V_ADD_U32_e32 %84.sub0, %64, implicit $exec
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...
---
name: no_copy_for_mfma
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
sgprForEXECCopy: '$sgpr100_sgpr101'
body: |
; CHECK-LABEL: name: no_copy_for_mfma
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: dead [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF14]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[DEF17]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%1:vreg_1024 = IMPLICIT_DEF
%2:vreg_1024 = IMPLICIT_DEF
%3:vreg_1024 = IMPLICIT_DEF
%4:vreg_1024 = IMPLICIT_DEF
%5:vreg_1024 = IMPLICIT_DEF
%6:vreg_1024 = IMPLICIT_DEF
%7:vreg_1024 = IMPLICIT_DEF
%8:vreg_512 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
%12:vreg_128 = IMPLICIT_DEF
%13:vreg_1024 = IMPLICIT_DEF
S_NOP 0, implicit-def %50:av_512
S_NOP 0, implicit-def %51:av_512
SCHED_BARRIER 0
%60:av_128_align2 = IMPLICIT_DEF
%61:av_128_align2 = IMPLICIT_DEF
%62:vreg_128_align2 = IMPLICIT_DEF
%63:vreg_64_align2 = IMPLICIT_DEF
%64:vgpr_32 = IMPLICIT_DEF
%72:vreg_128_align2 = IMPLICIT_DEF
undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
%88:vreg_128_align2 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.3, implicit killed $scc
bb.2:
%88:vreg_128_align2 = IMPLICIT_DEF
S_BRANCH %bb.4
bb.3:
%85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
$scc = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.5, implicit killed $scc
bb.4:
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
%86:vreg_128_align2 = IMPLICIT_DEF
%85:vreg_128_align2 = IMPLICIT_DEF
%87:vreg_128_align2 = IMPLICIT_DEF
bb.5:
undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
SCHED_BARRIER 0
KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
S_NOP 0, implicit %50, implicit %51
S_ENDPGM 0
...