| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -amdgpu-disable-rewrite-mfma-form-sched-stage=false -o - %s | FileCheck %s |
| |
| --- | |
| define void @src2_singledef_singleuse_dst_singleuse_singledef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_multiuse_dst_singleuse_singledef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_singleuse_dst_singleuse_singledef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_multiuse_dst_singleuse_singledef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_singleuse_dst_singleuse_multidef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_singleuse_dst_singleuse_multidef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_multiuse_dst_singleuse_multidef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_multiuse_dst_singleuse_multidef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_singleuse_dst_multiuse_singledef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_singleuse_dst_multiuse_singledef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_multiuse_dst_multiuse_singledef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_multiuse_dst_multiuse_singledef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_singleuse_dst_multiuse_multidef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_multiuse_dst_multiuse_multidef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_singleuse_dst_multiuse_multidef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_multiuse_dst_multiuse_multidef_vgpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_singleuse_dst_singleuse_singledef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_singleuse_dst_singleuse_singledef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_multiuse_dst_singleuse_singleedef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_multiuse_dst_singleuse_singledef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_singleuse_dst_singleuse_multidef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_singleuse_dst_singleuse_multidef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_multiuse_dst_singleuse_multidef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_multiuse_dst_singleuse_multidef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_singleuse_dst_multiuse_singledef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_singleuse_dst_multiuse_singledef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_multiuse_dst_multiuse_singledef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_multiuse_dst_multiuse_singledef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_singleuse_dst_multiuse_multidef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_singleuse_dst_multiuse_multidef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_multiuse_dst_multiuse_multidef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_multiuse_dst_multiuse_multidef_agpr() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_singleuse_dst_singleuse_singledef_mixed() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_singleuse_dst_multiuse_multidef_mixed() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_singledef_multiuse_dst_singleuse_multidef_mixed() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @src2_multidef_multiuse_dst_multiuse_multidef_mixed() #0 { |
| entry: |
| unreachable |
| } |
| |
| define void @no_copy_for_mfma() #0 { |
| entry: |
| unreachable |
| } |
| |
| attributes #0 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"} |
| ... |
| |
| |
| --- |
| name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| |
| bb.2: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.3: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| |
| bb.1: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.2: |
| %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| |
| bb.4: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.7: |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| |
| bb.4: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.7: |
| %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| |
| --- |
| name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.2: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.3: |
| KILL %89, %90, %91, %92, %93, %193 |
| |
| bb.4: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.2: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.3: |
| KILL %89, %90, %91, %92, %93, %193 |
| |
| bb.4: |
| %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec |
| undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.6(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.4: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| |
| bb.5: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.6: |
| KILL %89, %90, %91, %92, %93, %193 |
| |
| bb.7: |
| undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %95 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.6(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.4: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| |
| bb.5: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.6: |
| KILL %89, %90, %91, %92, %93, %193 |
| |
| bb.7: |
| %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec |
| undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| |
| bb.1: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.2: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.3: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| S_BRANCH %bb.5 |
| |
| bb.4: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| |
| bb.5: |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| |
| bb.1: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.2: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.3: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| S_BRANCH %bb.5 |
| |
| bb.4: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| |
| bb.5: |
| %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| |
| ... |
| |
| --- |
| name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.7 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| |
| bb.4: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.5: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| |
| bb.6: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| S_BRANCH %bb.8 |
| |
| bb.7: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| |
| bb.8: |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.7 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| |
| bb.4: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.5: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| |
| bb.6: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| S_BRANCH %bb.8 |
| |
| bb.7: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| |
| bb.8: |
| %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_6:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_7:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_8:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_9:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_10:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_11:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_6]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_7]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_8]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_11]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.6(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.1: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %194:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %195:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %196:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %197:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %198:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %199:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| |
| bb.2: |
| KILL %89, %90, %91, %92, %93, %193, %194, %195, %196, %197, %198, %199 |
| |
| |
| bb.3: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| |
| bb.4: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| S_BRANCH %bb.5 |
| |
| bb.5: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| |
| bb.6: |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.6(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.6 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.6(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.1: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.2: |
| KILL %89, %90, %91, %92, %93, %193 |
| |
| bb.3: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| |
| bb.4: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| S_BRANCH %bb.6 |
| |
| bb.5: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| |
| bb.6: |
| %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.6(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.7(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.8, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: successors: %bb.9(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.9 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.8: |
| ; CHECK-NEXT: successors: %bb.9(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.9: |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.4: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| |
| bb.5: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.6: |
| KILL %89, %90, %91, %92, %93, %193 |
| |
| |
| bb.7: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.9, implicit killed $scc |
| |
| bb.8: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| S_BRANCH %bb.10 |
| |
| bb.9: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| |
| bb.10: |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: successors: %bb.8(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.8 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: successors: %bb.8(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.8: |
| ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.4: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| |
| bb.5: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.6: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.8, implicit killed $scc |
| |
| bb.7: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| S_BRANCH %bb.9 |
| |
| bb.8: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| |
| bb.9: |
| %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| |
| ... |
| |
| --- |
| name: src2_singledef_singleuse_dst_singleuse_singledef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] |
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] |
| ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] |
| ; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[COPY7]], 0, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[COPY7]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.2: |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.3: |
| KILL %89, %90, %91, %92, %93, %193 |
| |
| bb.4: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| |
| --- |
| name: src2_multidef_singleuse_dst_singleuse_singledef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] |
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] |
| ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] |
| ; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] |
| ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[COPY7]].sub0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[COPY7]], [[V_ADD_U32_e32_]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.1: |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.2: |
| KILL %89, %90, %91, %92, %93, %193 |
| |
| |
| bb.3: |
| %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 128, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec |
| |
| bb.4: |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.7: |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_multidef_multiuse_dst_singleuse_singledef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 128, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec |
| |
| bb.4: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.7: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_singledef_singleuse_dst_singleuse_multidef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 128, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 0, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.3: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_multidef_singleuse_dst_singleuse_multidef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 128, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec |
| |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.3: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_singledef_multiuse_dst_singleuse_multidef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 512, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]], 128, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.4: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| |
| bb.5: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.6: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_multidef_multiuse_dst_singleuse_multidef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 512, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]], 384, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.4: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| |
| bb.5: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.6: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 128, 0, implicit $exec |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 384, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| |
| --- |
| name: src2_singledef_singleuse_dst_multiuse_singledef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 128, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| |
| bb.1: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.2: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.3: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec |
| S_BRANCH %bb.5 |
| |
| bb.4: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec |
| |
| bb.5: |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_multidef_singleuse_dst_multiuse_singledef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 256, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 256, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| |
| bb.1: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.2: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.3: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec |
| S_BRANCH %bb.5 |
| |
| bb.4: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec |
| |
| bb.5: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| |
| ... |
| |
| |
| --- |
| name: src2_singledef_multiuse_dst_multiuse_singledef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 256, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 128, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.7 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 128, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec |
| |
| bb.4: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.5: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| |
| bb.6: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec |
| S_BRANCH %bb.8 |
| |
| bb.7: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec |
| |
| bb.8: |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| |
| --- |
| name: src2_multidef_multiuse_dst_multiuse_singledef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 256, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 128, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.7 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 128, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF14]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF15]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF16]], [[DEF17]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec |
| |
| bb.4: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.5: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| |
| bb.6: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec |
| S_BRANCH %bb.8 |
| |
| bb.7: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec |
| |
| bb.8: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %84.sub0:vreg_128_align2, 256, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: src2_singledef_singleuse_dst_multiuse_multidef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 0, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.1: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.2: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.3: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec |
| S_BRANCH %bb.5 |
| |
| bb.4: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 256, 0, implicit $exec |
| |
| bb.5: |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| |
| --- |
| name: src2_multidef_singleuse_dst_multiuse_multidef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]].sub0, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]].sub1, 256, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]].sub1, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]].sub0, 256, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.1: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.2: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.3: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec |
| S_BRANCH %bb.5 |
| |
| bb.4: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec |
| |
| bb.5: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| |
| --- |
| name: src2_singledef_multiuse_dst_multiuse_multidef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 512, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: successors: %bb.8(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub0, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub1, 128, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.8 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: successors: %bb.8(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub1, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub0, 128, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.8: |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.4: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| |
| bb.5: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.6: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.8, implicit killed $scc |
| |
| bb.7: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec |
| S_BRANCH %bb.9 |
| |
| bb.8: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec |
| |
| bb.9: |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_multidef_multiuse_dst_multiuse_multidef_agpr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_agpr |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 256, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF11]], 512, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF15]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF16]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[DEF17]], 4, 4, [[DEF14]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: successors: %bb.8(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub0, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub1, 128, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.8 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: successors: %bb.8(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub1, 0, 0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_]].sub0, 128, 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.8: |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DS_READ_B128_gfx9_]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.4: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| |
| bb.5: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.6: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.8, implicit killed $scc |
| |
| bb.7: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec |
| S_BRANCH %bb.9 |
| |
| bb.8: |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec |
| |
| bb.9: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 256, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| |
| ... |
| |
| --- |
| name: src2_singledef_singleuse_dst_singleuse_singledef_mixed |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_mixed |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| |
| bb.2: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.3: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| |
| --- |
| name: src2_multidef_singleuse_dst_multiuse_multidef_mixed |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_mixed |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: successors: %bb.8(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF14]], [[V_ADD_U32_e32_]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_BRANCH %bb.8 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: successors: %bb.8(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[DEF19:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.8: |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF21]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_]], [[DEF19]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.4: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| |
| bb.5: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.6: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.8, implicit killed $scc |
| |
| bb.7: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec |
| %94:vreg_128_align2 = IMPLICIT_DEF |
| S_BRANCH %bb.9 |
| |
| bb.8: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| |
| bb.9: |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| |
| ... |
| |
| --- |
| name: src2_singledef_multiuse_dst_singleuse_multidef_mixed |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_mixed |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF16]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF18]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF15]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF14]], [[V_ADD_U32_e32_1]], 0, 0, implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF20]], [[DEF15]], [[DEF16]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| |
| bb.2: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.3: |
| KILL %89, %90, %91, %92, %93, %193 |
| |
| bb.4: |
| %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: src2_multidef_multiuse_dst_multiuse_multidef_mixed |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_mixed |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF14]], 0, 0, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.7 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec |
| ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF16]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF17]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec |
| S_BRANCH %bb.4 |
| |
| bb.3: |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec |
| |
| bb.4: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| |
| bb.5: |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.7, implicit killed $scc |
| |
| bb.6: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec |
| %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| S_BRANCH %bb.8 |
| |
| bb.7: |
| DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec |
| %94:vreg_128_align2 = IMPLICIT_DEF |
| |
| bb.8: |
| %95:vgpr_32 = V_ADD_U32_e32 %84.sub0, %64, implicit $exec |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: no_copy_for_mfma |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' |
| stackPtrOffsetReg: '$sgpr32' |
| argumentInfo: |
| privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } |
| workGroupIDX: { reg: '$sgpr6' } |
| privateSegmentWaveByteOffset: { reg: '$sgpr7' } |
| workItemIDX: { reg: '$vgpr0' } |
| sgprForEXECCopy: '$sgpr100_sgpr101' |
| body: | |
| ; CHECK-LABEL: name: no_copy_for_mfma |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_512 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_128 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_NOP 0, implicit-def %12 |
| ; CHECK-NEXT: S_NOP 0, implicit-def %13 |
| ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: dead [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_ADD_U32_e32_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: $scc = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF11]], [[DEF12]], [[DEF17]], 4, 4, [[DEF13]].sub0, [[DEF14]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4: |
| ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF14]], implicit $exec |
| ; CHECK-NEXT: SCHED_BARRIER 0 |
| ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF |
| ; CHECK-NEXT: KILL [[DEF18]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF19]], [[DEF15]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[DEF17]], [[V_ADD_U32_e32_1]] |
| ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $vgpr0, $sgpr4_sgpr5 |
| %1:vreg_1024 = IMPLICIT_DEF |
| %2:vreg_1024 = IMPLICIT_DEF |
| %3:vreg_1024 = IMPLICIT_DEF |
| %4:vreg_1024 = IMPLICIT_DEF |
| %5:vreg_1024 = IMPLICIT_DEF |
| %6:vreg_1024 = IMPLICIT_DEF |
| %7:vreg_1024 = IMPLICIT_DEF |
| %8:vreg_512 = IMPLICIT_DEF |
| %10:vreg_64 = IMPLICIT_DEF |
| %11:vgpr_32 = IMPLICIT_DEF |
| %12:vreg_128 = IMPLICIT_DEF |
| %13:vreg_1024 = IMPLICIT_DEF |
| S_NOP 0, implicit-def %50:av_512 |
| S_NOP 0, implicit-def %51:av_512 |
| SCHED_BARRIER 0 |
| %60:av_128_align2 = IMPLICIT_DEF |
| %61:av_128_align2 = IMPLICIT_DEF |
| %62:vreg_128_align2 = IMPLICIT_DEF |
| %63:vreg_64_align2 = IMPLICIT_DEF |
| %64:vgpr_32 = IMPLICIT_DEF |
| %72:vreg_128_align2 = IMPLICIT_DEF |
| undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec |
| %88:vreg_128_align2 = IMPLICIT_DEF |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.3, implicit killed $scc |
| |
| bb.2: |
| %88:vreg_128_align2 = IMPLICIT_DEF |
| S_BRANCH %bb.4 |
| |
| |
| bb.3: |
| %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| $scc = IMPLICIT_DEF |
| S_CBRANCH_SCC1 %bb.5, implicit killed $scc |
| |
| bb.4: |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %86:vreg_128_align2 = IMPLICIT_DEF |
| %85:vreg_128_align2 = IMPLICIT_DEF |
| %87:vreg_128_align2 = IMPLICIT_DEF |
| |
| bb.5: |
| undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec |
| SCHED_BARRIER 0 |
| KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 |
| S_NOP 0, implicit %50, implicit %51 |
| S_ENDPGM 0 |
| ... |