| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass post-RA-hazard-rec -amdgpu-wmma-vnop-hoisting=false %s -o - | FileCheck -check-prefix=NOHOIST %s |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=HOIST %s |
| |
| # Test 1: WMMA outside loop, VALU inside loop |
| # The NOPs should be hoisted from the loop body to the preheader |
| --- |
| name: test_simple_loop_hoist |
| body: | |
| ; NOHOIST-LABEL: name: test_simple_loop_hoist |
| ; NOHOIST: bb.0: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.1: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; |
| ; HOIST-LABEL: name: test_simple_loop_hoist |
| ; HOIST: bb.0: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.1: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| bb.0: |
| successors: %bb.1 |
| $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| S_BRANCH %bb.1 |
| bb.1: |
| successors: %bb.1 |
| $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| S_BRANCH %bb.1 |
| ... |
| |
| # Test 2: WMMA hazard INSIDE the loop; should NOT hoist |
| --- |
| name: test_internal_hazard_no_hoist |
| body: | |
| ; NOHOIST-LABEL: name: test_internal_hazard_no_hoist |
| ; NOHOIST: bb.0: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.1: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; |
| ; HOIST-LABEL: name: test_internal_hazard_no_hoist |
| ; HOIST: bb.0: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.1: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| bb.0: |
| successors: %bb.1 |
| S_BRANCH %bb.1 |
| bb.1: |
| successors: %bb.1 |
| ; WMMA inside the loop writes to vgpr16-23, VALU reads vgpr16 |
| $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| S_BRANCH %bb.1 |
| ... |
| |
| # Test 3: WMMA in loop but no hazard |
| --- |
| name: test_wmma_in_loop_no_conflict_hoist |
| body: | |
| ; NOHOIST-LABEL: name: test_wmma_in_loop_no_conflict_hoist |
| ; NOHOIST: bb.0: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.1: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, 8, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, 0, implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; |
| ; HOIST-LABEL: name: test_wmma_in_loop_no_conflict_hoist |
| ; HOIST: bb.0: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.1: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, 8, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, 0, implicit $exec |
| ; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| bb.0: |
| successors: %bb.1 |
| ; External WMMA writes to vgpr16-23 |
| $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| S_BRANCH %bb.1 |
| bb.1: |
| successors: %bb.1 |
| ; Loop WMMA writes to vgpr56-63 (different registers) |
| $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, 8, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, 0, implicit $exec |
| ; This reads vgpr16 from the external WMMA |
| $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| S_BRANCH %bb.1 |
| ... |
| |
| # Test 4: WMMA outside both loops, VALU in inner loop |
| # NOPs should be hoisted to the outermost preheader (bb.0) |
| --- |
| name: test_nested_loop_hoist_to_outermost |
| body: | |
| ; NOHOIST-LABEL: name: test_nested_loop_hoist_to_outermost |
| ; NOHOIST: bb.0: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.1: |
| ; NOHOIST-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: S_CBRANCH_SCC1 %bb.3, implicit undef $scc |
| ; NOHOIST-NEXT: S_BRANCH %bb.2 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.2: |
| ; NOHOIST-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; NOHOIST-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.2 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.3: |
| ; NOHOIST-NEXT: S_ENDPGM 0 |
| ; |
| ; HOIST-LABEL: name: test_nested_loop_hoist_to_outermost |
| ; HOIST: bb.0: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.1: |
| ; HOIST-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: S_CBRANCH_SCC1 %bb.3, implicit undef $scc |
| ; HOIST-NEXT: S_BRANCH %bb.2 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.2: |
| ; HOIST-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; HOIST-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.2 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.3: |
| ; HOIST-NEXT: S_ENDPGM 0 |
| bb.0: |
| successors: %bb.1 |
| ; WMMA outside all loops - writes to vgpr16-23 |
| $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| S_BRANCH %bb.1 |
| bb.1: |
| ; Outer loop header - can exit to bb.3 or continue to bb.2 |
| successors: %bb.3, %bb.2 |
| S_CBRANCH_SCC1 %bb.3, implicit undef $scc |
| S_BRANCH %bb.2 |
| bb.2: |
| ; Inner loop - VALU reads vgpr16 from external WMMA |
| ; Back-edge to bb.2 (inner) or bb.1 (outer) |
| successors: %bb.2, %bb.1 |
| $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| S_CBRANCH_EXECZ %bb.1, implicit $exec |
| S_BRANCH %bb.2 |
| bb.3: |
| ; Exit block |
| S_ENDPGM 0 |
| ... |
| |
| # Test 5: Triple nested loop - WMMA hazard in outer loop (L1) |
| # VALU in innermost loop (L3) reads from WMMA in L1's body |
| # NOPs should be hoisted to L2's preheader (bb.1) |
| --- |
| name: test_triple_nested_hoist_to_intermediate |
| body: | |
| ; NOHOIST-LABEL: name: test_triple_nested_hoist_to_intermediate |
| ; NOHOIST: bb.0: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.1: |
| ; NOHOIST-NEXT: successors: %bb.2(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.2 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.2: |
| ; NOHOIST-NEXT: successors: %bb.3(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: S_BRANCH %bb.3 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.3: |
| ; NOHOIST-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; NOHOIST-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.3 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.4: |
| ; NOHOIST-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: S_CBRANCH_SCC1 %bb.5, implicit undef $scc |
| ; NOHOIST-NEXT: S_BRANCH %bb.2 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.5: |
| ; NOHOIST-NEXT: successors: %bb.1(0x40000000), %bb.6(0x40000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: S_CBRANCH_SCC1 %bb.6, implicit undef $scc |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.6: |
| ; NOHOIST-NEXT: S_ENDPGM 0 |
| ; |
| ; HOIST-LABEL: name: test_triple_nested_hoist_to_intermediate |
| ; HOIST: bb.0: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.1: |
| ; HOIST-NEXT: successors: %bb.2(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.2 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.2: |
| ; HOIST-NEXT: successors: %bb.3(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: S_BRANCH %bb.3 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.3: |
| ; HOIST-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; HOIST-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.3 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.4: |
| ; HOIST-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: S_CBRANCH_SCC1 %bb.5, implicit undef $scc |
| ; HOIST-NEXT: S_BRANCH %bb.2 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.5: |
| ; HOIST-NEXT: successors: %bb.1(0x40000000), %bb.6(0x40000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: S_CBRANCH_SCC1 %bb.6, implicit undef $scc |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.6: |
| ; HOIST-NEXT: S_ENDPGM 0 |
| bb.0: |
| successors: %bb.1 |
| S_BRANCH %bb.1 |
| bb.1: |
| successors: %bb.2 |
| $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| S_BRANCH %bb.2 |
| bb.2: |
| successors: %bb.3 |
| S_BRANCH %bb.3 |
| bb.3: |
| ; VALU reads vgpr16 from WMMA in bb.1 |
| successors: %bb.3, %bb.4 |
| $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| S_CBRANCH_EXECZ %bb.4, implicit $exec |
| S_BRANCH %bb.3 |
| bb.4: |
| ; L2 latch - back to L2 header or exit to L1 latch |
| successors: %bb.2, %bb.5 |
| S_CBRANCH_SCC1 %bb.5, implicit undef $scc |
| S_BRANCH %bb.2 |
| bb.5: |
| ; L1 latch - back to L1 header or exit |
| successors: %bb.1, %bb.6 |
| S_CBRANCH_SCC1 %bb.6, implicit undef $scc |
| S_BRANCH %bb.1 |
| bb.6: |
| ; Exit |
| S_ENDPGM 0 |
| ... |
| |
| # Test 6: No preheader (multiple predecessors) - cannot hoist |
| --- |
| name: test_no_preheader_no_hoist |
| body: | |
| ; NOHOIST-LABEL: name: test_no_preheader_no_hoist |
| ; NOHOIST: bb.0: |
| ; NOHOIST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; NOHOIST-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.1: |
| ; NOHOIST-NEXT: successors: %bb.2(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: S_BRANCH %bb.2 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.2: |
| ; NOHOIST-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; NOHOIST-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.2 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.3: |
| ; NOHOIST-NEXT: S_ENDPGM 0 |
| ; |
| ; HOIST-LABEL: name: test_no_preheader_no_hoist |
| ; HOIST: bb.0: |
| ; HOIST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; HOIST-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.1: |
| ; HOIST-NEXT: successors: %bb.2(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: S_BRANCH %bb.2 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.2: |
| ; HOIST-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; HOIST-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.2 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.3: |
| ; HOIST-NEXT: S_ENDPGM 0 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| S_CBRANCH_SCC1 %bb.2, implicit undef $scc |
| S_BRANCH %bb.1 |
| bb.1: |
| successors: %bb.2 |
| S_BRANCH %bb.2 |
| bb.2: |
| ; Loop header with two predecessors (bb.0 and bb.1) |
| successors: %bb.2, %bb.3 |
| $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| S_CBRANCH_EXECZ %bb.3, implicit $exec |
| S_BRANCH %bb.2 |
| bb.3: |
| S_ENDPGM 0 |
| ... |
| |
| # Test 7: Preheader without terminator |
| --- |
| name: test_fallthrough_preheader_hoist |
| body: | |
| ; NOHOIST-LABEL: name: test_fallthrough_preheader_hoist |
| ; NOHOIST: bb.0: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.1: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; |
| ; HOIST-LABEL: name: test_fallthrough_preheader_hoist |
| ; HOIST: bb.0: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.1: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| bb.0: |
| successors: %bb.1 |
| $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| bb.1: |
| successors: %bb.1 |
| $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| S_BRANCH %bb.1 |
| ... |
| |
| # Test 8: Not in a loop; should NOT hoist |
| --- |
| name: test_not_in_loop_no_hoist |
| body: | |
| bb.0: |
| ; NOHOIST-LABEL: name: test_not_in_loop_no_hoist |
| ; NOHOIST: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; |
| ; HOIST-LABEL: name: test_not_in_loop_no_hoist |
| ; HOIST: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ... |
| |
| # Test 9: VALU first, then WMMA in same loop (hazard via back-edge) |
| --- |
| name: test_valu_before_wmma_backedge_no_hoist |
| body: | |
| ; NOHOIST-LABEL: name: test_valu_before_wmma_backedge_no_hoist |
| ; NOHOIST: bb.0: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: bb.1: |
| ; NOHOIST-NEXT: successors: %bb.1(0x80000000) |
| ; NOHOIST-NEXT: {{ $}} |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: V_NOP_e32 implicit $exec |
| ; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; NOHOIST-NEXT: S_BRANCH %bb.1 |
| ; |
| ; HOIST-LABEL: name: test_valu_before_wmma_backedge_no_hoist |
| ; HOIST: bb.0: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: bb.1: |
| ; HOIST-NEXT: successors: %bb.1(0x80000000) |
| ; HOIST-NEXT: {{ $}} |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: V_NOP_e32 implicit $exec |
| ; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| ; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| ; HOIST-NEXT: S_BRANCH %bb.1 |
| bb.0: |
| successors: %bb.1 |
| S_BRANCH %bb.1 |
| bb.1: |
| successors: %bb.1 |
| $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec |
| $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| S_BRANCH %bb.1 |
| ... |