blob: 46ff3b81b17f925cfabec04417a083e78b5bf0e8 [file] [edit]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass post-RA-hazard-rec -amdgpu-wmma-vnop-hoisting=false %s -o - | FileCheck -check-prefix=NOHOIST %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=HOIST %s
# Test 1: WMMA outside loop, VALU inside loop
# The NOPs should be hoisted from the loop body to the preheader
---
name: test_simple_loop_hoist
body: |
; NOHOIST-LABEL: name: test_simple_loop_hoist
; NOHOIST: bb.0:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.1
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.1:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.1
;
; HOIST-LABEL: name: test_simple_loop_hoist
; HOIST: bb.0:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: S_BRANCH %bb.1
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.1:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; HOIST-NEXT: S_BRANCH %bb.1
bb.0:
successors: %bb.1
$vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.1
$vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
S_BRANCH %bb.1
...
# Test 2: WMMA hazard INSIDE the loop; should NOT hoist
---
name: test_internal_hazard_no_hoist
body: |
; NOHOIST-LABEL: name: test_internal_hazard_no_hoist
; NOHOIST: bb.0:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: S_BRANCH %bb.1
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.1:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.1
;
; HOIST-LABEL: name: test_internal_hazard_no_hoist
; HOIST: bb.0:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: S_BRANCH %bb.1
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.1:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; HOIST-NEXT: S_BRANCH %bb.1
bb.0:
successors: %bb.1
S_BRANCH %bb.1
bb.1:
successors: %bb.1
; WMMA inside the loop writes to vgpr16-23, VALU reads vgpr16
$vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
$vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
S_BRANCH %bb.1
...
# Test 3: WMMA in loop but no hazard
---
name: test_wmma_in_loop_no_conflict_hoist
body: |
; NOHOIST-LABEL: name: test_wmma_in_loop_no_conflict_hoist
; NOHOIST: bb.0:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.1
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.1:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, 8, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, 0, implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.1
;
; HOIST-LABEL: name: test_wmma_in_loop_no_conflict_hoist
; HOIST: bb.0:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: S_BRANCH %bb.1
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.1:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, 8, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, 0, implicit $exec
; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; HOIST-NEXT: S_BRANCH %bb.1
bb.0:
successors: %bb.1
; External WMMA writes to vgpr16-23
$vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.1
; Loop WMMA writes to vgpr56-63 (different registers)
$vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, 8, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, 0, implicit $exec
; This reads vgpr16 from the external WMMA
$vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
S_BRANCH %bb.1
...
# Test 4: WMMA outside both loops, VALU in inner loop
# NOPs should be hoisted to the outermost preheader (bb.0)
---
name: test_nested_loop_hoist_to_outermost
body: |
; NOHOIST-LABEL: name: test_nested_loop_hoist_to_outermost
; NOHOIST: bb.0:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.1
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.1:
; NOHOIST-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: S_CBRANCH_SCC1 %bb.3, implicit undef $scc
; NOHOIST-NEXT: S_BRANCH %bb.2
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.2:
; NOHOIST-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; NOHOIST-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.2
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.3:
; NOHOIST-NEXT: S_ENDPGM 0
;
; HOIST-LABEL: name: test_nested_loop_hoist_to_outermost
; HOIST: bb.0:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: S_BRANCH %bb.1
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.1:
; HOIST-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: S_CBRANCH_SCC1 %bb.3, implicit undef $scc
; HOIST-NEXT: S_BRANCH %bb.2
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.2:
; HOIST-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; HOIST-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
; HOIST-NEXT: S_BRANCH %bb.2
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.3:
; HOIST-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
; WMMA outside all loops - writes to vgpr16-23
$vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
; Outer loop header - can exit to bb.3 or continue to bb.2
successors: %bb.3, %bb.2
S_CBRANCH_SCC1 %bb.3, implicit undef $scc
S_BRANCH %bb.2
bb.2:
; Inner loop - VALU reads vgpr16 from external WMMA
; Back-edge to bb.2 (inner) or bb.1 (outer)
successors: %bb.2, %bb.1
$vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
S_BRANCH %bb.2
bb.3:
; Exit block
S_ENDPGM 0
...
# Test 5: Triple nested loop - WMMA hazard in outer loop (L1)
# VALU in innermost loop (L3) reads from WMMA in L1's body
# NOPs should be hoisted to L2's preheader (bb.1)
---
name: test_triple_nested_hoist_to_intermediate
body: |
; NOHOIST-LABEL: name: test_triple_nested_hoist_to_intermediate
; NOHOIST: bb.0:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: S_BRANCH %bb.1
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.1:
; NOHOIST-NEXT: successors: %bb.2(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.2
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.2:
; NOHOIST-NEXT: successors: %bb.3(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: S_BRANCH %bb.3
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.3:
; NOHOIST-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; NOHOIST-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.3
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.4:
; NOHOIST-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: S_CBRANCH_SCC1 %bb.5, implicit undef $scc
; NOHOIST-NEXT: S_BRANCH %bb.2
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.5:
; NOHOIST-NEXT: successors: %bb.1(0x40000000), %bb.6(0x40000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: S_CBRANCH_SCC1 %bb.6, implicit undef $scc
; NOHOIST-NEXT: S_BRANCH %bb.1
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.6:
; NOHOIST-NEXT: S_ENDPGM 0
;
; HOIST-LABEL: name: test_triple_nested_hoist_to_intermediate
; HOIST: bb.0:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: S_BRANCH %bb.1
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.1:
; HOIST-NEXT: successors: %bb.2(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: S_BRANCH %bb.2
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.2:
; HOIST-NEXT: successors: %bb.3(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: S_BRANCH %bb.3
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.3:
; HOIST-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; HOIST-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
; HOIST-NEXT: S_BRANCH %bb.3
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.4:
; HOIST-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: S_CBRANCH_SCC1 %bb.5, implicit undef $scc
; HOIST-NEXT: S_BRANCH %bb.2
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.5:
; HOIST-NEXT: successors: %bb.1(0x40000000), %bb.6(0x40000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: S_CBRANCH_SCC1 %bb.6, implicit undef $scc
; HOIST-NEXT: S_BRANCH %bb.1
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.6:
; HOIST-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
S_BRANCH %bb.1
bb.1:
successors: %bb.2
$vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.2
bb.2:
successors: %bb.3
S_BRANCH %bb.3
bb.3:
; VALU reads vgpr16 from WMMA in bb.1
successors: %bb.3, %bb.4
$vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.4:
; L2 latch - back to L2 header or exit to L1 latch
successors: %bb.2, %bb.5
S_CBRANCH_SCC1 %bb.5, implicit undef $scc
S_BRANCH %bb.2
bb.5:
; L1 latch - back to L1 header or exit
successors: %bb.1, %bb.6
S_CBRANCH_SCC1 %bb.6, implicit undef $scc
S_BRANCH %bb.1
bb.6:
; Exit
S_ENDPGM 0
...
# Test 6: No preheader (multiple predecessors) - cannot hoist
---
name: test_no_preheader_no_hoist
body: |
; NOHOIST-LABEL: name: test_no_preheader_no_hoist
; NOHOIST: bb.0:
; NOHOIST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; NOHOIST-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc
; NOHOIST-NEXT: S_BRANCH %bb.1
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.1:
; NOHOIST-NEXT: successors: %bb.2(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: S_BRANCH %bb.2
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.2:
; NOHOIST-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; NOHOIST-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.2
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.3:
; NOHOIST-NEXT: S_ENDPGM 0
;
; HOIST-LABEL: name: test_no_preheader_no_hoist
; HOIST: bb.0:
; HOIST-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; HOIST-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc
; HOIST-NEXT: S_BRANCH %bb.1
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.1:
; HOIST-NEXT: successors: %bb.2(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: S_BRANCH %bb.2
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.2:
; HOIST-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; HOIST-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; HOIST-NEXT: S_BRANCH %bb.2
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.3:
; HOIST-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
$vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
S_CBRANCH_SCC1 %bb.2, implicit undef $scc
S_BRANCH %bb.1
bb.1:
successors: %bb.2
S_BRANCH %bb.2
bb.2:
; Loop header with two predecessors (bb.0 and bb.1)
successors: %bb.2, %bb.3
$vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
S_CBRANCH_EXECZ %bb.3, implicit $exec
S_BRANCH %bb.2
bb.3:
S_ENDPGM 0
...
# Test 7: Preheader without terminator
---
name: test_fallthrough_preheader_hoist
body: |
; NOHOIST-LABEL: name: test_fallthrough_preheader_hoist
; NOHOIST: bb.0:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.1:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.1
;
; HOIST-LABEL: name: test_fallthrough_preheader_hoist
; HOIST: bb.0:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.1:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; HOIST-NEXT: S_BRANCH %bb.1
bb.0:
successors: %bb.1
$vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
bb.1:
successors: %bb.1
$vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
S_BRANCH %bb.1
...
# Test 8: Not in a loop; should NOT hoist
---
name: test_not_in_loop_no_hoist
body: |
bb.0:
; NOHOIST-LABEL: name: test_not_in_loop_no_hoist
; NOHOIST: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
;
; HOIST-LABEL: name: test_not_in_loop_no_hoist
; HOIST: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
$vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
$vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
...
# Test 9: VALU first, then WMMA in same loop (hazard via back-edge)
---
name: test_valu_before_wmma_backedge_no_hoist
body: |
; NOHOIST-LABEL: name: test_valu_before_wmma_backedge_no_hoist
; NOHOIST: bb.0:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: S_BRANCH %bb.1
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: bb.1:
; NOHOIST-NEXT: successors: %bb.1(0x80000000)
; NOHOIST-NEXT: {{ $}}
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: V_NOP_e32 implicit $exec
; NOHOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; NOHOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; NOHOIST-NEXT: S_BRANCH %bb.1
;
; HOIST-LABEL: name: test_valu_before_wmma_backedge_no_hoist
; HOIST: bb.0:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: S_BRANCH %bb.1
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: bb.1:
; HOIST-NEXT: successors: %bb.1(0x80000000)
; HOIST-NEXT: {{ $}}
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: V_NOP_e32 implicit $exec
; HOIST-NEXT: $vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
; HOIST-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
; HOIST-NEXT: S_BRANCH %bb.1
bb.0:
successors: %bb.1
S_BRANCH %bb.1
bb.1:
successors: %bb.1
$vgpr25 = V_ADD_F32_e32 $vgpr24, $vgpr16, implicit $mode, implicit $exec
$vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
...