| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=si-insert-waitcnts -o - %s | FileCheck %s |
| |
| --- |
| # Test: DS prefetch with flush points in single-block loop. |
| # Preheader has DS load. Loop has DS loads where some are used in same iteration |
| # (creating flush points) but others remain unflushed at backedge (prefetches). |
| # Expected: s_wait_dscnt 0 in preheader, non-zero waits in loop. |
| name: ds_prefetch_flushed |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| ; CHECK-LABEL: name: ds_prefetch_flushed |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr0, $vgpr0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = DS_READ_B128 $vgpr0, 0, 0, implicit $m0, implicit $exec |
| ; CHECK-NEXT: $vgpr28 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec |
| ; CHECK-NEXT: $vgpr32 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec |
| ; CHECK-NEXT: S_WAIT_DSCNT 0 |
| ; CHECK-NEXT: S_BRANCH %bb.1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; CHECK-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10_vgpr11_vgpr12_vgpr13, $vgpr28, $vgpr32 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $vgpr50 = V_ADD_F32_e32 $vgpr10, $vgpr11, implicit $mode, implicit $exec |
| ; CHECK-NEXT: S_WAIT_DSCNT 1 |
| ; CHECK-NEXT: $vgpr51 = V_ADD_F32_e32 $vgpr28, $vgpr28, implicit $mode, implicit $exec |
| ; CHECK-NEXT: S_BARRIER |
| ; CHECK-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = DS_READ_B128 $vgpr0, 64, 0, implicit $m0, implicit $exec |
| ; CHECK-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = DS_READ_B128 $vgpr0, 80, 0, implicit $m0, implicit $exec |
| ; CHECK-NEXT: S_WAIT_DSCNT 2 |
| ; CHECK-NEXT: $vgpr52 = V_ADD_F32_e32 $vgpr32, $vgpr32, implicit $mode, implicit $exec |
| ; CHECK-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = DS_READ_B128 $vgpr0, 96, 0, implicit $m0, implicit $exec |
| ; CHECK-NEXT: $vgpr32_vgpr33_vgpr34_vgpr35 = DS_READ_B128 $vgpr0, 112, 0, implicit $m0, implicit $exec |
| ; CHECK-NEXT: S_WAIT_DSCNT 2 |
| ; CHECK-NEXT: $vgpr53 = V_ADD_F32_e32 $vgpr24, $vgpr25, implicit $mode, implicit $exec |
| ; CHECK-NEXT: $sgpr0 = S_ADD_I32 $sgpr0, -1, implicit-def $scc |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc |
| ; CHECK-NEXT: S_BRANCH %bb.2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| successors: %bb.1 |
| liveins: $sgpr0, $vgpr0 |
| |
| ; Preheader: DS load for use inside loop |
| $vgpr10_vgpr11_vgpr12_vgpr13 = DS_READ_B128 $vgpr0, 0, 0, implicit $m0, implicit $exec |
| $vgpr28 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec |
| $vgpr32 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| successors: %bb.1, %bb.2 |
| liveins: $sgpr0, $vgpr0, $vgpr10_vgpr11_vgpr12_vgpr13, $vgpr28, $vgpr32 |
| |
| ; Use preheader value |
| $vgpr50 = V_ADD_F32_e32 $vgpr10, $vgpr11, implicit $mode, implicit $exec |
| |
| ; Use preheader and prefetched value |
| $vgpr51 = V_ADD_F32_e32 $vgpr28, $vgpr28, implicit $mode, implicit $exec |
| |
| ; Barrier |
| S_BARRIER |
| |
| ; First two will be "flushed" by same-iteration use below |
| $vgpr20_vgpr21_vgpr22_vgpr23 = DS_READ_B128 $vgpr0, 64, 0, implicit $m0, implicit $exec |
| $vgpr24_vgpr25_vgpr26_vgpr27 = DS_READ_B128 $vgpr0, 80, 0, implicit $m0, implicit $exec |
| ; Use preheader and prefetched value |
| $vgpr52 = V_ADD_F32_e32 $vgpr32, $vgpr32, implicit $mode, implicit $exec |
| ; These two remain unflushed - true prefetches for next iteration |
| $vgpr28_vgpr29_vgpr30_vgpr31 = DS_READ_B128 $vgpr0, 96, 0, implicit $m0, implicit $exec |
| $vgpr32_vgpr33_vgpr34_vgpr35 = DS_READ_B128 $vgpr0, 112, 0, implicit $m0, implicit $exec |
| |
| ; Use vgpr24 - creates flush point, flushes loads 1-2 |
| $vgpr53 = V_ADD_F32_e32 $vgpr24, $vgpr25, implicit $mode, implicit $exec |
| |
| ; Loop control |
| $sgpr0 = S_ADD_I32 $sgpr0, -1, implicit-def $scc |
| S_CBRANCH_SCC1 %bb.1, implicit $scc |
| S_BRANCH %bb.2 |
| |
| bb.2: |
| S_ENDPGM 0 |
| ... |