blob: 5b8b5207b1eee8272e983678ddc1b2547fbcd81f [file] [log] [blame] [edit]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=si-insert-waitcnts -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=si-insert-waitcnts -o - %s | FileCheck %s
# Test for the fix that removed the incorrect S_BARRIER check for DS stores.
# Previously, the code would reset SeenDSStoreInCurrMBB when encountering an
# S_BARRIER, incorrectly assuming that stores postdominated by a barrier would
# be waited at the barrier. This was wrong because:
# 1. S_BARRIER without AutoWaitcntBeforeBarrier does not automatically wait for DS stores to complete
# 2. S_BARRIER with BackOffBarrier feature does not later flush memory ops by adding ZERO waitcnt
#
# This test ensures that when a loop has a DS store followed by S_BARRIER,
# the preheader flush optimization is NOT applied (no S_WAIT_DSCNT in preheader).
# The wait should happen inside the loop instead.
---
# Test: DS store followed by S_BARRIER in loop.
# DS load in preheader, value used in loop.
# The preheader should NOT have S_WAIT_DSCNT because SeenDSStoreInLoop = true.
# Instead, the wait should be inside the loop.
name: ds_store_barrier_no_preheader_flush
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
; CHECK-LABEL: name: ds_store_barrier_no_preheader_flush
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = DS_READ_B128 $vgpr0, 0, 0, implicit $m0, implicit $exec
; Verify NO S_WAIT_DSCNT in preheader - the wait must be inside the loop
; CHECK-NOT: S_WAIT_DSCNT
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_WAIT_DSCNT 0
; CHECK-NEXT: $vgpr30 = V_ADD_F32_e32 $vgpr10, $vgpr1, implicit $mode, implicit $exec
; CHECK-NEXT: DS_WRITE_B32 $vgpr2, $vgpr30, 0, 0, implicit $m0, implicit $exec
; With BackOffBarrier, no S_WAIT_DSCNT needed before S_BARRIER
; CHECK-NEXT: S_BARRIER
; CHECK-NEXT: $sgpr0 = S_ADD_I32 $sgpr0, -1, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
; Preheader: DS load
$vgpr10_vgpr11_vgpr12_vgpr13 = DS_READ_B128 $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.1, %bb.2
liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr10
; Use DS-loaded value from preheader
$vgpr30 = V_ADD_F32_e32 $vgpr10, $vgpr1, implicit $mode, implicit $exec
; DS store followed by barrier - this should NOT reset SeenDSStoreInCurrMBB
DS_WRITE_B32 $vgpr2, $vgpr30, 0, 0, implicit $m0, implicit $exec
S_BARRIER
; Loop control
$sgpr0 = S_ADD_I32 $sgpr0, -1, implicit-def $scc
S_CBRANCH_SCC1 %bb.1, implicit $scc
S_BRANCH %bb.2
bb.2:
S_ENDPGM 0
...