blob: 4a27fa7913a6e64f4a0966a8f6d7af7f8eafb738 [file] [edit]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=si-insert-waitcnts -o - %s | FileCheck %s
# Regression test for mergeAsyncMarks() asserting when OtherMarks is empty.
#
# At a CFG join point where one predecessor has an ASYNCMARK (non-empty
# AsyncMarks) and the other does not (empty OtherMarks), MergeCount becomes
# min(0, N) = 0. Before the fix, seq_inclusive<unsigned>(1, 0) asserted
# Begin <= End. After the fix the function returns early when either side
# is empty.
#
# GLOBAL_LOAD_ASYNC_TO_LDS_B32 is a GFX1250 async LDS DMA instruction tracked
# via ASYNC_CNT. isAsync() returns true for it, so the score is recorded into
# AsyncScore before ASYNCMARK pushes it onto AsyncMarks.
#
# The join block contains WAIT_ASYNCMARK 0 to consume the pending mark.
# Before the fix, mergeAsyncMarks() asserted before reaching the wait.
# After the fix the pass completes without asserting.
#
# Two patterns are tested:
# asyncmark_in_then - ASYNCMARK in the then-successor, else-successor is sync
# asyncmark_in_else - ASYNCMARK in the else-successor, then-successor is sync
---
# Pattern 1: ASYNCMARK in then-successor, else-successor is sync.
name: asyncmark_in_then
tracksRegLiveness: true
machineFunctionInfo:
occupancy: 8
body: |
; CHECK-LABEL: name: asyncmark_in_then
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0
; CHECK-NEXT: S_WAIT_KMCNT 0
; CHECK-NEXT: S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3)
; CHECK-NEXT: ASYNCMARK
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: WAIT_ASYNCMARK 0
; CHECK-NEXT: S_WAIT_ASYNCCNT 0, implicit-def $asynccnt, implicit $asynccnt
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1, $vgpr2
S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; then branch issues async LDS DMA + ASYNCMARK
bb.1:
liveins: $vgpr0_vgpr1, $vgpr2
GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3)
ASYNCMARK
S_BRANCH %bb.3
; else branch sync path, no async operations; OtherMarks is empty at join
bb.2:
S_BRANCH %bb.3
; join mergeAsyncMarks sees non-empty AsyncMarks (then) and empty OtherMarks (else).
; Before fix: assertion. After fix: returns early, no spurious wait inserted.
bb.3:
WAIT_ASYNCMARK 0
S_ENDPGM 0
...
---
# Pattern 2: ASYNCMARK in else-successor, then-successor is sync.
# Mirror of asyncmark_in_then — exercises the opposite predecessor ordering.
name: asyncmark_in_else
tracksRegLiveness: true
machineFunctionInfo:
occupancy: 8
body: |
; CHECK-LABEL: name: asyncmark_in_else
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0
; CHECK-NEXT: S_WAIT_KMCNT 0
; CHECK-NEXT: S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3)
; CHECK-NEXT: ASYNCMARK
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: WAIT_ASYNCMARK 0
; CHECK-NEXT: S_WAIT_ASYNCCNT 0, implicit-def $asynccnt, implicit $asynccnt
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1, $vgpr2
S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; then branch sync path, no async operations
bb.1:
S_BRANCH %bb.3
; else branch issues async LDS DMA + ASYNCMARK
bb.2:
liveins: $vgpr0_vgpr1, $vgpr2
GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3)
ASYNCMARK
S_BRANCH %bb.3
; join block
bb.3:
WAIT_ASYNCMARK 0
S_ENDPGM 0
...