| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=si-insert-waitcnts -o - %s | FileCheck %s |
| |
| # Regression test for mergeAsyncMarks() asserting when OtherMarks is empty. |
| # |
| # At a CFG join point where one predecessor has an ASYNCMARK (non-empty |
| # AsyncMarks) and the other does not (empty OtherMarks), MergeCount becomes |
| # min(0, N) = 0. Before the fix, seq_inclusive<unsigned>(1, 0) asserted |
| # Begin <= End. After the fix the function returns early when either side |
| # is empty. |
| # |
| # GLOBAL_LOAD_ASYNC_TO_LDS_B32 is a GFX1250 async LDS DMA instruction tracked |
| # via ASYNC_CNT. isAsync() returns true for it, so the score is recorded into |
| # AsyncScore before ASYNCMARK pushes it onto AsyncMarks. |
| # |
| # The join block contains WAIT_ASYNCMARK 0 to consume the pending mark. |
| # Before the fix, mergeAsyncMarks() asserted before reaching the wait. |
| # After the fix the pass completes without asserting. |
| # |
| # Two patterns are tested: |
| # asyncmark_in_then - ASYNCMARK in the then-successor, else-successor is sync |
| # asyncmark_in_else - ASYNCMARK in the else-successor, then-successor is sync |
| |
| --- |
| # Pattern 1: ASYNCMARK in then-successor, else-successor is sync. |
| name: asyncmark_in_then |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| occupancy: 8 |
| body: | |
| ; CHECK-LABEL: name: asyncmark_in_then |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1, $vgpr2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0 |
| ; CHECK-NEXT: S_WAIT_KMCNT 0 |
| ; CHECK-NEXT: S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3) |
| ; CHECK-NEXT: ASYNCMARK |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: WAIT_ASYNCMARK 0 |
| ; CHECK-NEXT: S_WAIT_ASYNCCNT 0, implicit-def $asynccnt, implicit $asynccnt |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1, $vgpr2 |
| |
| S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc |
| S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| |
| ; then branch — issues async LDS DMA + ASYNCMARK |
| bb.1: |
| liveins: $vgpr0_vgpr1, $vgpr2 |
| |
| GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3) |
| ASYNCMARK |
| S_BRANCH %bb.3 |
| |
| ; else branch — sync path, no async operations; OtherMarks is empty at join |
| bb.2: |
| S_BRANCH %bb.3 |
| |
| ; join — mergeAsyncMarks sees non-empty AsyncMarks (then) and empty OtherMarks (else). |
| ; Before fix: assertion. After fix: returns early, no spurious wait inserted. |
| bb.3: |
| WAIT_ASYNCMARK 0 |
| S_ENDPGM 0 |
| ... |
| --- |
| # Pattern 2: ASYNCMARK in else-successor, then-successor is sync. |
| # Mirror of asyncmark_in_then — exercises the opposite predecessor ordering. |
| name: asyncmark_in_else |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| occupancy: 8 |
| body: | |
| ; CHECK-LABEL: name: asyncmark_in_else |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1, $vgpr2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0 |
| ; CHECK-NEXT: S_WAIT_KMCNT 0 |
| ; CHECK-NEXT: S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: successors: %bb.3(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3) |
| ; CHECK-NEXT: ASYNCMARK |
| ; CHECK-NEXT: S_BRANCH %bb.3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: WAIT_ASYNCMARK 0 |
| ; CHECK-NEXT: S_WAIT_ASYNCCNT 0, implicit-def $asynccnt, implicit $asynccnt |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1, $vgpr2 |
| |
| S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc |
| S_CBRANCH_SCC1 %bb.2, implicit killed $scc |
| |
| ; then branch — sync path, no async operations |
| bb.1: |
| S_BRANCH %bb.3 |
| |
| ; else branch — issues async LDS DMA + ASYNCMARK |
| bb.2: |
| liveins: $vgpr0_vgpr1, $vgpr2 |
| |
| GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3) |
| ASYNCMARK |
| S_BRANCH %bb.3 |
| |
| ; join block |
| bb.3: |
| WAIT_ASYNCMARK 0 |
| S_ENDPGM 0 |
| ... |