blob: 5068843d170273b93b92dc4c18772b1ff5c714da [file] [log] [blame] [edit]
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s
# Test that we can optimize away s_wait_loadcnt at function boundaries when
# the only pending LOAD_CNT events are from GLOBAL_INV (which doesn't write
# to VGPRs).
#
# When a function contains only GLOBAL_INV with no actual VMEM loads pending
# to VGPRs, we should not need to emit s_wait_loadcnt 0 before the return.
---
# Test 1: Only GLOBAL_INV, no VGPR loads - should NOT need S_WAIT_LOADCNT
# before return because GLOBAL_INV doesn't write to VGPRs.
name: func_global_inv_only
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: false
body: |
bb.0:
liveins: $sgpr30_sgpr31
; GFX12-LABEL: name: func_global_inv_only
; GFX12: liveins: $sgpr30_sgpr31
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
; GFX12-NEXT: S_WAIT_EXPCNT 0
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
; GFX12-NEXT: S_WAIT_BVHCNT 0
; GFX12-NEXT: S_WAIT_KMCNT 0
; GFX12-NEXT: GLOBAL_INV 16, implicit $exec
; GFX12-NOT: S_WAIT_LOADCNT
; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
GLOBAL_INV 16, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31
...
---
# Test 2: GLOBAL_INV with actual VGPR load - MUST wait for loadcnt
name: func_global_inv_with_vgpr_load
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: false
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-LABEL: name: func_global_inv_with_vgpr_load
; GFX12: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
; GFX12-NEXT: S_WAIT_EXPCNT 0
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
; GFX12-NEXT: S_WAIT_BVHCNT 0
; GFX12-NEXT: S_WAIT_KMCNT 0
; GFX12-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: GLOBAL_INV 16, implicit $exec
; GFX12-NEXT: S_WAIT_LOADCNT 0
; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
GLOBAL_INV 16, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
...
---
# Test 3: Only VGPR load (no GLOBAL_INV) - MUST wait for loadcnt
name: func_vgpr_load_no_global_inv
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: false
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-LABEL: name: func_vgpr_load_no_global_inv
; GFX12: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
; GFX12-NEXT: S_WAIT_EXPCNT 0
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
; GFX12-NEXT: S_WAIT_BVHCNT 0
; GFX12-NEXT: S_WAIT_KMCNT 0
; GFX12-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: S_WAIT_LOADCNT 0
; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
...
---
# Test 4: GLOBAL_INV with load already waited on - should NOT need S_WAIT_LOADCNT at return
# The load was waited on when $vgpr0 was used, so only GLOBAL_INV is pending at return.
name: func_global_inv_load_already_waited
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: false
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-LABEL: name: func_global_inv_load_already_waited
; GFX12: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
; GFX12-NEXT: S_WAIT_EXPCNT 0
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
; GFX12-NEXT: S_WAIT_BVHCNT 0
; GFX12-NEXT: S_WAIT_KMCNT 0
; GFX12-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: S_WAIT_LOADCNT 0
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; GFX12-NEXT: GLOBAL_INV 16, implicit $exec
; GFX12-NOT: S_WAIT_LOADCNT
; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
GLOBAL_INV 16, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...