blob: dfec6bfdce5c1b436bb4a9aee4cd909f90d89c6d [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
; =============================================================================
; atomicrmw or - generic address space (addrspace 0)
; =============================================================================
define i32 @atomicrmw_or_i32_generic(ptr addrspace(0) %ptr, i32 %val) {
; GFX12-LABEL: atomicrmw_or_i32_generic:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_wb scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: flat_atomic_or_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_setpc_b64 s[30:31]
%result = atomicrmw or ptr addrspace(0) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0
ret i32 %result
}
define i64 @atomicrmw_or_i64_generic(ptr addrspace(0) %ptr, i64 %val) {
; GFX12-LABEL: atomicrmw_or_i64_generic:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%result = atomicrmw or ptr addrspace(0) %ptr, i64 %val syncscope("workgroup") monotonic, !noalias.addrspace !0
ret i64 %result
}
; =============================================================================
; atomicrmw or - local address space (addrspace 3)
; =============================================================================
define i32 @atomicrmw_or_i32_local(ptr addrspace(3) %ptr, i32 %val) {
; GFX12-LABEL: atomicrmw_or_i32_local:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: ds_or_rtn_b32 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_setpc_b64 s[30:31]
%result = atomicrmw or ptr addrspace(3) %ptr, i32 %val seq_cst
ret i32 %result
}
define i64 @atomicrmw_or_i64_local(ptr addrspace(3) %ptr, i64 %val) {
; GFX12-LABEL: atomicrmw_or_i64_local:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: ds_or_rtn_b64 v[0:1], v0, v[1:2]
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_setpc_b64 s[30:31]
%result = atomicrmw or ptr addrspace(3) %ptr, i64 %val seq_cst
ret i64 %result
}
; =============================================================================
; atomicrmw or with metadata - global address space (no expansion)
; =============================================================================
define i32 @atomicrmw_or_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) {
; GFX12-LABEL: atomicrmw_or_i32_global_no_remote_memory:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_wb scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_atomic_or_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_setpc_b64 s[30:31]
%result = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0
ret i32 %result
}
define i64 @atomicrmw_or_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) {
; GFX12-LABEL: atomicrmw_or_i64_global_no_remote_memory:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_wb scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_atomic_or_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_setpc_b64 s[30:31]
%result = atomicrmw or ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0
ret i64 %result
}
define i32 @atomicrmw_or_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) {
; GFX12-LABEL: atomicrmw_or_i32_global_no_fine_grained_memory:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_wb scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_atomic_or_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_setpc_b64 s[30:31]
%result = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0
ret i32 %result
}
define i64 @atomicrmw_or_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) {
; GFX12-LABEL: atomicrmw_or_i64_global_no_fine_grained_memory:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_wb scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_atomic_or_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_setpc_b64 s[30:31]
%result = atomicrmw or ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0
ret i64 %result
}
!0 = !{i32 5, i32 6}