| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s |
| |
| ; ============================================================================= |
| ; atomicrmw or - generic address space (addrspace 0) |
| ; ============================================================================= |
| |
| define i32 @atomicrmw_or_i32_generic(ptr addrspace(0) %ptr, i32 %val) { |
| ; GFX12-LABEL: atomicrmw_or_i32_generic: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: flat_atomic_or_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = atomicrmw or ptr addrspace(0) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 |
| ret i32 %result |
| } |
| |
| define i64 @atomicrmw_or_i64_generic(ptr addrspace(0) %ptr, i64 %val) { |
| ; GFX12-LABEL: atomicrmw_or_i64_generic: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = atomicrmw or ptr addrspace(0) %ptr, i64 %val syncscope("workgroup") monotonic, !noalias.addrspace !0 |
| ret i64 %result |
| } |
| |
| ; ============================================================================= |
| ; atomicrmw or - local address space (addrspace 3) |
| ; ============================================================================= |
| |
| define i32 @atomicrmw_or_i32_local(ptr addrspace(3) %ptr, i32 %val) { |
| ; GFX12-LABEL: atomicrmw_or_i32_local: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: ds_or_rtn_b32 v0, v0, v1 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = atomicrmw or ptr addrspace(3) %ptr, i32 %val seq_cst |
| ret i32 %result |
| } |
| |
| define i64 @atomicrmw_or_i64_local(ptr addrspace(3) %ptr, i64 %val) { |
| ; GFX12-LABEL: atomicrmw_or_i64_local: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: ds_or_rtn_b64 v[0:1], v0, v[1:2] |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = atomicrmw or ptr addrspace(3) %ptr, i64 %val seq_cst |
| ret i64 %result |
| } |
| |
| ; ============================================================================= |
| ; atomicrmw or with metadata - global address space (no expansion) |
| ; ============================================================================= |
| |
| define i32 @atomicrmw_or_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) { |
| ; GFX12-LABEL: atomicrmw_or_i32_global_no_remote_memory: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_atomic_or_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 |
| ret i32 %result |
| } |
| |
| define i64 @atomicrmw_or_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) { |
| ; GFX12-LABEL: atomicrmw_or_i64_global_no_remote_memory: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_atomic_or_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = atomicrmw or ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0 |
| ret i64 %result |
| } |
| |
| define i32 @atomicrmw_or_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) { |
| ; GFX12-LABEL: atomicrmw_or_i32_global_no_fine_grained_memory: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_atomic_or_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0 |
| ret i32 %result |
| } |
| |
| define i64 @atomicrmw_or_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) { |
| ; GFX12-LABEL: atomicrmw_or_i64_global_no_fine_grained_memory: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_atomic_or_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = atomicrmw or ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0 |
| ret i64 %result |
| } |
| |
| !0 = !{i32 5, i32 6} |