| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=None -o - %s | FileCheck %s |
| |
| %S = type <{ float, double }> |
| |
| ; The result of that atomic ops should not be used as a uniform value. |
| |
| define protected amdgpu_kernel void @add(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: add: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_add v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw add ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @sub(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: sub: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_sub v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw sub ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @and(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: and: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_and v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw and ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @or(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: or: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_or v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw or ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @xor(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: xor: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_xor v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw xor ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @nand(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: nand: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: s_mov_b64 s[4:5], 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s6 |
| ; CHECK-NEXT: .LBB5_1: ; %atomicrmw.start |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, v0 |
| ; CHECK-NEXT: v_not_b32_e32 v0, v3 |
| ; CHECK-NEXT: v_or_b32_e32 v2, -2, v0 |
| ; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 |
| ; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] |
| ; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: s_cbranch_execnz .LBB5_1 |
| ; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, s3 |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw nand ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @max_workgroup(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: max_workgroup: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_smax v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw max ptr addrspace(1) %p, i32 1 syncscope("workgroup") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @max(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: max: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_smax v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw max ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @min_workgroup(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: min_workgroup: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_smin v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw min ptr addrspace(1) %p, i32 1 syncscope("workgroup") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @min(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: min: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_smin v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw min ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @umax_workgroup(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: umax_workgroup: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_umax v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw umax ptr addrspace(1) %p, i32 1 syncscope("workgroup") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @umax(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: umax: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_umax v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw umax ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @umin_workgroup(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: umin_workgroup: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_umin v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw umin ptr addrspace(1) %p, i32 1 syncscope("workgroup") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @umin(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: umin: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_umin v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw umin ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @cmpxchg(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: cmpxchg: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_cmpswap v2, v2, v[0:1], s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %agg = cmpxchg ptr addrspace(1) %p, i32 1, i32 2 monotonic monotonic |
| %n32 = extractvalue {i32, i1} %agg, 0 |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @xchg(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: xchg: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_swap v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw xchg ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @inc(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: inc: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_inc v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw uinc_wrap ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @dec(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: dec: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_dec v2, v0, v1, s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = atomicrmw udec_wrap ptr addrspace(1) %p, i32 1 syncscope("agent") monotonic |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @fadd(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: fadd: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: s_mov_b64 s[4:5], 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s6 |
| ; CHECK-NEXT: .LBB18_1: ; %atomicrmw.start |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, v0 |
| ; CHECK-NEXT: v_add_f32_e32 v2, 1.0, v3 |
| ; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 |
| ; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] |
| ; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: s_cbranch_execnz .LBB18_1 |
| ; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v0 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %f32 = atomicrmw fadd ptr addrspace(1) %p, float 1.0 syncscope("agent") monotonic |
| %n32 = fptoui float %f32 to i32 |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @fsub(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: fsub: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: s_mov_b64 s[4:5], 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s6 |
| ; CHECK-NEXT: .LBB19_1: ; %atomicrmw.start |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, v0 |
| ; CHECK-NEXT: v_add_f32_e32 v2, -1.0, v3 |
| ; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 |
| ; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] |
| ; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: s_cbranch_execnz .LBB19_1 |
| ; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v0 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %f32 = atomicrmw fsub ptr addrspace(1) %p, float 1.0 syncscope("agent") monotonic |
| %n32 = fptoui float %f32 to i32 |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @fmin(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: fmin: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_min_f64 v[0:1], v2, v[0:1], s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v2, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| |
| %f64 = atomicrmw fmin ptr addrspace(1) %p, double 1.0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.memory !0 |
| %n32 = fptoui double %f64 to i32 |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @fmax(ptr addrspace(1) %p, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: fmax: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_atomic_max_f64 v[0:1], v2, v[0:1], s[0:1] glc |
| ; CHECK-NEXT: v_mov_b32_e32 v2, s2 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, s3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3] |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %f64 = atomicrmw fmax ptr addrspace(1) %p, double 1.0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.memory !0 |
| %n32 = fptoui double %f64 to i32 |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.swap(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.swap: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_swap v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.add(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.add: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_add v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.sub(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.sub: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_sub v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.smin(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.smin: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_smin v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.smax(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.smax: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_smax v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.umin(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.umin: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_umin v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.umax(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.umax: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_umax v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.and(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.and: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_and v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.or(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.or: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_or v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.xor(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.xor: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_xor v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.inc(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.inc: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_inc v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.dec(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.dec: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s6 |
| ; CHECK-NEXT: buffer_atomic_dec v0, v1, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32 1, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.cmpswap(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.cmpswap: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v2, s6 |
| ; CHECK-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %n32 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 1, i32 2, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.fadd(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.fadd: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 1.0 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s6 |
| ; CHECK-NEXT: buffer_atomic_add_f32 v1, v0, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %f32 = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float 1.0, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n32 = fptoui float %f32 to i32 |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.fmin(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.fmin: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v2, s6 |
| ; CHECK-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %f64 = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double 1.0, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n32 = fptoui double %f64 to i32 |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| define protected amdgpu_kernel void @buffer.ptr.atomic.fmax(ptr addrspace(8) %rsrc, i32 %vindex, ptr addrspace(1) %q) { |
| ; CHECK-LABEL: buffer.ptr.atomic.fmax: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x34 |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v2, s6 |
| ; CHECK-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen glc |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; CHECK-NEXT: global_store_dword v[0:1], v2, off |
| ; CHECK-NEXT: s_endpgm |
| %f64 = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double 1.0, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) |
| %n32 = fptoui double %f64 to i32 |
| %n64 = zext i32 %n32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %q, i64 %n64, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |
| |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32) |
| declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) |
| declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32) |
| declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32) |
| declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32) |
| |
| !0 = !{} |