| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -o - -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s |
| ; RUN: %if ptxas-12.8 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78| %ptxas-verify -arch=sm_90 %} |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| declare ptr addrspace(7) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3), i32) |
| declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr) |
| declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() |
| declare ptr @llvm.nvvm.mapa(ptr, i32) |
| |
| ; Common setup for distributed shared memory cluster addressing |
| define i32 @test_distributed_shared_cluster_common(ptr %ptr, ptr addrspace(3) %smem_ptr) local_unnamed_addr { |
| ; CHECK-LABEL: test_distributed_shared_cluster_common( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<3>; |
| ; CHECK-NEXT: .reg .b32 %r<8>; |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.u64 %rd1, [test_distributed_shared_cluster_common_param_0]; |
| ; CHECK-NEXT: ld.param.u64 %rd2, [test_distributed_shared_cluster_common_param_1]; |
| ; CHECK-NEXT: mov.u32 %r1, %ctaid.x; |
| ; CHECK-NEXT: xor.b32 %r2, %r1, 1; |
| ; CHECK-NEXT: isspacep.shared::cluster %p1, %rd1; |
| ; CHECK-NEXT: mapa.u64 %rd3, %rd1, %r2; |
| ; CHECK-NEXT: isspacep.shared::cluster %p2, %rd3; |
| ; CHECK-NEXT: mapa.shared::cluster.u64 %rd4, %rd2, %r2; |
| ; CHECK-NEXT: ld.shared::cluster.u32 %r3, [%rd4]; |
| ; CHECK-NEXT: add.s32 %r4, %r3, 42; |
| ; CHECK-NEXT: st.shared::cluster.u32 [%rd4], %r4; |
| ; CHECK-NEXT: selp.b32 %r5, 1, 0, %p1; |
| ; CHECK-NEXT: selp.b32 %r6, 1, 0, %p2; |
| ; CHECK-NEXT: add.s32 %r7, %r5, %r6; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r7; |
| ; CHECK-NEXT: ret; |
| entry: |
| %0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() |
| %1 = xor i32 %0, 1 |
| %2 = tail call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %ptr) |
| %3 = tail call ptr @llvm.nvvm.mapa(ptr %ptr, i32 %1) |
| %4 = tail call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %3) |
| %dsmem_ptr = call ptr addrspace(7) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %smem_ptr, i32 %1) |
| |
| ; Add load and store to the distributed shared memory cluster |
| %loaded_val = load i32, ptr addrspace(7) %dsmem_ptr |
| %updated_val = add i32 %loaded_val, 42 |
| store i32 %updated_val, ptr addrspace(7) %dsmem_ptr |
| |
| ; Return value preserves the isspacep test results plus the value operation |
| %5 = zext i1 %2 to i32 |
| %6 = zext i1 %4 to i32 |
| %ret = add i32 %5, %6 |
| ret i32 %ret |
| } |
| |
| ; Floating point atomic operations tests |
| define void @test_distributed_shared_cluster_float_atomic(ptr addrspace(7) %dsmem_ptr) local_unnamed_addr { |
| ; CHECK-LABEL: test_distributed_shared_cluster_float_atomic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b32 %f<2>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .b64 %fd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.u64 %rd1, [test_distributed_shared_cluster_float_atomic_param_0]; |
| ; CHECK-NEXT: mov.b16 %rs1, 0x3C00; |
| ; CHECK-NEXT: atom.shared::cluster.add.noftz.f16 %rs2, [%rd1], %rs1; |
| ; CHECK-NEXT: mov.b16 %rs3, 0x3F80; |
| ; CHECK-NEXT: atom.shared::cluster.add.noftz.bf16 %rs4, [%rd1], %rs3; |
| ; CHECK-NEXT: atom.shared::cluster.add.f32 %f1, [%rd1], 0f3F800000; |
| ; CHECK-NEXT: atom.shared::cluster.add.f64 %fd1, [%rd1], 0d3FF0000000000000; |
| ; CHECK-NEXT: ret; |
| entry: |
| ; Floating point atomic operations |
| %0 = atomicrmw fadd ptr addrspace(7) %dsmem_ptr, half 1.000000e+00 seq_cst |
| %1 = atomicrmw fadd ptr addrspace(7) %dsmem_ptr, bfloat 1.000000e+00 seq_cst |
| %2 = atomicrmw fadd ptr addrspace(7) %dsmem_ptr, float 1.000000e+00 seq_cst |
| %3 = atomicrmw fadd ptr addrspace(7) %dsmem_ptr, double 1.000000e+00 seq_cst |
| |
| ret void |
| } |
| |
| ; Integer atomic operations tests |
| define void @test_distributed_shared_cluster_int_atomic(ptr addrspace(7) %dsmem_ptr) local_unnamed_addr { |
| ; CHECK-LABEL: test_distributed_shared_cluster_int_atomic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.u64 %rd1, [test_distributed_shared_cluster_int_atomic_param_0]; |
| ; CHECK-NEXT: atom.shared::cluster.add.u32 %r1, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.add.u64 %rd2, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.exch.b32 %r2, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.exch.b64 %rd3, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.min.s32 %r3, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.min.s64 %rd4, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.min.u32 %r4, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.min.u64 %rd5, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.max.s32 %r5, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.max.s64 %rd6, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.max.u32 %r6, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.max.u64 %rd7, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.inc.u32 %r7, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.dec.u32 %r8, [%rd1], 1; |
| ; CHECK-NEXT: ret; |
| entry: |
| ; Integer add operations |
| %0 = atomicrmw add ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| %1 = atomicrmw add ptr addrspace(7) %dsmem_ptr, i64 1 monotonic |
| |
| ; Exchange operations |
| %2 = atomicrmw xchg ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| %3 = atomicrmw xchg ptr addrspace(7) %dsmem_ptr, i64 1 monotonic |
| |
| ; Min operations (signed and unsigned) |
| %4 = atomicrmw min ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| %5 = atomicrmw min ptr addrspace(7) %dsmem_ptr, i64 1 monotonic |
| %6 = atomicrmw umin ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| %7 = atomicrmw umin ptr addrspace(7) %dsmem_ptr, i64 1 monotonic |
| |
| ; Max operations (signed and unsigned) |
| %8 = atomicrmw max ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| %9 = atomicrmw max ptr addrspace(7) %dsmem_ptr, i64 1 monotonic |
| %10 = atomicrmw umax ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| %11 = atomicrmw umax ptr addrspace(7) %dsmem_ptr, i64 1 monotonic |
| |
| ; Inc/Dec operations (32-bit only) |
| %12 = atomicrmw uinc_wrap ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| %13 = atomicrmw udec_wrap ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| |
| ret void |
| } |
| |
| ; Bitwise atomic operations tests |
| define void @test_distributed_shared_cluster_bitwise_atomic(ptr addrspace(7) %dsmem_ptr) local_unnamed_addr { |
| ; CHECK-LABEL: test_distributed_shared_cluster_bitwise_atomic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.u64 %rd1, [test_distributed_shared_cluster_bitwise_atomic_param_0]; |
| ; CHECK-NEXT: atom.shared::cluster.and.b32 %r1, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.and.b64 %rd2, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.or.b32 %r2, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.or.b64 %rd3, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.xor.b32 %r3, [%rd1], 1; |
| ; CHECK-NEXT: atom.shared::cluster.xor.b64 %rd4, [%rd1], 1; |
| ; CHECK-NEXT: ret; |
| entry: |
| ; Bitwise operations |
| %0 = atomicrmw and ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| %1 = atomicrmw and ptr addrspace(7) %dsmem_ptr, i64 1 monotonic |
| %2 = atomicrmw or ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| %3 = atomicrmw or ptr addrspace(7) %dsmem_ptr, i64 1 monotonic |
| %4 = atomicrmw xor ptr addrspace(7) %dsmem_ptr, i32 1 monotonic |
| %5 = atomicrmw xor ptr addrspace(7) %dsmem_ptr, i64 1 monotonic |
| |
| ret void |
| } |
| |
| ; Compare-exchange operations tests |
| define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr) local_unnamed_addr { |
| ; CHECK-LABEL: test_distributed_shared_cluster_cmpxchg( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<11>; |
| ; CHECK-NEXT: .reg .b32 %r<53>; |
| ; CHECK-NEXT: .reg .b64 %rd<12>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.u64 %rd2, [test_distributed_shared_cluster_cmpxchg_param_0]; |
| ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r24, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.acquire.shared::cluster.cas.b32 %r25, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.acquire.shared::cluster.cas.b32 %r26, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.release.shared::cluster.cas.b32 %r27, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.acq_rel.shared::cluster.cas.b32 %r28, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.acq_rel.shared::cluster.cas.b32 %r29, [%rd2], 1, 0; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: atom.acquire.shared::cluster.cas.b32 %r30, [%rd2], 1, 0; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: atom.acquire.shared::cluster.cas.b32 %r31, [%rd2], 1, 0; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: atom.acquire.shared::cluster.cas.b32 %r32, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b64 %rd3, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.acquire.shared::cluster.cas.b64 %rd4, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.acquire.shared::cluster.cas.b64 %rd5, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.release.shared::cluster.cas.b64 %rd6, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.acq_rel.shared::cluster.cas.b64 %rd7, [%rd2], 1, 0; |
| ; CHECK-NEXT: atom.acq_rel.shared::cluster.cas.b64 %rd8, [%rd2], 1, 0; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: atom.acquire.shared::cluster.cas.b64 %rd9, [%rd2], 1, 0; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: atom.acquire.shared::cluster.cas.b64 %rd10, [%rd2], 1, 0; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: atom.acquire.shared::cluster.cas.b64 %rd11, [%rd2], 1, 0; |
| ; CHECK-NEXT: and.b64 %rd1, %rd2, -4; |
| ; CHECK-NEXT: cvt.u32.u64 %r33, %rd2; |
| ; CHECK-NEXT: and.b32 %r34, %r33, 3; |
| ; CHECK-NEXT: shl.b32 %r1, %r34, 3; |
| ; CHECK-NEXT: mov.b32 %r35, 65535; |
| ; CHECK-NEXT: shl.b32 %r36, %r35, %r1; |
| ; CHECK-NEXT: not.b32 %r2, %r36; |
| ; CHECK-NEXT: mov.b32 %r37, 1; |
| ; CHECK-NEXT: shl.b32 %r3, %r37, %r1; |
| ; CHECK-NEXT: ld.shared::cluster.u32 %r38, [%rd1]; |
| ; CHECK-NEXT: and.b32 %r48, %r38, %r2; |
| ; CHECK-NEXT: $L__BB4_1: // %partword.cmpxchg.loop33 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: or.b32 %r39, %r48, %r3; |
| ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r6, [%rd1], %r39, %r48; |
| ; CHECK-NEXT: setp.eq.s32 %p1, %r6, %r39; |
| ; CHECK-NEXT: @%p1 bra $L__BB4_3; |
| ; CHECK-NEXT: // %bb.2: // %partword.cmpxchg.failure32 |
| ; CHECK-NEXT: // in Loop: Header=BB4_1 Depth=1 |
| ; CHECK-NEXT: and.b32 %r7, %r6, %r2; |
| ; CHECK-NEXT: setp.ne.s32 %p2, %r48, %r7; |
| ; CHECK-NEXT: mov.b32 %r48, %r7; |
| ; CHECK-NEXT: @%p2 bra $L__BB4_1; |
| ; CHECK-NEXT: $L__BB4_3: // %partword.cmpxchg.end31 |
| ; CHECK-NEXT: ld.shared::cluster.u32 %r40, [%rd1]; |
| ; CHECK-NEXT: and.b32 %r49, %r40, %r2; |
| ; CHECK-NEXT: $L__BB4_4: // %partword.cmpxchg.loop23 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: or.b32 %r41, %r49, %r3; |
| ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r10, [%rd1], %r41, %r49; |
| ; CHECK-NEXT: setp.eq.s32 %p3, %r10, %r41; |
| ; CHECK-NEXT: @%p3 bra $L__BB4_6; |
| ; CHECK-NEXT: // %bb.5: // %partword.cmpxchg.failure22 |
| ; CHECK-NEXT: // in Loop: Header=BB4_4 Depth=1 |
| ; CHECK-NEXT: and.b32 %r11, %r10, %r2; |
| ; CHECK-NEXT: setp.ne.s32 %p4, %r49, %r11; |
| ; CHECK-NEXT: mov.b32 %r49, %r11; |
| ; CHECK-NEXT: @%p4 bra $L__BB4_4; |
| ; CHECK-NEXT: $L__BB4_6: // %partword.cmpxchg.end21 |
| ; CHECK-NEXT: fence.acq_rel.sys; |
| ; CHECK-NEXT: fence.acq_rel.sys; |
| ; CHECK-NEXT: ld.shared::cluster.u32 %r42, [%rd1]; |
| ; CHECK-NEXT: and.b32 %r50, %r42, %r2; |
| ; CHECK-NEXT: $L__BB4_7: // %partword.cmpxchg.loop13 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: or.b32 %r43, %r50, %r3; |
| ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r14, [%rd1], %r43, %r50; |
| ; CHECK-NEXT: setp.eq.s32 %p5, %r14, %r43; |
| ; CHECK-NEXT: @%p5 bra $L__BB4_9; |
| ; CHECK-NEXT: // %bb.8: // %partword.cmpxchg.failure12 |
| ; CHECK-NEXT: // in Loop: Header=BB4_7 Depth=1 |
| ; CHECK-NEXT: and.b32 %r15, %r14, %r2; |
| ; CHECK-NEXT: setp.ne.s32 %p6, %r50, %r15; |
| ; CHECK-NEXT: mov.b32 %r50, %r15; |
| ; CHECK-NEXT: @%p6 bra $L__BB4_7; |
| ; CHECK-NEXT: $L__BB4_9: // %partword.cmpxchg.end11 |
| ; CHECK-NEXT: fence.acq_rel.sys; |
| ; CHECK-NEXT: ld.shared::cluster.u32 %r44, [%rd1]; |
| ; CHECK-NEXT: and.b32 %r51, %r44, %r2; |
| ; CHECK-NEXT: $L__BB4_10: // %partword.cmpxchg.loop3 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: or.b32 %r45, %r51, %r3; |
| ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r18, [%rd1], %r45, %r51; |
| ; CHECK-NEXT: setp.eq.s32 %p7, %r18, %r45; |
| ; CHECK-NEXT: @%p7 bra $L__BB4_12; |
| ; CHECK-NEXT: // %bb.11: // %partword.cmpxchg.failure2 |
| ; CHECK-NEXT: // in Loop: Header=BB4_10 Depth=1 |
| ; CHECK-NEXT: and.b32 %r19, %r18, %r2; |
| ; CHECK-NEXT: setp.ne.s32 %p8, %r51, %r19; |
| ; CHECK-NEXT: mov.b32 %r51, %r19; |
| ; CHECK-NEXT: @%p8 bra $L__BB4_10; |
| ; CHECK-NEXT: $L__BB4_12: // %partword.cmpxchg.end1 |
| ; CHECK-NEXT: fence.acq_rel.sys; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: ld.shared::cluster.u32 %r46, [%rd1]; |
| ; CHECK-NEXT: and.b32 %r52, %r46, %r2; |
| ; CHECK-NEXT: $L__BB4_13: // %partword.cmpxchg.loop |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: or.b32 %r47, %r52, %r3; |
| ; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r22, [%rd1], %r47, %r52; |
| ; CHECK-NEXT: setp.eq.s32 %p9, %r22, %r47; |
| ; CHECK-NEXT: @%p9 bra $L__BB4_15; |
| ; CHECK-NEXT: // %bb.14: // %partword.cmpxchg.failure |
| ; CHECK-NEXT: // in Loop: Header=BB4_13 Depth=1 |
| ; CHECK-NEXT: and.b32 %r23, %r22, %r2; |
| ; CHECK-NEXT: setp.ne.s32 %p10, %r52, %r23; |
| ; CHECK-NEXT: mov.b32 %r52, %r23; |
| ; CHECK-NEXT: @%p10 bra $L__BB4_13; |
| ; CHECK-NEXT: $L__BB4_15: // %partword.cmpxchg.end |
| ; CHECK-NEXT: fence.acq_rel.sys; |
| ; CHECK-NEXT: ret; |
| entry: |
| ; Compare-exchange operation - all memory ordering combinations for 32-bit |
| %0 = cmpxchg ptr addrspace(7) %dsmem_ptr, i32 1, i32 0 monotonic monotonic |
| %1 = cmpxchg ptr addrspace(7) %dsmem_ptr, i32 1, i32 0 acquire monotonic |
| %2 = cmpxchg ptr addrspace(7) %dsmem_ptr, i32 1, i32 0 acquire acquire |
| %3 = cmpxchg ptr addrspace(7) %dsmem_ptr, i32 1, i32 0 release monotonic |
| %4 = cmpxchg ptr addrspace(7) %dsmem_ptr, i32 1, i32 0 acq_rel monotonic |
| %5 = cmpxchg ptr addrspace(7) %dsmem_ptr, i32 1, i32 0 acq_rel acquire |
| %6 = cmpxchg ptr addrspace(7) %dsmem_ptr, i32 1, i32 0 seq_cst monotonic |
| %7 = cmpxchg ptr addrspace(7) %dsmem_ptr, i32 1, i32 0 seq_cst acquire |
| %8 = cmpxchg ptr addrspace(7) %dsmem_ptr, i32 1, i32 0 seq_cst seq_cst |
| |
| ; Compare-exchange operation - all memory ordering combinations for 64-bit |
| %9 = cmpxchg ptr addrspace(7) %dsmem_ptr, i64 1, i64 0 monotonic monotonic |
| %10 = cmpxchg ptr addrspace(7) %dsmem_ptr, i64 1, i64 0 acquire monotonic |
| %11 = cmpxchg ptr addrspace(7) %dsmem_ptr, i64 1, i64 0 acquire acquire |
| %12 = cmpxchg ptr addrspace(7) %dsmem_ptr, i64 1, i64 0 release monotonic |
| %13 = cmpxchg ptr addrspace(7) %dsmem_ptr, i64 1, i64 0 acq_rel monotonic |
| %14 = cmpxchg ptr addrspace(7) %dsmem_ptr, i64 1, i64 0 acq_rel acquire |
| %15 = cmpxchg ptr addrspace(7) %dsmem_ptr, i64 1, i64 0 seq_cst monotonic |
| %16 = cmpxchg ptr addrspace(7) %dsmem_ptr, i64 1, i64 0 seq_cst acquire |
| %17 = cmpxchg ptr addrspace(7) %dsmem_ptr, i64 1, i64 0 seq_cst seq_cst |
| |
| ; Compare-exchange operation - 16-bit |
| %18 = cmpxchg ptr addrspace(7) %dsmem_ptr, i16 1, i16 0 monotonic monotonic |
| %19 = cmpxchg ptr addrspace(7) %dsmem_ptr, i16 1, i16 0 acquire acquire |
| %20 = cmpxchg ptr addrspace(7) %dsmem_ptr, i16 1, i16 0 release monotonic |
| %21 = cmpxchg ptr addrspace(7) %dsmem_ptr, i16 1, i16 0 acq_rel acquire |
| %22 = cmpxchg ptr addrspace(7) %dsmem_ptr, i16 1, i16 0 seq_cst seq_cst |
| |
| ret void |
| } |