| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: not llc < %s -mcpu=sm_90 -mattr=+ptx82 2>&1 | FileCheck %s --check-prefix=ERROR |
| ; RUN: not llc < %s -mcpu=sm_80 -mattr=+ptx84 2>&1 | FileCheck %s --check-prefix=ERROR |
| ; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx84 | FileCheck %s --check-prefix=CHECK |
| ; RUN: %if ptxas-sm_90 && ptxas-isa-8.4 %{ llc < %s -mcpu=sm_90 -mattr=+ptx84 | %ptxas-verify -arch=sm_90 %} |
| |
| ;; TODO: Update cmpxchg.py so that it can automatically generate the IR for |
| ;; these test cases. |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| ;; Check that the first couple of error messages are correct. |
| ; ERROR: error: unsupported cmpxchg |
| ; ERROR: error: unsupported cmpxchg |
| |
| define i128 @test_xchg_generic(ptr %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_generic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_generic_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_generic_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr %addr, i128 %amt release |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_global(ptr addrspace(1) %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_global( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_global_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_global_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.release.sys.global.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr addrspace(1) %addr, i128 %amt release |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_shared(ptr addrspace(3) %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_shared( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_shared_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_shared_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.release.sys.shared.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr addrspace(3) %addr, i128 %amt release |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_shared_cluster(ptr addrspace(7) %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_shared_cluster( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_shared_cluster_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_shared_cluster_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.release.sys.shared::cluster.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr addrspace(7) %addr, i128 %amt release |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_block(ptr %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_block( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_block_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_block_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.release.cta.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("block") release |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_cluster(ptr %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_cluster( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_cluster_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_cluster_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.release.cluster.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("cluster") release |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_gpu(ptr %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_gpu( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_gpu_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_gpu_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.release.gpu.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("device") release |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_sys(ptr %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_sys( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_sys_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_sys_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr %addr, i128 %amt release |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_relaxed(ptr %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_relaxed( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_relaxed_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_relaxed_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.relaxed.sys.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr %addr, i128 %amt monotonic |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_acquire(ptr %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_acquire( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_acquire_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_acquire_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.acquire.sys.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr %addr, i128 %amt acquire |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_release(ptr %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_release( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_release_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_release_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr %addr, i128 %amt release |
| ret i128 %old |
| } |
| |
| define i128 @test_xchg_acq_rel(ptr %addr, i128 %amt) { |
| ; CHECK-LABEL: test_xchg_acq_rel( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_acq_rel_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_acq_rel_param_1]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; |
| ; CHECK-NEXT: atom.acq_rel.sys.exch.b128 dst, [%rd1], amt; |
| ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %old = atomicrmw xchg ptr %addr, i128 %amt acq_rel |
| ret i128 %old |
| } |
| |
| define i128 @test_cmpxchg_generic(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_generic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_generic_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_generic_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_generic_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_global(ptr addrspace(1) %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_global( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_global_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_global_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_global_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.relaxed.sys.global.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i128 %cmp, i128 %new monotonic monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_shared(ptr addrspace(3) %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_shared( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_shared_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_shared_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_shared_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.relaxed.sys.shared.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i128 %cmp, i128 %new monotonic monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_block(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_block( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_block_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_block_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_block_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.relaxed.cta.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("block") monotonic monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_cluster(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_cluster( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_cluster_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_cluster_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_cluster_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.relaxed.cluster.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("cluster") monotonic monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_gpu(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_gpu( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_gpu_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_gpu_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_gpu_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.relaxed.gpu.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("device") monotonic monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_shared_cluster(ptr addrspace(7) %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_shared_cluster( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_shared_cluster_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_shared_cluster_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_shared_cluster_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.relaxed.sys.shared::cluster.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(7) %addr, i128 %cmp, i128 %new monotonic monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_monotonic_monotonic(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_monotonic_monotonic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_monotonic_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_monotonic_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_monotonic_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_monotonic_acquire(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_monotonic_acquire( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_acquire_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_acquire_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_acquire_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic acquire |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_monotonic_seq_cst( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0]; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic seq_cst |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_acquire_monotonic(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_acquire_monotonic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_monotonic_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_monotonic_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_monotonic_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_acquire_acquire(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_acquire_acquire( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_acquire_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_acquire_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_acquire_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire acquire |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_acquire_seq_cst( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0]; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire seq_cst |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_release_monotonic(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_release_monotonic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_monotonic_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_monotonic_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_monotonic_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.release.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_release_acquire(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_release_acquire( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_acquire_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_acquire_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_acquire_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release acquire |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_release_seq_cst( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0]; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release seq_cst |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_acq_rel_monotonic(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_acq_rel_monotonic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_monotonic_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_monotonic_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_monotonic_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_acq_rel_acquire(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_acq_rel_acquire( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_acquire_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_acquire_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_acquire_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel acquire |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_acq_rel_seq_cst( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0]; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel seq_cst |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_seq_cst_monotonic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0]; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst monotonic |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_seq_cst_acquire( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0]; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst acquire |
| ret i128 %new |
| } |
| |
| define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) { |
| ; CHECK-LABEL: test_cmpxchg_seq_cst_seq_cst( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0]; |
| ; CHECK-NEXT: fence.sc.sys; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; |
| ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst seq_cst |
| ret i128 %new |
| } |
| |
| define i128 @test_atomicrmw_and(ptr %ptr, i128 %val) { |
| ; CHECK-LABEL: test_atomicrmw_and( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b64 %rd<13>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_and_param_1]; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_and_param_0]; |
| ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; |
| ; CHECK-NEXT: $L__BB34_1: // %atomicrmw.start |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: and.b64 %rd6, %rd11, %rd4; |
| ; CHECK-NEXT: and.b64 %rd7, %rd12, %rd5; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7}; |
| ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; |
| ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; |
| ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; |
| ; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0; |
| ; CHECK-NEXT: mov.b64 %rd11, %rd1; |
| ; CHECK-NEXT: mov.b64 %rd12, %rd2; |
| ; CHECK-NEXT: @%p1 bra $L__BB34_1; |
| ; CHECK-NEXT: // %bb.2: // %atomicrmw.end |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw and ptr %ptr, i128 %val monotonic |
| ret i128 %ret |
| } |
| |
| define i128 @test_atomicrmw_or(ptr %ptr, i128 %val) { |
| ; CHECK-LABEL: test_atomicrmw_or( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b64 %rd<13>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_or_param_1]; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_or_param_0]; |
| ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; |
| ; CHECK-NEXT: $L__BB35_1: // %atomicrmw.start |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: or.b64 %rd6, %rd11, %rd4; |
| ; CHECK-NEXT: or.b64 %rd7, %rd12, %rd5; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7}; |
| ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; |
| ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; |
| ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; |
| ; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0; |
| ; CHECK-NEXT: mov.b64 %rd11, %rd1; |
| ; CHECK-NEXT: mov.b64 %rd12, %rd2; |
| ; CHECK-NEXT: @%p1 bra $L__BB35_1; |
| ; CHECK-NEXT: // %bb.2: // %atomicrmw.end |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw or ptr %ptr, i128 %val monotonic |
| ret i128 %ret |
| } |
| |
| define i128 @test_atomicrmw_xor(ptr %ptr, i128 %val) { |
| ; CHECK-LABEL: test_atomicrmw_xor( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b64 %rd<13>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_xor_param_1]; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_xor_param_0]; |
| ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; |
| ; CHECK-NEXT: $L__BB36_1: // %atomicrmw.start |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: xor.b64 %rd6, %rd11, %rd4; |
| ; CHECK-NEXT: xor.b64 %rd7, %rd12, %rd5; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7}; |
| ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; |
| ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; |
| ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; |
| ; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0; |
| ; CHECK-NEXT: mov.b64 %rd11, %rd1; |
| ; CHECK-NEXT: mov.b64 %rd12, %rd2; |
| ; CHECK-NEXT: @%p1 bra $L__BB36_1; |
| ; CHECK-NEXT: // %bb.2: // %atomicrmw.end |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw xor ptr %ptr, i128 %val monotonic |
| ret i128 %ret |
| } |
| |
| define i128 @test_atomicrmw_min(ptr %ptr, i128 %val) { |
| ; CHECK-LABEL: test_atomicrmw_min( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<7>; |
| ; CHECK-NEXT: .reg .b64 %rd<13>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_min_param_1]; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_min_param_0]; |
| ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; |
| ; CHECK-NEXT: $L__BB37_1: // %atomicrmw.start |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4; |
| ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; |
| ; CHECK-NEXT: and.pred %p3, %p2, %p1; |
| ; CHECK-NEXT: setp.lt.s64 %p4, %rd12, %rd5; |
| ; CHECK-NEXT: or.pred %p5, %p3, %p4; |
| ; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; |
| ; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; |
| ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; |
| ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; |
| ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; |
| ; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; |
| ; CHECK-NEXT: mov.b64 %rd11, %rd1; |
| ; CHECK-NEXT: mov.b64 %rd12, %rd2; |
| ; CHECK-NEXT: @%p6 bra $L__BB37_1; |
| ; CHECK-NEXT: // %bb.2: // %atomicrmw.end |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw min ptr %ptr, i128 %val monotonic |
| ret i128 %ret |
| } |
| |
| define i128 @test_atomicrmw_max(ptr %ptr, i128 %val) { |
| ; CHECK-LABEL: test_atomicrmw_max( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<7>; |
| ; CHECK-NEXT: .reg .b64 %rd<13>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_max_param_1]; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_max_param_0]; |
| ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; |
| ; CHECK-NEXT: $L__BB38_1: // %atomicrmw.start |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4; |
| ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; |
| ; CHECK-NEXT: and.pred %p3, %p2, %p1; |
| ; CHECK-NEXT: setp.gt.s64 %p4, %rd12, %rd5; |
| ; CHECK-NEXT: or.pred %p5, %p3, %p4; |
| ; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; |
| ; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; |
| ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; |
| ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; |
| ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; |
| ; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; |
| ; CHECK-NEXT: mov.b64 %rd11, %rd1; |
| ; CHECK-NEXT: mov.b64 %rd12, %rd2; |
| ; CHECK-NEXT: @%p6 bra $L__BB38_1; |
| ; CHECK-NEXT: // %bb.2: // %atomicrmw.end |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw max ptr %ptr, i128 %val monotonic |
| ret i128 %ret |
| } |
| |
| define i128 @test_atomicrmw_umin(ptr %ptr, i128 %val) { |
| ; CHECK-LABEL: test_atomicrmw_umin( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<7>; |
| ; CHECK-NEXT: .reg .b64 %rd<13>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_umin_param_1]; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_umin_param_0]; |
| ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; |
| ; CHECK-NEXT: $L__BB39_1: // %atomicrmw.start |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4; |
| ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; |
| ; CHECK-NEXT: and.pred %p3, %p2, %p1; |
| ; CHECK-NEXT: setp.lt.u64 %p4, %rd12, %rd5; |
| ; CHECK-NEXT: or.pred %p5, %p3, %p4; |
| ; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; |
| ; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; |
| ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; |
| ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; |
| ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; |
| ; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; |
| ; CHECK-NEXT: mov.b64 %rd11, %rd1; |
| ; CHECK-NEXT: mov.b64 %rd12, %rd2; |
| ; CHECK-NEXT: @%p6 bra $L__BB39_1; |
| ; CHECK-NEXT: // %bb.2: // %atomicrmw.end |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw umin ptr %ptr, i128 %val monotonic |
| ret i128 %ret |
| } |
| |
| define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) { |
| ; CHECK-LABEL: test_atomicrmw_umax( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<7>; |
| ; CHECK-NEXT: .reg .b64 %rd<13>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_umax_param_1]; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_umax_param_0]; |
| ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; |
| ; CHECK-NEXT: $L__BB40_1: // %atomicrmw.start |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4; |
| ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; |
| ; CHECK-NEXT: and.pred %p3, %p2, %p1; |
| ; CHECK-NEXT: setp.gt.u64 %p4, %rd12, %rd5; |
| ; CHECK-NEXT: or.pred %p5, %p3, %p4; |
| ; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; |
| ; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 cmp, swap, dst; |
| ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; |
| ; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; |
| ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; |
| ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; |
| ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; |
| ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; |
| ; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; |
| ; CHECK-NEXT: mov.b64 %rd11, %rd1; |
| ; CHECK-NEXT: mov.b64 %rd12, %rd2; |
| ; CHECK-NEXT: @%p6 bra $L__BB40_1; |
| ; CHECK-NEXT: // %bb.2: // %atomicrmw.end |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw umax ptr %ptr, i128 %val monotonic |
| ret i128 %ret |
| } |
| |
| |
| @si128 = internal addrspace(3) global i128 0, align 16 |
| |
| define void @test_atomicrmw_xchg_const() { |
| ; CHECK-LABEL: test_atomicrmw_xchg_const( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-NEXT: // demoted variable |
| ; CHECK-NEXT: .shared .align 16 .b8 si128[16]; |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: mov.b64 %rd1, 0; |
| ; CHECK-NEXT: mov.b64 %rd2, 23; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b128 amt, dst; |
| ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd1}; |
| ; CHECK-NEXT: atom.relaxed.sys.shared.exch.b128 dst, [si128], amt; |
| ; CHECK-NEXT: mov.b128 {%rd3, %rd4}, dst; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: ret; |
| %res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 monotonic |
| ret void |
| } |