| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | FileCheck %s --check-prefix=SM60 |
| ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %} |
| |
| define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: monotonic_monotonic_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [monotonic_monotonic_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i8_global_cta_param_0]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: ld.param.b8 %r9, [monotonic_monotonic_i8_global_cta_param_1]; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB0_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB0_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB0_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB0_1; |
| ; SM60-NEXT: $L__BB0_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic monotonic |
| ret i8 %new |
| } |
| |
| define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: monotonic_acquire_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [monotonic_acquire_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i8_global_cta_param_0]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: ld.param.b8 %r9, [monotonic_acquire_i8_global_cta_param_1]; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB1_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB1_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB1_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB1_1; |
| ; SM60-NEXT: $L__BB1_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic acquire |
| ret i8 %new |
| } |
| |
| define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: monotonic_seq_cst_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [monotonic_seq_cst_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [monotonic_seq_cst_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB2_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB2_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB2_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB2_1; |
| ; SM60-NEXT: $L__BB2_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: acquire_monotonic_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [acquire_monotonic_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i8_global_cta_param_0]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: ld.param.b8 %r9, [acquire_monotonic_i8_global_cta_param_1]; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB3_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB3_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB3_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB3_1; |
| ; SM60-NEXT: $L__BB3_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire monotonic |
| ret i8 %new |
| } |
| |
| define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: acquire_acquire_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [acquire_acquire_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acquire_acquire_i8_global_cta_param_0]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: ld.param.b8 %r9, [acquire_acquire_i8_global_cta_param_1]; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB4_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB4_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB4_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB4_1; |
| ; SM60-NEXT: $L__BB4_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire acquire |
| ret i8 %new |
| } |
| |
| define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: acquire_seq_cst_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [acquire_seq_cst_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [acquire_seq_cst_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB5_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB5_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB5_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB5_1; |
| ; SM60-NEXT: $L__BB5_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: release_monotonic_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [release_monotonic_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [release_monotonic_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [release_monotonic_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB6_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB6_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB6_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB6_1; |
| ; SM60-NEXT: $L__BB6_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release monotonic |
| ret i8 %new |
| } |
| |
| define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: release_acquire_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [release_acquire_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [release_acquire_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [release_acquire_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB7_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB7_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB7_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB7_1; |
| ; SM60-NEXT: $L__BB7_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release acquire |
| ret i8 %new |
| } |
| |
| define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: release_seq_cst_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [release_seq_cst_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [release_seq_cst_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [release_seq_cst_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB8_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB8_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB8_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB8_1; |
| ; SM60-NEXT: $L__BB8_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: acq_rel_monotonic_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_monotonic_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [acq_rel_monotonic_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB9_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB9_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB9_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB9_1; |
| ; SM60-NEXT: $L__BB9_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel monotonic |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [acq_rel_acquire_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB10_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB10_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB10_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB10_1; |
| ; SM60-NEXT: $L__BB10_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: acq_rel_seq_cst_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_seq_cst_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [acq_rel_seq_cst_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB11_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB11_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB11_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB11_1; |
| ; SM60-NEXT: $L__BB11_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: seq_cst_monotonic_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [seq_cst_monotonic_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [seq_cst_monotonic_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB12_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB12_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB12_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB12_1; |
| ; SM60-NEXT: $L__BB12_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst monotonic |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: seq_cst_acquire_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [seq_cst_acquire_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [seq_cst_acquire_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB13_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB13_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB13_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB13_1; |
| ; SM60-NEXT: $L__BB13_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst acquire |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: seq_cst_seq_cst_i8_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [seq_cst_seq_cst_i8_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i8_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [seq_cst_seq_cst_i8_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB14_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB14_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB14_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB14_1; |
| ; SM60-NEXT: $L__BB14_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst seq_cst |
| ret i8 %new |
| } |
| |
| define i16 @monotonic_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: monotonic_monotonic_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [monotonic_monotonic_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i16_global_cta_param_0]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: ld.param.b16 %r9, [monotonic_monotonic_i16_global_cta_param_1]; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB15_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB15_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB15_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB15_1; |
| ; SM60-NEXT: $L__BB15_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic monotonic |
| ret i16 %new |
| } |
| |
| define i16 @monotonic_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: monotonic_acquire_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [monotonic_acquire_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i16_global_cta_param_0]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: ld.param.b16 %r9, [monotonic_acquire_i16_global_cta_param_1]; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB16_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB16_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB16_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB16_1; |
| ; SM60-NEXT: $L__BB16_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic acquire |
| ret i16 %new |
| } |
| |
| define i16 @monotonic_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: monotonic_seq_cst_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [monotonic_seq_cst_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [monotonic_seq_cst_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB17_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB17_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB17_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB17_1; |
| ; SM60-NEXT: $L__BB17_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @acquire_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: acquire_monotonic_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [acquire_monotonic_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i16_global_cta_param_0]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: ld.param.b16 %r9, [acquire_monotonic_i16_global_cta_param_1]; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB18_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB18_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB18_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB18_1; |
| ; SM60-NEXT: $L__BB18_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire monotonic |
| ret i16 %new |
| } |
| |
| define i16 @acquire_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: acquire_acquire_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [acquire_acquire_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acquire_acquire_i16_global_cta_param_0]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: ld.param.b16 %r9, [acquire_acquire_i16_global_cta_param_1]; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB19_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB19_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB19_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB19_1; |
| ; SM60-NEXT: $L__BB19_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire acquire |
| ret i16 %new |
| } |
| |
| define i16 @acquire_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: acquire_seq_cst_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [acquire_seq_cst_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [acquire_seq_cst_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB20_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB20_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB20_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB20_1; |
| ; SM60-NEXT: $L__BB20_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @release_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: release_monotonic_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [release_monotonic_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [release_monotonic_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [release_monotonic_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB21_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB21_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB21_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB21_1; |
| ; SM60-NEXT: $L__BB21_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release monotonic |
| ret i16 %new |
| } |
| |
| define i16 @release_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: release_acquire_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [release_acquire_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [release_acquire_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [release_acquire_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB22_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB22_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB22_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB22_1; |
| ; SM60-NEXT: $L__BB22_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release acquire |
| ret i16 %new |
| } |
| |
| define i16 @release_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: release_seq_cst_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [release_seq_cst_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [release_seq_cst_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [release_seq_cst_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB23_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB23_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB23_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB23_1; |
| ; SM60-NEXT: $L__BB23_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: acq_rel_monotonic_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [acq_rel_monotonic_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [acq_rel_monotonic_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB24_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB24_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB24_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB24_1; |
| ; SM60-NEXT: $L__BB24_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel monotonic |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [acq_rel_acquire_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [acq_rel_acquire_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB25_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB25_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB25_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB25_1; |
| ; SM60-NEXT: $L__BB25_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel acquire |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: acq_rel_seq_cst_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [acq_rel_seq_cst_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [acq_rel_seq_cst_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB26_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB26_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB26_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB26_1; |
| ; SM60-NEXT: $L__BB26_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: seq_cst_monotonic_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [seq_cst_monotonic_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [seq_cst_monotonic_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB27_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB27_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB27_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB27_1; |
| ; SM60-NEXT: $L__BB27_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst monotonic |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: seq_cst_acquire_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [seq_cst_acquire_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [seq_cst_acquire_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB28_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB28_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB28_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB28_1; |
| ; SM60-NEXT: $L__BB28_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst acquire |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM60-LABEL: seq_cst_seq_cst_i16_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<20>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b16 %rs1, [seq_cst_seq_cst_i16_global_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i16_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b16 %r9, [seq_cst_seq_cst_i16_global_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 65535; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM60-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM60-NEXT: $L__BB29_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM60-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17; |
| ; SM60-NEXT: @%p1 bra $L__BB29_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB29_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8; |
| ; SM60-NEXT: mov.b32 %r19, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB29_1; |
| ; SM60-NEXT: $L__BB29_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst seq_cst |
| ret i16 %new |
| } |
| |
| define i32 @monotonic_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: monotonic_monotonic_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i32_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [monotonic_monotonic_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [monotonic_monotonic_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic monotonic |
| ret i32 %new |
| } |
| |
| define i32 @monotonic_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: monotonic_acquire_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i32_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [monotonic_acquire_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [monotonic_acquire_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic acquire |
| ret i32 %new |
| } |
| |
| define i32 @monotonic_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: monotonic_seq_cst_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i32_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b32 %r1, [monotonic_seq_cst_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [monotonic_seq_cst_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @acquire_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acquire_monotonic_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i32_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [acquire_monotonic_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acquire_monotonic_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire monotonic |
| ret i32 %new |
| } |
| |
| define i32 @acquire_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acquire_acquire_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acquire_acquire_i32_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [acquire_acquire_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acquire_acquire_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire acquire |
| ret i32 %new |
| } |
| |
| define i32 @acquire_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acquire_seq_cst_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i32_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b32 %r1, [acquire_seq_cst_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acquire_seq_cst_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @release_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: release_monotonic_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [release_monotonic_i32_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [release_monotonic_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [release_monotonic_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release monotonic |
| ret i32 %new |
| } |
| |
| define i32 @release_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: release_acquire_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [release_acquire_i32_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [release_acquire_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [release_acquire_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release acquire |
| ret i32 %new |
| } |
| |
| define i32 @release_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: release_seq_cst_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [release_seq_cst_i32_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b32 %r1, [release_seq_cst_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [release_seq_cst_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acq_rel_monotonic_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i32_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [acq_rel_monotonic_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acq_rel_monotonic_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel monotonic |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acq_rel_seq_cst_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i32_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b32 %r1, [acq_rel_seq_cst_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acq_rel_seq_cst_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: seq_cst_monotonic_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i32_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b32 %r1, [seq_cst_monotonic_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [seq_cst_monotonic_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst monotonic |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: seq_cst_acquire_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i32_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b32 %r1, [seq_cst_acquire_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [seq_cst_acquire_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst acquire |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: seq_cst_seq_cst_i32_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i32_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b32 %r1, [seq_cst_seq_cst_i32_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [seq_cst_seq_cst_i32_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst seq_cst |
| ret i32 %new |
| } |
| |
| define i64 @monotonic_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: monotonic_monotonic_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i64_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [monotonic_monotonic_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic monotonic |
| ret i64 %new |
| } |
| |
| define i64 @monotonic_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: monotonic_acquire_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i64_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [monotonic_acquire_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic acquire |
| ret i64 %new |
| } |
| |
| define i64 @monotonic_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: monotonic_seq_cst_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i64_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [monotonic_seq_cst_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @acquire_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: acquire_monotonic_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i64_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [acquire_monotonic_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire monotonic |
| ret i64 %new |
| } |
| |
| define i64 @acquire_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: acquire_acquire_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acquire_acquire_i64_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acquire_acquire_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [acquire_acquire_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire acquire |
| ret i64 %new |
| } |
| |
| define i64 @acquire_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: acquire_seq_cst_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i64_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [acquire_seq_cst_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @release_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: release_monotonic_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [release_monotonic_i64_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [release_monotonic_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [release_monotonic_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release monotonic |
| ret i64 %new |
| } |
| |
| define i64 @release_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: release_acquire_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [release_acquire_i64_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [release_acquire_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [release_acquire_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release acquire |
| ret i64 %new |
| } |
| |
| define i64 @release_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: release_seq_cst_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [release_seq_cst_i64_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b64 %rd2, [release_seq_cst_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [release_seq_cst_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: acq_rel_monotonic_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i64_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [acq_rel_monotonic_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel monotonic |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i64_global_cta_param_0]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [acq_rel_acquire_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel acquire |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: acq_rel_seq_cst_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i64_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [acq_rel_seq_cst_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: seq_cst_monotonic_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i64_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [seq_cst_monotonic_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst monotonic |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: seq_cst_acquire_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i64_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [seq_cst_acquire_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst acquire |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM60-LABEL: seq_cst_seq_cst_i64_global_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<5>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i64_global_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i64_global_cta_param_1]; |
| ; SM60-NEXT: ld.param.b64 %rd3, [seq_cst_seq_cst_i64_global_cta_param_2]; |
| ; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM60-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst seq_cst |
| ret i64 %new |
| } |
| |
| define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i8_global( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_global_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_global_param_0]; |
| ; SM60-NEXT: membar.sys; |
| ; SM60-NEXT: ld.param.b8 %r9, [acq_rel_acquire_i8_global_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB60_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.sys.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB60_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB60_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB60_1; |
| ; SM60-NEXT: $L__BB60_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.sys; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire |
| ret i8 %new |
| } |
| |
| define i32 @acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i32_global( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_param_2]; |
| ; SM60-NEXT: atom.sys.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_acquire_i32_global_sys(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i32_global_sys( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_sys_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_sys_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_sys_param_2]; |
| ; SM60-NEXT: atom.sys.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("") acq_rel acquire |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_acquire_i32_global_gpu(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i32_global_gpu( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_gpu_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_gpu_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_gpu_param_2]; |
| ; SM60-NEXT: atom.gpu.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("device") acq_rel acquire |
| ret i32 %new |
| } |
| |
| define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i8_generic_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_generic_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_generic_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [acq_rel_acquire_i8_generic_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB64_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB64_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB64_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB64_1; |
| ; SM60-NEXT: $L__BB64_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i8_shared_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .pred %p<3>; |
| ; SM60-NEXT: .reg .b16 %rs<2>; |
| ; SM60-NEXT: .reg .b32 %r<21>; |
| ; SM60-NEXT: .reg .b64 %rd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_shared_cta_param_2]; |
| ; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_shared_cta_param_0]; |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: ld.param.b8 %r9, [acq_rel_acquire_i8_shared_cta_param_1]; |
| ; SM60-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM60-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM60-NEXT: and.b32 %r11, %r10, 3; |
| ; SM60-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM60-NEXT: mov.b32 %r12, 255; |
| ; SM60-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM60-NEXT: not.b32 %r2, %r13; |
| ; SM60-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM60-NEXT: and.b32 %r15, %r14, 255; |
| ; SM60-NEXT: shl.b32 %r3, %r15, %r1; |
| ; SM60-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM60-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM60-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM60-NEXT: $L__BB65_1: // %partword.cmpxchg.loop |
| ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM60-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM60-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM60-NEXT: atom.cta.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18; |
| ; SM60-NEXT: @%p1 bra $L__BB65_3; |
| ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM60-NEXT: // in Loop: Header=BB65_1 Depth=1 |
| ; SM60-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8; |
| ; SM60-NEXT: mov.b32 %r20, %r8; |
| ; SM60-NEXT: @%p2 bra $L__BB65_1; |
| ; SM60-NEXT: $L__BB65_3: // %partword.cmpxchg.end |
| ; SM60-NEXT: membar.cta; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire |
| ret i8 %new |
| } |
| |
| define i32 @acq_rel_acquire_i32_generic_cta(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i32_generic_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_generic_cta_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_generic_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_generic_cta_param_2]; |
| ; SM60-NEXT: atom.cta.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_acquire_i32_shared_cta(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM60-LABEL: acq_rel_acquire_i32_shared_cta( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<4>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_shared_cta_param_0]; |
| ; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_shared_cta_param_1]; |
| ; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_shared_cta_param_2]; |
| ; SM60-NEXT: atom.cta.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM60-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM60-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire |
| ret i32 %new |
| } |
| |