| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90 |
| ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %} |
| |
| define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [monotonic_monotonic_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i8_generic_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [monotonic_monotonic_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB0_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB0_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB0_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB0_1; |
| ; SM90-NEXT: $L__BB0_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new monotonic monotonic |
| ret i8 %new |
| } |
| |
| define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [monotonic_monotonic_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i8_global_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [monotonic_monotonic_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB1_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB1_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB1_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB1_1; |
| ; SM90-NEXT: $L__BB1_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new monotonic monotonic |
| ret i8 %new |
| } |
| |
| define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [monotonic_monotonic_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i8_shared_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [monotonic_monotonic_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB2_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB2_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB2_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB2_1; |
| ; SM90-NEXT: $L__BB2_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new monotonic monotonic |
| ret i8 %new |
| } |
| |
| define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: monotonic_acquire_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [monotonic_acquire_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i8_generic_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [monotonic_acquire_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB3_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB3_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB3_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB3_1; |
| ; SM90-NEXT: $L__BB3_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new monotonic acquire |
| ret i8 %new |
| } |
| |
| define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: monotonic_acquire_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [monotonic_acquire_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i8_global_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [monotonic_acquire_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB4_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB4_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB4_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB4_1; |
| ; SM90-NEXT: $L__BB4_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new monotonic acquire |
| ret i8 %new |
| } |
| |
| define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: monotonic_acquire_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [monotonic_acquire_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i8_shared_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [monotonic_acquire_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB5_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB5_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB5_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB5_1; |
| ; SM90-NEXT: $L__BB5_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new monotonic acquire |
| ret i8 %new |
| } |
| |
| define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [monotonic_seq_cst_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i8_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [monotonic_seq_cst_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB6_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB6_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB6_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB6_1; |
| ; SM90-NEXT: $L__BB6_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new monotonic seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [monotonic_seq_cst_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i8_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [monotonic_seq_cst_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB7_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB7_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB7_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB7_1; |
| ; SM90-NEXT: $L__BB7_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new monotonic seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [monotonic_seq_cst_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i8_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [monotonic_seq_cst_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB8_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB8_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB8_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB8_1; |
| ; SM90-NEXT: $L__BB8_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new monotonic seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acquire_monotonic_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acquire_monotonic_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i8_generic_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acquire_monotonic_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB9_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB9_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB9_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB9_1; |
| ; SM90-NEXT: $L__BB9_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new acquire monotonic |
| ret i8 %new |
| } |
| |
| define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acquire_monotonic_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acquire_monotonic_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i8_global_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acquire_monotonic_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB10_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB10_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB10_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB10_1; |
| ; SM90-NEXT: $L__BB10_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acquire monotonic |
| ret i8 %new |
| } |
| |
| define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acquire_monotonic_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acquire_monotonic_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i8_shared_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acquire_monotonic_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB11_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB11_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB11_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB11_1; |
| ; SM90-NEXT: $L__BB11_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new acquire monotonic |
| ret i8 %new |
| } |
| |
| define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acquire_acquire_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acquire_acquire_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i8_generic_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acquire_acquire_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB12_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB12_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB12_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB12_1; |
| ; SM90-NEXT: $L__BB12_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new acquire acquire |
| ret i8 %new |
| } |
| |
| define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acquire_acquire_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acquire_acquire_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i8_global_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acquire_acquire_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB13_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB13_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB13_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB13_1; |
| ; SM90-NEXT: $L__BB13_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acquire acquire |
| ret i8 %new |
| } |
| |
| define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acquire_acquire_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acquire_acquire_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i8_shared_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acquire_acquire_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB14_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB14_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB14_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB14_1; |
| ; SM90-NEXT: $L__BB14_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new acquire acquire |
| ret i8 %new |
| } |
| |
| define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acquire_seq_cst_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i8_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acquire_seq_cst_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB15_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB15_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB15_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB15_1; |
| ; SM90-NEXT: $L__BB15_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new acquire seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acquire_seq_cst_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i8_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acquire_seq_cst_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB16_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB16_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB16_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB16_1; |
| ; SM90-NEXT: $L__BB16_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acquire seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acquire_seq_cst_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i8_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acquire_seq_cst_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB17_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB17_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB17_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB17_1; |
| ; SM90-NEXT: $L__BB17_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new acquire seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: release_monotonic_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [release_monotonic_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i8_generic_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [release_monotonic_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB18_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB18_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB18_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB18_1; |
| ; SM90-NEXT: $L__BB18_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new release monotonic |
| ret i8 %new |
| } |
| |
| define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: release_monotonic_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [release_monotonic_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i8_global_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [release_monotonic_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB19_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB19_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB19_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB19_1; |
| ; SM90-NEXT: $L__BB19_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new release monotonic |
| ret i8 %new |
| } |
| |
| define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: release_monotonic_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [release_monotonic_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i8_shared_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [release_monotonic_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB20_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB20_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB20_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB20_1; |
| ; SM90-NEXT: $L__BB20_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new release monotonic |
| ret i8 %new |
| } |
| |
| define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: release_acquire_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [release_acquire_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i8_generic_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [release_acquire_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB21_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB21_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB21_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB21_1; |
| ; SM90-NEXT: $L__BB21_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new release acquire |
| ret i8 %new |
| } |
| |
| define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: release_acquire_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [release_acquire_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i8_global_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [release_acquire_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB22_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB22_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB22_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB22_1; |
| ; SM90-NEXT: $L__BB22_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new release acquire |
| ret i8 %new |
| } |
| |
| define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: release_acquire_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [release_acquire_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i8_shared_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [release_acquire_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB23_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB23_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB23_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB23_1; |
| ; SM90-NEXT: $L__BB23_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new release acquire |
| ret i8 %new |
| } |
| |
| define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: release_seq_cst_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [release_seq_cst_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i8_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [release_seq_cst_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB24_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB24_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB24_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB24_1; |
| ; SM90-NEXT: $L__BB24_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new release seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: release_seq_cst_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [release_seq_cst_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i8_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [release_seq_cst_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB25_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB25_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB25_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB25_1; |
| ; SM90-NEXT: $L__BB25_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new release seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: release_seq_cst_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [release_seq_cst_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i8_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [release_seq_cst_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB26_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB26_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB26_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB26_1; |
| ; SM90-NEXT: $L__BB26_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new release seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_monotonic_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i8_generic_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acq_rel_monotonic_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB27_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB27_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB27_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB27_1; |
| ; SM90-NEXT: $L__BB27_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new acq_rel monotonic |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_monotonic_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i8_global_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acq_rel_monotonic_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB28_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB28_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB28_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB28_1; |
| ; SM90-NEXT: $L__BB28_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel monotonic |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_monotonic_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i8_shared_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acq_rel_monotonic_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB29_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB29_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB29_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB29_1; |
| ; SM90-NEXT: $L__BB29_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new acq_rel monotonic |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_generic_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acq_rel_acquire_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB30_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB30_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB30_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB30_1; |
| ; SM90-NEXT: $L__BB30_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new acq_rel acquire |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_global_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acq_rel_acquire_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB31_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB31_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB31_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB31_1; |
| ; SM90-NEXT: $L__BB31_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_shared_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acq_rel_acquire_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB32_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB32_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB32_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB32_1; |
| ; SM90-NEXT: $L__BB32_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new acq_rel acquire |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_seq_cst_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i8_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acq_rel_seq_cst_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB33_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB33_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB33_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB33_1; |
| ; SM90-NEXT: $L__BB33_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new acq_rel seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_seq_cst_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i8_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acq_rel_seq_cst_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB34_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB34_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB34_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB34_1; |
| ; SM90-NEXT: $L__BB34_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_seq_cst_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i8_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [acq_rel_seq_cst_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB35_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB35_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB35_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB35_1; |
| ; SM90-NEXT: $L__BB35_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new acq_rel seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_monotonic_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i8_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [seq_cst_monotonic_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB36_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB36_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB36_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB36_1; |
| ; SM90-NEXT: $L__BB36_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new seq_cst monotonic |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_monotonic_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i8_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [seq_cst_monotonic_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB37_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB37_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB37_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB37_1; |
| ; SM90-NEXT: $L__BB37_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new seq_cst monotonic |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_monotonic_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i8_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [seq_cst_monotonic_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB38_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB38_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB38_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB38_1; |
| ; SM90-NEXT: $L__BB38_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new seq_cst monotonic |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_acquire_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i8_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [seq_cst_acquire_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB39_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB39_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB39_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB39_1; |
| ; SM90-NEXT: $L__BB39_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new seq_cst acquire |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_acquire_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i8_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [seq_cst_acquire_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB40_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB40_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB40_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB40_1; |
| ; SM90-NEXT: $L__BB40_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new seq_cst acquire |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_acquire_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i8_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [seq_cst_acquire_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB41_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB41_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB41_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB41_1; |
| ; SM90-NEXT: $L__BB41_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new seq_cst acquire |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i8_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_seq_cst_i8_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i8_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [seq_cst_seq_cst_i8_generic_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB42_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB42_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB42_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB42_1; |
| ; SM90-NEXT: $L__BB42_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new seq_cst seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i8_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_seq_cst_i8_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i8_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [seq_cst_seq_cst_i8_global_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.global.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB43_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB43_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB43_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB43_1; |
| ; SM90-NEXT: $L__BB43_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new seq_cst seq_cst |
| ret i8 %new |
| } |
| |
| define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i8_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<21>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_seq_cst_i8_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i8_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM90-NEXT: and.b32 %r10, %r9, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM90-NEXT: mov.b32 %r11, 255; |
| ; SM90-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r12; |
| ; SM90-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM90-NEXT: and.b32 %r14, %r13, 255; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: ld.param.b8 %r15, [seq_cst_seq_cst_i8_shared_param_1]; |
| ; SM90-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r16, [%rd1]; |
| ; SM90-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM90-NEXT: $L__BB44_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM90-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM90-NEXT: @%p1 bra $L__BB44_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB44_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM90-NEXT: mov.b32 %r20, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB44_1; |
| ; SM90-NEXT: $L__BB44_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new seq_cst seq_cst |
| ret i8 %new |
| } |
| |
| define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [monotonic_monotonic_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i16_generic_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [monotonic_monotonic_i16_generic_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB45_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB45_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB45_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB45_1; |
| ; SM90-NEXT: $L__BB45_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new monotonic monotonic |
| ret i16 %new |
| } |
| |
| define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [monotonic_monotonic_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i16_global_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [monotonic_monotonic_i16_global_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB46_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB46_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB46_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB46_1; |
| ; SM90-NEXT: $L__BB46_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new monotonic monotonic |
| ret i16 %new |
| } |
| |
| define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [monotonic_monotonic_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i16_shared_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [monotonic_monotonic_i16_shared_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB47_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB47_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB47_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB47_1; |
| ; SM90-NEXT: $L__BB47_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new monotonic monotonic |
| ret i16 %new |
| } |
| |
| define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: monotonic_acquire_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [monotonic_acquire_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i16_generic_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [monotonic_acquire_i16_generic_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB48_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB48_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB48_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB48_1; |
| ; SM90-NEXT: $L__BB48_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new monotonic acquire |
| ret i16 %new |
| } |
| |
| define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: monotonic_acquire_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [monotonic_acquire_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i16_global_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [monotonic_acquire_i16_global_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB49_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB49_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB49_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB49_1; |
| ; SM90-NEXT: $L__BB49_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new monotonic acquire |
| ret i16 %new |
| } |
| |
| define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: monotonic_acquire_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [monotonic_acquire_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i16_shared_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [monotonic_acquire_i16_shared_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB50_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB50_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB50_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB50_1; |
| ; SM90-NEXT: $L__BB50_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new monotonic acquire |
| ret i16 %new |
| } |
| |
| define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [monotonic_seq_cst_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i16_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [monotonic_seq_cst_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB51_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB51_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB51_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB51_1; |
| ; SM90-NEXT: $L__BB51_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new monotonic seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [monotonic_seq_cst_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i16_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [monotonic_seq_cst_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB52_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB52_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB52_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB52_1; |
| ; SM90-NEXT: $L__BB52_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new monotonic seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [monotonic_seq_cst_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i16_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [monotonic_seq_cst_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB53_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB53_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB53_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB53_1; |
| ; SM90-NEXT: $L__BB53_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new monotonic seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acquire_monotonic_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acquire_monotonic_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i16_generic_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [acquire_monotonic_i16_generic_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB54_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB54_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB54_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB54_1; |
| ; SM90-NEXT: $L__BB54_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new acquire monotonic |
| ret i16 %new |
| } |
| |
| define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acquire_monotonic_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acquire_monotonic_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i16_global_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [acquire_monotonic_i16_global_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB55_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB55_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB55_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB55_1; |
| ; SM90-NEXT: $L__BB55_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new acquire monotonic |
| ret i16 %new |
| } |
| |
| define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acquire_monotonic_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acquire_monotonic_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i16_shared_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [acquire_monotonic_i16_shared_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB56_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB56_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB56_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB56_1; |
| ; SM90-NEXT: $L__BB56_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new acquire monotonic |
| ret i16 %new |
| } |
| |
| define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acquire_acquire_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acquire_acquire_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i16_generic_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [acquire_acquire_i16_generic_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB57_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB57_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB57_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB57_1; |
| ; SM90-NEXT: $L__BB57_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new acquire acquire |
| ret i16 %new |
| } |
| |
| define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acquire_acquire_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acquire_acquire_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i16_global_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [acquire_acquire_i16_global_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB58_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB58_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB58_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB58_1; |
| ; SM90-NEXT: $L__BB58_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new acquire acquire |
| ret i16 %new |
| } |
| |
| define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acquire_acquire_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acquire_acquire_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i16_shared_param_0]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: ld.param.b16 %r9, [acquire_acquire_i16_shared_param_1]; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB59_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB59_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB59_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB59_1; |
| ; SM90-NEXT: $L__BB59_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new acquire acquire |
| ret i16 %new |
| } |
| |
| define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acquire_seq_cst_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i16_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acquire_seq_cst_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB60_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB60_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB60_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB60_1; |
| ; SM90-NEXT: $L__BB60_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new acquire seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acquire_seq_cst_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i16_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acquire_seq_cst_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB61_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB61_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB61_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB61_1; |
| ; SM90-NEXT: $L__BB61_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new acquire seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acquire_seq_cst_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i16_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acquire_seq_cst_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB62_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB62_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB62_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB62_1; |
| ; SM90-NEXT: $L__BB62_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new acquire seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: release_monotonic_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [release_monotonic_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i16_generic_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [release_monotonic_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB63_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB63_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB63_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB63_1; |
| ; SM90-NEXT: $L__BB63_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new release monotonic |
| ret i16 %new |
| } |
| |
| define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: release_monotonic_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [release_monotonic_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i16_global_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [release_monotonic_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB64_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB64_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB64_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB64_1; |
| ; SM90-NEXT: $L__BB64_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new release monotonic |
| ret i16 %new |
| } |
| |
| define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: release_monotonic_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [release_monotonic_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i16_shared_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [release_monotonic_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB65_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB65_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB65_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB65_1; |
| ; SM90-NEXT: $L__BB65_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new release monotonic |
| ret i16 %new |
| } |
| |
| define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: release_acquire_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [release_acquire_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i16_generic_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [release_acquire_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB66_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB66_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB66_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB66_1; |
| ; SM90-NEXT: $L__BB66_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new release acquire |
| ret i16 %new |
| } |
| |
| define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: release_acquire_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [release_acquire_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i16_global_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [release_acquire_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB67_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB67_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB67_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB67_1; |
| ; SM90-NEXT: $L__BB67_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new release acquire |
| ret i16 %new |
| } |
| |
| define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: release_acquire_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [release_acquire_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i16_shared_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [release_acquire_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB68_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB68_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB68_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB68_1; |
| ; SM90-NEXT: $L__BB68_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new release acquire |
| ret i16 %new |
| } |
| |
| define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: release_seq_cst_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [release_seq_cst_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i16_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [release_seq_cst_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB69_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB69_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB69_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB69_1; |
| ; SM90-NEXT: $L__BB69_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new release seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: release_seq_cst_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [release_seq_cst_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i16_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [release_seq_cst_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB70_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB70_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB70_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB70_1; |
| ; SM90-NEXT: $L__BB70_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new release seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: release_seq_cst_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [release_seq_cst_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i16_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [release_seq_cst_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB71_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB71_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB71_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB71_1; |
| ; SM90-NEXT: $L__BB71_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new release seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_monotonic_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i16_generic_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acq_rel_monotonic_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB72_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB72_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB72_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB72_1; |
| ; SM90-NEXT: $L__BB72_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new acq_rel monotonic |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_monotonic_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i16_global_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acq_rel_monotonic_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB73_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB73_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB73_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB73_1; |
| ; SM90-NEXT: $L__BB73_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new acq_rel monotonic |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_monotonic_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i16_shared_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acq_rel_monotonic_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB74_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB74_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB74_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB74_1; |
| ; SM90-NEXT: $L__BB74_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new acq_rel monotonic |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_acquire_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i16_generic_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acq_rel_acquire_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB75_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB75_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB75_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB75_1; |
| ; SM90-NEXT: $L__BB75_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new acq_rel acquire |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_acquire_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i16_global_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acq_rel_acquire_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB76_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB76_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB76_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB76_1; |
| ; SM90-NEXT: $L__BB76_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new acq_rel acquire |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_acquire_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i16_shared_param_0]; |
| ; SM90-NEXT: fence.release.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acq_rel_acquire_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB77_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB77_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB77_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB77_1; |
| ; SM90-NEXT: $L__BB77_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new acq_rel acquire |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_seq_cst_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i16_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acq_rel_seq_cst_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB78_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB78_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB78_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB78_1; |
| ; SM90-NEXT: $L__BB78_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new acq_rel seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_seq_cst_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i16_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acq_rel_seq_cst_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB79_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB79_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB79_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB79_1; |
| ; SM90-NEXT: $L__BB79_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new acq_rel seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_seq_cst_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i16_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [acq_rel_seq_cst_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB80_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB80_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB80_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB80_1; |
| ; SM90-NEXT: $L__BB80_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new acq_rel seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_monotonic_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i16_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [seq_cst_monotonic_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB81_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB81_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB81_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB81_1; |
| ; SM90-NEXT: $L__BB81_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new seq_cst monotonic |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_monotonic_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i16_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [seq_cst_monotonic_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB82_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB82_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB82_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB82_1; |
| ; SM90-NEXT: $L__BB82_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new seq_cst monotonic |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_monotonic_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i16_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [seq_cst_monotonic_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB83_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB83_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB83_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB83_1; |
| ; SM90-NEXT: $L__BB83_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new seq_cst monotonic |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_acquire_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i16_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [seq_cst_acquire_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB84_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB84_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB84_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB84_1; |
| ; SM90-NEXT: $L__BB84_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new seq_cst acquire |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_acquire_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i16_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [seq_cst_acquire_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB85_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB85_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB85_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB85_1; |
| ; SM90-NEXT: $L__BB85_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new seq_cst acquire |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_acquire_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i16_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [seq_cst_acquire_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB86_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB86_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB86_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB86_1; |
| ; SM90-NEXT: $L__BB86_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new seq_cst acquire |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i16_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_seq_cst_i16_generic_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i16_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [seq_cst_seq_cst_i16_generic_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB87_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB87_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB87_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB87_1; |
| ; SM90-NEXT: $L__BB87_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new seq_cst seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i16_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_seq_cst_i16_global_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i16_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [seq_cst_seq_cst_i16_global_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.global.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB88_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB88_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB88_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB88_1; |
| ; SM90-NEXT: $L__BB88_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new seq_cst seq_cst |
| ret i16 %new |
| } |
| |
| define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i16_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .pred %p<3>; |
| ; SM90-NEXT: .reg .b16 %rs<2>; |
| ; SM90-NEXT: .reg .b32 %r<20>; |
| ; SM90-NEXT: .reg .b64 %rd<3>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_seq_cst_i16_shared_param_2]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i16_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b16 %r9, [seq_cst_seq_cst_i16_shared_param_1]; |
| ; SM90-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM90-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM90-NEXT: and.b32 %r11, %r10, 3; |
| ; SM90-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM90-NEXT: mov.b32 %r12, 65535; |
| ; SM90-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM90-NEXT: not.b32 %r2, %r13; |
| ; SM90-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM90-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM90-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM90-NEXT: ld.shared.b32 %r15, [%rd1]; |
| ; SM90-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM90-NEXT: $L__BB89_1: // %partword.cmpxchg.loop |
| ; SM90-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM90-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM90-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM90-NEXT: @%p1 bra $L__BB89_3; |
| ; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM90-NEXT: // in Loop: Header=BB89_1 Depth=1 |
| ; SM90-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM90-NEXT: mov.b32 %r19, %r8; |
| ; SM90-NEXT: @%p2 bra $L__BB89_1; |
| ; SM90-NEXT: $L__BB89_3: // %partword.cmpxchg.end |
| ; SM90-NEXT: fence.acquire.sys; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i16 %cmp, i16 %new seq_cst seq_cst |
| ret i16 %new |
| } |
| |
| define i32 @monotonic_monotonic_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i32_generic_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [monotonic_monotonic_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [monotonic_monotonic_i32_generic_param_2]; |
| ; SM90-NEXT: atom.relaxed.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new monotonic monotonic |
| ret i32 %new |
| } |
| |
| define i32 @monotonic_monotonic_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i32_global_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [monotonic_monotonic_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [monotonic_monotonic_i32_global_param_2]; |
| ; SM90-NEXT: atom.relaxed.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new monotonic monotonic |
| ret i32 %new |
| } |
| |
| define i32 @monotonic_monotonic_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i32_shared_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [monotonic_monotonic_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [monotonic_monotonic_i32_shared_param_2]; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new monotonic monotonic |
| ret i32 %new |
| } |
| |
| define i32 @monotonic_acquire_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: monotonic_acquire_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i32_generic_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [monotonic_acquire_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [monotonic_acquire_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new monotonic acquire |
| ret i32 %new |
| } |
| |
| define i32 @monotonic_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: monotonic_acquire_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i32_global_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [monotonic_acquire_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [monotonic_acquire_i32_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new monotonic acquire |
| ret i32 %new |
| } |
| |
| define i32 @monotonic_acquire_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: monotonic_acquire_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i32_shared_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [monotonic_acquire_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [monotonic_acquire_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new monotonic acquire |
| ret i32 %new |
| } |
| |
| define i32 @monotonic_seq_cst_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i32_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [monotonic_seq_cst_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [monotonic_seq_cst_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new monotonic seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @monotonic_seq_cst_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i32_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [monotonic_seq_cst_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [monotonic_seq_cst_i32_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new monotonic seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @monotonic_seq_cst_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i32_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [monotonic_seq_cst_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [monotonic_seq_cst_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new monotonic seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @acquire_monotonic_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acquire_monotonic_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i32_generic_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acquire_monotonic_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acquire_monotonic_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new acquire monotonic |
| ret i32 %new |
| } |
| |
| define i32 @acquire_monotonic_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acquire_monotonic_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i32_global_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acquire_monotonic_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acquire_monotonic_i32_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acquire monotonic |
| ret i32 %new |
| } |
| |
| define i32 @acquire_monotonic_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acquire_monotonic_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i32_shared_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acquire_monotonic_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acquire_monotonic_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new acquire monotonic |
| ret i32 %new |
| } |
| |
| define i32 @acquire_acquire_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acquire_acquire_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_acquire_i32_generic_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acquire_acquire_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acquire_acquire_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new acquire acquire |
| ret i32 %new |
| } |
| |
| define i32 @acquire_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acquire_acquire_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_acquire_i32_global_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acquire_acquire_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acquire_acquire_i32_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acquire acquire |
| ret i32 %new |
| } |
| |
| define i32 @acquire_acquire_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acquire_acquire_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_acquire_i32_shared_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acquire_acquire_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acquire_acquire_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new acquire acquire |
| ret i32 %new |
| } |
| |
| define i32 @acquire_seq_cst_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i32_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [acquire_seq_cst_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acquire_seq_cst_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new acquire seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @acquire_seq_cst_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i32_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [acquire_seq_cst_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acquire_seq_cst_i32_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acquire seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @acquire_seq_cst_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i32_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [acquire_seq_cst_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acquire_seq_cst_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new acquire seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @release_monotonic_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: release_monotonic_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_monotonic_i32_generic_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [release_monotonic_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [release_monotonic_i32_generic_param_2]; |
| ; SM90-NEXT: atom.release.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new release monotonic |
| ret i32 %new |
| } |
| |
| define i32 @release_monotonic_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: release_monotonic_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_monotonic_i32_global_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [release_monotonic_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [release_monotonic_i32_global_param_2]; |
| ; SM90-NEXT: atom.release.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new release monotonic |
| ret i32 %new |
| } |
| |
| define i32 @release_monotonic_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: release_monotonic_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_monotonic_i32_shared_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [release_monotonic_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [release_monotonic_i32_shared_param_2]; |
| ; SM90-NEXT: atom.release.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new release monotonic |
| ret i32 %new |
| } |
| |
| define i32 @release_acquire_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: release_acquire_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_acquire_i32_generic_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [release_acquire_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [release_acquire_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acq_rel.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new release acquire |
| ret i32 %new |
| } |
| |
| define i32 @release_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: release_acquire_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_acquire_i32_global_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [release_acquire_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [release_acquire_i32_global_param_2]; |
| ; SM90-NEXT: atom.acq_rel.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new release acquire |
| ret i32 %new |
| } |
| |
| define i32 @release_acquire_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: release_acquire_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_acquire_i32_shared_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [release_acquire_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [release_acquire_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acq_rel.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new release acquire |
| ret i32 %new |
| } |
| |
| define i32 @release_seq_cst_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: release_seq_cst_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_seq_cst_i32_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [release_seq_cst_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [release_seq_cst_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new release seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @release_seq_cst_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: release_seq_cst_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_seq_cst_i32_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [release_seq_cst_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [release_seq_cst_i32_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new release seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @release_seq_cst_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: release_seq_cst_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_seq_cst_i32_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [release_seq_cst_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [release_seq_cst_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new release seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_monotonic_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i32_generic_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acq_rel_monotonic_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acq_rel_monotonic_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acq_rel.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new acq_rel monotonic |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_monotonic_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i32_global_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acq_rel_monotonic_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acq_rel_monotonic_i32_global_param_2]; |
| ; SM90-NEXT: atom.acq_rel.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel monotonic |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_monotonic_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i32_shared_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acq_rel_monotonic_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acq_rel_monotonic_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acq_rel.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new acq_rel monotonic |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_acquire_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_generic_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acq_rel.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new acq_rel acquire |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_param_2]; |
| ; SM90-NEXT: atom.acq_rel.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_acquire_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_shared_param_0]; |
| ; SM90-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acq_rel.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new acq_rel acquire |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_seq_cst_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i32_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [acq_rel_seq_cst_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acq_rel_seq_cst_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new acq_rel seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_seq_cst_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i32_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [acq_rel_seq_cst_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acq_rel_seq_cst_i32_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @acq_rel_seq_cst_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i32_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [acq_rel_seq_cst_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [acq_rel_seq_cst_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new acq_rel seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_monotonic_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i32_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [seq_cst_monotonic_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [seq_cst_monotonic_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst monotonic |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_monotonic_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i32_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [seq_cst_monotonic_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [seq_cst_monotonic_i32_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new seq_cst monotonic |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_monotonic_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i32_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [seq_cst_monotonic_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [seq_cst_monotonic_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new seq_cst monotonic |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_acquire_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i32_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [seq_cst_acquire_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [seq_cst_acquire_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst acquire |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i32_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [seq_cst_acquire_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [seq_cst_acquire_i32_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new seq_cst acquire |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_acquire_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i32_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [seq_cst_acquire_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [seq_cst_acquire_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new seq_cst acquire |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_seq_cst_i32_generic(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i32_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i32_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [seq_cst_seq_cst_i32_generic_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [seq_cst_seq_cst_i32_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_seq_cst_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i32_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i32_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [seq_cst_seq_cst_i32_global_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [seq_cst_seq_cst_i32_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new seq_cst seq_cst |
| ret i32 %new |
| } |
| |
| define i32 @seq_cst_seq_cst_i32_shared(ptr addrspace(3) %addr, i32 %cmp, i32 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i32_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b32 %r<4>; |
| ; SM90-NEXT: .reg .b64 %rd<2>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i32_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b32 %r1, [seq_cst_seq_cst_i32_shared_param_1]; |
| ; SM90-NEXT: ld.param.b32 %r2, [seq_cst_seq_cst_i32_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM90-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new seq_cst seq_cst |
| ret i32 %new |
| } |
| |
| define i64 @monotonic_monotonic_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i64_generic_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [monotonic_monotonic_i64_generic_param_2]; |
| ; SM90-NEXT: atom.relaxed.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new monotonic monotonic |
| ret i64 %new |
| } |
| |
| define i64 @monotonic_monotonic_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i64_global_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [monotonic_monotonic_i64_global_param_2]; |
| ; SM90-NEXT: atom.relaxed.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new monotonic monotonic |
| ret i64 %new |
| } |
| |
| define i64 @monotonic_monotonic_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: monotonic_monotonic_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i64_shared_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [monotonic_monotonic_i64_shared_param_2]; |
| ; SM90-NEXT: atom.relaxed.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new monotonic monotonic |
| ret i64 %new |
| } |
| |
| define i64 @monotonic_acquire_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: monotonic_acquire_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i64_generic_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [monotonic_acquire_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new monotonic acquire |
| ret i64 %new |
| } |
| |
| define i64 @monotonic_acquire_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: monotonic_acquire_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i64_global_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [monotonic_acquire_i64_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new monotonic acquire |
| ret i64 %new |
| } |
| |
| define i64 @monotonic_acquire_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: monotonic_acquire_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i64_shared_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [monotonic_acquire_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new monotonic acquire |
| ret i64 %new |
| } |
| |
| define i64 @monotonic_seq_cst_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i64_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [monotonic_seq_cst_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new monotonic seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @monotonic_seq_cst_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i64_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [monotonic_seq_cst_i64_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new monotonic seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @monotonic_seq_cst_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: monotonic_seq_cst_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i64_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [monotonic_seq_cst_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new monotonic seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @acquire_monotonic_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acquire_monotonic_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i64_generic_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acquire_monotonic_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new acquire monotonic |
| ret i64 %new |
| } |
| |
| define i64 @acquire_monotonic_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acquire_monotonic_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i64_global_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acquire_monotonic_i64_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new acquire monotonic |
| ret i64 %new |
| } |
| |
| define i64 @acquire_monotonic_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acquire_monotonic_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i64_shared_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acquire_monotonic_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new acquire monotonic |
| ret i64 %new |
| } |
| |
| define i64 @acquire_acquire_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acquire_acquire_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_acquire_i64_generic_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acquire_acquire_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new acquire acquire |
| ret i64 %new |
| } |
| |
| define i64 @acquire_acquire_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acquire_acquire_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_acquire_i64_global_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acquire_acquire_i64_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new acquire acquire |
| ret i64 %new |
| } |
| |
| define i64 @acquire_acquire_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acquire_acquire_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_acquire_i64_shared_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acquire_acquire_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new acquire acquire |
| ret i64 %new |
| } |
| |
| define i64 @acquire_seq_cst_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i64_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acquire_seq_cst_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new acquire seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @acquire_seq_cst_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i64_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acquire_seq_cst_i64_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new acquire seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @acquire_seq_cst_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acquire_seq_cst_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i64_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acquire_seq_cst_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new acquire seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @release_monotonic_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: release_monotonic_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_monotonic_i64_generic_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [release_monotonic_i64_generic_param_2]; |
| ; SM90-NEXT: atom.release.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new release monotonic |
| ret i64 %new |
| } |
| |
| define i64 @release_monotonic_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: release_monotonic_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_monotonic_i64_global_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [release_monotonic_i64_global_param_2]; |
| ; SM90-NEXT: atom.release.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new release monotonic |
| ret i64 %new |
| } |
| |
| define i64 @release_monotonic_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: release_monotonic_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_monotonic_i64_shared_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [release_monotonic_i64_shared_param_2]; |
| ; SM90-NEXT: atom.release.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new release monotonic |
| ret i64 %new |
| } |
| |
| define i64 @release_acquire_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: release_acquire_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_acquire_i64_generic_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [release_acquire_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acq_rel.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new release acquire |
| ret i64 %new |
| } |
| |
| define i64 @release_acquire_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: release_acquire_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_acquire_i64_global_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [release_acquire_i64_global_param_2]; |
| ; SM90-NEXT: atom.acq_rel.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new release acquire |
| ret i64 %new |
| } |
| |
| define i64 @release_acquire_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: release_acquire_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_acquire_i64_shared_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [release_acquire_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acq_rel.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new release acquire |
| ret i64 %new |
| } |
| |
| define i64 @release_seq_cst_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: release_seq_cst_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_seq_cst_i64_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [release_seq_cst_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new release seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @release_seq_cst_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: release_seq_cst_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_seq_cst_i64_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [release_seq_cst_i64_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new release seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @release_seq_cst_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: release_seq_cst_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [release_seq_cst_i64_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [release_seq_cst_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new release seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_monotonic_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i64_generic_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_monotonic_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acq_rel.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new acq_rel monotonic |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_monotonic_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i64_global_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_monotonic_i64_global_param_2]; |
| ; SM90-NEXT: atom.acq_rel.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new acq_rel monotonic |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_monotonic_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acq_rel_monotonic_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i64_shared_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_monotonic_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acq_rel.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new acq_rel monotonic |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_acquire_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i64_generic_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_acquire_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acq_rel.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new acq_rel acquire |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_acquire_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i64_global_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_acquire_i64_global_param_2]; |
| ; SM90-NEXT: atom.acq_rel.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new acq_rel acquire |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_acquire_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acq_rel_acquire_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i64_shared_param_0]; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_acquire_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acq_rel.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new acq_rel acquire |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_seq_cst_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i64_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_seq_cst_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new acq_rel seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_seq_cst_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i64_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_seq_cst_i64_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new acq_rel seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @acq_rel_seq_cst_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: acq_rel_seq_cst_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i64_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_seq_cst_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new acq_rel seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_monotonic_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i64_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_monotonic_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst monotonic |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_monotonic_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i64_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_monotonic_i64_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new seq_cst monotonic |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_monotonic_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: seq_cst_monotonic_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i64_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_monotonic_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new seq_cst monotonic |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_acquire_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i64_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_acquire_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst acquire |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_acquire_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i64_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_acquire_i64_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new seq_cst acquire |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_acquire_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: seq_cst_acquire_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i64_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_acquire_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new seq_cst acquire |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_seq_cst_i64_generic(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i64_generic( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i64_generic_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i64_generic_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_seq_cst_i64_generic_param_2]; |
| ; SM90-NEXT: atom.acquire.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_seq_cst_i64_global(ptr addrspace(1) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i64_global( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i64_global_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i64_global_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_seq_cst_i64_global_param_2]; |
| ; SM90-NEXT: atom.acquire.global.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new seq_cst seq_cst |
| ret i64 %new |
| } |
| |
| define i64 @seq_cst_seq_cst_i64_shared(ptr addrspace(3) %addr, i64 %cmp, i64 %new) { |
| ; SM90-LABEL: seq_cst_seq_cst_i64_shared( |
| ; SM90: { |
| ; SM90-NEXT: .reg .b64 %rd<5>; |
| ; SM90-EMPTY: |
| ; SM90-NEXT: // %bb.0: |
| ; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i64_shared_param_0]; |
| ; SM90-NEXT: fence.sc.sys; |
| ; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i64_shared_param_1]; |
| ; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_seq_cst_i64_shared_param_2]; |
| ; SM90-NEXT: atom.acquire.shared.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM90-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM90-NEXT: ret; |
| %pairold = cmpxchg ptr addrspace(3) %addr, i64 %cmp, i64 %new seq_cst seq_cst |
| ret i64 %new |
| } |
| |