| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %} |
| |
| |
| ; CHECK-LABEL: atom0 |
| define i32 @atom0(ptr %addr, i32 %val) { |
| ; CHECK-LABEL: atom0( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom0_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom0_param_1]; |
| ; CHECK-NEXT: atom.add.u32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw add ptr %addr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| ; CHECK-LABEL: atom1 |
| define i64 @atom1(ptr %addr, i64 %val) { |
| ; CHECK-LABEL: atom1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom1_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atom1_param_1]; |
| ; CHECK-NEXT: atom.add.u64 %rd3, [%rd1], %rd2; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw add ptr %addr, i64 %val seq_cst |
| ret i64 %ret |
| } |
| |
| ; CHECK-LABEL: atom2 |
| define i32 @atom2(ptr %subr, i32 %val) { |
| ; CHECK-LABEL: atom2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom2_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom2_param_1]; |
| ; CHECK-NEXT: neg.s32 %r2, %r1; |
| ; CHECK-NEXT: atom.add.u32 %r3, [%rd1], %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw sub ptr %subr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| ; CHECK-LABEL: atom3 |
| define i64 @atom3(ptr %subr, i64 %val) { |
| ; CHECK-LABEL: atom3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom3_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atom3_param_1]; |
| ; CHECK-NEXT: neg.s64 %rd3, %rd2; |
| ; CHECK-NEXT: atom.add.u64 %rd4, [%rd1], %rd3; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw sub ptr %subr, i64 %val seq_cst |
| ret i64 %ret |
| } |
| |
| ; CHECK-LABEL: atom4 |
| define i32 @atom4(ptr %subr, i32 %val) { |
| ; CHECK-LABEL: atom4( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom4_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom4_param_1]; |
| ; CHECK-NEXT: atom.and.b32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw and ptr %subr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| ; CHECK-LABEL: atom5 |
| define i64 @atom5(ptr %subr, i64 %val) { |
| ; CHECK-LABEL: atom5( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom5_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atom5_param_1]; |
| ; CHECK-NEXT: atom.and.b64 %rd3, [%rd1], %rd2; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw and ptr %subr, i64 %val seq_cst |
| ret i64 %ret |
| } |
| |
| ;; NAND not yet supported |
| ;define i32 @atom6(ptr %subr, i32 %val) { |
| ; %ret = atomicrmw nand ptr %subr, i32 %val seq_cst |
| ; ret i32 %ret |
| ;} |
| |
| ;define i64 @atom7(ptr %subr, i64 %val) { |
| ; %ret = atomicrmw nand ptr %subr, i64 %val seq_cst |
| ; ret i64 %ret |
| ;} |
| |
| ; CHECK-LABEL: atom8 |
| define i32 @atom8(ptr %subr, i32 %val) { |
| ; CHECK-LABEL: atom8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom8_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom8_param_1]; |
| ; CHECK-NEXT: atom.or.b32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw or ptr %subr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| ; CHECK-LABEL: atom9 |
| define i64 @atom9(ptr %subr, i64 %val) { |
| ; CHECK-LABEL: atom9( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom9_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atom9_param_1]; |
| ; CHECK-NEXT: atom.or.b64 %rd3, [%rd1], %rd2; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw or ptr %subr, i64 %val seq_cst |
| ret i64 %ret |
| } |
| |
| ; CHECK-LABEL: atom10 |
| define i32 @atom10(ptr %subr, i32 %val) { |
| ; CHECK-LABEL: atom10( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom10_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom10_param_1]; |
| ; CHECK-NEXT: atom.xor.b32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw xor ptr %subr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| ; CHECK-LABEL: atom11 |
| define i64 @atom11(ptr %subr, i64 %val) { |
| ; CHECK-LABEL: atom11( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom11_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atom11_param_1]; |
| ; CHECK-NEXT: atom.xor.b64 %rd3, [%rd1], %rd2; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw xor ptr %subr, i64 %val seq_cst |
| ret i64 %ret |
| } |
| |
| ; CHECK-LABEL: atom12 |
| define i32 @atom12(ptr %subr, i32 %val) { |
| ; CHECK-LABEL: atom12( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom12_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom12_param_1]; |
| ; CHECK-NEXT: atom.max.s32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw max ptr %subr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| ; CHECK-LABEL: atom13 |
| define i64 @atom13(ptr %subr, i64 %val) { |
| ; CHECK-LABEL: atom13( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom13_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atom13_param_1]; |
| ; CHECK-NEXT: atom.max.s64 %rd3, [%rd1], %rd2; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw max ptr %subr, i64 %val seq_cst |
| ret i64 %ret |
| } |
| |
| ; CHECK-LABEL: atom14 |
| define i32 @atom14(ptr %subr, i32 %val) { |
| ; CHECK-LABEL: atom14( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom14_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom14_param_1]; |
| ; CHECK-NEXT: atom.min.s32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw min ptr %subr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| ; CHECK-LABEL: atom15 |
| define i64 @atom15(ptr %subr, i64 %val) { |
| ; CHECK-LABEL: atom15( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom15_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atom15_param_1]; |
| ; CHECK-NEXT: atom.min.s64 %rd3, [%rd1], %rd2; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw min ptr %subr, i64 %val seq_cst |
| ret i64 %ret |
| } |
| |
| ; CHECK-LABEL: atom16 |
| define i32 @atom16(ptr %subr, i32 %val) { |
| ; CHECK-LABEL: atom16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom16_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom16_param_1]; |
| ; CHECK-NEXT: atom.max.u32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw umax ptr %subr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| ; CHECK-LABEL: atom17 |
| define i64 @atom17(ptr %subr, i64 %val) { |
| ; CHECK-LABEL: atom17( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom17_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atom17_param_1]; |
| ; CHECK-NEXT: atom.max.u64 %rd3, [%rd1], %rd2; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw umax ptr %subr, i64 %val seq_cst |
| ret i64 %ret |
| } |
| |
| ; CHECK-LABEL: atom18 |
| define i32 @atom18(ptr %subr, i32 %val) { |
| ; CHECK-LABEL: atom18( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom18_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom18_param_1]; |
| ; CHECK-NEXT: atom.min.u32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw umin ptr %subr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| ; CHECK-LABEL: atom19 |
| define i64 @atom19(ptr %subr, i64 %val) { |
| ; CHECK-LABEL: atom19( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom19_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atom19_param_1]; |
| ; CHECK-NEXT: atom.min.u64 %rd3, [%rd1], %rd2; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw umin ptr %subr, i64 %val seq_cst |
| ret i64 %ret |
| } |
| |
| define i32 @atom20(ptr %subr, i32 %val) { |
| ; CHECK-LABEL: atom20( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom20_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom20_param_1]; |
| ; CHECK-NEXT: atom.inc.u32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw uinc_wrap ptr %subr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| define i32 @atom21(ptr %subr, i32 %val) { |
| ; CHECK-LABEL: atom21( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atom21_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atom21_param_1]; |
| ; CHECK-NEXT: atom.dec.u32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw udec_wrap ptr %subr, i32 %val seq_cst |
| ret i32 %ret |
| } |
| |
| declare float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val) |
| |
| ; CHECK-LABEL: atomic_add_f32_generic |
| define float @atomic_add_f32_generic(ptr %addr, float %val) { |
| ; CHECK-LABEL: atomic_add_f32_generic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_generic_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_generic_param_1]; |
| ; CHECK-NEXT: atom.add.f32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = call float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val) |
| ret float %ret |
| } |
| |
| declare float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val) |
| |
| ; CHECK-LABEL: atomic_add_f32_addrspace1 |
| define float @atomic_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) { |
| ; CHECK-LABEL: atomic_add_f32_addrspace1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_addrspace1_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_addrspace1_param_1]; |
| ; CHECK-NEXT: atom.global.add.f32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = call float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val) |
| ret float %ret |
| } |
| |
| declare float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val) |
| |
| ; CHECK-LABEL: atomic_add_f32_addrspace3 |
| define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) { |
| ; CHECK-LABEL: atomic_add_f32_addrspace3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_addrspace3_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_addrspace3_param_1]; |
| ; CHECK-NEXT: atom.shared.add.f32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = call float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val) |
| ret float %ret |
| } |
| |
| ; CHECK-LABEL: atomicrmw_add_f32_generic |
| define float @atomicrmw_add_f32_generic(ptr %addr, float %val) { |
| ; CHECK-LABEL: atomicrmw_add_f32_generic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomicrmw_add_f32_generic_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atomicrmw_add_f32_generic_param_1]; |
| ; CHECK-NEXT: atom.add.f32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw fadd ptr %addr, float %val seq_cst |
| ret float %ret |
| } |
| |
| ; CHECK-LABEL: atomicrmw_add_f16_generic |
| define half @atomicrmw_add_f16_generic(ptr %addr, half %val) { |
| ; CHECK-LABEL: atomicrmw_add_f16_generic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<4>; |
| ; CHECK-NEXT: .reg .b32 %r<18>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b16 %rs1, [atomicrmw_add_f16_generic_param_1]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atomicrmw_add_f16_generic_param_0]; |
| ; CHECK-NEXT: and.b64 %rd1, %rd2, -4; |
| ; CHECK-NEXT: cvt.u32.u64 %r4, %rd2; |
| ; CHECK-NEXT: and.b32 %r5, %r4, 3; |
| ; CHECK-NEXT: shl.b32 %r1, %r5, 3; |
| ; CHECK-NEXT: mov.b32 %r6, 65535; |
| ; CHECK-NEXT: shl.b32 %r7, %r6, %r1; |
| ; CHECK-NEXT: not.b32 %r2, %r7; |
| ; CHECK-NEXT: ld.b32 %r17, [%rd1]; |
| ; CHECK-NEXT: cvt.f32.f16 %r10, %rs1; |
| ; CHECK-NEXT: $L__BB24_1: // %atomicrmw.start |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: shr.u32 %r8, %r17, %r1; |
| ; CHECK-NEXT: cvt.u16.u32 %rs2, %r8; |
| ; CHECK-NEXT: cvt.f32.f16 %r9, %rs2; |
| ; CHECK-NEXT: add.rn.f32 %r11, %r9, %r10; |
| ; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r11; |
| ; CHECK-NEXT: cvt.u32.u16 %r12, %rs3; |
| ; CHECK-NEXT: shl.b32 %r13, %r12, %r1; |
| ; CHECK-NEXT: and.b32 %r14, %r17, %r2; |
| ; CHECK-NEXT: or.b32 %r15, %r14, %r13; |
| ; CHECK-NEXT: membar.sys; |
| ; CHECK-NEXT: atom.cas.b32 %r3, [%rd1], %r17, %r15; |
| ; CHECK-NEXT: setp.ne.b32 %p1, %r3, %r17; |
| ; CHECK-NEXT: mov.b32 %r17, %r3; |
| ; CHECK-NEXT: @%p1 bra $L__BB24_1; |
| ; CHECK-NEXT: // %bb.2: // %atomicrmw.end |
| ; CHECK-NEXT: shr.u32 %r16, %r3, %r1; |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %r16; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw fadd ptr %addr, half %val seq_cst |
| ret half %ret |
| } |
| |
| ; CHECK-LABEL: atomicrmw_add_f32_addrspace1 |
| define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) { |
| ; CHECK-LABEL: atomicrmw_add_f32_addrspace1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomicrmw_add_f32_addrspace1_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atomicrmw_add_f32_addrspace1_param_1]; |
| ; CHECK-NEXT: atom.global.add.f32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw fadd ptr addrspace(1) %addr, float %val seq_cst |
| ret float %ret |
| } |
| |
| ; CHECK-LABEL: atomicrmw_add_f32_addrspace3 |
| define float @atomicrmw_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) { |
| ; CHECK-LABEL: atomicrmw_add_f32_addrspace3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomicrmw_add_f32_addrspace3_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atomicrmw_add_f32_addrspace3_param_1]; |
| ; CHECK-NEXT: atom.shared.add.f32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = atomicrmw fadd ptr addrspace(3) %addr, float %val seq_cst |
| ret float %ret |
| } |
| |
| ; CHECK-LABEL: atomic_cmpxchg_i32 |
| define i32 @atomic_cmpxchg_i32(ptr %addr, i32 %cmp, i32 %new) { |
| ; CHECK-LABEL: atomic_cmpxchg_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomic_cmpxchg_i32_param_0]; |
| ; CHECK-NEXT: membar.sys; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atomic_cmpxchg_i32_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [atomic_cmpxchg_i32_param_2]; |
| ; CHECK-NEXT: atom.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst seq_cst |
| ret i32 %new |
| } |
| |
| ; CHECK-LABEL: atomic_cmpxchg_i64 |
| define i64 @atomic_cmpxchg_i64(ptr %addr, i64 %cmp, i64 %new) { |
| ; CHECK-LABEL: atomic_cmpxchg_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomic_cmpxchg_i64_param_0]; |
| ; CHECK-NEXT: membar.sys; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [atomic_cmpxchg_i64_param_1]; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [atomic_cmpxchg_i64_param_2]; |
| ; CHECK-NEXT: atom.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst seq_cst |
| ret i64 %new |
| } |