blob: 6ea02f35e96268a7d86881a582200a6ac789a3a5 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %}
; CHECK-LABEL: atom0
define i32 @atom0(ptr %addr, i32 %val) {
; CHECK-LABEL: atom0(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom0_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom0_param_1];
; CHECK-NEXT: atom.add.u32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw add ptr %addr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom1
define i64 @atom1(ptr %addr, i64 %val) {
; CHECK-LABEL: atom1(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom1_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [atom1_param_1];
; CHECK-NEXT: atom.add.u64 %rd3, [%rd1], %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%ret = atomicrmw add ptr %addr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom2
define i32 @atom2(ptr %subr, i32 %val) {
; CHECK-LABEL: atom2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom2_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom2_param_1];
; CHECK-NEXT: neg.s32 %r2, %r1;
; CHECK-NEXT: atom.add.u32 %r3, [%rd1], %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%ret = atomicrmw sub ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom3
define i64 @atom3(ptr %subr, i64 %val) {
; CHECK-LABEL: atom3(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom3_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [atom3_param_1];
; CHECK-NEXT: neg.s64 %rd3, %rd2;
; CHECK-NEXT: atom.add.u64 %rd4, [%rd1], %rd3;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%ret = atomicrmw sub ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom4
define i32 @atom4(ptr %subr, i32 %val) {
; CHECK-LABEL: atom4(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom4_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom4_param_1];
; CHECK-NEXT: atom.and.b32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw and ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom5
define i64 @atom5(ptr %subr, i64 %val) {
; CHECK-LABEL: atom5(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom5_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [atom5_param_1];
; CHECK-NEXT: atom.and.b64 %rd3, [%rd1], %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%ret = atomicrmw and ptr %subr, i64 %val seq_cst
ret i64 %ret
}
;; NAND not yet supported
;define i32 @atom6(ptr %subr, i32 %val) {
; %ret = atomicrmw nand ptr %subr, i32 %val seq_cst
; ret i32 %ret
;}
;define i64 @atom7(ptr %subr, i64 %val) {
; %ret = atomicrmw nand ptr %subr, i64 %val seq_cst
; ret i64 %ret
;}
; CHECK-LABEL: atom8
define i32 @atom8(ptr %subr, i32 %val) {
; CHECK-LABEL: atom8(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom8_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom8_param_1];
; CHECK-NEXT: atom.or.b32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw or ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom9
define i64 @atom9(ptr %subr, i64 %val) {
; CHECK-LABEL: atom9(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom9_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [atom9_param_1];
; CHECK-NEXT: atom.or.b64 %rd3, [%rd1], %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%ret = atomicrmw or ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom10
define i32 @atom10(ptr %subr, i32 %val) {
; CHECK-LABEL: atom10(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom10_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom10_param_1];
; CHECK-NEXT: atom.xor.b32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw xor ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom11
define i64 @atom11(ptr %subr, i64 %val) {
; CHECK-LABEL: atom11(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom11_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [atom11_param_1];
; CHECK-NEXT: atom.xor.b64 %rd3, [%rd1], %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%ret = atomicrmw xor ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom12
define i32 @atom12(ptr %subr, i32 %val) {
; CHECK-LABEL: atom12(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom12_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom12_param_1];
; CHECK-NEXT: atom.max.s32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw max ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom13
define i64 @atom13(ptr %subr, i64 %val) {
; CHECK-LABEL: atom13(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom13_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [atom13_param_1];
; CHECK-NEXT: atom.max.s64 %rd3, [%rd1], %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%ret = atomicrmw max ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom14
define i32 @atom14(ptr %subr, i32 %val) {
; CHECK-LABEL: atom14(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom14_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom14_param_1];
; CHECK-NEXT: atom.min.s32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw min ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom15
define i64 @atom15(ptr %subr, i64 %val) {
; CHECK-LABEL: atom15(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom15_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [atom15_param_1];
; CHECK-NEXT: atom.min.s64 %rd3, [%rd1], %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%ret = atomicrmw min ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom16
define i32 @atom16(ptr %subr, i32 %val) {
; CHECK-LABEL: atom16(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom16_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom16_param_1];
; CHECK-NEXT: atom.max.u32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw umax ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom17
define i64 @atom17(ptr %subr, i64 %val) {
; CHECK-LABEL: atom17(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom17_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [atom17_param_1];
; CHECK-NEXT: atom.max.u64 %rd3, [%rd1], %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%ret = atomicrmw umax ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom18
define i32 @atom18(ptr %subr, i32 %val) {
; CHECK-LABEL: atom18(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom18_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom18_param_1];
; CHECK-NEXT: atom.min.u32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw umin ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom19
define i64 @atom19(ptr %subr, i64 %val) {
; CHECK-LABEL: atom19(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom19_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [atom19_param_1];
; CHECK-NEXT: atom.min.u64 %rd3, [%rd1], %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%ret = atomicrmw umin ptr %subr, i64 %val seq_cst
ret i64 %ret
}
define i32 @atom20(ptr %subr, i32 %val) {
; CHECK-LABEL: atom20(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom20_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom20_param_1];
; CHECK-NEXT: atom.inc.u32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw uinc_wrap ptr %subr, i32 %val seq_cst
ret i32 %ret
}
define i32 @atom21(ptr %subr, i32 %val) {
; CHECK-LABEL: atom21(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atom21_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atom21_param_1];
; CHECK-NEXT: atom.dec.u32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw udec_wrap ptr %subr, i32 %val seq_cst
ret i32 %ret
}
declare float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
; CHECK-LABEL: atomic_add_f32_generic
define float @atomic_add_f32_generic(ptr %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_generic(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_generic_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_generic_param_1];
; CHECK-NEXT: atom.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = call float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
ret float %ret
}
declare float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val)
; CHECK-LABEL: atomic_add_f32_addrspace1
define float @atomic_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_addrspace1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_addrspace1_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_addrspace1_param_1];
; CHECK-NEXT: atom.global.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = call float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val)
ret float %ret
}
declare float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val)
; CHECK-LABEL: atomic_add_f32_addrspace3
define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_addrspace3(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_addrspace3_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_addrspace3_param_1];
; CHECK-NEXT: atom.shared.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = call float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val)
ret float %ret
}
; CHECK-LABEL: atomicrmw_add_f32_generic
define float @atomicrmw_add_f32_generic(ptr %addr, float %val) {
; CHECK-LABEL: atomicrmw_add_f32_generic(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomicrmw_add_f32_generic_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atomicrmw_add_f32_generic_param_1];
; CHECK-NEXT: atom.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw fadd ptr %addr, float %val seq_cst
ret float %ret
}
; CHECK-LABEL: atomicrmw_add_f16_generic
define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
; CHECK-LABEL: atomicrmw_add_f16_generic(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<18>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [atomicrmw_add_f16_generic_param_1];
; CHECK-NEXT: ld.param.b64 %rd2, [atomicrmw_add_f16_generic_param_0];
; CHECK-NEXT: and.b64 %rd1, %rd2, -4;
; CHECK-NEXT: cvt.u32.u64 %r4, %rd2;
; CHECK-NEXT: and.b32 %r5, %r4, 3;
; CHECK-NEXT: shl.b32 %r1, %r5, 3;
; CHECK-NEXT: mov.b32 %r6, 65535;
; CHECK-NEXT: shl.b32 %r7, %r6, %r1;
; CHECK-NEXT: not.b32 %r2, %r7;
; CHECK-NEXT: ld.b32 %r17, [%rd1];
; CHECK-NEXT: cvt.f32.f16 %r10, %rs1;
; CHECK-NEXT: $L__BB24_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u32 %r8, %r17, %r1;
; CHECK-NEXT: cvt.u16.u32 %rs2, %r8;
; CHECK-NEXT: cvt.f32.f16 %r9, %rs2;
; CHECK-NEXT: add.rn.f32 %r11, %r9, %r10;
; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r11;
; CHECK-NEXT: cvt.u32.u16 %r12, %rs3;
; CHECK-NEXT: shl.b32 %r13, %r12, %r1;
; CHECK-NEXT: and.b32 %r14, %r17, %r2;
; CHECK-NEXT: or.b32 %r15, %r14, %r13;
; CHECK-NEXT: membar.sys;
; CHECK-NEXT: atom.cas.b32 %r3, [%rd1], %r17, %r15;
; CHECK-NEXT: setp.ne.b32 %p1, %r3, %r17;
; CHECK-NEXT: mov.b32 %r17, %r3;
; CHECK-NEXT: @%p1 bra $L__BB24_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: shr.u32 %r16, %r3, %r1;
; CHECK-NEXT: st.param.b16 [func_retval0], %r16;
; CHECK-NEXT: ret;
%ret = atomicrmw fadd ptr %addr, half %val seq_cst
ret half %ret
}
; CHECK-LABEL: atomicrmw_add_f32_addrspace1
define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
; CHECK-LABEL: atomicrmw_add_f32_addrspace1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomicrmw_add_f32_addrspace1_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atomicrmw_add_f32_addrspace1_param_1];
; CHECK-NEXT: atom.global.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw fadd ptr addrspace(1) %addr, float %val seq_cst
ret float %ret
}
; CHECK-LABEL: atomicrmw_add_f32_addrspace3
define float @atomicrmw_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
; CHECK-LABEL: atomicrmw_add_f32_addrspace3(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomicrmw_add_f32_addrspace3_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atomicrmw_add_f32_addrspace3_param_1];
; CHECK-NEXT: atom.shared.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = atomicrmw fadd ptr addrspace(3) %addr, float %val seq_cst
ret float %ret
}
; CHECK-LABEL: atomic_cmpxchg_i32
define i32 @atomic_cmpxchg_i32(ptr %addr, i32 %cmp, i32 %new) {
; CHECK-LABEL: atomic_cmpxchg_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomic_cmpxchg_i32_param_0];
; CHECK-NEXT: membar.sys;
; CHECK-NEXT: ld.param.b32 %r1, [atomic_cmpxchg_i32_param_1];
; CHECK-NEXT: ld.param.b32 %r2, [atomic_cmpxchg_i32_param_2];
; CHECK-NEXT: atom.cas.b32 %r3, [%rd1], %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst seq_cst
ret i32 %new
}
; CHECK-LABEL: atomic_cmpxchg_i64
define i64 @atomic_cmpxchg_i64(ptr %addr, i64 %cmp, i64 %new) {
; CHECK-LABEL: atomic_cmpxchg_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomic_cmpxchg_i64_param_0];
; CHECK-NEXT: membar.sys;
; CHECK-NEXT: ld.param.b64 %rd2, [atomic_cmpxchg_i64_param_1];
; CHECK-NEXT: ld.param.b64 %rd3, [atomic_cmpxchg_i64_param_2];
; CHECK-NEXT: atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst seq_cst
ret i64 %new
}