blob: cd35949ab290a2ce1440cf8f8d4654443d6b902a [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefixes=CHECK,SM60 %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s -check-prefixes=CHECK,SM70
; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %}
; TODO: add i1, <8 x i8>, and <6 x i8> vector tests.
; TODO: add test for vectors that exceed 128-bit length
; Per https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#vectors
; vectors cannot exceed 128-bit in length, i.e., .v4.u64 is not allowed.
; TODO: generate PTX that preserves Concurrent Forward Progress
; for atomic operations to local statespace
; by generating atomic or volatile operations.
; TODO: design exposure for atomic operations on vector types.
; TODO: add weak,atomic,volatile,atomic volatile tests
; for .const and .param statespaces.
; TODO: optimize .sys.shared into .cta.shared or .cluster.shared .
;; generic statespace
; generic
define void @generic_i8(ptr %a) {
; CHECK-LABEL: generic_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_i8_param_0];
; CHECK-NEXT: ld.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i8, ptr %a
%a.add = add i8 %a.load, 1
store i8 %a.add, ptr %a
ret void
}
define void @generic_i16(ptr %a) {
; CHECK-LABEL: generic_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_i16_param_0];
; CHECK-NEXT: ld.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i16, ptr %a
%a.add = add i16 %a.load, 1
store i16 %a.add, ptr %a
ret void
}
define void @generic_i32(ptr %a) {
; CHECK-LABEL: generic_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_i32_param_0];
; CHECK-NEXT: ld.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load i32, ptr %a
%a.add = add i32 %a.load, 1
store i32 %a.add, ptr %a
ret void
}
define void @generic_i64(ptr %a) {
; CHECK-LABEL: generic_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_i64_param_0];
; CHECK-NEXT: ld.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load i64, ptr %a
%a.add = add i64 %a.load, 1
store i64 %a.add, ptr %a
ret void
}
define void @generic_float(ptr %a) {
; CHECK-LABEL: generic_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_float_param_0];
; CHECK-NEXT: ld.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load float, ptr %a
%a.add = fadd float %a.load, 1.
store float %a.add, ptr %a
ret void
}
define void @generic_double(ptr %a) {
; CHECK-LABEL: generic_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_double_param_0];
; CHECK-NEXT: ld.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load double, ptr %a
%a.add = fadd double %a.load, 1.
store double %a.add, ptr %a
ret void
}
; TODO: make the lowering of this weak vector ops consistent with
; the ones of the next tests. This test lowers to a weak PTX
; vector op, but next test lowers to a vector PTX op.
define void @generic_2xi8(ptr %a) {
; CHECK-LABEL: generic_2xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xi8_param_0];
; CHECK-NEXT: ld.v2.u8 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: st.v2.u8 [%rd1], {%rs4, %rs3};
; CHECK-NEXT: ret;
%a.load = load <2 x i8>, ptr %a
%a.add = add <2 x i8> %a.load, <i8 1, i8 1>
store <2 x i8> %a.add, ptr %a
ret void
}
; TODO: make the lowering of this weak vector ops consistent with
; the ones of the previous test. This test lowers to a weak
; PTX scalar op, but prior test lowers to a vector PTX op.
define void @generic_4xi8(ptr %a) {
; CHECK-LABEL: generic_4xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b32 %r<13>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_4xi8_param_0];
; CHECK-NEXT: ld.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr %a
%a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1>
store <4 x i8> %a.add, ptr %a
ret void
}
define void @generic_2xi16(ptr %a) {
; CHECK-LABEL: generic_2xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xi16_param_0];
; CHECK-NEXT: ld.u32 %r1, [%rd1];
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
; CHECK-NEXT: st.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load <2 x i16>, ptr %a
%a.add = add <2 x i16> %a.load, <i16 1, i16 1>
store <2 x i16> %a.add, ptr %a
ret void
}
define void @generic_4xi16(ptr %a) {
; CHECK-LABEL: generic_4xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_4xi16_param_0];
; CHECK-NEXT: ld.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
; CHECK-NEXT: add.s16 %rs5, %rs4, 1;
; CHECK-NEXT: add.s16 %rs6, %rs3, 1;
; CHECK-NEXT: add.s16 %rs7, %rs2, 1;
; CHECK-NEXT: add.s16 %rs8, %rs1, 1;
; CHECK-NEXT: st.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
; CHECK-NEXT: ret;
%a.load = load <4 x i16>, ptr %a
%a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1>
store <4 x i16> %a.add, ptr %a
ret void
}
define void @generic_2xi32(ptr %a) {
; CHECK-LABEL: generic_2xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xi32_param_0];
; CHECK-NEXT: ld.v2.u32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: add.s32 %r3, %r2, 1;
; CHECK-NEXT: add.s32 %r4, %r1, 1;
; CHECK-NEXT: st.v2.u32 [%rd1], {%r4, %r3};
; CHECK-NEXT: ret;
%a.load = load <2 x i32>, ptr %a
%a.add = add <2 x i32> %a.load, <i32 1, i32 1>
store <2 x i32> %a.add, ptr %a
ret void
}
define void @generic_4xi32(ptr %a) {
; CHECK-LABEL: generic_4xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_4xi32_param_0];
; CHECK-NEXT: ld.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: add.s32 %r5, %r4, 1;
; CHECK-NEXT: add.s32 %r6, %r3, 1;
; CHECK-NEXT: add.s32 %r7, %r2, 1;
; CHECK-NEXT: add.s32 %r8, %r1, 1;
; CHECK-NEXT: st.v4.u32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load <4 x i32>, ptr %a
%a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1>
store <4 x i32> %a.add, ptr %a
ret void
}
define void @generic_2xi64(ptr %a) {
; CHECK-LABEL: generic_2xi64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xi64_param_0];
; CHECK-NEXT: ld.v2.u64 {%rd2, %rd3}, [%rd1];
; CHECK-NEXT: add.s64 %rd4, %rd3, 1;
; CHECK-NEXT: add.s64 %rd5, %rd2, 1;
; CHECK-NEXT: st.v2.u64 [%rd1], {%rd5, %rd4};
; CHECK-NEXT: ret;
%a.load = load <2 x i64>, ptr %a
%a.add = add <2 x i64> %a.load, <i64 1, i64 1>
store <2 x i64> %a.add, ptr %a
ret void
}
define void @generic_2xfloat(ptr %a) {
; CHECK-LABEL: generic_2xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xfloat_param_0];
; CHECK-NEXT: ld.v2.f32 {%f1, %f2}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000;
; CHECK-NEXT: st.v2.f32 [%rd1], {%f4, %f3};
; CHECK-NEXT: ret;
%a.load = load <2 x float>, ptr %a
%a.add = fadd <2 x float> %a.load, <float 1., float 1.>
store <2 x float> %a.add, ptr %a
ret void
}
define void @generic_4xfloat(ptr %a) {
; CHECK-LABEL: generic_4xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_4xfloat_param_0];
; CHECK-NEXT: ld.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000;
; CHECK-NEXT: st.v4.f32 [%rd1], {%f8, %f7, %f6, %f5};
; CHECK-NEXT: ret;
%a.load = load <4 x float>, ptr %a
%a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.>
store <4 x float> %a.add, ptr %a
ret void
}
define void @generic_2xdouble(ptr %a) {
; CHECK-LABEL: generic_2xdouble(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xdouble_param_0];
; CHECK-NEXT: ld.v2.f64 {%fd1, %fd2}, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.v2.f64 [%rd1], {%fd4, %fd3};
; CHECK-NEXT: ret;
%a.load = load <2 x double>, ptr %a
%a.add = fadd <2 x double> %a.load, <double 1., double 1.>
store <2 x double> %a.add, ptr %a
ret void
}
; generic_volatile
define void @generic_volatile_i8(ptr %a) {
; CHECK-LABEL: generic_volatile_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_i8_param_0];
; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr %a
%a.add = add i8 %a.load, 1
store volatile i8 %a.add, ptr %a
ret void
}
define void @generic_volatile_i16(ptr %a) {
; CHECK-LABEL: generic_volatile_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_i16_param_0];
; CHECK-NEXT: ld.volatile.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i16, ptr %a
%a.add = add i16 %a.load, 1
store volatile i16 %a.add, ptr %a
ret void
}
define void @generic_volatile_i32(ptr %a) {
; CHECK-LABEL: generic_volatile_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_i32_param_0];
; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile i32, ptr %a
%a.add = add i32 %a.load, 1
store volatile i32 %a.add, ptr %a
ret void
}
define void @generic_volatile_i64(ptr %a) {
; CHECK-LABEL: generic_volatile_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_i64_param_0];
; CHECK-NEXT: ld.volatile.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile i64, ptr %a
%a.add = add i64 %a.load, 1
store volatile i64 %a.add, ptr %a
ret void
}
define void @generic_volatile_float(ptr %a) {
; CHECK-LABEL: generic_volatile_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_float_param_0];
; CHECK-NEXT: ld.volatile.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load volatile float, ptr %a
%a.add = fadd float %a.load, 1.
store volatile float %a.add, ptr %a
ret void
}
define void @generic_volatile_double(ptr %a) {
; CHECK-LABEL: generic_volatile_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_double_param_0];
; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load volatile double, ptr %a
%a.add = fadd double %a.load, 1.
store volatile double %a.add, ptr %a
ret void
}
; TODO: volatile, atomic, and volatile atomic memory operations on vector types.
; Currently, LLVM:
; - does not allow atomic operations on vectors.
; - it allows volatile operations but not clear what that means.
; Following both semantics make sense in general and PTX supports both:
; - volatile/atomic/volatile atomic applies to the whole vector
; - volatile/atomic/volatile atomic applies elementwise
; Actions required:
; - clarify LLVM semantics for volatile on vectors and align the NVPTX backend with those
; Below tests show that the current implementation picks the semantics in an inconsistent way
; * volatile <2 x i8> lowers to "elementwise volatile"
; * <4 x i8> lowers to "full vector volatile"
; - provide support for vector atomics, e.g., by extending LLVM IR or via intrinsics
; - update tests in load-store-sm70.ll as well.
; TODO: make this operation consistent with the one for <4 x i8>
; This operation lowers to a "element wise volatile PTX operation".
define void @generic_volatile_2xi8(ptr %a) {
; CHECK-LABEL: generic_volatile_2xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xi8_param_0];
; CHECK-NEXT: ld.volatile.v2.u8 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: st.volatile.v2.u8 [%rd1], {%rs4, %rs3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i8>, ptr %a
%a.add = add <2 x i8> %a.load, <i8 1, i8 1>
store volatile <2 x i8> %a.add, ptr %a
ret void
}
; TODO: make this operation consistent with the one for <2 x i8>
; This operation lowers to a "full vector volatile PTX operation".
define void @generic_volatile_4xi8(ptr %a) {
; CHECK-LABEL: generic_volatile_4xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b32 %r<13>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_4xi8_param_0];
; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.volatile.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr %a
%a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1>
store volatile <4 x i8> %a.add, ptr %a
ret void
}
define void @generic_volatile_2xi16(ptr %a) {
; CHECK-LABEL: generic_volatile_2xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xi16_param_0];
; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1];
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
; CHECK-NEXT: st.volatile.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i16>, ptr %a
%a.add = add <2 x i16> %a.load, <i16 1, i16 1>
store volatile <2 x i16> %a.add, ptr %a
ret void
}
define void @generic_volatile_4xi16(ptr %a) {
; CHECK-LABEL: generic_volatile_4xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_4xi16_param_0];
; CHECK-NEXT: ld.volatile.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
; CHECK-NEXT: add.s16 %rs5, %rs4, 1;
; CHECK-NEXT: add.s16 %rs6, %rs3, 1;
; CHECK-NEXT: add.s16 %rs7, %rs2, 1;
; CHECK-NEXT: add.s16 %rs8, %rs1, 1;
; CHECK-NEXT: st.volatile.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i16>, ptr %a
%a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1>
store volatile <4 x i16> %a.add, ptr %a
ret void
}
define void @generic_volatile_2xi32(ptr %a) {
; CHECK-LABEL: generic_volatile_2xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xi32_param_0];
; CHECK-NEXT: ld.volatile.v2.u32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: add.s32 %r3, %r2, 1;
; CHECK-NEXT: add.s32 %r4, %r1, 1;
; CHECK-NEXT: st.volatile.v2.u32 [%rd1], {%r4, %r3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i32>, ptr %a
%a.add = add <2 x i32> %a.load, <i32 1, i32 1>
store volatile <2 x i32> %a.add, ptr %a
ret void
}
define void @generic_volatile_4xi32(ptr %a) {
; CHECK-LABEL: generic_volatile_4xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_4xi32_param_0];
; CHECK-NEXT: ld.volatile.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: add.s32 %r5, %r4, 1;
; CHECK-NEXT: add.s32 %r6, %r3, 1;
; CHECK-NEXT: add.s32 %r7, %r2, 1;
; CHECK-NEXT: add.s32 %r8, %r1, 1;
; CHECK-NEXT: st.volatile.v4.u32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i32>, ptr %a
%a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1>
store volatile <4 x i32> %a.add, ptr %a
ret void
}
define void @generic_volatile_2xi64(ptr %a) {
; CHECK-LABEL: generic_volatile_2xi64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xi64_param_0];
; CHECK-NEXT: ld.volatile.v2.u64 {%rd2, %rd3}, [%rd1];
; CHECK-NEXT: add.s64 %rd4, %rd3, 1;
; CHECK-NEXT: add.s64 %rd5, %rd2, 1;
; CHECK-NEXT: st.volatile.v2.u64 [%rd1], {%rd5, %rd4};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i64>, ptr %a
%a.add = add <2 x i64> %a.load, <i64 1, i64 1>
store volatile <2 x i64> %a.add, ptr %a
ret void
}
define void @generic_volatile_2xfloat(ptr %a) {
; CHECK-LABEL: generic_volatile_2xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xfloat_param_0];
; CHECK-NEXT: ld.volatile.v2.f32 {%f1, %f2}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.v2.f32 [%rd1], {%f4, %f3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x float>, ptr %a
%a.add = fadd <2 x float> %a.load, <float 1., float 1.>
store volatile <2 x float> %a.add, ptr %a
ret void
}
define void @generic_volatile_4xfloat(ptr %a) {
; CHECK-LABEL: generic_volatile_4xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_4xfloat_param_0];
; CHECK-NEXT: ld.volatile.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.v4.f32 [%rd1], {%f8, %f7, %f6, %f5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x float>, ptr %a
%a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.>
store volatile <4 x float> %a.add, ptr %a
ret void
}
define void @generic_volatile_2xdouble(ptr %a) {
; CHECK-LABEL: generic_volatile_2xdouble(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xdouble_param_0];
; CHECK-NEXT: ld.volatile.v2.f64 {%fd1, %fd2}, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.v2.f64 [%rd1], {%fd4, %fd3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x double>, ptr %a
%a.add = fadd <2 x double> %a.load, <double 1., double 1.>
store volatile <2 x double> %a.add, ptr %a
ret void
}
; generic_unordered_sys
define void @generic_unordered_sys_i8(ptr %a) {
; SM60-LABEL: generic_unordered_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i8_param_0];
; SM60-NEXT: ld.volatile.u8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.u8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.u8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.u8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr %a unordered, align 1
ret void
}
define void @generic_unordered_sys_i16(ptr %a) {
; SM60-LABEL: generic_unordered_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i16_param_0];
; SM60-NEXT: ld.volatile.u16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.u16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.u16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.u16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr %a unordered, align 2
ret void
}
define void @generic_unordered_sys_i32(ptr %a) {
; SM60-LABEL: generic_unordered_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i32_param_0];
; SM60-NEXT: ld.volatile.u32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.u32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.u32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.u32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr %a unordered, align 4
ret void
}
define void @generic_unordered_sys_i64(ptr %a) {
; SM60-LABEL: generic_unordered_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i64_param_0];
; SM60-NEXT: ld.volatile.u64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.u64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.u64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.u64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr %a unordered, align 8
ret void
}
define void @generic_unordered_sys_float(ptr %a) {
; SM60-LABEL: generic_unordered_sys_float(
; SM60: {
; SM60-NEXT: .reg .f32 %f<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_float_param_0];
; SM60-NEXT: ld.volatile.f32 %f1, [%rd1];
; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM60-NEXT: st.volatile.f32 [%rd1], %f2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_float(
; SM70: {
; SM70-NEXT: .reg .f32 %f<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.f32 %f1, [%rd1];
; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.f32 [%rd1], %f2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr %a unordered, align 4
ret void
}
define void @generic_unordered_sys_double(ptr %a) {
; SM60-LABEL: generic_unordered_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-NEXT: .reg .f64 %fd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_double_param_0];
; SM60-NEXT: ld.volatile.f64 %fd1, [%rd1];
; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.f64 [%rd1], %fd2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-NEXT: .reg .f64 %fd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.f64 %fd1, [%rd1];
; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.f64 [%rd1], %fd2;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr %a unordered, align 8
ret void
}
; generic_unordered_volatile_sys
define void @generic_unordered_volatile_sys_i8(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr %a unordered, align 1
ret void
}
define void @generic_unordered_volatile_sys_i16(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.volatile.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr %a unordered, align 2
ret void
}
define void @generic_unordered_volatile_sys_i32(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr %a unordered, align 4
ret void
}
define void @generic_unordered_volatile_sys_i64(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.volatile.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr %a unordered, align 8
ret void
}
define void @generic_unordered_volatile_sys_float(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_float_param_0];
; CHECK-NEXT: ld.volatile.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr %a unordered, align 4
ret void
}
define void @generic_unordered_volatile_sys_double(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_double_param_0];
; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr %a unordered, align 8
ret void
}
; generic_monotonic_sys
define void @generic_monotonic_sys_i8(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i8_param_0];
; SM60-NEXT: ld.volatile.u8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.u8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.u8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.u8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr %a monotonic, align 1
ret void
}
define void @generic_monotonic_sys_i16(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i16_param_0];
; SM60-NEXT: ld.volatile.u16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.u16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.u16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.u16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr %a monotonic, align 2
ret void
}
define void @generic_monotonic_sys_i32(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i32_param_0];
; SM60-NEXT: ld.volatile.u32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.u32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.u32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.u32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr %a monotonic, align 4
ret void
}
define void @generic_monotonic_sys_i64(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i64_param_0];
; SM60-NEXT: ld.volatile.u64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.u64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.u64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.u64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr %a monotonic, align 8
ret void
}
define void @generic_monotonic_sys_float(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_float(
; SM60: {
; SM60-NEXT: .reg .f32 %f<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_float_param_0];
; SM60-NEXT: ld.volatile.f32 %f1, [%rd1];
; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM60-NEXT: st.volatile.f32 [%rd1], %f2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_float(
; SM70: {
; SM70-NEXT: .reg .f32 %f<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.f32 %f1, [%rd1];
; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.f32 [%rd1], %f2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr %a monotonic, align 4
ret void
}
define void @generic_monotonic_sys_double(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-NEXT: .reg .f64 %fd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_double_param_0];
; SM60-NEXT: ld.volatile.f64 %fd1, [%rd1];
; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.f64 [%rd1], %fd2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-NEXT: .reg .f64 %fd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.f64 %fd1, [%rd1];
; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.f64 [%rd1], %fd2;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr %a monotonic, align 8
ret void
}
; generic_monotonic_volatile_sys
define void @generic_monotonic_volatile_sys_i8(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr %a monotonic, align 1
ret void
}
define void @generic_monotonic_volatile_sys_i16(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.volatile.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr %a monotonic, align 2
ret void
}
define void @generic_monotonic_volatile_sys_i32(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr %a monotonic, align 4
ret void
}
define void @generic_monotonic_volatile_sys_i64(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.volatile.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr %a monotonic, align 8
ret void
}
define void @generic_monotonic_volatile_sys_float(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_float_param_0];
; CHECK-NEXT: ld.volatile.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr %a monotonic, align 4
ret void
}
define void @generic_monotonic_volatile_sys_double(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_double_param_0];
; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr %a monotonic, align 8
ret void
}
;; global statespace
; global
define void @global_i8(ptr addrspace(1) %a) {
; CHECK-LABEL: global_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_i8_param_0];
; CHECK-NEXT: ld.global.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.global.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(1) %a
%a.add = add i8 %a.load, 1
store i8 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_i16(ptr addrspace(1) %a) {
; CHECK-LABEL: global_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_i16_param_0];
; CHECK-NEXT: ld.global.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.global.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i16, ptr addrspace(1) %a
%a.add = add i16 %a.load, 1
store i16 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_i32(ptr addrspace(1) %a) {
; CHECK-LABEL: global_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_i32_param_0];
; CHECK-NEXT: ld.global.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.global.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load i32, ptr addrspace(1) %a
%a.add = add i32 %a.load, 1
store i32 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_i64(ptr addrspace(1) %a) {
; CHECK-LABEL: global_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_i64_param_0];
; CHECK-NEXT: ld.global.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.global.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load i64, ptr addrspace(1) %a
%a.add = add i64 %a.load, 1
store i64 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_float(ptr addrspace(1) %a) {
; CHECK-LABEL: global_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_float_param_0];
; CHECK-NEXT: ld.global.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.global.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load float, ptr addrspace(1) %a
%a.add = fadd float %a.load, 1.
store float %a.add, ptr addrspace(1) %a
ret void
}
define void @global_double(ptr addrspace(1) %a) {
; CHECK-LABEL: global_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_double_param_0];
; CHECK-NEXT: ld.global.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.global.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load double, ptr addrspace(1) %a
%a.add = fadd double %a.load, 1.
store double %a.add, ptr addrspace(1) %a
ret void
}
define void @global_2xi8(ptr addrspace(1) %a) {
; CHECK-LABEL: global_2xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_2xi8_param_0];
; CHECK-NEXT: ld.global.v2.u8 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: st.global.v2.u8 [%rd1], {%rs4, %rs3};
; CHECK-NEXT: ret;
%a.load = load <2 x i8>, ptr addrspace(1) %a
%a.add = add <2 x i8> %a.load, <i8 1, i8 1>
store <2 x i8> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_4xi8(ptr addrspace(1) %a) {
; CHECK-LABEL: global_4xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b32 %r<13>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_4xi8_param_0];
; CHECK-NEXT: ld.global.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.global.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr addrspace(1) %a
%a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1>
store <4 x i8> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_2xi16(ptr addrspace(1) %a) {
; CHECK-LABEL: global_2xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_2xi16_param_0];
; CHECK-NEXT: ld.global.u32 %r1, [%rd1];
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
; CHECK-NEXT: st.global.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load <2 x i16>, ptr addrspace(1) %a
%a.add = add <2 x i16> %a.load, <i16 1, i16 1>
store <2 x i16> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_4xi16(ptr addrspace(1) %a) {
; CHECK-LABEL: global_4xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_4xi16_param_0];
; CHECK-NEXT: ld.global.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
; CHECK-NEXT: add.s16 %rs5, %rs4, 1;
; CHECK-NEXT: add.s16 %rs6, %rs3, 1;
; CHECK-NEXT: add.s16 %rs7, %rs2, 1;
; CHECK-NEXT: add.s16 %rs8, %rs1, 1;
; CHECK-NEXT: st.global.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
; CHECK-NEXT: ret;
%a.load = load <4 x i16>, ptr addrspace(1) %a
%a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1>
store <4 x i16> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_2xi32(ptr addrspace(1) %a) {
; CHECK-LABEL: global_2xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_2xi32_param_0];
; CHECK-NEXT: ld.global.v2.u32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: add.s32 %r3, %r2, 1;
; CHECK-NEXT: add.s32 %r4, %r1, 1;
; CHECK-NEXT: st.global.v2.u32 [%rd1], {%r4, %r3};
; CHECK-NEXT: ret;
%a.load = load <2 x i32>, ptr addrspace(1) %a
%a.add = add <2 x i32> %a.load, <i32 1, i32 1>
store <2 x i32> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_4xi32(ptr addrspace(1) %a) {
; CHECK-LABEL: global_4xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_4xi32_param_0];
; CHECK-NEXT: ld.global.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: add.s32 %r5, %r4, 1;
; CHECK-NEXT: add.s32 %r6, %r3, 1;
; CHECK-NEXT: add.s32 %r7, %r2, 1;
; CHECK-NEXT: add.s32 %r8, %r1, 1;
; CHECK-NEXT: st.global.v4.u32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load <4 x i32>, ptr addrspace(1) %a
%a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1>
store <4 x i32> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_2xi64(ptr addrspace(1) %a) {
; CHECK-LABEL: global_2xi64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_2xi64_param_0];
; CHECK-NEXT: ld.global.v2.u64 {%rd2, %rd3}, [%rd1];
; CHECK-NEXT: add.s64 %rd4, %rd3, 1;
; CHECK-NEXT: add.s64 %rd5, %rd2, 1;
; CHECK-NEXT: st.global.v2.u64 [%rd1], {%rd5, %rd4};
; CHECK-NEXT: ret;
%a.load = load <2 x i64>, ptr addrspace(1) %a
%a.add = add <2 x i64> %a.load, <i64 1, i64 1>
store <2 x i64> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_2xfloat(ptr addrspace(1) %a) {
; CHECK-LABEL: global_2xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_2xfloat_param_0];
; CHECK-NEXT: ld.global.v2.f32 {%f1, %f2}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000;
; CHECK-NEXT: st.global.v2.f32 [%rd1], {%f4, %f3};
; CHECK-NEXT: ret;
%a.load = load <2 x float>, ptr addrspace(1) %a
%a.add = fadd <2 x float> %a.load, <float 1., float 1.>
store <2 x float> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_4xfloat(ptr addrspace(1) %a) {
; CHECK-LABEL: global_4xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_4xfloat_param_0];
; CHECK-NEXT: ld.global.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000;
; CHECK-NEXT: st.global.v4.f32 [%rd1], {%f8, %f7, %f6, %f5};
; CHECK-NEXT: ret;
%a.load = load <4 x float>, ptr addrspace(1) %a
%a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.>
store <4 x float> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_2xdouble(ptr addrspace(1) %a) {
; CHECK-LABEL: global_2xdouble(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_2xdouble_param_0];
; CHECK-NEXT: ld.global.v2.f64 {%fd1, %fd2}, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.global.v2.f64 [%rd1], {%fd4, %fd3};
; CHECK-NEXT: ret;
%a.load = load <2 x double>, ptr addrspace(1) %a
%a.add = fadd <2 x double> %a.load, <double 1., double 1.>
store <2 x double> %a.add, ptr addrspace(1) %a
ret void
}
; global_volatile
define void @global_volatile_i8(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_i8_param_0];
; CHECK-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.global.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(1) %a
%a.add = add i8 %a.load, 1
store volatile i8 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_i16(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_i16_param_0];
; CHECK-NEXT: ld.volatile.global.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.global.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i16, ptr addrspace(1) %a
%a.add = add i16 %a.load, 1
store volatile i16 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_i32(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_i32_param_0];
; CHECK-NEXT: ld.volatile.global.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.global.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile i32, ptr addrspace(1) %a
%a.add = add i32 %a.load, 1
store volatile i32 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_i64(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_i64_param_0];
; CHECK-NEXT: ld.volatile.global.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.global.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile i64, ptr addrspace(1) %a
%a.add = add i64 %a.load, 1
store volatile i64 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_float(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_float_param_0];
; CHECK-NEXT: ld.volatile.global.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.global.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load volatile float, ptr addrspace(1) %a
%a.add = fadd float %a.load, 1.
store volatile float %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_double(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_double_param_0];
; CHECK-NEXT: ld.volatile.global.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.global.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load volatile double, ptr addrspace(1) %a
%a.add = fadd double %a.load, 1.
store volatile double %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_2xi8(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_2xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xi8_param_0];
; CHECK-NEXT: ld.volatile.global.v2.u8 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: st.volatile.global.v2.u8 [%rd1], {%rs4, %rs3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i8>, ptr addrspace(1) %a
%a.add = add <2 x i8> %a.load, <i8 1, i8 1>
store volatile <2 x i8> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_4xi8(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_4xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b32 %r<13>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_4xi8_param_0];
; CHECK-NEXT: ld.volatile.global.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.volatile.global.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr addrspace(1) %a
%a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1>
store volatile <4 x i8> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_2xi16(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_2xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xi16_param_0];
; CHECK-NEXT: ld.volatile.global.u32 %r1, [%rd1];
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
; CHECK-NEXT: st.volatile.global.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i16>, ptr addrspace(1) %a
%a.add = add <2 x i16> %a.load, <i16 1, i16 1>
store volatile <2 x i16> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_4xi16(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_4xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_4xi16_param_0];
; CHECK-NEXT: ld.volatile.global.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
; CHECK-NEXT: add.s16 %rs5, %rs4, 1;
; CHECK-NEXT: add.s16 %rs6, %rs3, 1;
; CHECK-NEXT: add.s16 %rs7, %rs2, 1;
; CHECK-NEXT: add.s16 %rs8, %rs1, 1;
; CHECK-NEXT: st.volatile.global.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i16>, ptr addrspace(1) %a
%a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1>
store volatile <4 x i16> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_2xi32(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_2xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xi32_param_0];
; CHECK-NEXT: ld.volatile.global.v2.u32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: add.s32 %r3, %r2, 1;
; CHECK-NEXT: add.s32 %r4, %r1, 1;
; CHECK-NEXT: st.volatile.global.v2.u32 [%rd1], {%r4, %r3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i32>, ptr addrspace(1) %a
%a.add = add <2 x i32> %a.load, <i32 1, i32 1>
store volatile <2 x i32> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_4xi32(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_4xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_4xi32_param_0];
; CHECK-NEXT: ld.volatile.global.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: add.s32 %r5, %r4, 1;
; CHECK-NEXT: add.s32 %r6, %r3, 1;
; CHECK-NEXT: add.s32 %r7, %r2, 1;
; CHECK-NEXT: add.s32 %r8, %r1, 1;
; CHECK-NEXT: st.volatile.global.v4.u32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i32>, ptr addrspace(1) %a
%a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1>
store volatile <4 x i32> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_2xi64(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_2xi64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xi64_param_0];
; CHECK-NEXT: ld.volatile.global.v2.u64 {%rd2, %rd3}, [%rd1];
; CHECK-NEXT: add.s64 %rd4, %rd3, 1;
; CHECK-NEXT: add.s64 %rd5, %rd2, 1;
; CHECK-NEXT: st.volatile.global.v2.u64 [%rd1], {%rd5, %rd4};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i64>, ptr addrspace(1) %a
%a.add = add <2 x i64> %a.load, <i64 1, i64 1>
store volatile <2 x i64> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_2xfloat(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_2xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xfloat_param_0];
; CHECK-NEXT: ld.volatile.global.v2.f32 {%f1, %f2}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.global.v2.f32 [%rd1], {%f4, %f3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x float>, ptr addrspace(1) %a
%a.add = fadd <2 x float> %a.load, <float 1., float 1.>
store volatile <2 x float> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_4xfloat(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_4xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_4xfloat_param_0];
; CHECK-NEXT: ld.volatile.global.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.global.v4.f32 [%rd1], {%f8, %f7, %f6, %f5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x float>, ptr addrspace(1) %a
%a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.>
store volatile <4 x float> %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_2xdouble(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_2xdouble(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xdouble_param_0];
; CHECK-NEXT: ld.volatile.global.v2.f64 {%fd1, %fd2}, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.global.v2.f64 [%rd1], {%fd4, %fd3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x double>, ptr addrspace(1) %a
%a.add = fadd <2 x double> %a.load, <double 1., double 1.>
store volatile <2 x double> %a.add, ptr addrspace(1) %a
ret void
}
; global_unordered_sys
define void @global_unordered_sys_i8(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i8_param_0];
; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.global.u8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.global.u8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1
ret void
}
define void @global_unordered_sys_i16(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i16_param_0];
; SM60-NEXT: ld.volatile.global.u16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.u16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.global.u16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.global.u16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(1) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(1) %a unordered, align 2
ret void
}
define void @global_unordered_sys_i32(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i32_param_0];
; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.global.u32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.global.u32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.global.u32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(1) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(1) %a unordered, align 4
ret void
}
define void @global_unordered_sys_i64(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i64_param_0];
; SM60-NEXT: ld.volatile.global.u64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.global.u64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.global.u64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.global.u64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(1) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(1) %a unordered, align 8
ret void
}
define void @global_unordered_sys_float(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_float(
; SM60: {
; SM60-NEXT: .reg .f32 %f<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_float_param_0];
; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd1];
; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM60-NEXT: st.volatile.global.f32 [%rd1], %f2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_float(
; SM70: {
; SM70-NEXT: .reg .f32 %f<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.global.f32 %f1, [%rd1];
; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.global.f32 [%rd1], %f2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr addrspace(1) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(1) %a unordered, align 4
ret void
}
define void @global_unordered_sys_double(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-NEXT: .reg .f64 %fd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_double_param_0];
; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd1];
; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.global.f64 [%rd1], %fd2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-NEXT: .reg .f64 %fd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.global.f64 %fd1, [%rd1];
; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.global.f64 [%rd1], %fd2;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr addrspace(1) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(1) %a unordered, align 8
ret void
}
; global_unordered_volatile_sys
define void @global_unordered_volatile_sys_i8(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i8_param_0];
; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i8_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.u8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.u8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1
ret void
}
define void @global_unordered_volatile_sys_i16(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i16_param_0];
; SM60-NEXT: ld.volatile.global.u16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.u16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i16_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.u16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.u16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(1) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(1) %a unordered, align 2
ret void
}
define void @global_unordered_volatile_sys_i32(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i32_param_0];
; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.global.u32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i32_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.u32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.u32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(1) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(1) %a unordered, align 4
ret void
}
define void @global_unordered_volatile_sys_i64(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i64_param_0];
; SM60-NEXT: ld.volatile.global.u64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.global.u64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i64_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.u64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.u64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(1) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(1) %a unordered, align 8
ret void
}
define void @global_unordered_volatile_sys_float(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_float(
; SM60: {
; SM60-NEXT: .reg .f32 %f<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_float_param_0];
; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd1];
; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM60-NEXT: st.volatile.global.f32 [%rd1], %f2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_float(
; SM70: {
; SM70-NEXT: .reg .f32 %f<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_float_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.f32 %f1, [%rd1];
; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM70-NEXT: st.mmio.relaxed.sys.global.f32 [%rd1], %f2;
; SM70-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(1) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(1) %a unordered, align 4
ret void
}
define void @global_unordered_volatile_sys_double(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-NEXT: .reg .f64 %fd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_double_param_0];
; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd1];
; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.global.f64 [%rd1], %fd2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-NEXT: .reg .f64 %fd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_double_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.f64 %fd1, [%rd1];
; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM70-NEXT: st.mmio.relaxed.sys.global.f64 [%rd1], %fd2;
; SM70-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(1) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(1) %a unordered, align 8
ret void
}
; global_monotonic_sys
define void @global_monotonic_sys_i8(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i8_param_0];
; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.global.u8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.global.u8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
ret void
}
define void @global_monotonic_sys_i16(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i16_param_0];
; SM60-NEXT: ld.volatile.global.u16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.u16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.global.u16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.global.u16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(1) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(1) %a monotonic, align 2
ret void
}
define void @global_monotonic_sys_i32(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i32_param_0];
; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.global.u32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.global.u32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.global.u32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(1) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(1) %a monotonic, align 4
ret void
}
define void @global_monotonic_sys_i64(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i64_param_0];
; SM60-NEXT: ld.volatile.global.u64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.global.u64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.global.u64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.global.u64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(1) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(1) %a monotonic, align 8
ret void
}
define void @global_monotonic_sys_float(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_float(
; SM60: {
; SM60-NEXT: .reg .f32 %f<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_float_param_0];
; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd1];
; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM60-NEXT: st.volatile.global.f32 [%rd1], %f2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_float(
; SM70: {
; SM70-NEXT: .reg .f32 %f<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.global.f32 %f1, [%rd1];
; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.global.f32 [%rd1], %f2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr addrspace(1) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(1) %a monotonic, align 4
ret void
}
define void @global_monotonic_sys_double(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-NEXT: .reg .f64 %fd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_double_param_0];
; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd1];
; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.global.f64 [%rd1], %fd2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-NEXT: .reg .f64 %fd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.global.f64 %fd1, [%rd1];
; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.global.f64 [%rd1], %fd2;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr addrspace(1) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(1) %a monotonic, align 8
ret void
}
; global_monotonic_volatile_sys
define void @global_monotonic_volatile_sys_i8(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i8_param_0];
; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i8_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.u8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.u8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
ret void
}
define void @global_monotonic_volatile_sys_i16(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i16_param_0];
; SM60-NEXT: ld.volatile.global.u16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.u16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i16_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.u16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.u16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(1) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(1) %a monotonic, align 2
ret void
}
define void @global_monotonic_volatile_sys_i32(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i32_param_0];
; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.global.u32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i32_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.u32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.u32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(1) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(1) %a monotonic, align 4
ret void
}
define void @global_monotonic_volatile_sys_i64(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i64_param_0];
; SM60-NEXT: ld.volatile.global.u64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.global.u64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i64_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.u64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.u64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(1) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(1) %a monotonic, align 8
ret void
}
define void @global_monotonic_volatile_sys_float(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_float(
; SM60: {
; SM60-NEXT: .reg .f32 %f<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_float_param_0];
; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd1];
; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM60-NEXT: st.volatile.global.f32 [%rd1], %f2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_float(
; SM70: {
; SM70-NEXT: .reg .f32 %f<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_float_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.f32 %f1, [%rd1];
; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM70-NEXT: st.mmio.relaxed.sys.global.f32 [%rd1], %f2;
; SM70-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(1) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(1) %a monotonic, align 4
ret void
}
define void @global_monotonic_volatile_sys_double(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-NEXT: .reg .f64 %fd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_double_param_0];
; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd1];
; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.global.f64 [%rd1], %fd2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-NEXT: .reg .f64 %fd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_double_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.f64 %fd1, [%rd1];
; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM70-NEXT: st.mmio.relaxed.sys.global.f64 [%rd1], %fd2;
; SM70-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(1) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(1) %a monotonic, align 8
ret void
}
;; shared statespace
; shared
define void @shared_i8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_i8_param_0];
; CHECK-NEXT: ld.shared.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.shared.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(3) %a
%a.add = add i8 %a.load, 1
store i8 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_i16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_i16_param_0];
; CHECK-NEXT: ld.shared.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.shared.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i16, ptr addrspace(3) %a
%a.add = add i16 %a.load, 1
store i16 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_i32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_i32_param_0];
; CHECK-NEXT: ld.shared.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.shared.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load i32, ptr addrspace(3) %a
%a.add = add i32 %a.load, 1
store i32 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_i64(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_i64_param_0];
; CHECK-NEXT: ld.shared.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.shared.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load i64, ptr addrspace(3) %a
%a.add = add i64 %a.load, 1
store i64 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_float(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_float_param_0];
; CHECK-NEXT: ld.shared.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.shared.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load float, ptr addrspace(3) %a
%a.add = fadd float %a.load, 1.
store float %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_double(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_double_param_0];
; CHECK-NEXT: ld.shared.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.shared.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load double, ptr addrspace(3) %a
%a.add = fadd double %a.load, 1.
store double %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_2xi8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_2xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xi8_param_0];
; CHECK-NEXT: ld.shared.v2.u8 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: st.shared.v2.u8 [%rd1], {%rs4, %rs3};
; CHECK-NEXT: ret;
%a.load = load <2 x i8>, ptr addrspace(3) %a
%a.add = add <2 x i8> %a.load, <i8 1, i8 1>
store <2 x i8> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_4xi8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_4xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b32 %r<13>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_4xi8_param_0];
; CHECK-NEXT: ld.shared.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.shared.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr addrspace(3) %a
%a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1>
store <4 x i8> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_2xi16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_2xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xi16_param_0];
; CHECK-NEXT: ld.shared.u32 %r1, [%rd1];
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
; CHECK-NEXT: st.shared.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load <2 x i16>, ptr addrspace(3) %a
%a.add = add <2 x i16> %a.load, <i16 1, i16 1>
store <2 x i16> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_4xi16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_4xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_4xi16_param_0];
; CHECK-NEXT: ld.shared.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
; CHECK-NEXT: add.s16 %rs5, %rs4, 1;
; CHECK-NEXT: add.s16 %rs6, %rs3, 1;
; CHECK-NEXT: add.s16 %rs7, %rs2, 1;
; CHECK-NEXT: add.s16 %rs8, %rs1, 1;
; CHECK-NEXT: st.shared.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
; CHECK-NEXT: ret;
%a.load = load <4 x i16>, ptr addrspace(3) %a
%a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1>
store <4 x i16> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_2xi32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_2xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xi32_param_0];
; CHECK-NEXT: ld.shared.v2.u32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: add.s32 %r3, %r2, 1;
; CHECK-NEXT: add.s32 %r4, %r1, 1;
; CHECK-NEXT: st.shared.v2.u32 [%rd1], {%r4, %r3};
; CHECK-NEXT: ret;
%a.load = load <2 x i32>, ptr addrspace(3) %a
%a.add = add <2 x i32> %a.load, <i32 1, i32 1>
store <2 x i32> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_4xi32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_4xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_4xi32_param_0];
; CHECK-NEXT: ld.shared.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: add.s32 %r5, %r4, 1;
; CHECK-NEXT: add.s32 %r6, %r3, 1;
; CHECK-NEXT: add.s32 %r7, %r2, 1;
; CHECK-NEXT: add.s32 %r8, %r1, 1;
; CHECK-NEXT: st.shared.v4.u32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load <4 x i32>, ptr addrspace(3) %a
%a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1>
store <4 x i32> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_2xi64(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_2xi64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xi64_param_0];
; CHECK-NEXT: ld.shared.v2.u64 {%rd2, %rd3}, [%rd1];
; CHECK-NEXT: add.s64 %rd4, %rd3, 1;
; CHECK-NEXT: add.s64 %rd5, %rd2, 1;
; CHECK-NEXT: st.shared.v2.u64 [%rd1], {%rd5, %rd4};
; CHECK-NEXT: ret;
%a.load = load <2 x i64>, ptr addrspace(3) %a
%a.add = add <2 x i64> %a.load, <i64 1, i64 1>
store <2 x i64> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_2xfloat(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_2xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xfloat_param_0];
; CHECK-NEXT: ld.shared.v2.f32 {%f1, %f2}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000;
; CHECK-NEXT: st.shared.v2.f32 [%rd1], {%f4, %f3};
; CHECK-NEXT: ret;
%a.load = load <2 x float>, ptr addrspace(3) %a
%a.add = fadd <2 x float> %a.load, <float 1., float 1.>
store <2 x float> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_4xfloat(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_4xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_4xfloat_param_0];
; CHECK-NEXT: ld.shared.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000;
; CHECK-NEXT: st.shared.v4.f32 [%rd1], {%f8, %f7, %f6, %f5};
; CHECK-NEXT: ret;
%a.load = load <4 x float>, ptr addrspace(3) %a
%a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.>
store <4 x float> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_2xdouble(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_2xdouble(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xdouble_param_0];
; CHECK-NEXT: ld.shared.v2.f64 {%fd1, %fd2}, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.shared.v2.f64 [%rd1], {%fd4, %fd3};
; CHECK-NEXT: ret;
%a.load = load <2 x double>, ptr addrspace(3) %a
%a.add = fadd <2 x double> %a.load, <double 1., double 1.>
store <2 x double> %a.add, ptr addrspace(3) %a
ret void
}
; shared_volatile
define void @shared_volatile_i8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_i8_param_0];
; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(3) %a
%a.add = add i8 %a.load, 1
store volatile i8 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_i16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_i16_param_0];
; CHECK-NEXT: ld.volatile.shared.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i16, ptr addrspace(3) %a
%a.add = add i16 %a.load, 1
store volatile i16 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_i32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_i32_param_0];
; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile i32, ptr addrspace(3) %a
%a.add = add i32 %a.load, 1
store volatile i32 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_i64(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_i64_param_0];
; CHECK-NEXT: ld.volatile.shared.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.shared.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile i64, ptr addrspace(3) %a
%a.add = add i64 %a.load, 1
store volatile i64 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_float(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_float_param_0];
; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.shared.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load volatile float, ptr addrspace(3) %a
%a.add = fadd float %a.load, 1.
store volatile float %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_double(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_double_param_0];
; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.shared.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load volatile double, ptr addrspace(3) %a
%a.add = fadd double %a.load, 1.
store volatile double %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_2xi8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_2xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xi8_param_0];
; CHECK-NEXT: ld.volatile.shared.v2.u8 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.v2.u8 [%rd1], {%rs4, %rs3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i8>, ptr addrspace(3) %a
%a.add = add <2 x i8> %a.load, <i8 1, i8 1>
store volatile <2 x i8> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_4xi8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_4xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b32 %r<13>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_4xi8_param_0];
; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr addrspace(3) %a
%a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1>
store volatile <4 x i8> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_2xi16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_2xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xi16_param_0];
; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1];
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i16>, ptr addrspace(3) %a
%a.add = add <2 x i16> %a.load, <i16 1, i16 1>
store volatile <2 x i16> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_4xi16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_4xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_4xi16_param_0];
; CHECK-NEXT: ld.volatile.shared.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
; CHECK-NEXT: add.s16 %rs5, %rs4, 1;
; CHECK-NEXT: add.s16 %rs6, %rs3, 1;
; CHECK-NEXT: add.s16 %rs7, %rs2, 1;
; CHECK-NEXT: add.s16 %rs8, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i16>, ptr addrspace(3) %a
%a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1>
store volatile <4 x i16> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_2xi32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_2xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xi32_param_0];
; CHECK-NEXT: ld.volatile.shared.v2.u32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: add.s32 %r3, %r2, 1;
; CHECK-NEXT: add.s32 %r4, %r1, 1;
; CHECK-NEXT: st.volatile.shared.v2.u32 [%rd1], {%r4, %r3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i32>, ptr addrspace(3) %a
%a.add = add <2 x i32> %a.load, <i32 1, i32 1>
store volatile <2 x i32> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_4xi32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_4xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_4xi32_param_0];
; CHECK-NEXT: ld.volatile.shared.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: add.s32 %r5, %r4, 1;
; CHECK-NEXT: add.s32 %r6, %r3, 1;
; CHECK-NEXT: add.s32 %r7, %r2, 1;
; CHECK-NEXT: add.s32 %r8, %r1, 1;
; CHECK-NEXT: st.volatile.shared.v4.u32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i32>, ptr addrspace(3) %a
%a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1>
store volatile <4 x i32> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_2xi64(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_2xi64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xi64_param_0];
; CHECK-NEXT: ld.volatile.shared.v2.u64 {%rd2, %rd3}, [%rd1];
; CHECK-NEXT: add.s64 %rd4, %rd3, 1;
; CHECK-NEXT: add.s64 %rd5, %rd2, 1;
; CHECK-NEXT: st.volatile.shared.v2.u64 [%rd1], {%rd5, %rd4};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i64>, ptr addrspace(3) %a
%a.add = add <2 x i64> %a.load, <i64 1, i64 1>
store volatile <2 x i64> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_2xfloat(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_2xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xfloat_param_0];
; CHECK-NEXT: ld.volatile.shared.v2.f32 {%f1, %f2}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.shared.v2.f32 [%rd1], {%f4, %f3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x float>, ptr addrspace(3) %a
%a.add = fadd <2 x float> %a.load, <float 1., float 1.>
store volatile <2 x float> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_4xfloat(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_4xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_4xfloat_param_0];
; CHECK-NEXT: ld.volatile.shared.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.shared.v4.f32 [%rd1], {%f8, %f7, %f6, %f5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x float>, ptr addrspace(3) %a
%a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.>
store volatile <4 x float> %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_2xdouble(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_2xdouble(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xdouble_param_0];
; CHECK-NEXT: ld.volatile.shared.v2.f64 {%fd1, %fd2}, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.shared.v2.f64 [%rd1], {%fd4, %fd3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x double>, ptr addrspace(3) %a
%a.add = fadd <2 x double> %a.load, <double 1., double 1.>
store volatile <2 x double> %a.add, ptr addrspace(3) %a
ret void
}
; shared_unordered_sys
define void @shared_unordered_sys_i8(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i8_param_0];
; SM60-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.u8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.shared.u8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1
ret void
}
define void @shared_unordered_sys_i16(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i16_param_0];
; SM60-NEXT: ld.volatile.shared.u16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.shared.u16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.u16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.shared.u16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(3) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(3) %a unordered, align 2
ret void
}
define void @shared_unordered_sys_i32(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i32_param_0];
; SM60-NEXT: ld.volatile.shared.u32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.shared.u32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.u32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.shared.u32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(3) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(3) %a unordered, align 4
ret void
}
define void @shared_unordered_sys_i64(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i64_param_0];
; SM60-NEXT: ld.volatile.shared.u64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.shared.u64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.u64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.shared.u64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(3) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(3) %a unordered, align 8
ret void
}
define void @shared_unordered_sys_float(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_float(
; SM60: {
; SM60-NEXT: .reg .f32 %f<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_float_param_0];
; SM60-NEXT: ld.volatile.shared.f32 %f1, [%rd1];
; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM60-NEXT: st.volatile.shared.f32 [%rd1], %f2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_float(
; SM70: {
; SM70-NEXT: .reg .f32 %f<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.f32 %f1, [%rd1];
; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.shared.f32 [%rd1], %f2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr addrspace(3) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(3) %a unordered, align 4
ret void
}
define void @shared_unordered_sys_double(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-NEXT: .reg .f64 %fd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_double_param_0];
; SM60-NEXT: ld.volatile.shared.f64 %fd1, [%rd1];
; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.shared.f64 [%rd1], %fd2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-NEXT: .reg .f64 %fd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.f64 %fd1, [%rd1];
; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.shared.f64 [%rd1], %fd2;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr addrspace(3) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(3) %a unordered, align 8
ret void
}
; shared_unordered_volatile_sys
define void @shared_unordered_volatile_sys_i8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(3) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(3) %a unordered, align 1
ret void
}
define void @shared_unordered_volatile_sys_i16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.volatile.shared.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(3) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(3) %a unordered, align 2
ret void
}
define void @shared_unordered_volatile_sys_i32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(3) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(3) %a unordered, align 4
ret void
}
define void @shared_unordered_volatile_sys_i64(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.volatile.shared.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.shared.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(3) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(3) %a unordered, align 8
ret void
}
define void @shared_unordered_volatile_sys_float(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_float_param_0];
; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.shared.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(3) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(3) %a unordered, align 4
ret void
}
define void @shared_unordered_volatile_sys_double(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_double_param_0];
; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.shared.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(3) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(3) %a unordered, align 8
ret void
}
; shared_monotonic_sys
define void @shared_monotonic_sys_i8(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i8_param_0];
; SM60-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.u8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.shared.u8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
ret void
}
define void @shared_monotonic_sys_i16(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i16_param_0];
; SM60-NEXT: ld.volatile.shared.u16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.shared.u16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.u16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.shared.u16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(3) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(3) %a monotonic, align 2
ret void
}
define void @shared_monotonic_sys_i32(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i32_param_0];
; SM60-NEXT: ld.volatile.shared.u32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.shared.u32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.u32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.shared.u32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(3) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(3) %a monotonic, align 4
ret void
}
define void @shared_monotonic_sys_i64(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i64_param_0];
; SM60-NEXT: ld.volatile.shared.u64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.shared.u64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.u64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.shared.u64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(3) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(3) %a monotonic, align 8
ret void
}
define void @shared_monotonic_sys_float(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_float(
; SM60: {
; SM60-NEXT: .reg .f32 %f<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_float_param_0];
; SM60-NEXT: ld.volatile.shared.f32 %f1, [%rd1];
; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM60-NEXT: st.volatile.shared.f32 [%rd1], %f2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_float(
; SM70: {
; SM70-NEXT: .reg .f32 %f<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.f32 %f1, [%rd1];
; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.shared.f32 [%rd1], %f2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr addrspace(3) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(3) %a monotonic, align 4
ret void
}
define void @shared_monotonic_sys_double(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-NEXT: .reg .f64 %fd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_double_param_0];
; SM60-NEXT: ld.volatile.shared.f64 %fd1, [%rd1];
; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.shared.f64 [%rd1], %fd2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-NEXT: .reg .f64 %fd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.f64 %fd1, [%rd1];
; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.shared.f64 [%rd1], %fd2;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr addrspace(3) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(3) %a monotonic, align 8
ret void
}
; shared_monotonic_volatile_sys
define void @shared_monotonic_volatile_sys_i8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1
ret void
}
define void @shared_monotonic_volatile_sys_i16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.volatile.shared.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(3) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(3) %a monotonic, align 2
ret void
}
define void @shared_monotonic_volatile_sys_i32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(3) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(3) %a monotonic, align 4
ret void
}
define void @shared_monotonic_volatile_sys_i64(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.volatile.shared.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.shared.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(3) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(3) %a monotonic, align 8
ret void
}
define void @shared_monotonic_volatile_sys_float(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_float_param_0];
; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.volatile.shared.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(3) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(3) %a monotonic, align 4
ret void
}
define void @shared_monotonic_volatile_sys_double(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_double_param_0];
; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.shared.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(3) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(3) %a monotonic, align 8
ret void
}
;; local statespace
; local
define void @local_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_i8_param_0];
; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
store i8 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_i16_param_0];
; CHECK-NEXT: ld.local.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i16, ptr addrspace(5) %a
%a.add = add i16 %a.load, 1
store i16 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_i32_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load i32, ptr addrspace(5) %a
%a.add = add i32 %a.load, 1
store i32 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_i64_param_0];
; CHECK-NEXT: ld.local.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load i64, ptr addrspace(5) %a
%a.add = add i64 %a.load, 1
store i64 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_float_param_0];
; CHECK-NEXT: ld.local.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.local.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load float, ptr addrspace(5) %a
%a.add = fadd float %a.load, 1.
store float %a.add, ptr addrspace(5) %a
ret void
}
define void @local_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_double_param_0];
; CHECK-NEXT: ld.local.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.local.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load double, ptr addrspace(5) %a
%a.add = fadd double %a.load, 1.
store double %a.add, ptr addrspace(5) %a
ret void
}
define void @local_2xi8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_2xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_2xi8_param_0];
; CHECK-NEXT: ld.local.v2.u8 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: st.local.v2.u8 [%rd1], {%rs4, %rs3};
; CHECK-NEXT: ret;
%a.load = load <2 x i8>, ptr addrspace(5) %a
%a.add = add <2 x i8> %a.load, <i8 1, i8 1>
store <2 x i8> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_4xi8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_4xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b32 %r<13>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_4xi8_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.local.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr addrspace(5) %a
%a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1>
store <4 x i8> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_2xi16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_2xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_2xi16_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
; CHECK-NEXT: st.local.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load <2 x i16>, ptr addrspace(5) %a
%a.add = add <2 x i16> %a.load, <i16 1, i16 1>
store <2 x i16> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_4xi16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_4xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_4xi16_param_0];
; CHECK-NEXT: ld.local.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
; CHECK-NEXT: add.s16 %rs5, %rs4, 1;
; CHECK-NEXT: add.s16 %rs6, %rs3, 1;
; CHECK-NEXT: add.s16 %rs7, %rs2, 1;
; CHECK-NEXT: add.s16 %rs8, %rs1, 1;
; CHECK-NEXT: st.local.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
; CHECK-NEXT: ret;
%a.load = load <4 x i16>, ptr addrspace(5) %a
%a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1>
store <4 x i16> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_2xi32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_2xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_2xi32_param_0];
; CHECK-NEXT: ld.local.v2.u32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: add.s32 %r3, %r2, 1;
; CHECK-NEXT: add.s32 %r4, %r1, 1;
; CHECK-NEXT: st.local.v2.u32 [%rd1], {%r4, %r3};
; CHECK-NEXT: ret;
%a.load = load <2 x i32>, ptr addrspace(5) %a
%a.add = add <2 x i32> %a.load, <i32 1, i32 1>
store <2 x i32> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_4xi32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_4xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_4xi32_param_0];
; CHECK-NEXT: ld.local.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: add.s32 %r5, %r4, 1;
; CHECK-NEXT: add.s32 %r6, %r3, 1;
; CHECK-NEXT: add.s32 %r7, %r2, 1;
; CHECK-NEXT: add.s32 %r8, %r1, 1;
; CHECK-NEXT: st.local.v4.u32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load <4 x i32>, ptr addrspace(5) %a
%a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1>
store <4 x i32> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_2xi64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_2xi64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_2xi64_param_0];
; CHECK-NEXT: ld.local.v2.u64 {%rd2, %rd3}, [%rd1];
; CHECK-NEXT: add.s64 %rd4, %rd3, 1;
; CHECK-NEXT: add.s64 %rd5, %rd2, 1;
; CHECK-NEXT: st.local.v2.u64 [%rd1], {%rd5, %rd4};
; CHECK-NEXT: ret;
%a.load = load <2 x i64>, ptr addrspace(5) %a
%a.add = add <2 x i64> %a.load, <i64 1, i64 1>
store <2 x i64> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_2xfloat(ptr addrspace(5) %a) {
; CHECK-LABEL: local_2xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_2xfloat_param_0];
; CHECK-NEXT: ld.local.v2.f32 {%f1, %f2}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000;
; CHECK-NEXT: st.local.v2.f32 [%rd1], {%f4, %f3};
; CHECK-NEXT: ret;
%a.load = load <2 x float>, ptr addrspace(5) %a
%a.add = fadd <2 x float> %a.load, <float 1., float 1.>
store <2 x float> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_4xfloat(ptr addrspace(5) %a) {
; CHECK-LABEL: local_4xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_4xfloat_param_0];
; CHECK-NEXT: ld.local.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000;
; CHECK-NEXT: st.local.v4.f32 [%rd1], {%f8, %f7, %f6, %f5};
; CHECK-NEXT: ret;
%a.load = load <4 x float>, ptr addrspace(5) %a
%a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.>
store <4 x float> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_2xdouble(ptr addrspace(5) %a) {
; CHECK-LABEL: local_2xdouble(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_2xdouble_param_0];
; CHECK-NEXT: ld.local.v2.f64 {%fd1, %fd2}, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.local.v2.f64 [%rd1], {%fd4, %fd3};
; CHECK-NEXT: ret;
%a.load = load <2 x double>, ptr addrspace(5) %a
%a.add = fadd <2 x double> %a.load, <double 1., double 1.>
store <2 x double> %a.add, ptr addrspace(5) %a
ret void
}
; local_volatile
define void @local_volatile_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_i8_param_0];
; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
store volatile i8 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_i16_param_0];
; CHECK-NEXT: ld.local.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i16, ptr addrspace(5) %a
%a.add = add i16 %a.load, 1
store volatile i16 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_i32_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile i32, ptr addrspace(5) %a
%a.add = add i32 %a.load, 1
store volatile i32 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_i64_param_0];
; CHECK-NEXT: ld.local.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile i64, ptr addrspace(5) %a
%a.add = add i64 %a.load, 1
store volatile i64 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_float_param_0];
; CHECK-NEXT: ld.local.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.local.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load volatile float, ptr addrspace(5) %a
%a.add = fadd float %a.load, 1.
store volatile float %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_double_param_0];
; CHECK-NEXT: ld.local.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.local.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load volatile double, ptr addrspace(5) %a
%a.add = fadd double %a.load, 1.
store volatile double %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_2xi8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_2xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xi8_param_0];
; CHECK-NEXT: ld.local.v2.u8 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: st.local.v2.u8 [%rd1], {%rs4, %rs3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i8>, ptr addrspace(5) %a
%a.add = add <2 x i8> %a.load, <i8 1, i8 1>
store volatile <2 x i8> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_4xi8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_4xi8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b32 %r<13>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_4xi8_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.local.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr addrspace(5) %a
%a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1>
store volatile <4 x i8> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_2xi16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_2xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xi16_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NEXT: add.s16 %rs3, %rs2, 1;
; CHECK-NEXT: add.s16 %rs4, %rs1, 1;
; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
; CHECK-NEXT: st.local.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i16>, ptr addrspace(5) %a
%a.add = add <2 x i16> %a.load, <i16 1, i16 1>
store volatile <2 x i16> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_4xi16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_4xi16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_4xi16_param_0];
; CHECK-NEXT: ld.local.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
; CHECK-NEXT: add.s16 %rs5, %rs4, 1;
; CHECK-NEXT: add.s16 %rs6, %rs3, 1;
; CHECK-NEXT: add.s16 %rs7, %rs2, 1;
; CHECK-NEXT: add.s16 %rs8, %rs1, 1;
; CHECK-NEXT: st.local.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i16>, ptr addrspace(5) %a
%a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1>
store volatile <4 x i16> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_2xi32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_2xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xi32_param_0];
; CHECK-NEXT: ld.local.v2.u32 {%r1, %r2}, [%rd1];
; CHECK-NEXT: add.s32 %r3, %r2, 1;
; CHECK-NEXT: add.s32 %r4, %r1, 1;
; CHECK-NEXT: st.local.v2.u32 [%rd1], {%r4, %r3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i32>, ptr addrspace(5) %a
%a.add = add <2 x i32> %a.load, <i32 1, i32 1>
store volatile <2 x i32> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_4xi32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_4xi32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_4xi32_param_0];
; CHECK-NEXT: ld.local.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1];
; CHECK-NEXT: add.s32 %r5, %r4, 1;
; CHECK-NEXT: add.s32 %r6, %r3, 1;
; CHECK-NEXT: add.s32 %r7, %r2, 1;
; CHECK-NEXT: add.s32 %r8, %r1, 1;
; CHECK-NEXT: st.local.v4.u32 [%rd1], {%r8, %r7, %r6, %r5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i32>, ptr addrspace(5) %a
%a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1>
store volatile <4 x i32> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_2xi64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_2xi64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xi64_param_0];
; CHECK-NEXT: ld.local.v2.u64 {%rd2, %rd3}, [%rd1];
; CHECK-NEXT: add.s64 %rd4, %rd3, 1;
; CHECK-NEXT: add.s64 %rd5, %rd2, 1;
; CHECK-NEXT: st.local.v2.u64 [%rd1], {%rd5, %rd4};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x i64>, ptr addrspace(5) %a
%a.add = add <2 x i64> %a.load, <i64 1, i64 1>
store volatile <2 x i64> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_2xfloat(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_2xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<5>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xfloat_param_0];
; CHECK-NEXT: ld.local.v2.f32 {%f1, %f2}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000;
; CHECK-NEXT: st.local.v2.f32 [%rd1], {%f4, %f3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x float>, ptr addrspace(5) %a
%a.add = fadd <2 x float> %a.load, <float 1., float 1.>
store volatile <2 x float> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_4xfloat(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_4xfloat(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<9>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_4xfloat_param_0];
; CHECK-NEXT: ld.local.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000;
; CHECK-NEXT: st.local.v4.f32 [%rd1], {%f8, %f7, %f6, %f5};
; CHECK-NEXT: ret;
%a.load = load volatile <4 x float>, ptr addrspace(5) %a
%a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.>
store volatile <4 x float> %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_2xdouble(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_2xdouble(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xdouble_param_0];
; CHECK-NEXT: ld.local.v2.f64 {%fd1, %fd2}, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.local.v2.f64 [%rd1], {%fd4, %fd3};
; CHECK-NEXT: ret;
%a.load = load volatile <2 x double>, ptr addrspace(5) %a
%a.add = fadd <2 x double> %a.load, <double 1., double 1.>
store volatile <2 x double> %a.add, ptr addrspace(5) %a
ret void
}
; local_unordered_sys
define void @local_unordered_sys_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_i8_param_0];
; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(5) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(5) %a unordered, align 1
ret void
}
define void @local_unordered_sys_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_i16_param_0];
; CHECK-NEXT: ld.local.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(5) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(5) %a unordered, align 2
ret void
}
define void @local_unordered_sys_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_i32_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(5) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(5) %a unordered, align 4
ret void
}
define void @local_unordered_sys_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_i64_param_0];
; CHECK-NEXT: ld.local.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(5) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(5) %a unordered, align 8
ret void
}
define void @local_unordered_sys_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_float_param_0];
; CHECK-NEXT: ld.local.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.local.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load atomic float, ptr addrspace(5) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(5) %a unordered, align 4
ret void
}
define void @local_unordered_sys_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_double_param_0];
; CHECK-NEXT: ld.local.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.local.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load atomic double, ptr addrspace(5) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(5) %a unordered, align 8
ret void
}
; local_unordered_volatile_sys
define void @local_unordered_volatile_sys_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(5) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(5) %a unordered, align 1
ret void
}
define void @local_unordered_volatile_sys_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.local.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(5) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(5) %a unordered, align 2
ret void
}
define void @local_unordered_volatile_sys_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(5) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(5) %a unordered, align 4
ret void
}
define void @local_unordered_volatile_sys_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.local.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(5) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(5) %a unordered, align 8
ret void
}
define void @local_unordered_volatile_sys_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_float_param_0];
; CHECK-NEXT: ld.local.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.local.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(5) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(5) %a unordered, align 4
ret void
}
define void @local_unordered_volatile_sys_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_double_param_0];
; CHECK-NEXT: ld.local.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.local.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(5) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(5) %a unordered, align 8
ret void
}
; local_monotonic_sys
define void @local_monotonic_sys_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_i8_param_0];
; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1
ret void
}
define void @local_monotonic_sys_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_i16_param_0];
; CHECK-NEXT: ld.local.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(5) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(5) %a monotonic, align 2
ret void
}
define void @local_monotonic_sys_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_i32_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(5) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(5) %a monotonic, align 4
ret void
}
define void @local_monotonic_sys_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_i64_param_0];
; CHECK-NEXT: ld.local.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(5) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(5) %a monotonic, align 8
ret void
}
define void @local_monotonic_sys_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_float_param_0];
; CHECK-NEXT: ld.local.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.local.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load atomic float, ptr addrspace(5) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(5) %a monotonic, align 4
ret void
}
define void @local_monotonic_sys_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_double_param_0];
; CHECK-NEXT: ld.local.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.local.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load atomic double, ptr addrspace(5) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(5) %a monotonic, align 8
ret void
}
; local_monotonic_volatile_sys
define void @local_monotonic_volatile_sys_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1
ret void
}
define void @local_monotonic_volatile_sys_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.local.u16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.u16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(5) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(5) %a monotonic, align 2
ret void
}
define void @local_monotonic_volatile_sys_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.u32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(5) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(5) %a monotonic, align 4
ret void
}
define void @local_monotonic_volatile_sys_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.local.u64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.u64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(5) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(5) %a monotonic, align 8
ret void
}
define void @local_monotonic_volatile_sys_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .f32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_float_param_0];
; CHECK-NEXT: ld.local.f32 %f1, [%rd1];
; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
; CHECK-NEXT: st.local.f32 [%rd1], %f2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(5) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(5) %a monotonic, align 4
ret void
}
define void @local_monotonic_volatile_sys_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-NEXT: .reg .f64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_double_param_0];
; CHECK-NEXT: ld.local.f64 %fd1, [%rd1];
; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
; CHECK-NEXT: st.local.f64 [%rd1], %fd2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(5) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(5) %a monotonic, align 8
ret void
}