| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefixes=CHECK,SM60 %s |
| ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} |
| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s -check-prefixes=CHECK,SM70 |
| ; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %} |
| |
| ; TODO: add i1, <8 x i8>, and <6 x i8> vector tests. |
| |
| ; TODO: add test for vectors that exceed 128-bit length |
| ; Per https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#vectors |
| ; vectors cannot exceed 128-bit in length, i.e., .v4.u64 is not allowed. |
| |
| ; TODO: generate PTX that preserves Concurrent Forward Progress |
| ; for atomic operations to local statespace |
| ; by generating atomic or volatile operations. |
| |
| ; TODO: design exposure for atomic operations on vector types. |
| |
| ; TODO: add weak,atomic,volatile,atomic volatile tests |
| ; for .const and .param statespaces. |
| |
| ; TODO: optimize .sys.shared into .cta.shared or .cluster.shared . |
| |
| ;; generic statespace |
| |
| ; generic |
| |
| define void @generic_i8(ptr %a) { |
| ; CHECK-LABEL: generic_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_i8_param_0]; |
| ; CHECK-NEXT: ld.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i8, ptr %a |
| %a.add = add i8 %a.load, 1 |
| store i8 %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_i16(ptr %a) { |
| ; CHECK-LABEL: generic_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_i16_param_0]; |
| ; CHECK-NEXT: ld.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i16, ptr %a |
| %a.add = add i16 %a.load, 1 |
| store i16 %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_i32(ptr %a) { |
| ; CHECK-LABEL: generic_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_i32_param_0]; |
| ; CHECK-NEXT: ld.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i32, ptr %a |
| %a.add = add i32 %a.load, 1 |
| store i32 %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_i64(ptr %a) { |
| ; CHECK-LABEL: generic_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_i64_param_0]; |
| ; CHECK-NEXT: ld.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load i64, ptr %a |
| %a.add = add i64 %a.load, 1 |
| store i64 %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_float(ptr %a) { |
| ; CHECK-LABEL: generic_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_float_param_0]; |
| ; CHECK-NEXT: ld.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load float, ptr %a |
| %a.add = fadd float %a.load, 1. |
| store float %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_double(ptr %a) { |
| ; CHECK-LABEL: generic_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_double_param_0]; |
| ; CHECK-NEXT: ld.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load double, ptr %a |
| %a.add = fadd double %a.load, 1. |
| store double %a.add, ptr %a |
| ret void |
| } |
| |
| ; TODO: make the lowering of this weak vector ops consistent with |
| ; the ones of the next tests. This test lowers to a weak PTX |
| ; vector op, but next test lowers to a vector PTX op. |
| define void @generic_2xi8(ptr %a) { |
| ; CHECK-LABEL: generic_2xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xi8_param_0]; |
| ; CHECK-NEXT: ld.v2.u8 {%rs1, %rs2}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: st.v2.u8 [%rd1], {%rs4, %rs3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i8>, ptr %a |
| %a.add = add <2 x i8> %a.load, <i8 1, i8 1> |
| store <2 x i8> %a.add, ptr %a |
| ret void |
| } |
| |
| ; TODO: make the lowering of this weak vector ops consistent with |
| ; the ones of the previous test. This test lowers to a weak |
| ; PTX scalar op, but prior test lowers to a vector PTX op. |
| define void @generic_4xi8(ptr %a) { |
| ; CHECK-LABEL: generic_4xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_4xi8_param_0]; |
| ; CHECK-NEXT: ld.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; |
| ; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; |
| ; CHECK-NEXT: add.s16 %rs4, %rs3, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; |
| ; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U; |
| ; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; |
| ; CHECK-NEXT: add.s16 %rs6, %rs5, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; |
| ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9; |
| ; CHECK-NEXT: add.s16 %rs8, %rs7, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8; |
| ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U; |
| ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U; |
| ; CHECK-NEXT: st.u32 [%rd1], %r12; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i8>, ptr %a |
| %a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1> |
| store <4 x i8> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_2xi16(ptr %a) { |
| ; CHECK-LABEL: generic_2xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xi16_param_0]; |
| ; CHECK-NEXT: ld.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; |
| ; CHECK-NEXT: st.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i16>, ptr %a |
| %a.add = add <2 x i16> %a.load, <i16 1, i16 1> |
| store <2 x i16> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_4xi16(ptr %a) { |
| ; CHECK-LABEL: generic_4xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_4xi16_param_0]; |
| ; CHECK-NEXT: ld.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs5, %rs4, 1; |
| ; CHECK-NEXT: add.s16 %rs6, %rs3, 1; |
| ; CHECK-NEXT: add.s16 %rs7, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs8, %rs1, 1; |
| ; CHECK-NEXT: st.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i16>, ptr %a |
| %a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1> |
| store <4 x i16> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_2xi32(ptr %a) { |
| ; CHECK-LABEL: generic_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xi32_param_0]; |
| ; CHECK-NEXT: ld.v2.u32 {%r1, %r2}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r3, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r4, %r1, 1; |
| ; CHECK-NEXT: st.v2.u32 [%rd1], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i32>, ptr %a |
| %a.add = add <2 x i32> %a.load, <i32 1, i32 1> |
| store <2 x i32> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_4xi32(ptr %a) { |
| ; CHECK-LABEL: generic_4xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_4xi32_param_0]; |
| ; CHECK-NEXT: ld.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r5, %r4, 1; |
| ; CHECK-NEXT: add.s32 %r6, %r3, 1; |
| ; CHECK-NEXT: add.s32 %r7, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r8, %r1, 1; |
| ; CHECK-NEXT: st.v4.u32 [%rd1], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i32>, ptr %a |
| %a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1> |
| store <4 x i32> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_2xi64(ptr %a) { |
| ; CHECK-LABEL: generic_2xi64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xi64_param_0]; |
| ; CHECK-NEXT: ld.v2.u64 {%rd2, %rd3}, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd4, %rd3, 1; |
| ; CHECK-NEXT: add.s64 %rd5, %rd2, 1; |
| ; CHECK-NEXT: st.v2.u64 [%rd1], {%rd5, %rd4}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i64>, ptr %a |
| %a.add = add <2 x i64> %a.load, <i64 1, i64 1> |
| store <2 x i64> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_2xfloat(ptr %a) { |
| ; CHECK-LABEL: generic_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xfloat_param_0]; |
| ; CHECK-NEXT: ld.v2.f32 {%f1, %f2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.v2.f32 [%rd1], {%f4, %f3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x float>, ptr %a |
| %a.add = fadd <2 x float> %a.load, <float 1., float 1.> |
| store <2 x float> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_4xfloat(ptr %a) { |
| ; CHECK-LABEL: generic_4xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_4xfloat_param_0]; |
| ; CHECK-NEXT: ld.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.v4.f32 [%rd1], {%f8, %f7, %f6, %f5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x float>, ptr %a |
| %a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.> |
| store <4 x float> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_2xdouble(ptr %a) { |
| ; CHECK-LABEL: generic_2xdouble( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_2xdouble_param_0]; |
| ; CHECK-NEXT: ld.v2.f64 {%fd1, %fd2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000; |
| ; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.v2.f64 [%rd1], {%fd4, %fd3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x double>, ptr %a |
| %a.add = fadd <2 x double> %a.load, <double 1., double 1.> |
| store <2 x double> %a.add, ptr %a |
| ret void |
| } |
| |
| ; generic_volatile |
| |
| define void @generic_volatile_i8(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_i8_param_0]; |
| ; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i8, ptr %a |
| %a.add = add i8 %a.load, 1 |
| store volatile i8 %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_i16(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_i16_param_0]; |
| ; CHECK-NEXT: ld.volatile.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i16, ptr %a |
| %a.add = add i16 %a.load, 1 |
| store volatile i16 %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_i32(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_i32_param_0]; |
| ; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.volatile.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i32, ptr %a |
| %a.add = add i32 %a.load, 1 |
| store volatile i32 %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_i64(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_i64_param_0]; |
| ; CHECK-NEXT: ld.volatile.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.volatile.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i64, ptr %a |
| %a.add = add i64 %a.load, 1 |
| store volatile i64 %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_float(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_float_param_0]; |
| ; CHECK-NEXT: ld.volatile.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile float, ptr %a |
| %a.add = fadd float %a.load, 1. |
| store volatile float %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_double(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_double_param_0]; |
| ; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.volatile.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile double, ptr %a |
| %a.add = fadd double %a.load, 1. |
| store volatile double %a.add, ptr %a |
| ret void |
| } |
| |
| ; TODO: volatile, atomic, and volatile atomic memory operations on vector types. |
| ; Currently, LLVM: |
| ; - does not allow atomic operations on vectors. |
| ; - it allows volatile operations but not clear what that means. |
| ; Following both semantics make sense in general and PTX supports both: |
| ; - volatile/atomic/volatile atomic applies to the whole vector |
| ; - volatile/atomic/volatile atomic applies elementwise |
| ; Actions required: |
| ; - clarify LLVM semantics for volatile on vectors and align the NVPTX backend with those |
| ; Below tests show that the current implementation picks the semantics in an inconsistent way |
| ; * volatile <2 x i8> lowers to "elementwise volatile" |
| ; * <4 x i8> lowers to "full vector volatile" |
| ; - provide support for vector atomics, e.g., by extending LLVM IR or via intrinsics |
| ; - update tests in load-store-sm70.ll as well. |
| |
| ; TODO: make this operation consistent with the one for <4 x i8> |
| ; This operation lowers to a "element wise volatile PTX operation". |
| define void @generic_volatile_2xi8(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_2xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xi8_param_0]; |
| ; CHECK-NEXT: ld.volatile.v2.u8 {%rs1, %rs2}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.v2.u8 [%rd1], {%rs4, %rs3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i8>, ptr %a |
| %a.add = add <2 x i8> %a.load, <i8 1, i8 1> |
| store volatile <2 x i8> %a.add, ptr %a |
| ret void |
| } |
| |
| ; TODO: make this operation consistent with the one for <2 x i8> |
| ; This operation lowers to a "full vector volatile PTX operation". |
| define void @generic_volatile_4xi8(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_4xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_4xi8_param_0]; |
| ; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; |
| ; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; |
| ; CHECK-NEXT: add.s16 %rs4, %rs3, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; |
| ; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U; |
| ; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; |
| ; CHECK-NEXT: add.s16 %rs6, %rs5, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; |
| ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9; |
| ; CHECK-NEXT: add.s16 %rs8, %rs7, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8; |
| ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U; |
| ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U; |
| ; CHECK-NEXT: st.volatile.u32 [%rd1], %r12; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i8>, ptr %a |
| %a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1> |
| store volatile <4 x i8> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_2xi16(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_2xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xi16_param_0]; |
| ; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; |
| ; CHECK-NEXT: st.volatile.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i16>, ptr %a |
| %a.add = add <2 x i16> %a.load, <i16 1, i16 1> |
| store volatile <2 x i16> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_4xi16(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_4xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_4xi16_param_0]; |
| ; CHECK-NEXT: ld.volatile.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs5, %rs4, 1; |
| ; CHECK-NEXT: add.s16 %rs6, %rs3, 1; |
| ; CHECK-NEXT: add.s16 %rs7, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs8, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i16>, ptr %a |
| %a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1> |
| store volatile <4 x i16> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_2xi32(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xi32_param_0]; |
| ; CHECK-NEXT: ld.volatile.v2.u32 {%r1, %r2}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r3, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r4, %r1, 1; |
| ; CHECK-NEXT: st.volatile.v2.u32 [%rd1], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i32>, ptr %a |
| %a.add = add <2 x i32> %a.load, <i32 1, i32 1> |
| store volatile <2 x i32> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_4xi32(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_4xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_4xi32_param_0]; |
| ; CHECK-NEXT: ld.volatile.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r5, %r4, 1; |
| ; CHECK-NEXT: add.s32 %r6, %r3, 1; |
| ; CHECK-NEXT: add.s32 %r7, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r8, %r1, 1; |
| ; CHECK-NEXT: st.volatile.v4.u32 [%rd1], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i32>, ptr %a |
| %a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1> |
| store volatile <4 x i32> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_2xi64(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_2xi64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xi64_param_0]; |
| ; CHECK-NEXT: ld.volatile.v2.u64 {%rd2, %rd3}, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd4, %rd3, 1; |
| ; CHECK-NEXT: add.s64 %rd5, %rd2, 1; |
| ; CHECK-NEXT: st.volatile.v2.u64 [%rd1], {%rd5, %rd4}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i64>, ptr %a |
| %a.add = add <2 x i64> %a.load, <i64 1, i64 1> |
| store volatile <2 x i64> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_2xfloat(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xfloat_param_0]; |
| ; CHECK-NEXT: ld.volatile.v2.f32 {%f1, %f2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.v2.f32 [%rd1], {%f4, %f3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x float>, ptr %a |
| %a.add = fadd <2 x float> %a.load, <float 1., float 1.> |
| store volatile <2 x float> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_4xfloat(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_4xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_4xfloat_param_0]; |
| ; CHECK-NEXT: ld.volatile.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.v4.f32 [%rd1], {%f8, %f7, %f6, %f5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x float>, ptr %a |
| %a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.> |
| store volatile <4 x float> %a.add, ptr %a |
| ret void |
| } |
| |
| define void @generic_volatile_2xdouble(ptr %a) { |
| ; CHECK-LABEL: generic_volatile_2xdouble( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_2xdouble_param_0]; |
| ; CHECK-NEXT: ld.volatile.v2.f64 {%fd1, %fd2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000; |
| ; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.volatile.v2.f64 [%rd1], {%fd4, %fd3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x double>, ptr %a |
| %a.add = fadd <2 x double> %a.load, <double 1., double 1.> |
| store volatile <2 x double> %a.add, ptr %a |
| ret void |
| } |
| |
| ; generic_unordered_sys |
| |
| define void @generic_unordered_sys_i8(ptr %a) { |
| ; SM60-LABEL: generic_unordered_sys_i8( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i8_param_0]; |
| ; SM60-NEXT: ld.volatile.u8 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.u8 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_unordered_sys_i8( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i8_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.u8 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.u8 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i8, ptr %a unordered, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic i8 %a.add, ptr %a unordered, align 1 |
| ret void |
| } |
| |
| define void @generic_unordered_sys_i16(ptr %a) { |
| ; SM60-LABEL: generic_unordered_sys_i16( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i16_param_0]; |
| ; SM60-NEXT: ld.volatile.u16 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.u16 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_unordered_sys_i16( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i16_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.u16 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.u16 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i16, ptr %a unordered, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic i16 %a.add, ptr %a unordered, align 2 |
| ret void |
| } |
| |
| define void @generic_unordered_sys_i32(ptr %a) { |
| ; SM60-LABEL: generic_unordered_sys_i32( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i32_param_0]; |
| ; SM60-NEXT: ld.volatile.u32 %r1, [%rd1]; |
| ; SM60-NEXT: add.s32 %r2, %r1, 1; |
| ; SM60-NEXT: st.volatile.u32 [%rd1], %r2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_unordered_sys_i32( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b32 %r<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i32_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.u32 %r1, [%rd1]; |
| ; SM70-NEXT: add.s32 %r2, %r1, 1; |
| ; SM70-NEXT: st.relaxed.sys.u32 [%rd1], %r2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i32, ptr %a unordered, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic i32 %a.add, ptr %a unordered, align 4 |
| ret void |
| } |
| |
| define void @generic_unordered_sys_i64(ptr %a) { |
| ; SM60-LABEL: generic_unordered_sys_i64( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<4>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i64_param_0]; |
| ; SM60-NEXT: ld.volatile.u64 %rd2, [%rd1]; |
| ; SM60-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM60-NEXT: st.volatile.u64 [%rd1], %rd3; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_unordered_sys_i64( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<4>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_i64_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.u64 %rd2, [%rd1]; |
| ; SM70-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM70-NEXT: st.relaxed.sys.u64 [%rd1], %rd3; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i64, ptr %a unordered, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic i64 %a.add, ptr %a unordered, align 8 |
| ret void |
| } |
| |
| define void @generic_unordered_sys_float(ptr %a) { |
| ; SM60-LABEL: generic_unordered_sys_float( |
| ; SM60: { |
| ; SM60-NEXT: .reg .f32 %f<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_float_param_0]; |
| ; SM60-NEXT: ld.volatile.f32 %f1, [%rd1]; |
| ; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM60-NEXT: st.volatile.f32 [%rd1], %f2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_unordered_sys_float( |
| ; SM70: { |
| ; SM70-NEXT: .reg .f32 %f<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_float_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.f32 %f1, [%rd1]; |
| ; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM70-NEXT: st.relaxed.sys.f32 [%rd1], %f2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic float, ptr %a unordered, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic float %a.add, ptr %a unordered, align 4 |
| ret void |
| } |
| |
| define void @generic_unordered_sys_double(ptr %a) { |
| ; SM60-LABEL: generic_unordered_sys_double( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-NEXT: .reg .f64 %fd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_double_param_0]; |
| ; SM60-NEXT: ld.volatile.f64 %fd1, [%rd1]; |
| ; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM60-NEXT: st.volatile.f64 [%rd1], %fd2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_unordered_sys_double( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-NEXT: .reg .f64 %fd<3>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_double_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.f64 %fd1, [%rd1]; |
| ; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM70-NEXT: st.relaxed.sys.f64 [%rd1], %fd2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic double, ptr %a unordered, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic double %a.add, ptr %a unordered, align 8 |
| ret void |
| } |
| |
| ; generic_unordered_volatile_sys |
| |
| define void @generic_unordered_volatile_sys_i8(ptr %a) { |
| ; CHECK-LABEL: generic_unordered_volatile_sys_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_i8_param_0]; |
| ; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i8, ptr %a unordered, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic volatile i8 %a.add, ptr %a unordered, align 1 |
| ret void |
| } |
| |
| define void @generic_unordered_volatile_sys_i16(ptr %a) { |
| ; CHECK-LABEL: generic_unordered_volatile_sys_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_i16_param_0]; |
| ; CHECK-NEXT: ld.volatile.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i16, ptr %a unordered, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic volatile i16 %a.add, ptr %a unordered, align 2 |
| ret void |
| } |
| |
| define void @generic_unordered_volatile_sys_i32(ptr %a) { |
| ; CHECK-LABEL: generic_unordered_volatile_sys_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_i32_param_0]; |
| ; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.volatile.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i32, ptr %a unordered, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic volatile i32 %a.add, ptr %a unordered, align 4 |
| ret void |
| } |
| |
| define void @generic_unordered_volatile_sys_i64(ptr %a) { |
| ; CHECK-LABEL: generic_unordered_volatile_sys_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_i64_param_0]; |
| ; CHECK-NEXT: ld.volatile.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.volatile.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i64, ptr %a unordered, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic volatile i64 %a.add, ptr %a unordered, align 8 |
| ret void |
| } |
| |
| define void @generic_unordered_volatile_sys_float(ptr %a) { |
| ; CHECK-LABEL: generic_unordered_volatile_sys_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_float_param_0]; |
| ; CHECK-NEXT: ld.volatile.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile float, ptr %a unordered, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic volatile float %a.add, ptr %a unordered, align 4 |
| ret void |
| } |
| |
| define void @generic_unordered_volatile_sys_double(ptr %a) { |
| ; CHECK-LABEL: generic_unordered_volatile_sys_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_double_param_0]; |
| ; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.volatile.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile double, ptr %a unordered, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic volatile double %a.add, ptr %a unordered, align 8 |
| ret void |
| } |
| |
| ; generic_monotonic_sys |
| |
| define void @generic_monotonic_sys_i8(ptr %a) { |
| ; SM60-LABEL: generic_monotonic_sys_i8( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i8_param_0]; |
| ; SM60-NEXT: ld.volatile.u8 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.u8 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_monotonic_sys_i8( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i8_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.u8 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.u8 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i8, ptr %a monotonic, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic i8 %a.add, ptr %a monotonic, align 1 |
| ret void |
| } |
| |
| define void @generic_monotonic_sys_i16(ptr %a) { |
| ; SM60-LABEL: generic_monotonic_sys_i16( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i16_param_0]; |
| ; SM60-NEXT: ld.volatile.u16 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.u16 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_monotonic_sys_i16( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i16_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.u16 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.u16 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i16, ptr %a monotonic, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic i16 %a.add, ptr %a monotonic, align 2 |
| ret void |
| } |
| |
| define void @generic_monotonic_sys_i32(ptr %a) { |
| ; SM60-LABEL: generic_monotonic_sys_i32( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i32_param_0]; |
| ; SM60-NEXT: ld.volatile.u32 %r1, [%rd1]; |
| ; SM60-NEXT: add.s32 %r2, %r1, 1; |
| ; SM60-NEXT: st.volatile.u32 [%rd1], %r2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_monotonic_sys_i32( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b32 %r<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i32_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.u32 %r1, [%rd1]; |
| ; SM70-NEXT: add.s32 %r2, %r1, 1; |
| ; SM70-NEXT: st.relaxed.sys.u32 [%rd1], %r2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i32, ptr %a monotonic, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic i32 %a.add, ptr %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @generic_monotonic_sys_i64(ptr %a) { |
| ; SM60-LABEL: generic_monotonic_sys_i64( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<4>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i64_param_0]; |
| ; SM60-NEXT: ld.volatile.u64 %rd2, [%rd1]; |
| ; SM60-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM60-NEXT: st.volatile.u64 [%rd1], %rd3; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_monotonic_sys_i64( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<4>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_i64_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.u64 %rd2, [%rd1]; |
| ; SM70-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM70-NEXT: st.relaxed.sys.u64 [%rd1], %rd3; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i64, ptr %a monotonic, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic i64 %a.add, ptr %a monotonic, align 8 |
| ret void |
| } |
| |
| define void @generic_monotonic_sys_float(ptr %a) { |
| ; SM60-LABEL: generic_monotonic_sys_float( |
| ; SM60: { |
| ; SM60-NEXT: .reg .f32 %f<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_float_param_0]; |
| ; SM60-NEXT: ld.volatile.f32 %f1, [%rd1]; |
| ; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM60-NEXT: st.volatile.f32 [%rd1], %f2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_monotonic_sys_float( |
| ; SM70: { |
| ; SM70-NEXT: .reg .f32 %f<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_float_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.f32 %f1, [%rd1]; |
| ; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM70-NEXT: st.relaxed.sys.f32 [%rd1], %f2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic float, ptr %a monotonic, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic float %a.add, ptr %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @generic_monotonic_sys_double(ptr %a) { |
| ; SM60-LABEL: generic_monotonic_sys_double( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-NEXT: .reg .f64 %fd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_double_param_0]; |
| ; SM60-NEXT: ld.volatile.f64 %fd1, [%rd1]; |
| ; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM60-NEXT: st.volatile.f64 [%rd1], %fd2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: generic_monotonic_sys_double( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-NEXT: .reg .f64 %fd<3>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_double_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.f64 %fd1, [%rd1]; |
| ; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM70-NEXT: st.relaxed.sys.f64 [%rd1], %fd2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic double, ptr %a monotonic, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic double %a.add, ptr %a monotonic, align 8 |
| ret void |
| } |
| |
| ; generic_monotonic_volatile_sys |
| |
| define void @generic_monotonic_volatile_sys_i8(ptr %a) { |
| ; CHECK-LABEL: generic_monotonic_volatile_sys_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_i8_param_0]; |
| ; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i8, ptr %a monotonic, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic volatile i8 %a.add, ptr %a monotonic, align 1 |
| ret void |
| } |
| |
| define void @generic_monotonic_volatile_sys_i16(ptr %a) { |
| ; CHECK-LABEL: generic_monotonic_volatile_sys_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_i16_param_0]; |
| ; CHECK-NEXT: ld.volatile.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i16, ptr %a monotonic, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic volatile i16 %a.add, ptr %a monotonic, align 2 |
| ret void |
| } |
| |
| define void @generic_monotonic_volatile_sys_i32(ptr %a) { |
| ; CHECK-LABEL: generic_monotonic_volatile_sys_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_i32_param_0]; |
| ; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.volatile.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i32, ptr %a monotonic, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic volatile i32 %a.add, ptr %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @generic_monotonic_volatile_sys_i64(ptr %a) { |
| ; CHECK-LABEL: generic_monotonic_volatile_sys_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_i64_param_0]; |
| ; CHECK-NEXT: ld.volatile.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.volatile.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i64, ptr %a monotonic, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic volatile i64 %a.add, ptr %a monotonic, align 8 |
| ret void |
| } |
| |
| define void @generic_monotonic_volatile_sys_float(ptr %a) { |
| ; CHECK-LABEL: generic_monotonic_volatile_sys_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_float_param_0]; |
| ; CHECK-NEXT: ld.volatile.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile float, ptr %a monotonic, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic volatile float %a.add, ptr %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @generic_monotonic_volatile_sys_double(ptr %a) { |
| ; CHECK-LABEL: generic_monotonic_volatile_sys_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_double_param_0]; |
| ; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.volatile.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile double, ptr %a monotonic, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic volatile double %a.add, ptr %a monotonic, align 8 |
| ret void |
| } |
| |
| ;; global statespace |
| |
| ; global |
| |
| define void @global_i8(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_i8_param_0]; |
| ; CHECK-NEXT: ld.global.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.global.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i8, ptr addrspace(1) %a |
| %a.add = add i8 %a.load, 1 |
| store i8 %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_i16(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_i16_param_0]; |
| ; CHECK-NEXT: ld.global.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.global.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i16, ptr addrspace(1) %a |
| %a.add = add i16 %a.load, 1 |
| store i16 %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_i32(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_i32_param_0]; |
| ; CHECK-NEXT: ld.global.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.global.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i32, ptr addrspace(1) %a |
| %a.add = add i32 %a.load, 1 |
| store i32 %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_i64(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_i64_param_0]; |
| ; CHECK-NEXT: ld.global.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.global.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load i64, ptr addrspace(1) %a |
| %a.add = add i64 %a.load, 1 |
| store i64 %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_float(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_float_param_0]; |
| ; CHECK-NEXT: ld.global.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.global.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load float, ptr addrspace(1) %a |
| %a.add = fadd float %a.load, 1. |
| store float %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_double(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_double_param_0]; |
| ; CHECK-NEXT: ld.global.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.global.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load double, ptr addrspace(1) %a |
| %a.add = fadd double %a.load, 1. |
| store double %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_2xi8(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_2xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_2xi8_param_0]; |
| ; CHECK-NEXT: ld.global.v2.u8 {%rs1, %rs2}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: st.global.v2.u8 [%rd1], {%rs4, %rs3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i8>, ptr addrspace(1) %a |
| %a.add = add <2 x i8> %a.load, <i8 1, i8 1> |
| store <2 x i8> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_4xi8(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_4xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_4xi8_param_0]; |
| ; CHECK-NEXT: ld.global.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; |
| ; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; |
| ; CHECK-NEXT: add.s16 %rs4, %rs3, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; |
| ; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U; |
| ; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; |
| ; CHECK-NEXT: add.s16 %rs6, %rs5, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; |
| ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9; |
| ; CHECK-NEXT: add.s16 %rs8, %rs7, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8; |
| ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U; |
| ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U; |
| ; CHECK-NEXT: st.global.u32 [%rd1], %r12; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i8>, ptr addrspace(1) %a |
| %a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1> |
| store <4 x i8> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_2xi16(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_2xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_2xi16_param_0]; |
| ; CHECK-NEXT: ld.global.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; |
| ; CHECK-NEXT: st.global.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i16>, ptr addrspace(1) %a |
| %a.add = add <2 x i16> %a.load, <i16 1, i16 1> |
| store <2 x i16> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_4xi16(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_4xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_4xi16_param_0]; |
| ; CHECK-NEXT: ld.global.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs5, %rs4, 1; |
| ; CHECK-NEXT: add.s16 %rs6, %rs3, 1; |
| ; CHECK-NEXT: add.s16 %rs7, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs8, %rs1, 1; |
| ; CHECK-NEXT: st.global.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i16>, ptr addrspace(1) %a |
| %a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1> |
| store <4 x i16> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_2xi32(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_2xi32_param_0]; |
| ; CHECK-NEXT: ld.global.v2.u32 {%r1, %r2}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r3, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r4, %r1, 1; |
| ; CHECK-NEXT: st.global.v2.u32 [%rd1], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i32>, ptr addrspace(1) %a |
| %a.add = add <2 x i32> %a.load, <i32 1, i32 1> |
| store <2 x i32> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_4xi32(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_4xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_4xi32_param_0]; |
| ; CHECK-NEXT: ld.global.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r5, %r4, 1; |
| ; CHECK-NEXT: add.s32 %r6, %r3, 1; |
| ; CHECK-NEXT: add.s32 %r7, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r8, %r1, 1; |
| ; CHECK-NEXT: st.global.v4.u32 [%rd1], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i32>, ptr addrspace(1) %a |
| %a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1> |
| store <4 x i32> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_2xi64(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_2xi64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_2xi64_param_0]; |
| ; CHECK-NEXT: ld.global.v2.u64 {%rd2, %rd3}, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd4, %rd3, 1; |
| ; CHECK-NEXT: add.s64 %rd5, %rd2, 1; |
| ; CHECK-NEXT: st.global.v2.u64 [%rd1], {%rd5, %rd4}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i64>, ptr addrspace(1) %a |
| %a.add = add <2 x i64> %a.load, <i64 1, i64 1> |
| store <2 x i64> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_2xfloat(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_2xfloat_param_0]; |
| ; CHECK-NEXT: ld.global.v2.f32 {%f1, %f2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.global.v2.f32 [%rd1], {%f4, %f3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x float>, ptr addrspace(1) %a |
| %a.add = fadd <2 x float> %a.load, <float 1., float 1.> |
| store <2 x float> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_4xfloat(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_4xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_4xfloat_param_0]; |
| ; CHECK-NEXT: ld.global.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.global.v4.f32 [%rd1], {%f8, %f7, %f6, %f5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x float>, ptr addrspace(1) %a |
| %a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.> |
| store <4 x float> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_2xdouble(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_2xdouble( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_2xdouble_param_0]; |
| ; CHECK-NEXT: ld.global.v2.f64 {%fd1, %fd2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000; |
| ; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.global.v2.f64 [%rd1], {%fd4, %fd3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x double>, ptr addrspace(1) %a |
| %a.add = fadd <2 x double> %a.load, <double 1., double 1.> |
| store <2 x double> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| ; global_volatile |
| |
| define void @global_volatile_i8(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_i8_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.global.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i8, ptr addrspace(1) %a |
| %a.add = add i8 %a.load, 1 |
| store volatile i8 %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_i16(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_i16_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.global.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i16, ptr addrspace(1) %a |
| %a.add = add i16 %a.load, 1 |
| store volatile i16 %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_i32(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_i32_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.volatile.global.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i32, ptr addrspace(1) %a |
| %a.add = add i32 %a.load, 1 |
| store volatile i32 %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_i64(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_i64_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.volatile.global.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i64, ptr addrspace(1) %a |
| %a.add = add i64 %a.load, 1 |
| store volatile i64 %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_float(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_float_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.global.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile float, ptr addrspace(1) %a |
| %a.add = fadd float %a.load, 1. |
| store volatile float %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_double(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_double_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.volatile.global.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile double, ptr addrspace(1) %a |
| %a.add = fadd double %a.load, 1. |
| store volatile double %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_2xi8(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_2xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xi8_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.v2.u8 {%rs1, %rs2}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.global.v2.u8 [%rd1], {%rs4, %rs3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i8>, ptr addrspace(1) %a |
| %a.add = add <2 x i8> %a.load, <i8 1, i8 1> |
| store volatile <2 x i8> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_4xi8(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_4xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_4xi8_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; |
| ; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; |
| ; CHECK-NEXT: add.s16 %rs4, %rs3, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; |
| ; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U; |
| ; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; |
| ; CHECK-NEXT: add.s16 %rs6, %rs5, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; |
| ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9; |
| ; CHECK-NEXT: add.s16 %rs8, %rs7, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8; |
| ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U; |
| ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U; |
| ; CHECK-NEXT: st.volatile.global.u32 [%rd1], %r12; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i8>, ptr addrspace(1) %a |
| %a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1> |
| store volatile <4 x i8> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_2xi16(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_2xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xi16_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; |
| ; CHECK-NEXT: st.volatile.global.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i16>, ptr addrspace(1) %a |
| %a.add = add <2 x i16> %a.load, <i16 1, i16 1> |
| store volatile <2 x i16> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_4xi16(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_4xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_4xi16_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs5, %rs4, 1; |
| ; CHECK-NEXT: add.s16 %rs6, %rs3, 1; |
| ; CHECK-NEXT: add.s16 %rs7, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs8, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.global.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i16>, ptr addrspace(1) %a |
| %a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1> |
| store volatile <4 x i16> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_2xi32(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xi32_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.v2.u32 {%r1, %r2}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r3, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r4, %r1, 1; |
| ; CHECK-NEXT: st.volatile.global.v2.u32 [%rd1], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i32>, ptr addrspace(1) %a |
| %a.add = add <2 x i32> %a.load, <i32 1, i32 1> |
| store volatile <2 x i32> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_4xi32(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_4xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_4xi32_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r5, %r4, 1; |
| ; CHECK-NEXT: add.s32 %r6, %r3, 1; |
| ; CHECK-NEXT: add.s32 %r7, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r8, %r1, 1; |
| ; CHECK-NEXT: st.volatile.global.v4.u32 [%rd1], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i32>, ptr addrspace(1) %a |
| %a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1> |
| store volatile <4 x i32> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_2xi64(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_2xi64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xi64_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.v2.u64 {%rd2, %rd3}, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd4, %rd3, 1; |
| ; CHECK-NEXT: add.s64 %rd5, %rd2, 1; |
| ; CHECK-NEXT: st.volatile.global.v2.u64 [%rd1], {%rd5, %rd4}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i64>, ptr addrspace(1) %a |
| %a.add = add <2 x i64> %a.load, <i64 1, i64 1> |
| store volatile <2 x i64> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_2xfloat(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xfloat_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.v2.f32 {%f1, %f2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.global.v2.f32 [%rd1], {%f4, %f3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x float>, ptr addrspace(1) %a |
| %a.add = fadd <2 x float> %a.load, <float 1., float 1.> |
| store volatile <2 x float> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_4xfloat(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_4xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_4xfloat_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.global.v4.f32 [%rd1], {%f8, %f7, %f6, %f5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x float>, ptr addrspace(1) %a |
| %a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.> |
| store volatile <4 x float> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| define void @global_volatile_2xdouble(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: global_volatile_2xdouble( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_2xdouble_param_0]; |
| ; CHECK-NEXT: ld.volatile.global.v2.f64 {%fd1, %fd2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000; |
| ; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.volatile.global.v2.f64 [%rd1], {%fd4, %fd3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x double>, ptr addrspace(1) %a |
| %a.add = fadd <2 x double> %a.load, <double 1., double 1.> |
| store volatile <2 x double> %a.add, ptr addrspace(1) %a |
| ret void |
| } |
| |
| ; global_unordered_sys |
| |
| define void @global_unordered_sys_i8(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_sys_i8( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i8_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_sys_i8( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i8_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.u8 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.global.u8 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1 |
| ret void |
| } |
| |
| define void @global_unordered_sys_i16(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_sys_i16( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i16_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u16 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.global.u16 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_sys_i16( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i16_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.u16 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.global.u16 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i16, ptr addrspace(1) %a unordered, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic i16 %a.add, ptr addrspace(1) %a unordered, align 2 |
| ret void |
| } |
| |
| define void @global_unordered_sys_i32(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_sys_i32( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i32_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd1]; |
| ; SM60-NEXT: add.s32 %r2, %r1, 1; |
| ; SM60-NEXT: st.volatile.global.u32 [%rd1], %r2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_sys_i32( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b32 %r<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i32_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.u32 %r1, [%rd1]; |
| ; SM70-NEXT: add.s32 %r2, %r1, 1; |
| ; SM70-NEXT: st.relaxed.sys.global.u32 [%rd1], %r2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i32, ptr addrspace(1) %a unordered, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic i32 %a.add, ptr addrspace(1) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @global_unordered_sys_i64(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_sys_i64( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<4>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i64_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u64 %rd2, [%rd1]; |
| ; SM60-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM60-NEXT: st.volatile.global.u64 [%rd1], %rd3; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_sys_i64( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<4>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_i64_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.u64 %rd2, [%rd1]; |
| ; SM70-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM70-NEXT: st.relaxed.sys.global.u64 [%rd1], %rd3; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i64, ptr addrspace(1) %a unordered, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic i64 %a.add, ptr addrspace(1) %a unordered, align 8 |
| ret void |
| } |
| |
| define void @global_unordered_sys_float(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_sys_float( |
| ; SM60: { |
| ; SM60-NEXT: .reg .f32 %f<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_float_param_0]; |
| ; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd1]; |
| ; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM60-NEXT: st.volatile.global.f32 [%rd1], %f2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_sys_float( |
| ; SM70: { |
| ; SM70-NEXT: .reg .f32 %f<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_float_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.f32 %f1, [%rd1]; |
| ; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM70-NEXT: st.relaxed.sys.global.f32 [%rd1], %f2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic float, ptr addrspace(1) %a unordered, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic float %a.add, ptr addrspace(1) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @global_unordered_sys_double(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_sys_double( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-NEXT: .reg .f64 %fd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_double_param_0]; |
| ; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd1]; |
| ; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM60-NEXT: st.volatile.global.f64 [%rd1], %fd2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_sys_double( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-NEXT: .reg .f64 %fd<3>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_double_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.f64 %fd1, [%rd1]; |
| ; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM70-NEXT: st.relaxed.sys.global.f64 [%rd1], %fd2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic double, ptr addrspace(1) %a unordered, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic double %a.add, ptr addrspace(1) %a unordered, align 8 |
| ret void |
| } |
| |
| ; global_unordered_volatile_sys |
| |
| define void @global_unordered_volatile_sys_i8(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_volatile_sys_i8( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i8_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_volatile_sys_i8( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i8_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.u8 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.u8 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1 |
| ret void |
| } |
| |
| define void @global_unordered_volatile_sys_i16(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_volatile_sys_i16( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i16_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u16 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.global.u16 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_volatile_sys_i16( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i16_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.u16 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.u16 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile i16, ptr addrspace(1) %a unordered, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic volatile i16 %a.add, ptr addrspace(1) %a unordered, align 2 |
| ret void |
| } |
| |
| define void @global_unordered_volatile_sys_i32(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_volatile_sys_i32( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i32_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd1]; |
| ; SM60-NEXT: add.s32 %r2, %r1, 1; |
| ; SM60-NEXT: st.volatile.global.u32 [%rd1], %r2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_volatile_sys_i32( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b32 %r<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i32_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.u32 %r1, [%rd1]; |
| ; SM70-NEXT: add.s32 %r2, %r1, 1; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.u32 [%rd1], %r2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile i32, ptr addrspace(1) %a unordered, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic volatile i32 %a.add, ptr addrspace(1) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @global_unordered_volatile_sys_i64(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_volatile_sys_i64( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<4>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i64_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u64 %rd2, [%rd1]; |
| ; SM60-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM60-NEXT: st.volatile.global.u64 [%rd1], %rd3; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_volatile_sys_i64( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<4>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_i64_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.u64 %rd2, [%rd1]; |
| ; SM70-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.u64 [%rd1], %rd3; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile i64, ptr addrspace(1) %a unordered, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic volatile i64 %a.add, ptr addrspace(1) %a unordered, align 8 |
| ret void |
| } |
| |
| define void @global_unordered_volatile_sys_float(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_volatile_sys_float( |
| ; SM60: { |
| ; SM60-NEXT: .reg .f32 %f<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_float_param_0]; |
| ; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd1]; |
| ; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM60-NEXT: st.volatile.global.f32 [%rd1], %f2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_volatile_sys_float( |
| ; SM70: { |
| ; SM70-NEXT: .reg .f32 %f<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_float_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.f32 %f1, [%rd1]; |
| ; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.f32 [%rd1], %f2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile float, ptr addrspace(1) %a unordered, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic volatile float %a.add, ptr addrspace(1) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @global_unordered_volatile_sys_double(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_unordered_volatile_sys_double( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-NEXT: .reg .f64 %fd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_double_param_0]; |
| ; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd1]; |
| ; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM60-NEXT: st.volatile.global.f64 [%rd1], %fd2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_unordered_volatile_sys_double( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-NEXT: .reg .f64 %fd<3>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_double_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.f64 %fd1, [%rd1]; |
| ; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.f64 [%rd1], %fd2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile double, ptr addrspace(1) %a unordered, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic volatile double %a.add, ptr addrspace(1) %a unordered, align 8 |
| ret void |
| } |
| |
| ; global_monotonic_sys |
| |
| define void @global_monotonic_sys_i8(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_sys_i8( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i8_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_sys_i8( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i8_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.u8 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.global.u8 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1 |
| ret void |
| } |
| |
| define void @global_monotonic_sys_i16(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_sys_i16( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i16_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u16 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.global.u16 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_sys_i16( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i16_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.u16 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.global.u16 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i16, ptr addrspace(1) %a monotonic, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic i16 %a.add, ptr addrspace(1) %a monotonic, align 2 |
| ret void |
| } |
| |
| define void @global_monotonic_sys_i32(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_sys_i32( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i32_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd1]; |
| ; SM60-NEXT: add.s32 %r2, %r1, 1; |
| ; SM60-NEXT: st.volatile.global.u32 [%rd1], %r2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_sys_i32( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b32 %r<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i32_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.u32 %r1, [%rd1]; |
| ; SM70-NEXT: add.s32 %r2, %r1, 1; |
| ; SM70-NEXT: st.relaxed.sys.global.u32 [%rd1], %r2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i32, ptr addrspace(1) %a monotonic, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic i32 %a.add, ptr addrspace(1) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @global_monotonic_sys_i64(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_sys_i64( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<4>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i64_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u64 %rd2, [%rd1]; |
| ; SM60-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM60-NEXT: st.volatile.global.u64 [%rd1], %rd3; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_sys_i64( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<4>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_i64_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.u64 %rd2, [%rd1]; |
| ; SM70-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM70-NEXT: st.relaxed.sys.global.u64 [%rd1], %rd3; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i64, ptr addrspace(1) %a monotonic, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic i64 %a.add, ptr addrspace(1) %a monotonic, align 8 |
| ret void |
| } |
| |
| define void @global_monotonic_sys_float(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_sys_float( |
| ; SM60: { |
| ; SM60-NEXT: .reg .f32 %f<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_float_param_0]; |
| ; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd1]; |
| ; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM60-NEXT: st.volatile.global.f32 [%rd1], %f2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_sys_float( |
| ; SM70: { |
| ; SM70-NEXT: .reg .f32 %f<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_float_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.f32 %f1, [%rd1]; |
| ; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM70-NEXT: st.relaxed.sys.global.f32 [%rd1], %f2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic float, ptr addrspace(1) %a monotonic, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic float %a.add, ptr addrspace(1) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @global_monotonic_sys_double(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_sys_double( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-NEXT: .reg .f64 %fd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_double_param_0]; |
| ; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd1]; |
| ; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM60-NEXT: st.volatile.global.f64 [%rd1], %fd2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_sys_double( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-NEXT: .reg .f64 %fd<3>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_double_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.global.f64 %fd1, [%rd1]; |
| ; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM70-NEXT: st.relaxed.sys.global.f64 [%rd1], %fd2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic double, ptr addrspace(1) %a monotonic, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic double %a.add, ptr addrspace(1) %a monotonic, align 8 |
| ret void |
| } |
| |
| ; global_monotonic_volatile_sys |
| |
| define void @global_monotonic_volatile_sys_i8(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_volatile_sys_i8( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i8_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_volatile_sys_i8( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i8_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.u8 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.u8 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1 |
| ret void |
| } |
| |
| define void @global_monotonic_volatile_sys_i16(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_volatile_sys_i16( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i16_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u16 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.global.u16 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_volatile_sys_i16( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i16_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.u16 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.u16 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile i16, ptr addrspace(1) %a monotonic, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic volatile i16 %a.add, ptr addrspace(1) %a monotonic, align 2 |
| ret void |
| } |
| |
| define void @global_monotonic_volatile_sys_i32(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_volatile_sys_i32( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i32_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd1]; |
| ; SM60-NEXT: add.s32 %r2, %r1, 1; |
| ; SM60-NEXT: st.volatile.global.u32 [%rd1], %r2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_volatile_sys_i32( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b32 %r<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i32_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.u32 %r1, [%rd1]; |
| ; SM70-NEXT: add.s32 %r2, %r1, 1; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.u32 [%rd1], %r2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile i32, ptr addrspace(1) %a monotonic, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic volatile i32 %a.add, ptr addrspace(1) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @global_monotonic_volatile_sys_i64(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_volatile_sys_i64( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<4>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i64_param_0]; |
| ; SM60-NEXT: ld.volatile.global.u64 %rd2, [%rd1]; |
| ; SM60-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM60-NEXT: st.volatile.global.u64 [%rd1], %rd3; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_volatile_sys_i64( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<4>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_i64_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.u64 %rd2, [%rd1]; |
| ; SM70-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.u64 [%rd1], %rd3; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile i64, ptr addrspace(1) %a monotonic, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic volatile i64 %a.add, ptr addrspace(1) %a monotonic, align 8 |
| ret void |
| } |
| |
| define void @global_monotonic_volatile_sys_float(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_volatile_sys_float( |
| ; SM60: { |
| ; SM60-NEXT: .reg .f32 %f<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_float_param_0]; |
| ; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd1]; |
| ; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM60-NEXT: st.volatile.global.f32 [%rd1], %f2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_volatile_sys_float( |
| ; SM70: { |
| ; SM70-NEXT: .reg .f32 %f<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_float_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.f32 %f1, [%rd1]; |
| ; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.f32 [%rd1], %f2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile float, ptr addrspace(1) %a monotonic, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic volatile float %a.add, ptr addrspace(1) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @global_monotonic_volatile_sys_double(ptr addrspace(1) %a) { |
| ; SM60-LABEL: global_monotonic_volatile_sys_double( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-NEXT: .reg .f64 %fd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_double_param_0]; |
| ; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd1]; |
| ; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM60-NEXT: st.volatile.global.f64 [%rd1], %fd2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: global_monotonic_volatile_sys_double( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-NEXT: .reg .f64 %fd<3>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_double_param_0]; |
| ; SM70-NEXT: ld.mmio.relaxed.sys.global.f64 %fd1, [%rd1]; |
| ; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM70-NEXT: st.mmio.relaxed.sys.global.f64 [%rd1], %fd2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic volatile double, ptr addrspace(1) %a monotonic, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic volatile double %a.add, ptr addrspace(1) %a monotonic, align 8 |
| ret void |
| } |
| |
| ;; shared statespace |
| |
| ; shared |
| |
| define void @shared_i8(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_i8_param_0]; |
| ; CHECK-NEXT: ld.shared.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.shared.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i8, ptr addrspace(3) %a |
| %a.add = add i8 %a.load, 1 |
| store i8 %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_i16(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_i16_param_0]; |
| ; CHECK-NEXT: ld.shared.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.shared.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i16, ptr addrspace(3) %a |
| %a.add = add i16 %a.load, 1 |
| store i16 %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_i32(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_i32_param_0]; |
| ; CHECK-NEXT: ld.shared.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.shared.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i32, ptr addrspace(3) %a |
| %a.add = add i32 %a.load, 1 |
| store i32 %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_i64(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_i64_param_0]; |
| ; CHECK-NEXT: ld.shared.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.shared.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load i64, ptr addrspace(3) %a |
| %a.add = add i64 %a.load, 1 |
| store i64 %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_float(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_float_param_0]; |
| ; CHECK-NEXT: ld.shared.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.shared.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load float, ptr addrspace(3) %a |
| %a.add = fadd float %a.load, 1. |
| store float %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_double(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_double_param_0]; |
| ; CHECK-NEXT: ld.shared.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.shared.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load double, ptr addrspace(3) %a |
| %a.add = fadd double %a.load, 1. |
| store double %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_2xi8(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_2xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xi8_param_0]; |
| ; CHECK-NEXT: ld.shared.v2.u8 {%rs1, %rs2}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: st.shared.v2.u8 [%rd1], {%rs4, %rs3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i8>, ptr addrspace(3) %a |
| %a.add = add <2 x i8> %a.load, <i8 1, i8 1> |
| store <2 x i8> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_4xi8(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_4xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_4xi8_param_0]; |
| ; CHECK-NEXT: ld.shared.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; |
| ; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; |
| ; CHECK-NEXT: add.s16 %rs4, %rs3, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; |
| ; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U; |
| ; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; |
| ; CHECK-NEXT: add.s16 %rs6, %rs5, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; |
| ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9; |
| ; CHECK-NEXT: add.s16 %rs8, %rs7, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8; |
| ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U; |
| ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U; |
| ; CHECK-NEXT: st.shared.u32 [%rd1], %r12; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i8>, ptr addrspace(3) %a |
| %a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1> |
| store <4 x i8> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_2xi16(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_2xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xi16_param_0]; |
| ; CHECK-NEXT: ld.shared.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; |
| ; CHECK-NEXT: st.shared.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i16>, ptr addrspace(3) %a |
| %a.add = add <2 x i16> %a.load, <i16 1, i16 1> |
| store <2 x i16> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_4xi16(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_4xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_4xi16_param_0]; |
| ; CHECK-NEXT: ld.shared.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs5, %rs4, 1; |
| ; CHECK-NEXT: add.s16 %rs6, %rs3, 1; |
| ; CHECK-NEXT: add.s16 %rs7, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs8, %rs1, 1; |
| ; CHECK-NEXT: st.shared.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i16>, ptr addrspace(3) %a |
| %a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1> |
| store <4 x i16> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_2xi32(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xi32_param_0]; |
| ; CHECK-NEXT: ld.shared.v2.u32 {%r1, %r2}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r3, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r4, %r1, 1; |
| ; CHECK-NEXT: st.shared.v2.u32 [%rd1], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i32>, ptr addrspace(3) %a |
| %a.add = add <2 x i32> %a.load, <i32 1, i32 1> |
| store <2 x i32> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_4xi32(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_4xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_4xi32_param_0]; |
| ; CHECK-NEXT: ld.shared.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r5, %r4, 1; |
| ; CHECK-NEXT: add.s32 %r6, %r3, 1; |
| ; CHECK-NEXT: add.s32 %r7, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r8, %r1, 1; |
| ; CHECK-NEXT: st.shared.v4.u32 [%rd1], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i32>, ptr addrspace(3) %a |
| %a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1> |
| store <4 x i32> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_2xi64(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_2xi64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xi64_param_0]; |
| ; CHECK-NEXT: ld.shared.v2.u64 {%rd2, %rd3}, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd4, %rd3, 1; |
| ; CHECK-NEXT: add.s64 %rd5, %rd2, 1; |
| ; CHECK-NEXT: st.shared.v2.u64 [%rd1], {%rd5, %rd4}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i64>, ptr addrspace(3) %a |
| %a.add = add <2 x i64> %a.load, <i64 1, i64 1> |
| store <2 x i64> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_2xfloat(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xfloat_param_0]; |
| ; CHECK-NEXT: ld.shared.v2.f32 {%f1, %f2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.shared.v2.f32 [%rd1], {%f4, %f3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x float>, ptr addrspace(3) %a |
| %a.add = fadd <2 x float> %a.load, <float 1., float 1.> |
| store <2 x float> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_4xfloat(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_4xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_4xfloat_param_0]; |
| ; CHECK-NEXT: ld.shared.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.shared.v4.f32 [%rd1], {%f8, %f7, %f6, %f5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x float>, ptr addrspace(3) %a |
| %a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.> |
| store <4 x float> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_2xdouble(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_2xdouble( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_2xdouble_param_0]; |
| ; CHECK-NEXT: ld.shared.v2.f64 {%fd1, %fd2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000; |
| ; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.shared.v2.f64 [%rd1], {%fd4, %fd3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x double>, ptr addrspace(3) %a |
| %a.add = fadd <2 x double> %a.load, <double 1., double 1.> |
| store <2 x double> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| ; shared_volatile |
| |
| define void @shared_volatile_i8(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_i8_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i8, ptr addrspace(3) %a |
| %a.add = add i8 %a.load, 1 |
| store volatile i8 %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_i16(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_i16_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.shared.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i16, ptr addrspace(3) %a |
| %a.add = add i16 %a.load, 1 |
| store volatile i16 %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_i32(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_i32_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i32, ptr addrspace(3) %a |
| %a.add = add i32 %a.load, 1 |
| store volatile i32 %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_i64(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_i64_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.volatile.shared.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i64, ptr addrspace(3) %a |
| %a.add = add i64 %a.load, 1 |
| store volatile i64 %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_float(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_float_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.shared.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile float, ptr addrspace(3) %a |
| %a.add = fadd float %a.load, 1. |
| store volatile float %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_double(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_double_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.volatile.shared.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile double, ptr addrspace(3) %a |
| %a.add = fadd double %a.load, 1. |
| store volatile double %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_2xi8(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_2xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xi8_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.v2.u8 {%rs1, %rs2}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.shared.v2.u8 [%rd1], {%rs4, %rs3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i8>, ptr addrspace(3) %a |
| %a.add = add <2 x i8> %a.load, <i8 1, i8 1> |
| store volatile <2 x i8> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_4xi8(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_4xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_4xi8_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; |
| ; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; |
| ; CHECK-NEXT: add.s16 %rs4, %rs3, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; |
| ; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U; |
| ; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; |
| ; CHECK-NEXT: add.s16 %rs6, %rs5, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; |
| ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9; |
| ; CHECK-NEXT: add.s16 %rs8, %rs7, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8; |
| ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U; |
| ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U; |
| ; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r12; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i8>, ptr addrspace(3) %a |
| %a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1> |
| store volatile <4 x i8> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_2xi16(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_2xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xi16_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; |
| ; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i16>, ptr addrspace(3) %a |
| %a.add = add <2 x i16> %a.load, <i16 1, i16 1> |
| store volatile <2 x i16> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_4xi16(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_4xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_4xi16_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs5, %rs4, 1; |
| ; CHECK-NEXT: add.s16 %rs6, %rs3, 1; |
| ; CHECK-NEXT: add.s16 %rs7, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs8, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.shared.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i16>, ptr addrspace(3) %a |
| %a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1> |
| store volatile <4 x i16> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_2xi32(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xi32_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.v2.u32 {%r1, %r2}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r3, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r4, %r1, 1; |
| ; CHECK-NEXT: st.volatile.shared.v2.u32 [%rd1], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i32>, ptr addrspace(3) %a |
| %a.add = add <2 x i32> %a.load, <i32 1, i32 1> |
| store volatile <2 x i32> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_4xi32(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_4xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_4xi32_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r5, %r4, 1; |
| ; CHECK-NEXT: add.s32 %r6, %r3, 1; |
| ; CHECK-NEXT: add.s32 %r7, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r8, %r1, 1; |
| ; CHECK-NEXT: st.volatile.shared.v4.u32 [%rd1], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i32>, ptr addrspace(3) %a |
| %a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1> |
| store volatile <4 x i32> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_2xi64(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_2xi64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xi64_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.v2.u64 {%rd2, %rd3}, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd4, %rd3, 1; |
| ; CHECK-NEXT: add.s64 %rd5, %rd2, 1; |
| ; CHECK-NEXT: st.volatile.shared.v2.u64 [%rd1], {%rd5, %rd4}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i64>, ptr addrspace(3) %a |
| %a.add = add <2 x i64> %a.load, <i64 1, i64 1> |
| store volatile <2 x i64> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_2xfloat(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xfloat_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.v2.f32 {%f1, %f2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.shared.v2.f32 [%rd1], {%f4, %f3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x float>, ptr addrspace(3) %a |
| %a.add = fadd <2 x float> %a.load, <float 1., float 1.> |
| store volatile <2 x float> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_4xfloat(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_4xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_4xfloat_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.shared.v4.f32 [%rd1], {%f8, %f7, %f6, %f5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x float>, ptr addrspace(3) %a |
| %a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.> |
| store volatile <4 x float> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| define void @shared_volatile_2xdouble(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_volatile_2xdouble( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_2xdouble_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.v2.f64 {%fd1, %fd2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000; |
| ; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.volatile.shared.v2.f64 [%rd1], {%fd4, %fd3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x double>, ptr addrspace(3) %a |
| %a.add = fadd <2 x double> %a.load, <double 1., double 1.> |
| store volatile <2 x double> %a.add, ptr addrspace(3) %a |
| ret void |
| } |
| |
| ; shared_unordered_sys |
| |
| define void @shared_unordered_sys_i8(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_unordered_sys_i8( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i8_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.u8 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.shared.u8 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_unordered_sys_i8( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i8_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.u8 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.shared.u8 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1 |
| ret void |
| } |
| |
| define void @shared_unordered_sys_i16(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_unordered_sys_i16( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i16_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.u16 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.shared.u16 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_unordered_sys_i16( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i16_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.u16 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.shared.u16 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i16, ptr addrspace(3) %a unordered, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic i16 %a.add, ptr addrspace(3) %a unordered, align 2 |
| ret void |
| } |
| |
| define void @shared_unordered_sys_i32(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_unordered_sys_i32( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i32_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.u32 %r1, [%rd1]; |
| ; SM60-NEXT: add.s32 %r2, %r1, 1; |
| ; SM60-NEXT: st.volatile.shared.u32 [%rd1], %r2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_unordered_sys_i32( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b32 %r<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i32_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.u32 %r1, [%rd1]; |
| ; SM70-NEXT: add.s32 %r2, %r1, 1; |
| ; SM70-NEXT: st.relaxed.sys.shared.u32 [%rd1], %r2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i32, ptr addrspace(3) %a unordered, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic i32 %a.add, ptr addrspace(3) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @shared_unordered_sys_i64(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_unordered_sys_i64( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<4>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i64_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.u64 %rd2, [%rd1]; |
| ; SM60-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM60-NEXT: st.volatile.shared.u64 [%rd1], %rd3; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_unordered_sys_i64( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<4>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_i64_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.u64 %rd2, [%rd1]; |
| ; SM70-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM70-NEXT: st.relaxed.sys.shared.u64 [%rd1], %rd3; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i64, ptr addrspace(3) %a unordered, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic i64 %a.add, ptr addrspace(3) %a unordered, align 8 |
| ret void |
| } |
| |
| define void @shared_unordered_sys_float(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_unordered_sys_float( |
| ; SM60: { |
| ; SM60-NEXT: .reg .f32 %f<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_float_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.f32 %f1, [%rd1]; |
| ; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM60-NEXT: st.volatile.shared.f32 [%rd1], %f2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_unordered_sys_float( |
| ; SM70: { |
| ; SM70-NEXT: .reg .f32 %f<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_float_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.f32 %f1, [%rd1]; |
| ; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM70-NEXT: st.relaxed.sys.shared.f32 [%rd1], %f2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic float, ptr addrspace(3) %a unordered, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic float %a.add, ptr addrspace(3) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @shared_unordered_sys_double(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_unordered_sys_double( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-NEXT: .reg .f64 %fd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_double_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.f64 %fd1, [%rd1]; |
| ; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM60-NEXT: st.volatile.shared.f64 [%rd1], %fd2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_unordered_sys_double( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-NEXT: .reg .f64 %fd<3>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_double_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.f64 %fd1, [%rd1]; |
| ; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM70-NEXT: st.relaxed.sys.shared.f64 [%rd1], %fd2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic double, ptr addrspace(3) %a unordered, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic double %a.add, ptr addrspace(3) %a unordered, align 8 |
| ret void |
| } |
| |
| ; shared_unordered_volatile_sys |
| |
| define void @shared_unordered_volatile_sys_i8(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_unordered_volatile_sys_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_i8_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i8, ptr addrspace(3) %a unordered, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic volatile i8 %a.add, ptr addrspace(3) %a unordered, align 1 |
| ret void |
| } |
| |
| define void @shared_unordered_volatile_sys_i16(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_unordered_volatile_sys_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_i16_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.shared.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i16, ptr addrspace(3) %a unordered, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic volatile i16 %a.add, ptr addrspace(3) %a unordered, align 2 |
| ret void |
| } |
| |
| define void @shared_unordered_volatile_sys_i32(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_unordered_volatile_sys_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_i32_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i32, ptr addrspace(3) %a unordered, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic volatile i32 %a.add, ptr addrspace(3) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @shared_unordered_volatile_sys_i64(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_unordered_volatile_sys_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_i64_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.volatile.shared.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i64, ptr addrspace(3) %a unordered, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic volatile i64 %a.add, ptr addrspace(3) %a unordered, align 8 |
| ret void |
| } |
| |
| define void @shared_unordered_volatile_sys_float(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_unordered_volatile_sys_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_float_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.shared.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile float, ptr addrspace(3) %a unordered, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic volatile float %a.add, ptr addrspace(3) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @shared_unordered_volatile_sys_double(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_unordered_volatile_sys_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_double_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.volatile.shared.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile double, ptr addrspace(3) %a unordered, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic volatile double %a.add, ptr addrspace(3) %a unordered, align 8 |
| ret void |
| } |
| |
| ; shared_monotonic_sys |
| |
| define void @shared_monotonic_sys_i8(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_monotonic_sys_i8( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i8_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.u8 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.shared.u8 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_monotonic_sys_i8( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i8_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.u8 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.shared.u8 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1 |
| ret void |
| } |
| |
| define void @shared_monotonic_sys_i16(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_monotonic_sys_i16( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b16 %rs<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i16_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.u16 %rs1, [%rd1]; |
| ; SM60-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM60-NEXT: st.volatile.shared.u16 [%rd1], %rs2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_monotonic_sys_i16( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b16 %rs<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i16_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.u16 %rs1, [%rd1]; |
| ; SM70-NEXT: add.s16 %rs2, %rs1, 1; |
| ; SM70-NEXT: st.relaxed.sys.shared.u16 [%rd1], %rs2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i16, ptr addrspace(3) %a monotonic, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic i16 %a.add, ptr addrspace(3) %a monotonic, align 2 |
| ret void |
| } |
| |
| define void @shared_monotonic_sys_i32(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_monotonic_sys_i32( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b32 %r<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i32_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.u32 %r1, [%rd1]; |
| ; SM60-NEXT: add.s32 %r2, %r1, 1; |
| ; SM60-NEXT: st.volatile.shared.u32 [%rd1], %r2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_monotonic_sys_i32( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b32 %r<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i32_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.u32 %r1, [%rd1]; |
| ; SM70-NEXT: add.s32 %r2, %r1, 1; |
| ; SM70-NEXT: st.relaxed.sys.shared.u32 [%rd1], %r2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i32, ptr addrspace(3) %a monotonic, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic i32 %a.add, ptr addrspace(3) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @shared_monotonic_sys_i64(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_monotonic_sys_i64( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<4>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i64_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.u64 %rd2, [%rd1]; |
| ; SM60-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM60-NEXT: st.volatile.shared.u64 [%rd1], %rd3; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_monotonic_sys_i64( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<4>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_i64_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.u64 %rd2, [%rd1]; |
| ; SM70-NEXT: add.s64 %rd3, %rd2, 1; |
| ; SM70-NEXT: st.relaxed.sys.shared.u64 [%rd1], %rd3; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic i64, ptr addrspace(3) %a monotonic, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic i64 %a.add, ptr addrspace(3) %a monotonic, align 8 |
| ret void |
| } |
| |
| define void @shared_monotonic_sys_float(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_monotonic_sys_float( |
| ; SM60: { |
| ; SM60-NEXT: .reg .f32 %f<3>; |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_float_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.f32 %f1, [%rd1]; |
| ; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM60-NEXT: st.volatile.shared.f32 [%rd1], %f2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_monotonic_sys_float( |
| ; SM70: { |
| ; SM70-NEXT: .reg .f32 %f<3>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_float_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.f32 %f1, [%rd1]; |
| ; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; SM70-NEXT: st.relaxed.sys.shared.f32 [%rd1], %f2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic float, ptr addrspace(3) %a monotonic, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic float %a.add, ptr addrspace(3) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @shared_monotonic_sys_double(ptr addrspace(3) %a) { |
| ; SM60-LABEL: shared_monotonic_sys_double( |
| ; SM60: { |
| ; SM60-NEXT: .reg .b64 %rd<2>; |
| ; SM60-NEXT: .reg .f64 %fd<3>; |
| ; SM60-EMPTY: |
| ; SM60-NEXT: // %bb.0: |
| ; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_double_param_0]; |
| ; SM60-NEXT: ld.volatile.shared.f64 %fd1, [%rd1]; |
| ; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM60-NEXT: st.volatile.shared.f64 [%rd1], %fd2; |
| ; SM60-NEXT: ret; |
| ; |
| ; SM70-LABEL: shared_monotonic_sys_double( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-NEXT: .reg .f64 %fd<3>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_double_param_0]; |
| ; SM70-NEXT: ld.relaxed.sys.shared.f64 %fd1, [%rd1]; |
| ; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; SM70-NEXT: st.relaxed.sys.shared.f64 [%rd1], %fd2; |
| ; SM70-NEXT: ret; |
| %a.load = load atomic double, ptr addrspace(3) %a monotonic, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic double %a.add, ptr addrspace(3) %a monotonic, align 8 |
| ret void |
| } |
| |
| ; shared_monotonic_volatile_sys |
| |
| define void @shared_monotonic_volatile_sys_i8(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_monotonic_volatile_sys_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_i8_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1 |
| ret void |
| } |
| |
| define void @shared_monotonic_volatile_sys_i16(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_monotonic_volatile_sys_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_i16_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.volatile.shared.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i16, ptr addrspace(3) %a monotonic, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic volatile i16 %a.add, ptr addrspace(3) %a monotonic, align 2 |
| ret void |
| } |
| |
| define void @shared_monotonic_volatile_sys_i32(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_monotonic_volatile_sys_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_i32_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i32, ptr addrspace(3) %a monotonic, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic volatile i32 %a.add, ptr addrspace(3) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @shared_monotonic_volatile_sys_i64(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_monotonic_volatile_sys_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_i64_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.volatile.shared.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i64, ptr addrspace(3) %a monotonic, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic volatile i64 %a.add, ptr addrspace(3) %a monotonic, align 8 |
| ret void |
| } |
| |
| define void @shared_monotonic_volatile_sys_float(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_monotonic_volatile_sys_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_float_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.volatile.shared.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile float, ptr addrspace(3) %a monotonic, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic volatile float %a.add, ptr addrspace(3) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @shared_monotonic_volatile_sys_double(ptr addrspace(3) %a) { |
| ; CHECK-LABEL: shared_monotonic_volatile_sys_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_double_param_0]; |
| ; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.volatile.shared.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile double, ptr addrspace(3) %a monotonic, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic volatile double %a.add, ptr addrspace(3) %a monotonic, align 8 |
| ret void |
| } |
| |
| ;; local statespace |
| |
| ; local |
| |
| define void @local_i8(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_i8_param_0]; |
| ; CHECK-NEXT: ld.local.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i8, ptr addrspace(5) %a |
| %a.add = add i8 %a.load, 1 |
| store i8 %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_i16(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_i16_param_0]; |
| ; CHECK-NEXT: ld.local.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i16, ptr addrspace(5) %a |
| %a.add = add i16 %a.load, 1 |
| store i16 %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_i32(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_i32_param_0]; |
| ; CHECK-NEXT: ld.local.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.local.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load i32, ptr addrspace(5) %a |
| %a.add = add i32 %a.load, 1 |
| store i32 %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_i64(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_i64_param_0]; |
| ; CHECK-NEXT: ld.local.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.local.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load i64, ptr addrspace(5) %a |
| %a.add = add i64 %a.load, 1 |
| store i64 %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_float(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_float_param_0]; |
| ; CHECK-NEXT: ld.local.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.local.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load float, ptr addrspace(5) %a |
| %a.add = fadd float %a.load, 1. |
| store float %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_double(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_double_param_0]; |
| ; CHECK-NEXT: ld.local.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.local.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load double, ptr addrspace(5) %a |
| %a.add = fadd double %a.load, 1. |
| store double %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_2xi8(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_2xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_2xi8_param_0]; |
| ; CHECK-NEXT: ld.local.v2.u8 {%rs1, %rs2}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: st.local.v2.u8 [%rd1], {%rs4, %rs3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i8>, ptr addrspace(5) %a |
| %a.add = add <2 x i8> %a.load, <i8 1, i8 1> |
| store <2 x i8> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_4xi8(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_4xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_4xi8_param_0]; |
| ; CHECK-NEXT: ld.local.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; |
| ; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; |
| ; CHECK-NEXT: add.s16 %rs4, %rs3, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; |
| ; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U; |
| ; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; |
| ; CHECK-NEXT: add.s16 %rs6, %rs5, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; |
| ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9; |
| ; CHECK-NEXT: add.s16 %rs8, %rs7, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8; |
| ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U; |
| ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U; |
| ; CHECK-NEXT: st.local.u32 [%rd1], %r12; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i8>, ptr addrspace(5) %a |
| %a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1> |
| store <4 x i8> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_2xi16(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_2xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_2xi16_param_0]; |
| ; CHECK-NEXT: ld.local.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; |
| ; CHECK-NEXT: st.local.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i16>, ptr addrspace(5) %a |
| %a.add = add <2 x i16> %a.load, <i16 1, i16 1> |
| store <2 x i16> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_4xi16(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_4xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_4xi16_param_0]; |
| ; CHECK-NEXT: ld.local.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs5, %rs4, 1; |
| ; CHECK-NEXT: add.s16 %rs6, %rs3, 1; |
| ; CHECK-NEXT: add.s16 %rs7, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs8, %rs1, 1; |
| ; CHECK-NEXT: st.local.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i16>, ptr addrspace(5) %a |
| %a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1> |
| store <4 x i16> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_2xi32(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_2xi32_param_0]; |
| ; CHECK-NEXT: ld.local.v2.u32 {%r1, %r2}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r3, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r4, %r1, 1; |
| ; CHECK-NEXT: st.local.v2.u32 [%rd1], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i32>, ptr addrspace(5) %a |
| %a.add = add <2 x i32> %a.load, <i32 1, i32 1> |
| store <2 x i32> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_4xi32(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_4xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_4xi32_param_0]; |
| ; CHECK-NEXT: ld.local.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r5, %r4, 1; |
| ; CHECK-NEXT: add.s32 %r6, %r3, 1; |
| ; CHECK-NEXT: add.s32 %r7, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r8, %r1, 1; |
| ; CHECK-NEXT: st.local.v4.u32 [%rd1], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x i32>, ptr addrspace(5) %a |
| %a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1> |
| store <4 x i32> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_2xi64(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_2xi64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_2xi64_param_0]; |
| ; CHECK-NEXT: ld.local.v2.u64 {%rd2, %rd3}, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd4, %rd3, 1; |
| ; CHECK-NEXT: add.s64 %rd5, %rd2, 1; |
| ; CHECK-NEXT: st.local.v2.u64 [%rd1], {%rd5, %rd4}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x i64>, ptr addrspace(5) %a |
| %a.add = add <2 x i64> %a.load, <i64 1, i64 1> |
| store <2 x i64> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_2xfloat(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_2xfloat_param_0]; |
| ; CHECK-NEXT: ld.local.v2.f32 {%f1, %f2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.local.v2.f32 [%rd1], {%f4, %f3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x float>, ptr addrspace(5) %a |
| %a.add = fadd <2 x float> %a.load, <float 1., float 1.> |
| store <2 x float> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_4xfloat(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_4xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_4xfloat_param_0]; |
| ; CHECK-NEXT: ld.local.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.local.v4.f32 [%rd1], {%f8, %f7, %f6, %f5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <4 x float>, ptr addrspace(5) %a |
| %a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.> |
| store <4 x float> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_2xdouble(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_2xdouble( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_2xdouble_param_0]; |
| ; CHECK-NEXT: ld.local.v2.f64 {%fd1, %fd2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000; |
| ; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.local.v2.f64 [%rd1], {%fd4, %fd3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load <2 x double>, ptr addrspace(5) %a |
| %a.add = fadd <2 x double> %a.load, <double 1., double 1.> |
| store <2 x double> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| ; local_volatile |
| |
| define void @local_volatile_i8(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_i8_param_0]; |
| ; CHECK-NEXT: ld.local.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i8, ptr addrspace(5) %a |
| %a.add = add i8 %a.load, 1 |
| store volatile i8 %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_i16(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_i16_param_0]; |
| ; CHECK-NEXT: ld.local.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i16, ptr addrspace(5) %a |
| %a.add = add i16 %a.load, 1 |
| store volatile i16 %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_i32(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_i32_param_0]; |
| ; CHECK-NEXT: ld.local.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.local.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i32, ptr addrspace(5) %a |
| %a.add = add i32 %a.load, 1 |
| store volatile i32 %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_i64(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_i64_param_0]; |
| ; CHECK-NEXT: ld.local.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.local.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile i64, ptr addrspace(5) %a |
| %a.add = add i64 %a.load, 1 |
| store volatile i64 %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_float(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_float_param_0]; |
| ; CHECK-NEXT: ld.local.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.local.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile float, ptr addrspace(5) %a |
| %a.add = fadd float %a.load, 1. |
| store volatile float %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_double(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_double_param_0]; |
| ; CHECK-NEXT: ld.local.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.local.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile double, ptr addrspace(5) %a |
| %a.add = fadd double %a.load, 1. |
| store volatile double %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_2xi8(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_2xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xi8_param_0]; |
| ; CHECK-NEXT: ld.local.v2.u8 {%rs1, %rs2}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: st.local.v2.u8 [%rd1], {%rs4, %rs3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i8>, ptr addrspace(5) %a |
| %a.add = add <2 x i8> %a.load, <i8 1, i8 1> |
| store volatile <2 x i8> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_4xi8(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_4xi8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_4xi8_param_0]; |
| ; CHECK-NEXT: ld.local.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; |
| ; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; |
| ; CHECK-NEXT: add.s16 %rs4, %rs3, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; |
| ; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U; |
| ; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; |
| ; CHECK-NEXT: add.s16 %rs6, %rs5, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; |
| ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8; |
| ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9; |
| ; CHECK-NEXT: add.s16 %rs8, %rs7, 1; |
| ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8; |
| ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U; |
| ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U; |
| ; CHECK-NEXT: st.local.u32 [%rd1], %r12; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i8>, ptr addrspace(5) %a |
| %a.add = add <4 x i8> %a.load, <i8 1, i8 1, i8 1, i8 1> |
| store volatile <4 x i8> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_2xi16(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_2xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<5>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xi16_param_0]; |
| ; CHECK-NEXT: ld.local.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; |
| ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; |
| ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; |
| ; CHECK-NEXT: st.local.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i16>, ptr addrspace(5) %a |
| %a.add = add <2 x i16> %a.load, <i16 1, i16 1> |
| store volatile <2 x i16> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_4xi16(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_4xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_4xi16_param_0]; |
| ; CHECK-NEXT: ld.local.v4.u16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs5, %rs4, 1; |
| ; CHECK-NEXT: add.s16 %rs6, %rs3, 1; |
| ; CHECK-NEXT: add.s16 %rs7, %rs2, 1; |
| ; CHECK-NEXT: add.s16 %rs8, %rs1, 1; |
| ; CHECK-NEXT: st.local.v4.u16 [%rd1], {%rs8, %rs7, %rs6, %rs5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i16>, ptr addrspace(5) %a |
| %a.add = add <4 x i16> %a.load, <i16 1, i16 1, i16 1, i16 1> |
| store volatile <4 x i16> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_2xi32(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xi32_param_0]; |
| ; CHECK-NEXT: ld.local.v2.u32 {%r1, %r2}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r3, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r4, %r1, 1; |
| ; CHECK-NEXT: st.local.v2.u32 [%rd1], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i32>, ptr addrspace(5) %a |
| %a.add = add <2 x i32> %a.load, <i32 1, i32 1> |
| store volatile <2 x i32> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_4xi32(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_4xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_4xi32_param_0]; |
| ; CHECK-NEXT: ld.local.v4.u32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r5, %r4, 1; |
| ; CHECK-NEXT: add.s32 %r6, %r3, 1; |
| ; CHECK-NEXT: add.s32 %r7, %r2, 1; |
| ; CHECK-NEXT: add.s32 %r8, %r1, 1; |
| ; CHECK-NEXT: st.local.v4.u32 [%rd1], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x i32>, ptr addrspace(5) %a |
| %a.add = add <4 x i32> %a.load, <i32 1, i32 1, i32 1, i32 1> |
| store volatile <4 x i32> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_2xi64(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_2xi64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xi64_param_0]; |
| ; CHECK-NEXT: ld.local.v2.u64 {%rd2, %rd3}, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd4, %rd3, 1; |
| ; CHECK-NEXT: add.s64 %rd5, %rd2, 1; |
| ; CHECK-NEXT: st.local.v2.u64 [%rd1], {%rd5, %rd4}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x i64>, ptr addrspace(5) %a |
| %a.add = add <2 x i64> %a.load, <i64 1, i64 1> |
| store volatile <2 x i64> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_2xfloat(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xfloat_param_0]; |
| ; CHECK-NEXT: ld.local.v2.f32 {%f1, %f2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f3, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f4, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.local.v2.f32 [%rd1], {%f4, %f3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x float>, ptr addrspace(5) %a |
| %a.add = fadd <2 x float> %a.load, <float 1., float 1.> |
| store volatile <2 x float> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_4xfloat(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_4xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_4xfloat_param_0]; |
| ; CHECK-NEXT: ld.local.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f7, %f2, 0f3F800000; |
| ; CHECK-NEXT: add.rn.f32 %f8, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.local.v4.f32 [%rd1], {%f8, %f7, %f6, %f5}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <4 x float>, ptr addrspace(5) %a |
| %a.add = fadd <4 x float> %a.load, <float 1., float 1., float 1., float 1.> |
| store volatile <4 x float> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| define void @local_volatile_2xdouble(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_volatile_2xdouble( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_2xdouble_param_0]; |
| ; CHECK-NEXT: ld.local.v2.f64 {%fd1, %fd2}, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FF0000000000000; |
| ; CHECK-NEXT: add.rn.f64 %fd4, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.local.v2.f64 [%rd1], {%fd4, %fd3}; |
| ; CHECK-NEXT: ret; |
| %a.load = load volatile <2 x double>, ptr addrspace(5) %a |
| %a.add = fadd <2 x double> %a.load, <double 1., double 1.> |
| store volatile <2 x double> %a.add, ptr addrspace(5) %a |
| ret void |
| } |
| |
| ; local_unordered_sys |
| |
| define void @local_unordered_sys_i8(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_sys_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_i8_param_0]; |
| ; CHECK-NEXT: ld.local.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic i8, ptr addrspace(5) %a unordered, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic i8 %a.add, ptr addrspace(5) %a unordered, align 1 |
| ret void |
| } |
| |
| define void @local_unordered_sys_i16(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_sys_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_i16_param_0]; |
| ; CHECK-NEXT: ld.local.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic i16, ptr addrspace(5) %a unordered, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic i16 %a.add, ptr addrspace(5) %a unordered, align 2 |
| ret void |
| } |
| |
| define void @local_unordered_sys_i32(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_sys_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_i32_param_0]; |
| ; CHECK-NEXT: ld.local.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.local.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic i32, ptr addrspace(5) %a unordered, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic i32 %a.add, ptr addrspace(5) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @local_unordered_sys_i64(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_sys_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_i64_param_0]; |
| ; CHECK-NEXT: ld.local.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.local.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic i64, ptr addrspace(5) %a unordered, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic i64 %a.add, ptr addrspace(5) %a unordered, align 8 |
| ret void |
| } |
| |
| define void @local_unordered_sys_float(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_sys_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_float_param_0]; |
| ; CHECK-NEXT: ld.local.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.local.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic float, ptr addrspace(5) %a unordered, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic float %a.add, ptr addrspace(5) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @local_unordered_sys_double(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_sys_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_double_param_0]; |
| ; CHECK-NEXT: ld.local.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.local.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic double, ptr addrspace(5) %a unordered, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic double %a.add, ptr addrspace(5) %a unordered, align 8 |
| ret void |
| } |
| |
| ; local_unordered_volatile_sys |
| |
| define void @local_unordered_volatile_sys_i8(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_volatile_sys_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_i8_param_0]; |
| ; CHECK-NEXT: ld.local.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i8, ptr addrspace(5) %a unordered, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic volatile i8 %a.add, ptr addrspace(5) %a unordered, align 1 |
| ret void |
| } |
| |
| define void @local_unordered_volatile_sys_i16(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_volatile_sys_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_i16_param_0]; |
| ; CHECK-NEXT: ld.local.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i16, ptr addrspace(5) %a unordered, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic volatile i16 %a.add, ptr addrspace(5) %a unordered, align 2 |
| ret void |
| } |
| |
| define void @local_unordered_volatile_sys_i32(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_volatile_sys_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_i32_param_0]; |
| ; CHECK-NEXT: ld.local.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.local.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i32, ptr addrspace(5) %a unordered, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic volatile i32 %a.add, ptr addrspace(5) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @local_unordered_volatile_sys_i64(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_volatile_sys_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_i64_param_0]; |
| ; CHECK-NEXT: ld.local.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.local.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i64, ptr addrspace(5) %a unordered, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic volatile i64 %a.add, ptr addrspace(5) %a unordered, align 8 |
| ret void |
| } |
| |
| define void @local_unordered_volatile_sys_float(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_volatile_sys_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_float_param_0]; |
| ; CHECK-NEXT: ld.local.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.local.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile float, ptr addrspace(5) %a unordered, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic volatile float %a.add, ptr addrspace(5) %a unordered, align 4 |
| ret void |
| } |
| |
| define void @local_unordered_volatile_sys_double(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_unordered_volatile_sys_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_double_param_0]; |
| ; CHECK-NEXT: ld.local.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.local.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile double, ptr addrspace(5) %a unordered, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic volatile double %a.add, ptr addrspace(5) %a unordered, align 8 |
| ret void |
| } |
| |
| ; local_monotonic_sys |
| |
| define void @local_monotonic_sys_i8(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_sys_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_i8_param_0]; |
| ; CHECK-NEXT: ld.local.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1 |
| ret void |
| } |
| |
| define void @local_monotonic_sys_i16(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_sys_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_i16_param_0]; |
| ; CHECK-NEXT: ld.local.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic i16, ptr addrspace(5) %a monotonic, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic i16 %a.add, ptr addrspace(5) %a monotonic, align 2 |
| ret void |
| } |
| |
| define void @local_monotonic_sys_i32(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_sys_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_i32_param_0]; |
| ; CHECK-NEXT: ld.local.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.local.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic i32, ptr addrspace(5) %a monotonic, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic i32 %a.add, ptr addrspace(5) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @local_monotonic_sys_i64(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_sys_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_i64_param_0]; |
| ; CHECK-NEXT: ld.local.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.local.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic i64, ptr addrspace(5) %a monotonic, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic i64 %a.add, ptr addrspace(5) %a monotonic, align 8 |
| ret void |
| } |
| |
| define void @local_monotonic_sys_float(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_sys_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_float_param_0]; |
| ; CHECK-NEXT: ld.local.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.local.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic float, ptr addrspace(5) %a monotonic, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic float %a.add, ptr addrspace(5) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @local_monotonic_sys_double(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_sys_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_double_param_0]; |
| ; CHECK-NEXT: ld.local.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.local.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic double, ptr addrspace(5) %a monotonic, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic double %a.add, ptr addrspace(5) %a monotonic, align 8 |
| ret void |
| } |
| |
| ; local_monotonic_volatile_sys |
| |
| define void @local_monotonic_volatile_sys_i8(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_volatile_sys_i8( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_i8_param_0]; |
| ; CHECK-NEXT: ld.local.u8 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u8 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1 |
| %a.add = add i8 %a.load, 1 |
| store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1 |
| ret void |
| } |
| |
| define void @local_monotonic_volatile_sys_i16(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_volatile_sys_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_i16_param_0]; |
| ; CHECK-NEXT: ld.local.u16 %rs1, [%rd1]; |
| ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: st.local.u16 [%rd1], %rs2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i16, ptr addrspace(5) %a monotonic, align 2 |
| %a.add = add i16 %a.load, 1 |
| store atomic volatile i16 %a.add, ptr addrspace(5) %a monotonic, align 2 |
| ret void |
| } |
| |
| define void @local_monotonic_volatile_sys_i32(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_volatile_sys_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_i32_param_0]; |
| ; CHECK-NEXT: ld.local.u32 %r1, [%rd1]; |
| ; CHECK-NEXT: add.s32 %r2, %r1, 1; |
| ; CHECK-NEXT: st.local.u32 [%rd1], %r2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i32, ptr addrspace(5) %a monotonic, align 4 |
| %a.add = add i32 %a.load, 1 |
| store atomic volatile i32 %a.add, ptr addrspace(5) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @local_monotonic_volatile_sys_i64(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_volatile_sys_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_i64_param_0]; |
| ; CHECK-NEXT: ld.local.u64 %rd2, [%rd1]; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: st.local.u64 [%rd1], %rd3; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile i64, ptr addrspace(5) %a monotonic, align 8 |
| %a.add = add i64 %a.load, 1 |
| store atomic volatile i64 %a.add, ptr addrspace(5) %a monotonic, align 8 |
| ret void |
| } |
| |
| define void @local_monotonic_volatile_sys_float(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_volatile_sys_float( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .f32 %f<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_float_param_0]; |
| ; CHECK-NEXT: ld.local.f32 %f1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; |
| ; CHECK-NEXT: st.local.f32 [%rd1], %f2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile float, ptr addrspace(5) %a monotonic, align 4 |
| %a.add = fadd float %a.load, 1. |
| store atomic volatile float %a.add, ptr addrspace(5) %a monotonic, align 4 |
| ret void |
| } |
| |
| define void @local_monotonic_volatile_sys_double(ptr addrspace(5) %a) { |
| ; CHECK-LABEL: local_monotonic_volatile_sys_double( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NEXT: .reg .f64 %fd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_sys_double_param_0]; |
| ; CHECK-NEXT: ld.local.f64 %fd1, [%rd1]; |
| ; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000; |
| ; CHECK-NEXT: st.local.f64 [%rd1], %fd2; |
| ; CHECK-NEXT: ret; |
| %a.load = load atomic volatile double, ptr addrspace(5) %a monotonic, align 8 |
| %a.add = fadd double %a.load, 1. |
| store atomic volatile double %a.add, ptr addrspace(5) %a monotonic, align 8 |
| ret void |
| } |