| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 |
| ; RUN: %if ptxas-ptr32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %} |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} |
| |
| |
| ;; i8 |
| define i8 @ld_global_i8(ptr addrspace(0) %ptr) { |
| ; PTX32-LABEL: ld_global_i8( |
| ; PTX32: { |
| ; PTX32-NEXT: .reg .b32 %r<3>; |
| ; PTX32-EMPTY: |
| ; PTX32-NEXT: // %bb.0: |
| ; PTX32-NEXT: ld.param.b32 %r1, [ld_global_i8_param_0]; |
| ; PTX32-NEXT: ld.b8 %r2, [%r1]; |
| ; PTX32-NEXT: st.param.b32 [func_retval0], %r2; |
| ; PTX32-NEXT: ret; |
| ; |
| ; PTX64-LABEL: ld_global_i8( |
| ; PTX64: { |
| ; PTX64-NEXT: .reg .b32 %r<2>; |
| ; PTX64-NEXT: .reg .b64 %rd<2>; |
| ; PTX64-EMPTY: |
| ; PTX64-NEXT: // %bb.0: |
| ; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_i8_param_0]; |
| ; PTX64-NEXT: ld.b8 %r1, [%rd1]; |
| ; PTX64-NEXT: st.param.b32 [func_retval0], %r1; |
| ; PTX64-NEXT: ret; |
| %a = load i8, ptr addrspace(0) %ptr |
| ret i8 %a |
| } |
| |
| ;; i16 |
| define i16 @ld_global_i16(ptr addrspace(0) %ptr) { |
| ; PTX32-LABEL: ld_global_i16( |
| ; PTX32: { |
| ; PTX32-NEXT: .reg .b32 %r<3>; |
| ; PTX32-EMPTY: |
| ; PTX32-NEXT: // %bb.0: |
| ; PTX32-NEXT: ld.param.b32 %r1, [ld_global_i16_param_0]; |
| ; PTX32-NEXT: ld.b16 %r2, [%r1]; |
| ; PTX32-NEXT: st.param.b32 [func_retval0], %r2; |
| ; PTX32-NEXT: ret; |
| ; |
| ; PTX64-LABEL: ld_global_i16( |
| ; PTX64: { |
| ; PTX64-NEXT: .reg .b32 %r<2>; |
| ; PTX64-NEXT: .reg .b64 %rd<2>; |
| ; PTX64-EMPTY: |
| ; PTX64-NEXT: // %bb.0: |
| ; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_i16_param_0]; |
| ; PTX64-NEXT: ld.b16 %r1, [%rd1]; |
| ; PTX64-NEXT: st.param.b32 [func_retval0], %r1; |
| ; PTX64-NEXT: ret; |
| %a = load i16, ptr addrspace(0) %ptr |
| ret i16 %a |
| } |
| |
| ;; i32 |
| define i32 @ld_global_i32(ptr addrspace(0) %ptr) { |
| ; PTX32-LABEL: ld_global_i32( |
| ; PTX32: { |
| ; PTX32-NEXT: .reg .b32 %r<3>; |
| ; PTX32-EMPTY: |
| ; PTX32-NEXT: // %bb.0: |
| ; PTX32-NEXT: ld.param.b32 %r1, [ld_global_i32_param_0]; |
| ; PTX32-NEXT: ld.b32 %r2, [%r1]; |
| ; PTX32-NEXT: st.param.b32 [func_retval0], %r2; |
| ; PTX32-NEXT: ret; |
| ; |
| ; PTX64-LABEL: ld_global_i32( |
| ; PTX64: { |
| ; PTX64-NEXT: .reg .b32 %r<2>; |
| ; PTX64-NEXT: .reg .b64 %rd<2>; |
| ; PTX64-EMPTY: |
| ; PTX64-NEXT: // %bb.0: |
| ; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_i32_param_0]; |
| ; PTX64-NEXT: ld.b32 %r1, [%rd1]; |
| ; PTX64-NEXT: st.param.b32 [func_retval0], %r1; |
| ; PTX64-NEXT: ret; |
| %a = load i32, ptr addrspace(0) %ptr |
| ret i32 %a |
| } |
| |
| ;; i64 |
| define i64 @ld_global_i64(ptr addrspace(0) %ptr) { |
| ; PTX32-LABEL: ld_global_i64( |
| ; PTX32: { |
| ; PTX32-NEXT: .reg .b32 %r<2>; |
| ; PTX32-NEXT: .reg .b64 %rd<2>; |
| ; PTX32-EMPTY: |
| ; PTX32-NEXT: // %bb.0: |
| ; PTX32-NEXT: ld.param.b32 %r1, [ld_global_i64_param_0]; |
| ; PTX32-NEXT: ld.b64 %rd1, [%r1]; |
| ; PTX32-NEXT: st.param.b64 [func_retval0], %rd1; |
| ; PTX32-NEXT: ret; |
| ; |
| ; PTX64-LABEL: ld_global_i64( |
| ; PTX64: { |
| ; PTX64-NEXT: .reg .b64 %rd<3>; |
| ; PTX64-EMPTY: |
| ; PTX64-NEXT: // %bb.0: |
| ; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_i64_param_0]; |
| ; PTX64-NEXT: ld.b64 %rd2, [%rd1]; |
| ; PTX64-NEXT: st.param.b64 [func_retval0], %rd2; |
| ; PTX64-NEXT: ret; |
| %a = load i64, ptr addrspace(0) %ptr |
| ret i64 %a |
| } |
| |
| ;; f32 |
| define float @ld_global_f32(ptr addrspace(0) %ptr) { |
| ; PTX32-LABEL: ld_global_f32( |
| ; PTX32: { |
| ; PTX32-NEXT: .reg .b32 %r<3>; |
| ; PTX32-EMPTY: |
| ; PTX32-NEXT: // %bb.0: |
| ; PTX32-NEXT: ld.param.b32 %r1, [ld_global_f32_param_0]; |
| ; PTX32-NEXT: ld.b32 %r2, [%r1]; |
| ; PTX32-NEXT: st.param.b32 [func_retval0], %r2; |
| ; PTX32-NEXT: ret; |
| ; |
| ; PTX64-LABEL: ld_global_f32( |
| ; PTX64: { |
| ; PTX64-NEXT: .reg .b32 %r<2>; |
| ; PTX64-NEXT: .reg .b64 %rd<2>; |
| ; PTX64-EMPTY: |
| ; PTX64-NEXT: // %bb.0: |
| ; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_f32_param_0]; |
| ; PTX64-NEXT: ld.b32 %r1, [%rd1]; |
| ; PTX64-NEXT: st.param.b32 [func_retval0], %r1; |
| ; PTX64-NEXT: ret; |
| %a = load float, ptr addrspace(0) %ptr |
| ret float %a |
| } |
| |
| ;; f64 |
| define double @ld_global_f64(ptr addrspace(0) %ptr) { |
| ; PTX32-LABEL: ld_global_f64( |
| ; PTX32: { |
| ; PTX32-NEXT: .reg .b32 %r<2>; |
| ; PTX32-NEXT: .reg .b64 %rd<2>; |
| ; PTX32-EMPTY: |
| ; PTX32-NEXT: // %bb.0: |
| ; PTX32-NEXT: ld.param.b32 %r1, [ld_global_f64_param_0]; |
| ; PTX32-NEXT: ld.b64 %rd1, [%r1]; |
| ; PTX32-NEXT: st.param.b64 [func_retval0], %rd1; |
| ; PTX32-NEXT: ret; |
| ; |
| ; PTX64-LABEL: ld_global_f64( |
| ; PTX64: { |
| ; PTX64-NEXT: .reg .b64 %rd<3>; |
| ; PTX64-EMPTY: |
| ; PTX64-NEXT: // %bb.0: |
| ; PTX64-NEXT: ld.param.b64 %rd1, [ld_global_f64_param_0]; |
| ; PTX64-NEXT: ld.b64 %rd2, [%rd1]; |
| ; PTX64-NEXT: st.param.b64 [func_retval0], %rd2; |
| ; PTX64-NEXT: ret; |
| %a = load double, ptr addrspace(0) %ptr |
| ret double %a |
| } |