| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; Check that various LLVM idioms get lowered to NVPTX as expected. |
| |
| ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s |
| ; RUN: %if ptxas-ptr32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %} |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} |
| |
| %struct.S16 = type { i16, i16 } |
| %struct.S32 = type { i32, i32 } |
| |
| define i16 @abs_i16(i16 %a) { |
| ; CHECK-LABEL: abs_i16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b16 %rs1, [abs_i16_param_0]; |
| ; CHECK-NEXT: abs.s16 %rs2, %rs1; |
| ; CHECK-NEXT: cvt.u32.u16 %r1, %rs2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; |
| ; CHECK-NEXT: ret; |
| %neg = sub i16 0, %a |
| %abs.cond = icmp sge i16 %a, 0 |
| %abs = select i1 %abs.cond, i16 %a, i16 %neg |
| ret i16 %abs |
| } |
| |
| define i32 @abs_i32(i32 %a) { |
| ; CHECK-LABEL: abs_i32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [abs_i32_param_0]; |
| ; CHECK-NEXT: abs.s32 %r2, %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %neg = sub i32 0, %a |
| %abs.cond = icmp sge i32 %a, 0 |
| %abs = select i1 %abs.cond, i32 %a, i32 %neg |
| ret i32 %abs |
| } |
| |
| define i64 @abs_i64(i64 %a) { |
| ; CHECK-LABEL: abs_i64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [abs_i64_param_0]; |
| ; CHECK-NEXT: abs.s64 %rd2, %rd1; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; |
| ; CHECK-NEXT: ret; |
| %neg = sub i64 0, %a |
| %abs.cond = icmp sge i64 %a, 0 |
| %abs = select i1 %abs.cond, i64 %a, i64 %neg |
| ret i64 %abs |
| } |
| |
| define %struct.S16 @i32_to_2xi16(i32 noundef %in) { |
| ; CHECK-LABEL: i32_to_2xi16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_param_0]; |
| ; CHECK-NEXT: shr.u32 %r2, %r1, 16; |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %r1; |
| ; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2; |
| ; CHECK-NEXT: ret; |
| %low = trunc i32 %in to i16 |
| %high32 = lshr i32 %in, 16 |
| %high = trunc i32 %high32 to i16 |
| %s1 = insertvalue %struct.S16 poison, i16 %low, 0 |
| %s = insertvalue %struct.S16 %s1, i16 %high, 1 |
| ret %struct.S16 %s |
| } |
| |
| ; Same as above, but with rearranged order of low/high parts. |
| define %struct.S16 @i32_to_2xi16_lh(i32 noundef %in) { |
| ; CHECK-LABEL: i32_to_2xi16_lh( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_lh_param_0]; |
| ; CHECK-NEXT: shr.u32 %r2, %r1, 16; |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %r1; |
| ; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2; |
| ; CHECK-NEXT: ret; |
| %high32 = lshr i32 %in, 16 |
| %high = trunc i32 %high32 to i16 |
| %low = trunc i32 %in to i16 |
| %s1 = insertvalue %struct.S16 poison, i16 %low, 0 |
| %s = insertvalue %struct.S16 %s1, i16 %high, 1 |
| ret %struct.S16 %s |
| } |
| |
| |
| define %struct.S16 @i32_to_2xi16_not(i32 noundef %in) { |
| ; CHECK-LABEL: i32_to_2xi16_not( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_not_param_0]; |
| ; CHECK-NEXT: shr.u32 %r2, %r1, 15; |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %r1; |
| ; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2; |
| ; CHECK-NEXT: ret; |
| %low = trunc i32 %in to i16 |
| ; Shift by any value other than 16 blocks the conversiopn to mov. |
| %high32 = lshr i32 %in, 15 |
| %high = trunc i32 %high32 to i16 |
| %s1 = insertvalue %struct.S16 poison, i16 %low, 0 |
| %s = insertvalue %struct.S16 %s1, i16 %high, 1 |
| ret %struct.S16 %s |
| } |
| |
| define %struct.S32 @i64_to_2xi32(i64 noundef %in) { |
| ; CHECK-LABEL: i64_to_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [i64_to_2xi32_param_0]; |
| ; CHECK-NEXT: shr.u64 %rd2, %rd1, 32; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0+4], %rd2; |
| ; CHECK-NEXT: ret; |
| %low = trunc i64 %in to i32 |
| %high64 = lshr i64 %in, 32 |
| %high = trunc i64 %high64 to i32 |
| %s1 = insertvalue %struct.S32 poison, i32 %low, 0 |
| %s = insertvalue %struct.S32 %s1, i32 %high, 1 |
| ret %struct.S32 %s |
| } |
| |
| define %struct.S32 @i64_to_2xi32_not(i64 noundef %in) { |
| ; CHECK-LABEL: i64_to_2xi32_not( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [i64_to_2xi32_not_param_0]; |
| ; CHECK-NEXT: shr.u64 %rd2, %rd1, 31; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0+4], %rd2; |
| ; CHECK-NEXT: ret; |
| %low = trunc i64 %in to i32 |
| ; Shift by any value other than 32 blocks the conversiopn to mov. |
| %high64 = lshr i64 %in, 31 |
| %high = trunc i64 %high64 to i32 |
| %s1 = insertvalue %struct.S32 poison, i32 %low, 0 |
| %s = insertvalue %struct.S32 %s1, i32 %high, 1 |
| ret %struct.S32 %s |
| } |
| |
| ; Make sure we do not get confused when our input itself is [al]shr. |
| define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){ |
| ; CHECK-LABEL: i32_to_2xi16_shr( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_shr_param_0]; |
| ; CHECK-NEXT: { // callseq 0, 0 |
| ; CHECK-NEXT: .param .b32 param0; |
| ; CHECK-NEXT: st.param.b32 [param0], %r1; |
| ; CHECK-NEXT: call.uni escape_int, (param0); |
| ; CHECK-NEXT: } // callseq 0 |
| ; CHECK-NEXT: shr.s32 %r2, %r1, 16; |
| ; CHECK-NEXT: shr.u32 %r3, %r2, 16; |
| ; CHECK-NEXT: st.param.b16 [func_retval0+2], %r3; |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| call void @escape_int(i32 %i); // Force %i to be loaded completely. |
| %i1 = ashr i32 %i, 16 |
| %l = trunc i32 %i1 to i16 |
| %h32 = ashr i32 %i1, 16 |
| %h = trunc i32 %h32 to i16 |
| %s0 = insertvalue %struct.S16 poison, i16 %l, 0 |
| %s1 = insertvalue %struct.S16 %s0, i16 %h, 1 |
| ret %struct.S16 %s1 |
| } |
| declare dso_local void @escape_int(i32 noundef) |
| |