| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O0 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O0 | %ptxas-verify %} |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| define i32 @bfe0(i32 %a) { |
| ; CHECK-LABEL: bfe0( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u32 %r1, [bfe0_param_0]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 4, 4; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %val0 = ashr i32 %a, 4 |
| %val1 = and i32 %val0, 15 |
| ret i32 %val1 |
| } |
| |
| define i32 @bfe1(i32 %a) { |
| ; CHECK-LABEL: bfe1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u32 %r1, [bfe1_param_0]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 3, 3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %val0 = ashr i32 %a, 3 |
| %val1 = and i32 %val0, 7 |
| ret i32 %val1 |
| } |
| |
| define i32 @bfe2(i32 %a) { |
| ; CHECK-LABEL: bfe2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u32 %r1, [bfe2_param_0]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 5, 3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %val0 = ashr i32 %a, 5 |
| %val1 = and i32 %val0, 7 |
| ret i32 %val1 |
| } |
| |
| define i32 @no_bfe_on_32bit_overflow(i32 %a) { |
| ; CHECK-LABEL: no_bfe_on_32bit_overflow( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u32 %r1, [no_bfe_on_32bit_overflow_param_0]; |
| ; CHECK-NEXT: shr.s32 %r2, %r1, 31; |
| ; CHECK-NEXT: and.b32 %r3, %r2, 15; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %val0 = ashr i32 %a, 31 |
| %val1 = and i32 %val0, 15 |
| ret i32 %val1 |
| } |
| |
| define i32 @no_bfe_on_32bit_overflow_shr_and_pair(i32 %a) { |
| ; CHECK-LABEL: no_bfe_on_32bit_overflow_shr_and_pair( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u32 %r1, [no_bfe_on_32bit_overflow_shr_and_pair_param_0]; |
| ; CHECK-NEXT: shr.s32 %r2, %r1, 31; |
| ; CHECK-NEXT: and.b32 %r3, %r2, 15; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %val0 = ashr i32 %a, 31 |
| %val1 = and i32 %val0, 15 |
| ret i32 %val1 |
| } |
| |
| define i64 @no_bfe_on_64bit_overflow(i64 %a) { |
| ; CHECK-LABEL: no_bfe_on_64bit_overflow( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [no_bfe_on_64bit_overflow_param_0]; |
| ; CHECK-NEXT: shr.s64 %rd2, %rd1, 63; |
| ; CHECK-NEXT: and.b64 %rd3, %rd2, 7; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %val0 = ashr i64 %a, 63 |
| %val1 = and i64 %val0, 7 |
| ret i64 %val1 |
| } |
| |
| define i64 @no_bfe_on_64bit_overflow_shr_and_pair(i64 %a) { |
| ; CHECK-LABEL: no_bfe_on_64bit_overflow_shr_and_pair( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [no_bfe_on_64bit_overflow_shr_and_pair_param_0]; |
| ; CHECK-NEXT: shr.s64 %rd2, %rd1, 63; |
| ; CHECK-NEXT: and.b64 %rd3, %rd2, 7; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %val0 = ashr i64 %a, 63 |
| %val1 = and i64 %val0, 7 |
| ret i64 %val1 |
| } |
| |
| define i32 @bfe_ashr_signed_32(i32 %x) { |
| ; CHECK-O3-LABEL: bfe_ashr_signed_32( |
| ; CHECK-O3: { |
| ; CHECK-O3-NEXT: .reg .b32 %r<3>; |
| ; CHECK-O3-EMPTY: |
| ; CHECK-O3-NEXT: // %bb.0: |
| ; CHECK-O3-NEXT: ld.param.u16 %r1, [bfe_ashr_signed_32_param_0+2]; |
| ; CHECK-O3-NEXT: bfe.s32 %r2, %r1, 4, 12; |
| ; CHECK-O3-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-O3-NEXT: ret; |
| ; |
| ; CHECK-O0-LABEL: bfe_ashr_signed_32( |
| ; CHECK-O0: { |
| ; CHECK-O0-NEXT: .reg .b32 %r<3>; |
| ; CHECK-O0-EMPTY: |
| ; CHECK-O0-NEXT: // %bb.0: |
| ; CHECK-O0-NEXT: ld.param.u32 %r1, [bfe_ashr_signed_32_param_0]; |
| ; CHECK-O0-NEXT: bfe.s32 %r2, %r1, 20, 12; |
| ; CHECK-O0-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-O0-NEXT: ret; |
| %and = and i32 %x, -65536 |
| %shr = ashr exact i32 %and, 20 |
| ret i32 %shr |
| } |
| |
| define i32 @bfe_ashr_unsigned_32(i32 %x) { |
| ; CHECK-LABEL: bfe_ashr_unsigned_32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u32 %r1, [bfe_ashr_unsigned_32_param_0]; |
| ; CHECK-NEXT: bfe.u32 %r2, %r1, 5, 6; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %and = and i32 %x, 2047 |
| %shr = ashr exact i32 %and, 5 |
| ret i32 %shr |
| } |
| |
| define i64 @bfe_ashr_signed_64(i64 %x) { |
| ; CHECK-LABEL: bfe_ashr_signed_64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [bfe_ashr_signed_64_param_0]; |
| ; CHECK-NEXT: bfe.s64 %rd2, %rd1, 16, 48; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; |
| ; CHECK-NEXT: ret; |
| %and = and i64 %x, -65536 |
| %shr = ashr exact i64 %and, 16 |
| ret i64 %shr |
| } |
| |
| define i64 @bfe_ashr_unsigned_64(i64 %x) { |
| ; CHECK-LABEL: bfe_ashr_unsigned_64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u64 %rd1, [bfe_ashr_unsigned_64_param_0]; |
| ; CHECK-NEXT: bfe.u64 %rd2, %rd1, 5, 6; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; |
| ; CHECK-NEXT: ret; |
| %and = and i64 %x, 2047 |
| %shr = ashr exact i64 %and, 5 |
| ret i64 %shr |
| } |
| |
| define i32 @bfe3(i128 %a) { |
| ; CHECK-LABEL: bfe3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [bfe3_param_0]; |
| ; CHECK-NEXT: cvt.u32.u64 %r1, %rd1; |
| ; CHECK-NEXT: bfe.s32 %r2, %r1, 15, 17; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %trunc = trunc i128 %a to i32 |
| %and = and i32 %trunc, -32768 |
| %shr = ashr exact i32 %and, 15 |
| ret i32 %shr |
| } |
| |
| define i64 @bfe4(i128 %a) { |
| ; CHECK-LABEL: bfe4( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [bfe4_param_0]; |
| ; CHECK-NEXT: bfe.s64 %rd3, %rd1, 17, 47; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-NEXT: ret; |
| %trunc = trunc i128 %a to i64 |
| %and = and i64 %trunc, -131072 |
| %shr = ashr exact i64 %and, 17 |
| ret i64 %shr |
| } |
| |