blob: 0392f7786731a247d66af347e85c79e0ccaf6369 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O0 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O0 | %ptxas-verify %}
target triple = "nvptx64-nvidia-cuda"
define i32 @bfe0(i32 %a) {
; CHECK-LABEL: bfe0(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u32 %r1, [bfe0_param_0];
; CHECK-NEXT: bfe.u32 %r2, %r1, 4, 4;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%val0 = ashr i32 %a, 4
%val1 = and i32 %val0, 15
ret i32 %val1
}
define i32 @bfe1(i32 %a) {
; CHECK-LABEL: bfe1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u32 %r1, [bfe1_param_0];
; CHECK-NEXT: bfe.u32 %r2, %r1, 3, 3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%val0 = ashr i32 %a, 3
%val1 = and i32 %val0, 7
ret i32 %val1
}
define i32 @bfe2(i32 %a) {
; CHECK-LABEL: bfe2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u32 %r1, [bfe2_param_0];
; CHECK-NEXT: bfe.u32 %r2, %r1, 5, 3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%val0 = ashr i32 %a, 5
%val1 = and i32 %val0, 7
ret i32 %val1
}
define i32 @no_bfe_on_32bit_overflow(i32 %a) {
; CHECK-LABEL: no_bfe_on_32bit_overflow(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u32 %r1, [no_bfe_on_32bit_overflow_param_0];
; CHECK-NEXT: shr.s32 %r2, %r1, 31;
; CHECK-NEXT: and.b32 %r3, %r2, 15;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%val0 = ashr i32 %a, 31
%val1 = and i32 %val0, 15
ret i32 %val1
}
define i32 @no_bfe_on_32bit_overflow_shr_and_pair(i32 %a) {
; CHECK-LABEL: no_bfe_on_32bit_overflow_shr_and_pair(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u32 %r1, [no_bfe_on_32bit_overflow_shr_and_pair_param_0];
; CHECK-NEXT: shr.s32 %r2, %r1, 31;
; CHECK-NEXT: and.b32 %r3, %r2, 15;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%val0 = ashr i32 %a, 31
%val1 = and i32 %val0, 15
ret i32 %val1
}
define i64 @no_bfe_on_64bit_overflow(i64 %a) {
; CHECK-LABEL: no_bfe_on_64bit_overflow(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [no_bfe_on_64bit_overflow_param_0];
; CHECK-NEXT: shr.s64 %rd2, %rd1, 63;
; CHECK-NEXT: and.b64 %rd3, %rd2, 7;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%val0 = ashr i64 %a, 63
%val1 = and i64 %val0, 7
ret i64 %val1
}
define i64 @no_bfe_on_64bit_overflow_shr_and_pair(i64 %a) {
; CHECK-LABEL: no_bfe_on_64bit_overflow_shr_and_pair(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [no_bfe_on_64bit_overflow_shr_and_pair_param_0];
; CHECK-NEXT: shr.s64 %rd2, %rd1, 63;
; CHECK-NEXT: and.b64 %rd3, %rd2, 7;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%val0 = ashr i64 %a, 63
%val1 = and i64 %val0, 7
ret i64 %val1
}
define i32 @bfe_ashr_signed_32(i32 %x) {
; CHECK-O3-LABEL: bfe_ashr_signed_32(
; CHECK-O3: {
; CHECK-O3-NEXT: .reg .b32 %r<3>;
; CHECK-O3-EMPTY:
; CHECK-O3-NEXT: // %bb.0:
; CHECK-O3-NEXT: ld.param.u16 %r1, [bfe_ashr_signed_32_param_0+2];
; CHECK-O3-NEXT: bfe.s32 %r2, %r1, 4, 12;
; CHECK-O3-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-O3-NEXT: ret;
;
; CHECK-O0-LABEL: bfe_ashr_signed_32(
; CHECK-O0: {
; CHECK-O0-NEXT: .reg .b32 %r<3>;
; CHECK-O0-EMPTY:
; CHECK-O0-NEXT: // %bb.0:
; CHECK-O0-NEXT: ld.param.u32 %r1, [bfe_ashr_signed_32_param_0];
; CHECK-O0-NEXT: bfe.s32 %r2, %r1, 20, 12;
; CHECK-O0-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-O0-NEXT: ret;
%and = and i32 %x, -65536
%shr = ashr exact i32 %and, 20
ret i32 %shr
}
define i32 @bfe_ashr_unsigned_32(i32 %x) {
; CHECK-LABEL: bfe_ashr_unsigned_32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u32 %r1, [bfe_ashr_unsigned_32_param_0];
; CHECK-NEXT: bfe.u32 %r2, %r1, 5, 6;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%and = and i32 %x, 2047
%shr = ashr exact i32 %and, 5
ret i32 %shr
}
define i64 @bfe_ashr_signed_64(i64 %x) {
; CHECK-LABEL: bfe_ashr_signed_64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [bfe_ashr_signed_64_param_0];
; CHECK-NEXT: bfe.s64 %rd2, %rd1, 16, 48;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
; CHECK-NEXT: ret;
%and = and i64 %x, -65536
%shr = ashr exact i64 %and, 16
ret i64 %shr
}
define i64 @bfe_ashr_unsigned_64(i64 %x) {
; CHECK-LABEL: bfe_ashr_unsigned_64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [bfe_ashr_unsigned_64_param_0];
; CHECK-NEXT: bfe.u64 %rd2, %rd1, 5, 6;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
; CHECK-NEXT: ret;
%and = and i64 %x, 2047
%shr = ashr exact i64 %and, 5
ret i64 %shr
}
define i32 @bfe3(i128 %a) {
; CHECK-LABEL: bfe3(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [bfe3_param_0];
; CHECK-NEXT: cvt.u32.u64 %r1, %rd1;
; CHECK-NEXT: bfe.s32 %r2, %r1, 15, 17;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%trunc = trunc i128 %a to i32
%and = and i32 %trunc, -32768
%shr = ashr exact i32 %and, 15
ret i32 %shr
}
define i64 @bfe4(i128 %a) {
; CHECK-LABEL: bfe4(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [bfe4_param_0];
; CHECK-NEXT: bfe.s64 %rd3, %rd1, 17, 47;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%trunc = trunc i128 %a to i64
%and = and i64 %trunc, -131072
%shr = ashr exact i64 %and, 17
ret i64 %shr
}