blob: aada7eadce2aa55df8a25f2638dc25c93fe1f5db [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mcpu=sm_50 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify %}
target triple = "nvptx64-nvidia-cuda"
define i32 @test_simple_rotl(i32 %x) {
; CHECK-LABEL: test_simple_rotl(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_simple_rotl_param_0];
; CHECK-NEXT: shf.l.wrap.b32 %r2, %r1, %r1, 7;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%shl = shl i32 %x, 7
%shr = lshr i32 %x, 25
%add = add i32 %shl, %shr
ret i32 %add
}
define i32 @test_simple_rotr(i32 %x) {
; CHECK-LABEL: test_simple_rotr(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_simple_rotr_param_0];
; CHECK-NEXT: shf.l.wrap.b32 %r2, %r1, %r1, 25;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%shr = lshr i32 %x, 7
%shl = shl i32 %x, 25
%add = add i32 %shr, %shl
ret i32 %add
}
define i32 @test_rotl_var(i32 %x, i32 %y) {
; CHECK-LABEL: test_rotl_var(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_rotl_var_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_rotl_var_param_1];
; CHECK-NEXT: shf.l.wrap.b32 %r3, %r1, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%shl = shl i32 %x, %y
%sub = sub i32 32, %y
%shr = lshr i32 %x, %sub
%add = add i32 %shl, %shr
ret i32 %add
}
define i32 @test_rotr_var(i32 %x, i32 %y) {
; CHECK-LABEL: test_rotr_var(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_rotr_var_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_rotr_var_param_1];
; CHECK-NEXT: shf.r.wrap.b32 %r3, %r1, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%shr = lshr i32 %x, %y
%sub = sub i32 32, %y
%shl = shl i32 %x, %sub
%add = add i32 %shr, %shl
ret i32 %add
}
define i32 @test_invalid_rotl_var_and(i32 %x, i32 %y) {
; CHECK-LABEL: test_invalid_rotl_var_and(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<8>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_invalid_rotl_var_and_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_invalid_rotl_var_and_param_1];
; CHECK-NEXT: shl.b32 %r3, %r1, %r2;
; CHECK-NEXT: neg.s32 %r4, %r2;
; CHECK-NEXT: and.b32 %r5, %r4, 31;
; CHECK-NEXT: shr.u32 %r6, %r1, %r5;
; CHECK-NEXT: add.s32 %r7, %r6, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-NEXT: ret;
%shr = shl i32 %x, %y
%sub = sub nsw i32 0, %y
%and = and i32 %sub, 31
%shl = lshr i32 %x, %and
%add = add i32 %shl, %shr
ret i32 %add
}
define i32 @test_invalid_rotr_var_and(i32 %x, i32 %y) {
; CHECK-LABEL: test_invalid_rotr_var_and(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<8>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_invalid_rotr_var_and_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_invalid_rotr_var_and_param_1];
; CHECK-NEXT: shr.u32 %r3, %r1, %r2;
; CHECK-NEXT: neg.s32 %r4, %r2;
; CHECK-NEXT: and.b32 %r5, %r4, 31;
; CHECK-NEXT: shl.b32 %r6, %r1, %r5;
; CHECK-NEXT: add.s32 %r7, %r3, %r6;
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-NEXT: ret;
%shr = lshr i32 %x, %y
%sub = sub nsw i32 0, %y
%and = and i32 %sub, 31
%shl = shl i32 %x, %and
%add = add i32 %shr, %shl
ret i32 %add
}
define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) {
; CHECK-LABEL: test_fshl_special_case(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_fshl_special_case_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_fshl_special_case_param_1];
; CHECK-NEXT: ld.param.b32 %r3, [test_fshl_special_case_param_2];
; CHECK-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
%shl = shl i32 %x0, %y
%srli = lshr i32 %x1, 1
%x = xor i32 %y, 31
%srlo = lshr i32 %srli, %x
%o = add i32 %shl, %srlo
ret i32 %o
}
define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) {
; CHECK-LABEL: test_fshr_special_case(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_fshr_special_case_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_fshr_special_case_param_1];
; CHECK-NEXT: ld.param.b32 %r3, [test_fshr_special_case_param_2];
; CHECK-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
%shl = lshr i32 %x1, %y
%srli = shl i32 %x0, 1
%x = xor i32 %y, 31
%srlo = shl i32 %srli, %x
%o = add i32 %shl, %srlo
ret i32 %o
}
define i64 @test_rotl_udiv_special_case(i64 %i) {
; CHECK-LABEL: test_rotl_udiv_special_case(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_rotl_udiv_special_case_param_0];
; CHECK-NEXT: mul.hi.u64 %rd2, %rd1, -6148914691236517205;
; CHECK-NEXT: shr.u64 %rd3, %rd2, 1;
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd3;
; CHECK-NEXT: shf.l.wrap.b32 %r3, %r2, %r1, 28;
; CHECK-NEXT: shf.l.wrap.b32 %r4, %r1, %r2, 28;
; CHECK-NEXT: mov.b64 %rd4, {%r4, %r3};
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%lhs_div = udiv i64 %i, 3
%rhs_div = udiv i64 %i, 48
%lhs_shift = shl i64 %lhs_div, 60
%out = add i64 %lhs_shift, %rhs_div
ret i64 %out
}
define i32 @test_rotl_mul_special_case(i32 %i) {
; CHECK-LABEL: test_rotl_mul_special_case(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_rotl_mul_special_case_param_0];
; CHECK-NEXT: mul.lo.s32 %r2, %r1, 9;
; CHECK-NEXT: shf.l.wrap.b32 %r3, %r2, %r2, 7;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%lhs_mul = mul i32 %i, 9
%rhs_mul = mul i32 %i, 1152
%lhs_shift = lshr i32 %lhs_mul, 25
%out = add i32 %lhs_shift, %rhs_mul
ret i32 %out
}
define i64 @test_rotl_mul_with_mask_special_case(i64 %i) {
; CHECK-LABEL: test_rotl_mul_with_mask_special_case(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_rotl_mul_with_mask_special_case_param_0];
; CHECK-NEXT: mul.lo.s64 %rd2, %rd1, 9;
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1;
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2;
; CHECK-NEXT: shf.l.wrap.b32 %r5, %r4, %r1, 7;
; CHECK-NEXT: shf.l.wrap.b32 %r6, %r1, %r2, 7;
; CHECK-NEXT: mov.b64 %rd3, {%r5, %r6};
; CHECK-NEXT: and.b64 %rd4, %rd3, 255;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%lhs_mul = mul i64 %i, 1152
%rhs_mul = mul i64 %i, 9
%lhs_and = and i64 %lhs_mul, 160
%rhs_shift = lshr i64 %rhs_mul, 57
%out = add i64 %lhs_and, %rhs_shift
ret i64 %out
}
define i32 @test_fshl_with_mask_special_case(i32 %x) {
; CHECK-LABEL: test_fshl_with_mask_special_case(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_fshl_with_mask_special_case_param_0];
; CHECK-NEXT: or.b32 %r2, %r1, 1;
; CHECK-NEXT: shf.l.wrap.b32 %r3, %r1, %r2, 5;
; CHECK-NEXT: and.b32 %r4, %r3, -31;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
%or1 = or i32 %x, 1
%sh1 = shl i32 %or1, 5
%sh2 = lshr i32 %x, 27
%1 = and i32 %sh2, 1
%r = add i32 %sh1, %1
ret i32 %r
}