| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mcpu=sm_50 | FileCheck %s |
| ; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify %} |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| define i32 @test_simple_rotl(i32 %x) { |
| ; CHECK-LABEL: test_simple_rotl( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_simple_rotl_param_0]; |
| ; CHECK-NEXT: shf.l.wrap.b32 %r2, %r1, %r1, 7; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %shl = shl i32 %x, 7 |
| %shr = lshr i32 %x, 25 |
| %add = add i32 %shl, %shr |
| ret i32 %add |
| } |
| |
| define i32 @test_simple_rotr(i32 %x) { |
| ; CHECK-LABEL: test_simple_rotr( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_simple_rotr_param_0]; |
| ; CHECK-NEXT: shf.l.wrap.b32 %r2, %r1, %r1, 25; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %shr = lshr i32 %x, 7 |
| %shl = shl i32 %x, 25 |
| %add = add i32 %shr, %shl |
| ret i32 %add |
| } |
| |
| define i32 @test_rotl_var(i32 %x, i32 %y) { |
| ; CHECK-LABEL: test_rotl_var( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_rotl_var_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_rotl_var_param_1]; |
| ; CHECK-NEXT: shf.l.wrap.b32 %r3, %r1, %r1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %shl = shl i32 %x, %y |
| %sub = sub i32 32, %y |
| %shr = lshr i32 %x, %sub |
| %add = add i32 %shl, %shr |
| ret i32 %add |
| } |
| |
| define i32 @test_rotr_var(i32 %x, i32 %y) { |
| ; CHECK-LABEL: test_rotr_var( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_rotr_var_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_rotr_var_param_1]; |
| ; CHECK-NEXT: shf.r.wrap.b32 %r3, %r1, %r1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %shr = lshr i32 %x, %y |
| %sub = sub i32 32, %y |
| %shl = shl i32 %x, %sub |
| %add = add i32 %shr, %shl |
| ret i32 %add |
| } |
| |
| define i32 @test_invalid_rotl_var_and(i32 %x, i32 %y) { |
| ; CHECK-LABEL: test_invalid_rotl_var_and( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_invalid_rotl_var_and_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_invalid_rotl_var_and_param_1]; |
| ; CHECK-NEXT: shl.b32 %r3, %r1, %r2; |
| ; CHECK-NEXT: neg.s32 %r4, %r2; |
| ; CHECK-NEXT: and.b32 %r5, %r4, 31; |
| ; CHECK-NEXT: shr.u32 %r6, %r1, %r5; |
| ; CHECK-NEXT: add.s32 %r7, %r6, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r7; |
| ; CHECK-NEXT: ret; |
| %shr = shl i32 %x, %y |
| %sub = sub nsw i32 0, %y |
| %and = and i32 %sub, 31 |
| %shl = lshr i32 %x, %and |
| %add = add i32 %shl, %shr |
| ret i32 %add |
| } |
| |
| define i32 @test_invalid_rotr_var_and(i32 %x, i32 %y) { |
| ; CHECK-LABEL: test_invalid_rotr_var_and( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_invalid_rotr_var_and_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_invalid_rotr_var_and_param_1]; |
| ; CHECK-NEXT: shr.u32 %r3, %r1, %r2; |
| ; CHECK-NEXT: neg.s32 %r4, %r2; |
| ; CHECK-NEXT: and.b32 %r5, %r4, 31; |
| ; CHECK-NEXT: shl.b32 %r6, %r1, %r5; |
| ; CHECK-NEXT: add.s32 %r7, %r3, %r6; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r7; |
| ; CHECK-NEXT: ret; |
| %shr = lshr i32 %x, %y |
| %sub = sub nsw i32 0, %y |
| %and = and i32 %sub, 31 |
| %shl = shl i32 %x, %and |
| %add = add i32 %shr, %shl |
| ret i32 %add |
| } |
| |
| define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) { |
| ; CHECK-LABEL: test_fshl_special_case( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_fshl_special_case_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_fshl_special_case_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [test_fshl_special_case_param_2]; |
| ; CHECK-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %shl = shl i32 %x0, %y |
| %srli = lshr i32 %x1, 1 |
| %x = xor i32 %y, 31 |
| %srlo = lshr i32 %srli, %x |
| %o = add i32 %shl, %srlo |
| ret i32 %o |
| } |
| |
| define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) { |
| ; CHECK-LABEL: test_fshr_special_case( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_fshr_special_case_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_fshr_special_case_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [test_fshr_special_case_param_2]; |
| ; CHECK-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %shl = lshr i32 %x1, %y |
| %srli = shl i32 %x0, 1 |
| %x = xor i32 %y, 31 |
| %srlo = shl i32 %srli, %x |
| %o = add i32 %shl, %srlo |
| ret i32 %o |
| } |
| |
| define i64 @test_rotl_udiv_special_case(i64 %i) { |
| ; CHECK-LABEL: test_rotl_udiv_special_case( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_rotl_udiv_special_case_param_0]; |
| ; CHECK-NEXT: mul.hi.u64 %rd2, %rd1, -6148914691236517205; |
| ; CHECK-NEXT: shr.u64 %rd3, %rd2, 1; |
| ; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd3; |
| ; CHECK-NEXT: shf.l.wrap.b32 %r3, %r2, %r1, 28; |
| ; CHECK-NEXT: shf.l.wrap.b32 %r4, %r1, %r2, 28; |
| ; CHECK-NEXT: mov.b64 %rd4, {%r4, %r3}; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; |
| ; CHECK-NEXT: ret; |
| %lhs_div = udiv i64 %i, 3 |
| %rhs_div = udiv i64 %i, 48 |
| %lhs_shift = shl i64 %lhs_div, 60 |
| %out = add i64 %lhs_shift, %rhs_div |
| ret i64 %out |
| } |
| |
| define i32 @test_rotl_mul_special_case(i32 %i) { |
| ; CHECK-LABEL: test_rotl_mul_special_case( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_rotl_mul_special_case_param_0]; |
| ; CHECK-NEXT: mul.lo.s32 %r2, %r1, 9; |
| ; CHECK-NEXT: shf.l.wrap.b32 %r3, %r2, %r2, 7; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %lhs_mul = mul i32 %i, 9 |
| %rhs_mul = mul i32 %i, 1152 |
| %lhs_shift = lshr i32 %lhs_mul, 25 |
| %out = add i32 %lhs_shift, %rhs_mul |
| ret i32 %out |
| } |
| |
| define i64 @test_rotl_mul_with_mask_special_case(i64 %i) { |
| ; CHECK-LABEL: test_rotl_mul_with_mask_special_case( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_rotl_mul_with_mask_special_case_param_0]; |
| ; CHECK-NEXT: mul.lo.s64 %rd2, %rd1, 9; |
| ; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2; |
| ; CHECK-NEXT: shf.l.wrap.b32 %r5, %r4, %r1, 7; |
| ; CHECK-NEXT: shf.l.wrap.b32 %r6, %r1, %r2, 7; |
| ; CHECK-NEXT: mov.b64 %rd3, {%r5, %r6}; |
| ; CHECK-NEXT: and.b64 %rd4, %rd3, 255; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; |
| ; CHECK-NEXT: ret; |
| %lhs_mul = mul i64 %i, 1152 |
| %rhs_mul = mul i64 %i, 9 |
| %lhs_and = and i64 %lhs_mul, 160 |
| %rhs_shift = lshr i64 %rhs_mul, 57 |
| %out = add i64 %lhs_and, %rhs_shift |
| ret i64 %out |
| } |
| |
| define i32 @test_fshl_with_mask_special_case(i32 %x) { |
| ; CHECK-LABEL: test_fshl_with_mask_special_case( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_fshl_with_mask_special_case_param_0]; |
| ; CHECK-NEXT: or.b32 %r2, %r1, 1; |
| ; CHECK-NEXT: shf.l.wrap.b32 %r3, %r1, %r2, 5; |
| ; CHECK-NEXT: and.b32 %r4, %r3, -31; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %or1 = or i32 %x, 1 |
| %sh1 = shl i32 %or1, 5 |
| %sh2 = lshr i32 %x, 27 |
| %1 = and i32 %sh2, 1 |
| %r = add i32 %sh1, %1 |
| ret i32 %r |
| } |