blob: 554839436f451d92e6d986ac36e1d4d7cd958e41 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
define i64 @test_or(i64 %x, i32 %y) {
; CHECK-LABEL: test_or(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_or_param_0];
; CHECK-NEXT: ld.param.b32 %rd2, [test_or_param_1];
; CHECK-NEXT: shr.u64 %rd3, %rd1, 5;
; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%ext = zext i32 %y to i64
%shl = shl i64 %ext, 5
%or = or i64 %x, %shl
%srl = lshr i64 %or, 5
ret i64 %srl
}
; Fold: srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
define i64 @test_xor(i64 %x, i32 %y) {
; CHECK-LABEL: test_xor(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_xor_param_0];
; CHECK-NEXT: ld.param.b32 %rd2, [test_xor_param_1];
; CHECK-NEXT: shr.u64 %rd3, %rd1, 5;
; CHECK-NEXT: xor.b64 %rd4, %rd3, %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%ext = zext i32 %y to i64
%shl = shl i64 %ext, 5
%or = xor i64 %x, %shl
%srl = lshr i64 %or, 5
ret i64 %srl
}
; Fold: srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
define i64 @test_and(i64 %x, i32 %y) {
; CHECK-LABEL: test_and(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_and_param_0];
; CHECK-NEXT: ld.param.b32 %rd2, [test_and_param_1];
; CHECK-NEXT: shr.u64 %rd3, %rd1, 5;
; CHECK-NEXT: and.b64 %rd4, %rd3, %rd2;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%ext = zext i32 %y to i64
%shl = shl i64 %ext, 5
%or = and i64 %x, %shl
%srl = lshr i64 %or, 5
ret i64 %srl
}
; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
; x, y - vectors
define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
; CHECK-LABEL: test_vec(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<7>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_vec_param_0];
; CHECK-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [test_vec_param_1];
; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4};
; CHECK-NEXT: shr.u16 %rs5, %rs2, 5;
; CHECK-NEXT: shr.u16 %rs6, %rs1, 5;
; CHECK-NEXT: mov.b32 %r2, {%rs6, %rs5};
; CHECK-NEXT: or.b32 %r3, %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%ext = zext <2 x i8> %y to <2 x i16>
%shl = shl <2 x i16> %ext, splat(i16 5)
%or = or <2 x i16> %x, %shl
%srl = lshr <2 x i16> %or, splat(i16 5)
ret <2 x i16> %srl
}
; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; Reason: c1 > leadingzeros(zext(y)).
define i64 @test_negative_c(i64 %x, i32 %y) {
; CHECK-LABEL: test_negative_c(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_negative_c_param_0];
; CHECK-NEXT: ld.param.b32 %rd2, [test_negative_c_param_1];
; CHECK-NEXT: shl.b64 %rd3, %rd2, 33;
; CHECK-NEXT: or.b64 %rd4, %rd1, %rd3;
; CHECK-NEXT: shr.u64 %rd5, %rd4, 33;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd5;
; CHECK-NEXT: ret;
%ext = zext i32 %y to i64
%shl = shl i64 %ext, 33
%or = or i64 %x, %shl
%srl = lshr i64 %or, 33
ret i64 %srl
}
declare void @use(i64)
; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; Reason: multiple usage of "or"
define i64 @test_negative_use_lop(i64 %x, i32 %y) {
; CHECK-LABEL: test_negative_use_lop(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_negative_use_lop_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [test_negative_use_lop_param_1];
; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32;
; CHECK-NEXT: or.b64 %rd3, %rd1, %rd2;
; CHECK-NEXT: shr.u64 %rd4, %rd3, 5;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd3;
; CHECK-NEXT: call.uni use, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%ext = zext i32 %y to i64
%shl = shl i64 %ext, 5
%or = or i64 %x, %shl
%srl = lshr i64 %or, 5
call void @use(i64 %or)
ret i64 %srl
}
; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; Reason: multiple usage of "shl"
define i64 @test_negative_use_shl(i64 %x, i32 %y) {
; CHECK-LABEL: test_negative_use_shl(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_negative_use_shl_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [test_negative_use_shl_param_1];
; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32;
; CHECK-NEXT: or.b64 %rd3, %rd1, %rd2;
; CHECK-NEXT: shr.u64 %rd4, %rd3, 5;
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: call.uni use, (param0);
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%ext = zext i32 %y to i64
%shl = shl i64 %ext, 5
%or = or i64 %x, %shl
%srl = lshr i64 %or, 5
call void @use(i64 %shl)
ret i64 %srl
}
;; ============================================================================
;; Guarded shift patterns
;;
;; C/C++ code like `shift >= 32 ? 0 : x >> shift` generates a guarded shift
;; pattern to avoid undefined behavior. PTX shr/shl instructions clamp shift
;; amounts >= BitWidth to produce 0, making the guard redundant.
;;
;; Transformation 1 (ugt form):
;; (select (icmp ugt shift, BitWidth-1), 0, (srl x, shift))
;; i.e., shift > 31 ? 0 : x >> shift
;; --> (srl x, shift)
;;
;; Transformation 2 (ult form):
;; (select (icmp ult shift, BitWidth), (srl x, shift), 0)
;; i.e., shift < 32 ? x >> shift : 0
;; --> (srl x, shift)
;;
;; Same transformation applies to left shifts.
;; ============================================================================
;; --- i8 shr tests (negative - guard must remain) ---
; Do NOT optimize - PTX uses 16-bit registers, clamping happens at 16 not 8
define i8 @test_guarded_i8_ugt(i8 %x, i8 %shift) {
; CHECK-LABEL: test_guarded_i8_ugt(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b8 %rs1, [test_guarded_i8_ugt_param_0];
; CHECK-NEXT: ld.param.b8 %r1, [test_guarded_i8_ugt_param_1];
; CHECK-NEXT: setp.gt.u32 %p1, %r1, 7;
; CHECK-NEXT: shr.u16 %rs2, %rs1, %r1;
; CHECK-NEXT: selp.b16 %rs3, 0, %rs2, %p1;
; CHECK-NEXT: cvt.u32.u16 %r2, %rs3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%cmp = icmp ugt i8 %shift, 7
%shr = lshr i8 %x, %shift
%sel = select i1 %cmp, i8 0, i8 %shr
ret i8 %sel
}
; Do NOT optimize - PTX uses 16-bit registers, clamping happens at 16 not 8
define i8 @test_guarded_i8_ult(i8 %x, i8 %shift) {
; CHECK-LABEL: test_guarded_i8_ult(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b8 %rs1, [test_guarded_i8_ult_param_0];
; CHECK-NEXT: ld.param.b8 %r1, [test_guarded_i8_ult_param_1];
; CHECK-NEXT: setp.lt.u32 %p1, %r1, 8;
; CHECK-NEXT: shr.u16 %rs2, %rs1, %r1;
; CHECK-NEXT: selp.b16 %rs3, %rs2, 0, %p1;
; CHECK-NEXT: cvt.u32.u16 %r2, %rs3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%cmp = icmp ult i8 %shift, 8
%shr = lshr i8 %x, %shift
%sel = select i1 %cmp, i8 %shr, i8 0
ret i8 %sel
}
;; --- i16 shr tests ---
; (select (ugt shift, 15), 0, (srl x, shift)) --> (srl x, shift)
define i16 @test_guarded_i16_ugt(i16 %x, i16 %shift) {
; CHECK-LABEL: test_guarded_i16_ugt(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [test_guarded_i16_ugt_param_0];
; CHECK-NEXT: ld.param.b16 %r1, [test_guarded_i16_ugt_param_1];
; CHECK-NEXT: shr.u16 %rs2, %rs1, %r1;
; CHECK-NEXT: cvt.u32.u16 %r2, %rs2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%cmp = icmp ugt i16 %shift, 15
%shr = lshr i16 %x, %shift
%sel = select i1 %cmp, i16 0, i16 %shr
ret i16 %sel
}
; (select (ult shift, 16), (srl x, shift), 0) --> (srl x, shift)
define i16 @test_guarded_i16_ult(i16 %x, i16 %shift) {
; CHECK-LABEL: test_guarded_i16_ult(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [test_guarded_i16_ult_param_0];
; CHECK-NEXT: ld.param.b16 %r1, [test_guarded_i16_ult_param_1];
; CHECK-NEXT: shr.u16 %rs2, %rs1, %r1;
; CHECK-NEXT: cvt.u32.u16 %r2, %rs2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%cmp = icmp ult i16 %shift, 16
%shr = lshr i16 %x, %shift
%sel = select i1 %cmp, i16 %shr, i16 0
ret i16 %sel
}
;; --- i32 shr tests ---
; (select (ugt shift, 31), 0, (srl x, shift)) --> (srl x, shift)
define i32 @test_guarded_i32_ugt(i32 %x, i32 %shift) {
; CHECK-LABEL: test_guarded_i32_ugt(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_guarded_i32_ugt_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_guarded_i32_ugt_param_1];
; CHECK-NEXT: shr.u32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%cmp = icmp ugt i32 %shift, 31
%shr = lshr i32 %x, %shift
%sel = select i1 %cmp, i32 0, i32 %shr
ret i32 %sel
}
; (select (ult shift, 32), (srl x, shift), 0) --> (srl x, shift)
define i32 @test_guarded_i32_ult(i32 %x, i32 %shift) {
; CHECK-LABEL: test_guarded_i32_ult(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_guarded_i32_ult_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_guarded_i32_ult_param_1];
; CHECK-NEXT: shr.u32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%cmp = icmp ult i32 %shift, 32
%shr = lshr i32 %x, %shift
%sel = select i1 %cmp, i32 %shr, i32 0
ret i32 %sel
}
;; --- i64 shr tests ---
; (select (ugt shift, 63), 0, (srl x, shift)) --> (srl x, shift)
define i64 @test_guarded_i64_ugt(i64 %x, i64 %shift) {
; CHECK-LABEL: test_guarded_i64_ugt(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_guarded_i64_ugt_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [test_guarded_i64_ugt_param_1];
; CHECK-NEXT: shr.u64 %rd2, %rd1, %r1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
; CHECK-NEXT: ret;
%cmp = icmp ugt i64 %shift, 63
%shr = lshr i64 %x, %shift
%sel = select i1 %cmp, i64 0, i64 %shr
ret i64 %sel
}
; (select (ult shift, 64), (srl x, shift), 0) --> (srl x, shift)
define i64 @test_guarded_i64_ult(i64 %x, i64 %shift) {
; CHECK-LABEL: test_guarded_i64_ult(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_guarded_i64_ult_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [test_guarded_i64_ult_param_1];
; CHECK-NEXT: shr.u64 %rd2, %rd1, %r1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
; CHECK-NEXT: ret;
%cmp = icmp ult i64 %shift, 64
%shr = lshr i64 %x, %shift
%sel = select i1 %cmp, i64 %shr, i64 0
ret i64 %sel
}
;; --- i8 shl tests (negative - guard must remain) ---
; Do NOT optimize - PTX uses 16-bit registers, clamping happens at 16 not 8
define i8 @test_guarded_i8_ugt_shl(i8 %x, i8 %shift) {
; CHECK-LABEL: test_guarded_i8_ugt_shl(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b8 %rs1, [test_guarded_i8_ugt_shl_param_0];
; CHECK-NEXT: ld.param.b8 %r1, [test_guarded_i8_ugt_shl_param_1];
; CHECK-NEXT: setp.gt.u32 %p1, %r1, 7;
; CHECK-NEXT: shl.b16 %rs2, %rs1, %r1;
; CHECK-NEXT: selp.b16 %rs3, 0, %rs2, %p1;
; CHECK-NEXT: cvt.u32.u16 %r2, %rs3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%cmp = icmp ugt i8 %shift, 7
%shl = shl i8 %x, %shift
%sel = select i1 %cmp, i8 0, i8 %shl
ret i8 %sel
}
; Do NOT optimize - PTX uses 16-bit registers, clamping happens at 16 not 8
define i8 @test_guarded_i8_ult_shl(i8 %x, i8 %shift) {
; CHECK-LABEL: test_guarded_i8_ult_shl(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b8 %rs1, [test_guarded_i8_ult_shl_param_0];
; CHECK-NEXT: ld.param.b8 %r1, [test_guarded_i8_ult_shl_param_1];
; CHECK-NEXT: setp.lt.u32 %p1, %r1, 8;
; CHECK-NEXT: shl.b16 %rs2, %rs1, %r1;
; CHECK-NEXT: selp.b16 %rs3, %rs2, 0, %p1;
; CHECK-NEXT: cvt.u32.u16 %r2, %rs3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%cmp = icmp ult i8 %shift, 8
%shl = shl i8 %x, %shift
%sel = select i1 %cmp, i8 %shl, i8 0
ret i8 %sel
}
;; --- i16 shl tests ---
; (select (ugt shift, 15), 0, (shl x, shift)) --> (shl x, shift)
define i16 @test_guarded_i16_ugt_shl(i16 %x, i16 %shift) {
; CHECK-LABEL: test_guarded_i16_ugt_shl(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [test_guarded_i16_ugt_shl_param_0];
; CHECK-NEXT: ld.param.b16 %r1, [test_guarded_i16_ugt_shl_param_1];
; CHECK-NEXT: shl.b16 %rs2, %rs1, %r1;
; CHECK-NEXT: cvt.u32.u16 %r2, %rs2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%cmp = icmp ugt i16 %shift, 15
%shl = shl i16 %x, %shift
%sel = select i1 %cmp, i16 0, i16 %shl
ret i16 %sel
}
; (select (ult shift, 16), (shl x, shift), 0) --> (shl x, shift)
define i16 @test_guarded_i16_ult_shl(i16 %x, i16 %shift) {
; CHECK-LABEL: test_guarded_i16_ult_shl(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [test_guarded_i16_ult_shl_param_0];
; CHECK-NEXT: ld.param.b16 %r1, [test_guarded_i16_ult_shl_param_1];
; CHECK-NEXT: shl.b16 %rs2, %rs1, %r1;
; CHECK-NEXT: cvt.u32.u16 %r2, %rs2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%cmp = icmp ult i16 %shift, 16
%shl = shl i16 %x, %shift
%sel = select i1 %cmp, i16 %shl, i16 0
ret i16 %sel
}
;; --- i32 shl tests ---
; (select (ugt shift, 31), 0, (shl x, shift)) --> (shl x, shift)
define i32 @test_guarded_i32_ugt_shl(i32 %x, i32 %shift) {
; CHECK-LABEL: test_guarded_i32_ugt_shl(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_guarded_i32_ugt_shl_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_guarded_i32_ugt_shl_param_1];
; CHECK-NEXT: shl.b32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%cmp = icmp ugt i32 %shift, 31
%shl = shl i32 %x, %shift
%sel = select i1 %cmp, i32 0, i32 %shl
ret i32 %sel
}
; (select (ult shift, 32), (shl x, shift), 0) --> (shl x, shift)
define i32 @test_guarded_i32_ult_shl(i32 %x, i32 %shift) {
; CHECK-LABEL: test_guarded_i32_ult_shl(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_guarded_i32_ult_shl_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_guarded_i32_ult_shl_param_1];
; CHECK-NEXT: shl.b32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%cmp = icmp ult i32 %shift, 32
%shl = shl i32 %x, %shift
%sel = select i1 %cmp, i32 %shl, i32 0
ret i32 %sel
}
;; --- i64 shl tests ---
; (select (ugt shift, 63), 0, (shl x, shift)) --> (shl x, shift)
define i64 @test_guarded_i64_ugt_shl(i64 %x, i64 %shift) {
; CHECK-LABEL: test_guarded_i64_ugt_shl(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_guarded_i64_ugt_shl_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [test_guarded_i64_ugt_shl_param_1];
; CHECK-NEXT: shl.b64 %rd2, %rd1, %r1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
; CHECK-NEXT: ret;
%cmp = icmp ugt i64 %shift, 63
%shl = shl i64 %x, %shift
%sel = select i1 %cmp, i64 0, i64 %shl
ret i64 %sel
}
; (select (ult shift, 64), (shl x, shift), 0) --> (shl x, shift)
define i64 @test_guarded_i64_ult_shl(i64 %x, i64 %shift) {
; CHECK-LABEL: test_guarded_i64_ult_shl(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_guarded_i64_ult_shl_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [test_guarded_i64_ult_shl_param_1];
; CHECK-NEXT: shl.b64 %rd2, %rd1, %r1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
; CHECK-NEXT: ret;
%cmp = icmp ult i64 %shift, 64
%shl = shl i64 %x, %shift
%sel = select i1 %cmp, i64 %shl, i64 0
ret i64 %sel
}
; Verify that the optimization does not apply when the shift in the guard is different from the shift in the shift operation.
define i64 @test_guarded_i64_ult_shl_different_shift(i64 %x, i64 %shift1, i64 %shift2) {
; CHECK-LABEL: test_guarded_i64_ult_shl_different_shift(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_guarded_i64_ult_shl_different_shift_param_0];
; CHECK-NEXT: ld.param.b64 %rd2, [test_guarded_i64_ult_shl_different_shift_param_1];
; CHECK-NEXT: setp.lt.u64 %p1, %rd2, 64;
; CHECK-NEXT: ld.param.b32 %r1, [test_guarded_i64_ult_shl_different_shift_param_2];
; CHECK-NEXT: shl.b64 %rd3, %rd1, %r1;
; CHECK-NEXT: selp.b64 %rd4, %rd3, 0, %p1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%cmp = icmp ult i64 %shift1, 64
%shl = shl i64 %x, %shift2
%sel = select i1 %cmp, i64 %shl, i64 0
ret i64 %sel
}