blob: d42fd3605264fd745cd031f8836f8f2776c1314c [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;
; Test folding of redundant early-exit guard around ctpop:
; select (icmp ule X, 1), X, ctpop(X) --> ctpop(X)
;
; This fold is valid because ctpop(0)==0 and ctpop(1)==1, so the guard
; is always redundant. The guard only existed to skip slow software emulation.
; RUN: opt < %s -S -passes=instcombine | FileCheck %s
;------------------------------------------------------------------------------
; Positive tests: select (icmp ule X, 1), X, ctpop(X) --> ctpop(X)
;------------------------------------------------------------------------------
define i64 @fold_ule1_i64(i64 %x) {
; CHECK-LABEL: @fold_ule1_i64(
; CHECK-NEXT: [[POP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[X:%.*]])
; CHECK-NEXT: ret i64 [[POP]]
;
%cmp = icmp ule i64 %x, 1
%pop = call i64 @llvm.ctpop.i64(i64 %x)
%res = select i1 %cmp, i64 %x, i64 %pop
ret i64 %res
}
define i32 @fold_ule1_i32(i32 %x) {
; CHECK-LABEL: @fold_ule1_i32(
; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]])
; CHECK-NEXT: ret i32 [[POP]]
;
%cmp = icmp ule i32 %x, 1
%pop = call i32 @llvm.ctpop.i32(i32 %x)
%res = select i1 %cmp, i32 %x, i32 %pop
ret i32 %res
}
define <4 x i32> @fold_vector_ule1(<4 x i32> %x) {
; CHECK-LABEL: @fold_vector_ule1(
; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret <4 x i32> [[POP]]
;
%cmp = icmp ule <4 x i32> %x, splat(i32 1)
%pop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
%res = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> %pop
ret <4 x i32> %res
}
;------------------------------------------------------------------------------
; Negative tests: should NOT fold.
;------------------------------------------------------------------------------
; Wrong predicate (ugt instead of ult/ule)
define i32 @no_fold_wrong_pred(i32 %x) {
; CHECK-LABEL: @no_fold_wrong_pred(
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 1
; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[POP]]
; CHECK-NEXT: ret i32 [[RES]]
;
%cmp = icmp ugt i32 %x, 1
%pop = call i32 @llvm.ctpop.i32(i32 %x)
%res = select i1 %cmp, i32 %x, i32 %pop
ret i32 %res
}
; Wrong constant (ult 3 instead of ult 2)
define i32 @no_fold_wrong_const(i32 %x) {
; CHECK-LABEL: @no_fold_wrong_const(
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 3
; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[POP]]
; CHECK-NEXT: ret i32 [[RES]]
;
%cmp = icmp ult i32 %x, 3
%pop = call i32 @llvm.ctpop.i32(i32 %x)
%res = select i1 %cmp, i32 %x, i32 %pop
ret i32 %res
}
; Mismatched variables (condition uses Y, ctpop uses X)
define i32 @no_fold_mismatch_var(i32 %x, i32 %y) {
; CHECK-LABEL: @no_fold_mismatch_var(
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[Y:%.*]], 2
; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]])
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[POP]]
; CHECK-NEXT: ret i32 [[RES]]
;
%cmp = icmp ult i32 %y, 2
%pop = call i32 @llvm.ctpop.i32(i32 %x)
%res = select i1 %cmp, i32 %y, i32 %pop
ret i32 %res
}