| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; |
| ; Test folding of redundant early-exit guard around ctpop: |
| ; select (icmp ule X, 1), X, ctpop(X) --> ctpop(X) |
| ; |
| ; This fold is valid because ctpop(0)==0 and ctpop(1)==1, so the guard |
| ; is always redundant. The guard only existed to skip slow software emulation. |
| |
| |
| ; RUN: opt < %s -S -passes=instcombine | FileCheck %s |
| |
| |
| ;------------------------------------------------------------------------------ |
| ; Positive tests: select (icmp ule X, 1), X, ctpop(X) --> ctpop(X) |
| ;------------------------------------------------------------------------------ |
| |
| define i64 @fold_ule1_i64(i64 %x) { |
| ; CHECK-LABEL: @fold_ule1_i64( |
| ; CHECK-NEXT: [[POP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[X:%.*]]) |
| ; CHECK-NEXT: ret i64 [[POP]] |
| ; |
| %cmp = icmp ule i64 %x, 1 |
| %pop = call i64 @llvm.ctpop.i64(i64 %x) |
| %res = select i1 %cmp, i64 %x, i64 %pop |
| ret i64 %res |
| } |
| |
| define i32 @fold_ule1_i32(i32 %x) { |
| ; CHECK-LABEL: @fold_ule1_i32( |
| ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]]) |
| ; CHECK-NEXT: ret i32 [[POP]] |
| ; |
| %cmp = icmp ule i32 %x, 1 |
| %pop = call i32 @llvm.ctpop.i32(i32 %x) |
| %res = select i1 %cmp, i32 %x, i32 %pop |
| ret i32 %res |
| } |
| |
| define <4 x i32> @fold_vector_ule1(<4 x i32> %x) { |
| ; CHECK-LABEL: @fold_vector_ule1( |
| ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[X:%.*]]) |
| ; CHECK-NEXT: ret <4 x i32> [[POP]] |
| ; |
| %cmp = icmp ule <4 x i32> %x, splat(i32 1) |
| %pop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x) |
| %res = select <4 x i1> %cmp, <4 x i32> %x, <4 x i32> %pop |
| ret <4 x i32> %res |
| } |
| |
| ;------------------------------------------------------------------------------ |
| ; Negative tests: should NOT fold. |
| ;------------------------------------------------------------------------------ |
| |
| ; Wrong predicate (ugt instead of ult/ule) |
| define i32 @no_fold_wrong_pred(i32 %x) { |
| ; CHECK-LABEL: @no_fold_wrong_pred( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 1 |
| ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]]) |
| ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[POP]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %cmp = icmp ugt i32 %x, 1 |
| %pop = call i32 @llvm.ctpop.i32(i32 %x) |
| %res = select i1 %cmp, i32 %x, i32 %pop |
| ret i32 %res |
| } |
| |
| ; Wrong constant (ult 3 instead of ult 2) |
| define i32 @no_fold_wrong_const(i32 %x) { |
| ; CHECK-LABEL: @no_fold_wrong_const( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 3 |
| ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]]) |
| ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[POP]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %cmp = icmp ult i32 %x, 3 |
| %pop = call i32 @llvm.ctpop.i32(i32 %x) |
| %res = select i1 %cmp, i32 %x, i32 %pop |
| ret i32 %res |
| } |
| |
| ; Mismatched variables (condition uses Y, ctpop uses X) |
| define i32 @no_fold_mismatch_var(i32 %x, i32 %y) { |
| ; CHECK-LABEL: @no_fold_mismatch_var( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[Y:%.*]], 2 |
| ; CHECK-NEXT: [[POP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]]) |
| ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[POP]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %cmp = icmp ult i32 %y, 2 |
| %pop = call i32 @llvm.ctpop.i32(i32 %x) |
| %res = select i1 %cmp, i32 %y, i32 %pop |
| ret i32 %res |
| } |