blob: ed53b88aed6157b23b39c016aac304cb1cc30776 [file] [log] [blame]
//===- InstCombineCompares.cpp --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the visitICmp and visitFCmp functions.
//
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
// How many times is a select replaced by one of its operands?
STATISTIC(NumSel, "Number of select opts");
/// Compute Result = In1+In2, returning true if the result overflowed for this
/// type.
static bool addWithOverflow(APInt &Result, const APInt &In1,
const APInt &In2, bool IsSigned = false) {
bool Overflow;
if (IsSigned)
Result = In1.sadd_ov(In2, Overflow);
else
Result = In1.uadd_ov(In2, Overflow);
return Overflow;
}
/// Compute Result = In1-In2, returning true if the result overflowed for this
/// type.
static bool subWithOverflow(APInt &Result, const APInt &In1,
const APInt &In2, bool IsSigned = false) {
bool Overflow;
if (IsSigned)
Result = In1.ssub_ov(In2, Overflow);
else
Result = In1.usub_ov(In2, Overflow);
return Overflow;
}
/// Given an icmp instruction, return true if any use of this comparison is a
/// branch on sign bit comparison.
static bool hasBranchUse(ICmpInst &I) {
for (auto *U : I.users())
if (isa<BranchInst>(U))
return true;
return false;
}
/// Returns true if the exploded icmp can be expressed as a signed comparison
/// to zero and updates the predicate accordingly.
/// The signedness of the comparison is preserved.
/// TODO: Refactor with decomposeBitTestICmp()?
static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
if (!ICmpInst::isSigned(Pred))
return false;
if (C.isZero())
return ICmpInst::isRelational(Pred);
if (C.isOne()) {
if (Pred == ICmpInst::ICMP_SLT) {
Pred = ICmpInst::ICMP_SLE;
return true;
}
} else if (C.isAllOnes()) {
if (Pred == ICmpInst::ICMP_SGT) {
Pred = ICmpInst::ICMP_SGE;
return true;
}
}
return false;
}
/// This is called when we see this pattern:
/// cmp pred (load (gep GV, ...)), cmpcst
/// where GV is a global variable with a constant initializer. Try to simplify
/// this into some simple computation that does not need the load. For example
/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
///
/// If AndCst is non-null, then the loaded value is masked with that constant
/// before doing the comparison. This handles cases like "A[i]&4 == 0".
Instruction *
InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
GlobalVariable *GV, CmpInst &ICI,
ConstantInt *AndCst) {
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return nullptr;
uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
// Don't blow up on huge arrays.
if (ArrayElementCount > MaxArraySizeForCombine)
return nullptr;
// There are many forms of this optimization we can handle, for now, just do
// the simple index into a single-dimensional array.
//
// Require: GEP GV, 0, i {{, constant indices}}
if (GEP->getNumOperands() < 3 ||
!isa<ConstantInt>(GEP->getOperand(1)) ||
!cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
isa<Constant>(GEP->getOperand(2)))
return nullptr;
// Check that indices after the variable are constants and in-range for the
// type they index. Collect the indices. This is typically for arrays of
// structs.
SmallVector<unsigned, 4> LaterIndices;
Type *EltTy = Init->getType()->getArrayElementType();
for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (!Idx) return nullptr; // Variable index.
uint64_t IdxVal = Idx->getZExtValue();
if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
if (StructType *STy = dyn_cast<StructType>(EltTy))
EltTy = STy->getElementType(IdxVal);
else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
if (IdxVal >= ATy->getNumElements()) return nullptr;
EltTy = ATy->getElementType();
} else {
return nullptr; // Unknown type.
}
LaterIndices.push_back(IdxVal);
}
enum { Overdefined = -3, Undefined = -2 };
// Variables for our state machines.
// FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
// "i == 47 | i == 87", where 47 is the first index the condition is true for,
// and 87 is the second (and last) index. FirstTrueElement is -2 when
// undefined, otherwise set to the first true element. SecondTrueElement is
// -2 when undefined, -3 when overdefined and >= 0 when that index is true.
int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
// FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
// form "i != 47 & i != 87". Same state transitions as for true elements.
int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
/// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
/// define a state machine that triggers for ranges of values that the index
/// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
/// This is -2 when undefined, -3 when overdefined, and otherwise the last
/// index in the range (inclusive). We use -2 for undefined here because we
/// use relative comparisons and don't want 0-1 to match -1.
int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
// MagicBitvector - This is a magic bitvector where we set a bit if the
// comparison is true for element 'i'. If there are 64 elements or less in
// the array, this will fully represent all the comparison results.
uint64_t MagicBitvector = 0;
// Scan the array and see if one of our patterns matches.
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
Constant *Elt = Init->getAggregateElement(i);
if (!Elt) return nullptr;
// If this is indexing an array of structures, get the structure element.
if (!LaterIndices.empty())
Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
// If the element is masked, handle it.
if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
// Find out if the comparison would be true or false for the i'th element.
Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
CompareRHS, DL, &TLI);
// If the result is undef for this element, ignore it.
if (isa<UndefValue>(C)) {
// Extend range state machines to cover this element in case there is an
// undef in the middle of the range.
if (TrueRangeEnd == (int)i-1)
TrueRangeEnd = i;
if (FalseRangeEnd == (int)i-1)
FalseRangeEnd = i;
continue;
}
// If we can't compute the result for any of the elements, we have to give
// up evaluating the entire conditional.
if (!isa<ConstantInt>(C)) return nullptr;
// Otherwise, we know if the comparison is true or false for this element,
// update our state machines.
bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
// State machine for single/double/range index comparison.
if (IsTrueForElt) {
// Update the TrueElement state machine.
if (FirstTrueElement == Undefined)
FirstTrueElement = TrueRangeEnd = i; // First true element.
else {
// Update double-compare state machine.
if (SecondTrueElement == Undefined)
SecondTrueElement = i;
else
SecondTrueElement = Overdefined;
// Update range state machine.
if (TrueRangeEnd == (int)i-1)
TrueRangeEnd = i;
else
TrueRangeEnd = Overdefined;
}
} else {
// Update the FalseElement state machine.
if (FirstFalseElement == Undefined)
FirstFalseElement = FalseRangeEnd = i; // First false element.
else {
// Update double-compare state machine.
if (SecondFalseElement == Undefined)
SecondFalseElement = i;
else
SecondFalseElement = Overdefined;
// Update range state machine.
if (FalseRangeEnd == (int)i-1)
FalseRangeEnd = i;
else
FalseRangeEnd = Overdefined;
}
}
// If this element is in range, update our magic bitvector.
if (i < 64 && IsTrueForElt)
MagicBitvector |= 1ULL << i;
// If all of our states become overdefined, bail out early. Since the
// predicate is expensive, only check it every 8 elements. This is only
// really useful for really huge arrays.
if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
FalseRangeEnd == Overdefined)
return nullptr;
}
// Now that we've scanned the entire array, emit our new comparison(s). We
// order the state machines in complexity of the generated code.
Value *Idx = GEP->getOperand(2);
// If the index is larger than the pointer size of the target, truncate the
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
if (!GEP->isInBounds()) {
Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
if (Idx->getType()->getPrimitiveSizeInBits().getFixedSize() > PtrSize)
Idx = Builder.CreateTrunc(Idx, IntPtrTy);
}
// If inbounds keyword is not present, Idx * ElementSize can overflow.
// Let's assume that ElementSize is 2 and the wanted value is at offset 0.
// Then, there are two possible values for Idx to match offset 0:
// 0x00..00, 0x80..00.
// Emitting 'icmp eq Idx, 0' isn't correct in this case because the
// comparison is false if Idx was 0x80..00.
// We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
unsigned ElementSize =
DL.getTypeAllocSize(Init->getType()->getArrayElementType());
auto MaskIdx = [&](Value* Idx){
if (!GEP->isInBounds() && countTrailingZeros(ElementSize) != 0) {
Value *Mask = ConstantInt::get(Idx->getType(), -1);
Mask = Builder.CreateLShr(Mask, countTrailingZeros(ElementSize));
Idx = Builder.CreateAnd(Idx, Mask);
}
return Idx;
};
// If the comparison is only true for one or two elements, emit direct
// comparisons.
if (SecondTrueElement != Overdefined) {
Idx = MaskIdx(Idx);
// None true -> false.
if (FirstTrueElement == Undefined)
return replaceInstUsesWith(ICI, Builder.getFalse());
Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
// True for one element -> 'i == 47'.
if (SecondTrueElement == Undefined)
return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
// True for two elements -> 'i == 47 | i == 72'.
Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx);
return BinaryOperator::CreateOr(C1, C2);
}
// If the comparison is only false for one or two elements, emit direct
// comparisons.
if (SecondFalseElement != Overdefined) {
Idx = MaskIdx(Idx);
// None false -> true.
if (FirstFalseElement == Undefined)
return replaceInstUsesWith(ICI, Builder.getTrue());
Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
// False for one element -> 'i != 47'.
if (SecondFalseElement == Undefined)
return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
// False for two elements -> 'i != 47 & i != 72'.
Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
return BinaryOperator::CreateAnd(C1, C2);
}
// If the comparison can be replaced with a range comparison for the elements
// where it is true, emit the range check.
if (TrueRangeEnd != Overdefined) {
assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
Idx = MaskIdx(Idx);
// Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
if (FirstTrueElement) {
Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
Idx = Builder.CreateAdd(Idx, Offs);
}
Value *End = ConstantInt::get(Idx->getType(),
TrueRangeEnd-FirstTrueElement+1);
return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
}
// False range check.
if (FalseRangeEnd != Overdefined) {
assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
Idx = MaskIdx(Idx);
// Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
if (FirstFalseElement) {
Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
Idx = Builder.CreateAdd(Idx, Offs);
}
Value *End = ConstantInt::get(Idx->getType(),
FalseRangeEnd-FirstFalseElement);
return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
}
// If a magic bitvector captures the entire comparison state
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
{
Type *Ty = nullptr;
// Look for an appropriate type:
// - The type of Idx if the magic fits
// - The smallest fitting legal type
if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
Ty = Idx->getType();
else
Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
if (Ty) {
Idx = MaskIdx(Idx);
Value *V = Builder.CreateIntCast(Idx, Ty, false);
V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
}
}
return nullptr;
}
/// Return a value that can be used to compare the *offset* implied by a GEP to
/// zero. For example, if we have &A[i], we want to return 'i' for
/// "icmp ne i, 0". Note that, in general, indices can be complex, and scales
/// are involved. The above expression would also be legal to codegen as
/// "icmp ne (i*4), 0" (assuming A is a pointer to i32).
/// This latter form is less amenable to optimization though, and we are allowed
/// to generate the first by knowing that pointer arithmetic doesn't overflow.
///
/// If we can't emit an optimized form for this expression, this returns null.
///
static Value *evaluateGEPOffsetExpression(User *GEP, InstCombinerImpl &IC,
const DataLayout &DL) {
gep_type_iterator GTI = gep_type_begin(GEP);
// Check to see if this gep only has a single variable index. If so, and if
// any constant indices are a multiple of its scale, then we can compute this
// in terms of the scale of the variable index. For example, if the GEP
// implies an offset of "12 + i*4", then we can codegen this as "3 + i",
// because the expression will cross zero at the same point.
unsigned i, e = GEP->getNumOperands();
int64_t Offset = 0;
for (i = 1; i != e; ++i, ++GTI) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
// Compute the aggregate offset of constant indices.
if (CI->isZero()) continue;
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = GTI.getStructTypeOrNull()) {
Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
} else {
uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
Offset += Size*CI->getSExtValue();
}
} else {
// Found our variable index.
break;
}
}
// If there are no variable indices, we must have a constant offset, just
// evaluate it the general way.
if (i == e) return nullptr;
Value *VariableIdx = GEP->getOperand(i);
// Determine the scale factor of the variable element. For example, this is
// 4 if the variable index is into an array of i32.
uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType());
// Verify that there are no other variable indices. If so, emit the hard way.
for (++i, ++GTI; i != e; ++i, ++GTI) {
ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (!CI) return nullptr;
// Compute the aggregate offset of constant indices.
if (CI->isZero()) continue;
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = GTI.getStructTypeOrNull()) {
Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
} else {
uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
Offset += Size*CI->getSExtValue();
}
}
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType());
unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
if (Offset == 0) {
// Cast to intptrty in case a truncation occurs. If an extension is needed,
// we don't need to bother extending: the extension won't affect where the
// computation crosses zero.
if (VariableIdx->getType()->getPrimitiveSizeInBits().getFixedSize() >
IntPtrWidth) {
VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy);
}
return VariableIdx;
}
// Otherwise, there is an index. The computation we will do will be modulo
// the pointer size.
Offset = SignExtend64(Offset, IntPtrWidth);
VariableScale = SignExtend64(VariableScale, IntPtrWidth);
// To do this transformation, any constant index must be a multiple of the
// variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
// but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
// multiple of the variable scale.
int64_t NewOffs = Offset / (int64_t)VariableScale;
if (Offset != NewOffs*(int64_t)VariableScale)
return nullptr;
// Okay, we can do this evaluation. Start by converting the index to intptr.
if (VariableIdx->getType() != IntPtrTy)
VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy,
true /*Signed*/);
Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset");
}
/// Returns true if we can rewrite Start as a GEP with pointer Base
/// and some integer offset. The nodes that need to be re-written
/// for this transformation will be added to Explored.
static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
const DataLayout &DL,
SetVector<Value *> &Explored) {
SmallVector<Value *, 16> WorkList(1, Start);
Explored.insert(Base);
// The following traversal gives us an order which can be used
// when doing the final transformation. Since in the final
// transformation we create the PHI replacement instructions first,
// we don't have to get them in any particular order.
//
// However, for other instructions we will have to traverse the
// operands of an instruction first, which means that we have to
// do a post-order traversal.
while (!WorkList.empty()) {
SetVector<PHINode *> PHIs;
while (!WorkList.empty()) {
if (Explored.size() >= 100)
return false;
Value *V = WorkList.back();
if (Explored.contains(V)) {
WorkList.pop_back();
continue;
}
if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
!isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
// We've found some value that we can't explore which is different from
// the base. Therefore we can't do this transformation.
return false;
if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
auto *CI = cast<CastInst>(V);
if (!CI->isNoopCast(DL))
return false;
if (!Explored.contains(CI->getOperand(0)))
WorkList.push_back(CI->getOperand(0));
}
if (auto *GEP = dyn_cast<GEPOperator>(V)) {
// We're limiting the GEP to having one index. This will preserve
// the original pointer type. We could handle more cases in the
// future.
if (GEP->getNumIndices() != 1 || !GEP->isInBounds() ||
GEP->getType() != Start->getType())
return false;
if (!Explored.contains(GEP->getOperand(0)))
WorkList.push_back(GEP->getOperand(0));
}
if (WorkList.back() == V) {
WorkList.pop_back();
// We've finished visiting this node, mark it as such.
Explored.insert(V);
}
if (auto *PN = dyn_cast<PHINode>(V)) {
// We cannot transform PHIs on unsplittable basic blocks.
if (isa<CatchSwitchInst>(PN->getParent()->getTerminator()))
return false;
Explored.insert(PN);
PHIs.insert(PN);
}
}
// Explore the PHI nodes further.
for (auto *PN : PHIs)
for (Value *Op : PN->incoming_values())
if (!Explored.contains(Op))
WorkList.push_back(Op);
}
// Make sure that we can do this. Since we can't insert GEPs in a basic
// block before a PHI node, we can't easily do this transformation if
// we have PHI node users of transformed instructions.
for (Value *Val : Explored) {
for (Value *Use : Val->uses()) {
auto *PHI = dyn_cast<PHINode>(Use);
auto *Inst = dyn_cast<Instruction>(Val);
if (Inst == Base || Inst == PHI || !Inst || !PHI ||
!Explored.contains(PHI))
continue;
if (PHI->getParent() == Inst->getParent())
return false;
}
}
return true;
}
// Sets the appropriate insert point on Builder where we can add
// a replacement Instruction for V (if that is possible).
static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
bool Before = true) {
if (auto *PHI = dyn_cast<PHINode>(V)) {
Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());
return;
}
if (auto *I = dyn_cast<Instruction>(V)) {
if (!Before)
I = &*std::next(I->getIterator());
Builder.SetInsertPoint(I);
return;
}
if (auto *A = dyn_cast<Argument>(V)) {
// Set the insertion point in the entry block.
BasicBlock &Entry = A->getParent()->getEntryBlock();
Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());
return;
}
// Otherwise, this is a constant and we don't need to set a new
// insertion point.
assert(isa<Constant>(V) && "Setting insertion point for unknown value!");
}
/// Returns a re-written value of Start as an indexed GEP using Base as a
/// pointer.
static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
const DataLayout &DL,
SetVector<Value *> &Explored) {
// Perform all the substitutions. This is a bit tricky because we can
// have cycles in our use-def chains.
// 1. Create the PHI nodes without any incoming values.
// 2. Create all the other values.
// 3. Add the edges for the PHI nodes.
// 4. Emit GEPs to get the original pointers.
// 5. Remove the original instructions.
Type *IndexType = IntegerType::get(
Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType()));
DenseMap<Value *, Value *> NewInsts;
NewInsts[Base] = ConstantInt::getNullValue(IndexType);
// Create the new PHI nodes, without adding any incoming values.
for (Value *Val : Explored) {
if (Val == Base)
continue;
// Create empty phi nodes. This avoids cyclic dependencies when creating
// the remaining instructions.
if (auto *PHI = dyn_cast<PHINode>(Val))
NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
PHI->getName() + ".idx", PHI);
}
IRBuilder<> Builder(Base->getContext());
// Create all the other instructions.
for (Value *Val : Explored) {
if (NewInsts.find(Val) != NewInsts.end())
continue;
if (auto *CI = dyn_cast<CastInst>(Val)) {
// Don't get rid of the intermediate variable here; the store can grow
// the map which will invalidate the reference to the input value.
Value *V = NewInsts[CI->getOperand(0)];
NewInsts[CI] = V;
continue;
}
if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)]
: GEP->getOperand(1);
setInsertionPoint(Builder, GEP);
// Indices might need to be sign extended. GEPs will magically do
// this, but we need to do it ourselves here.
if (Index->getType()->getScalarSizeInBits() !=
NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {
Index = Builder.CreateSExtOrTrunc(
Index, NewInsts[GEP->getOperand(0)]->getType(),
GEP->getOperand(0)->getName() + ".sext");
}
auto *Op = NewInsts[GEP->getOperand(0)];
if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
NewInsts[GEP] = Index;
else
NewInsts[GEP] = Builder.CreateNSWAdd(
Op, Index, GEP->getOperand(0)->getName() + ".add");
continue;
}
if (isa<PHINode>(Val))
continue;
llvm_unreachable("Unexpected instruction type");
}
// Add the incoming values to the PHI nodes.
for (Value *Val : Explored) {
if (Val == Base)
continue;
// All the instructions have been created, we can now add edges to the
// phi nodes.
if (auto *PHI = dyn_cast<PHINode>(Val)) {
PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
Value *NewIncoming = PHI->getIncomingValue(I);
if (NewInsts.find(NewIncoming) != NewInsts.end())
NewIncoming = NewInsts[NewIncoming];
NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
}
}
}
for (Value *Val : Explored) {
if (Val == Base)
continue;
// Depending on the type, for external users we have to emit
// a GEP or a GEP + ptrtoint.
setInsertionPoint(Builder, Val, false);
// If required, create an inttoptr instruction for Base.
Value *NewBase = Base;
if (!Base->getType()->isPointerTy())
NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(),
Start->getName() + "to.ptr");
Value *GEP = Builder.CreateInBoundsGEP(
Start->getType()->getPointerElementType(), NewBase,
makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr");
if (!Val->getType()->isPointerTy()) {
Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(),
Val->getName() + ".conv");
GEP = Cast;
}
Val->replaceAllUsesWith(GEP);
}
return NewInsts[Start];
}
/// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express
/// the input Value as a constant indexed GEP. Returns a pair containing
/// the GEPs Pointer and Index.
static std::pair<Value *, Value *>
getAsConstantIndexedAddress(Value *V, const DataLayout &DL) {
Type *IndexType = IntegerType::get(V->getContext(),
DL.getIndexTypeSizeInBits(V->getType()));
Constant *Index = ConstantInt::getNullValue(IndexType);
while (true) {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
// We accept only inbouds GEPs here to exclude the possibility of
// overflow.
if (!GEP->isInBounds())
break;
if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 &&
GEP->getType() == V->getType()) {
V = GEP->getOperand(0);
Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
Index = ConstantExpr::getAdd(
Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
continue;
}
break;
}
if (auto *CI = dyn_cast<IntToPtrInst>(V)) {
if (!CI->isNoopCast(DL))
break;
V = CI->getOperand(0);
continue;
}
if (auto *CI = dyn_cast<PtrToIntInst>(V)) {
if (!CI->isNoopCast(DL))
break;
V = CI->getOperand(0);
continue;
}
break;
}
return {V, Index};
}
/// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
/// We can look through PHIs, GEPs and casts in order to determine a common base
/// between GEPLHS and RHS.
static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond,
const DataLayout &DL) {
// FIXME: Support vector of pointers.
if (GEPLHS->getType()->isVectorTy())
return nullptr;
if (!GEPLHS->hasAllConstantIndices())
return nullptr;
// Make sure the pointers have the same type.
if (GEPLHS->getType() != RHS->getType())
return nullptr;
Value *PtrBase, *Index;
std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
// The set of nodes that will take part in this transformation.
SetVector<Value *> Nodes;
if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
return nullptr;
// We know we can re-write this as
// ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
// Since we've only looked through inbouds GEPs we know that we
// can't have overflow on either side. We can therefore re-write
// this as:
// OFFSET1 cmp OFFSET2
Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes);
// RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
// GEP having PtrBase as the pointer base, and has returned in NewRHS the
// offset. Since Index is the offset of LHS to the base pointer, we will now
// compare the offsets instead of comparing the pointers.
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);
}
/// Fold comparisons between a GEP instruction and something else. At this point
/// we know that the GEP is on the LHS of the comparison.
Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond,
Instruction &I) {
// Don't transform signed compares of GEPs into index compares. Even if the
// GEP is inbounds, the final add of the base pointer can have signed overflow
// and would change the result of the icmp.
// e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
// the maximum signed value for the pointer type.
if (ICmpInst::isSigned(Cond))
return nullptr;
// Look through bitcasts and addrspacecasts. We do not however want to remove
// 0 GEPs.
if (!isa<GetElementPtrInst>(RHS))
RHS = RHS->stripPointerCasts();
Value *PtrBase = GEPLHS->getOperand(0);
// FIXME: Support vector pointer GEPs.
if (PtrBase == RHS && GEPLHS->isInBounds() &&
!GEPLHS->getType()->isVectorTy()) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
// This transformation (ignoring the base and scales) is valid because we
// know pointers can't overflow since the gep is inbounds. See if we can
// output an optimized form.
Value *Offset = evaluateGEPOffsetExpression(GEPLHS, *this, DL);
// If not, synthesize the offset the hard way.
if (!Offset)
Offset = EmitGEPOffset(GEPLHS);
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
Constant::getNullValue(Offset->getType()));
}
if (GEPLHS->isInBounds() && ICmpInst::isEquality(Cond) &&
isa<Constant>(RHS) && cast<Constant>(RHS)->isNullValue() &&
!NullPointerIsDefined(I.getFunction(),
RHS->getType()->getPointerAddressSpace())) {
// For most address spaces, an allocation can't be placed at null, but null
// itself is treated as a 0 size allocation in the in bounds rules. Thus,
// the only valid inbounds address derived from null, is null itself.
// Thus, we have four cases to consider:
// 1) Base == nullptr, Offset == 0 -> inbounds, null
// 2) Base == nullptr, Offset != 0 -> poison as the result is out of bounds
// 3) Base != nullptr, Offset == (-base) -> poison (crossing allocations)
// 4) Base != nullptr, Offset != (-base) -> nonnull (and possibly poison)
//
// (Note if we're indexing a type of size 0, that simply collapses into one
// of the buckets above.)
//
// In general, we're allowed to make values less poison (i.e. remove
// sources of full UB), so in this case, we just select between the two
// non-poison cases (1 and 4 above).
//
// For vectors, we apply the same reasoning on a per-lane basis.
auto *Base = GEPLHS->getPointerOperand();
if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) {
auto EC = cast<VectorType>(GEPLHS->getType())->getElementCount();
Base = Builder.CreateVectorSplat(EC, Base);
}
return new ICmpInst(Cond, Base,
ConstantExpr::getPointerBitCastOrAddrSpaceCast(
cast<Constant>(RHS), Base->getType()));
} else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
// If the base pointers are different, but the indices are the same, just
// compare the base pointer.
if (PtrBase != GEPRHS->getOperand(0)) {
bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
GEPRHS->getOperand(0)->getType();
if (IndicesTheSame)
for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
IndicesTheSame = false;
break;
}
// If all indices are the same, just compare the base pointers.
Type *BaseType = GEPLHS->getOperand(0)->getType();
if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType())
return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
// If we're comparing GEPs with two base pointers that only differ in type
// and both GEPs have only constant indices or just one use, then fold
// the compare with the adjusted indices.
// FIXME: Support vector of pointers.
if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
(GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
(GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
PtrBase->stripPointerCasts() ==
GEPRHS->getOperand(0)->stripPointerCasts() &&
!GEPLHS->getType()->isVectorTy()) {
Value *LOffset = EmitGEPOffset(GEPLHS);
Value *ROffset = EmitGEPOffset(GEPRHS);
// If we looked through an addrspacecast between different sized address
// spaces, the LHS and RHS pointers are different sized
// integers. Truncate to the smaller one.
Type *LHSIndexTy = LOffset->getType();
Type *RHSIndexTy = ROffset->getType();
if (LHSIndexTy != RHSIndexTy) {
if (LHSIndexTy->getPrimitiveSizeInBits().getFixedSize() <
RHSIndexTy->getPrimitiveSizeInBits().getFixedSize()) {
ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
} else
LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
}
Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond),
LOffset, ROffset);
return replaceInstUsesWith(I, Cmp);
}
// Otherwise, the base pointers are different and the indices are
// different. Try convert this to an indexed compare by looking through
// PHIs/casts.
return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
}
// If one of the GEPs has all zero indices, recurse.
// FIXME: Handle vector of pointers.
if (!GEPLHS->getType()->isVectorTy() && GEPLHS->hasAllZeroIndices())
return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
ICmpInst::getSwappedPredicate(Cond), I);
// If the other GEP has all zero indices, recurse.
// FIXME: Handle vector of pointers.
if (!GEPRHS->getType()->isVectorTy() && GEPRHS->hasAllZeroIndices())
return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
// If the GEPs only differ by one index, compare it.
unsigned NumDifferences = 0; // Keep track of # differences.
unsigned DiffOperand = 0; // The operand that differs.
for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
Type *LHSType = GEPLHS->getOperand(i)->getType();
Type *RHSType = GEPRHS->getOperand(i)->getType();
// FIXME: Better support for vector of pointers.
if (LHSType->getPrimitiveSizeInBits() !=
RHSType->getPrimitiveSizeInBits() ||
(GEPLHS->getType()->isVectorTy() &&
(!LHSType->isVectorTy() || !RHSType->isVectorTy()))) {
// Irreconcilable differences.
NumDifferences = 2;
break;
}
if (NumDifferences++) break;
DiffOperand = i;
}
if (NumDifferences == 0) // SAME GEP?
return replaceInstUsesWith(I, // No comparison is needed here.
ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond)));
else if (NumDifferences == 1 && GEPsInBounds) {
Value *LHSV = GEPLHS->getOperand(DiffOperand);
Value *RHSV = GEPRHS->getOperand(DiffOperand);
// Make sure we do a signed comparison here.
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
}
}
// Only lower this if the icmp is the only user of the GEP or if we expect
// the result to fold to a constant!
if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
(isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
// ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
Value *L = EmitGEPOffset(GEPLHS);
Value *R = EmitGEPOffset(GEPRHS);
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
}
}
// Try convert this to an indexed compare by looking through PHIs/casts as a
// last resort.
return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
}
Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI,
const AllocaInst *Alloca,
const Value *Other) {
assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
// It would be tempting to fold away comparisons between allocas and any
// pointer not based on that alloca (e.g. an argument). However, even
// though such pointers cannot alias, they can still compare equal.
//
// But LLVM doesn't specify where allocas get their memory, so if the alloca
// doesn't escape we can argue that it's impossible to guess its value, and we
// can therefore act as if any such guesses are wrong.
//
// The code below checks that the alloca doesn't escape, and that it's only
// used in a comparison once (the current instruction). The
// single-comparison-use condition ensures that we're trivially folding all
// comparisons against the alloca consistently, and avoids the risk of
// erroneously folding a comparison of the pointer with itself.
unsigned MaxIter = 32; // Break cycles and bound to constant-time.
SmallVector<const Use *, 32> Worklist;
for (const Use &U : Alloca->uses()) {
if (Worklist.size() >= MaxIter)
return nullptr;
Worklist.push_back(&U);
}
unsigned NumCmps = 0;
while (!Worklist.empty()) {
assert(Worklist.size() <= MaxIter);
const Use *U = Worklist.pop_back_val();
const Value *V = U->getUser();
--MaxIter;
if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
isa<SelectInst>(V)) {
// Track the uses.
} else if (isa<LoadInst>(V)) {
// Loading from the pointer doesn't escape it.
continue;
} else if (const auto *SI = dyn_cast<StoreInst>(V)) {
// Storing *to* the pointer is fine, but storing the pointer escapes it.
if (SI->getValueOperand() == U->get())
return nullptr;
continue;
} else if (isa<ICmpInst>(V)) {
if (NumCmps++)
return nullptr; // Found more than one cmp.
continue;
} else if (const auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
switch (Intrin->getIntrinsicID()) {
// These intrinsics don't escape or compare the pointer. Memset is safe
// because we don't allow ptrtoint. Memcpy and memmove are safe because
// we don't allow stores, so src cannot point to V.
case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
continue;
default:
return nullptr;
}
} else {
return nullptr;
}
for (const Use &U : V->uses()) {
if (Worklist.size() >= MaxIter)
return nullptr;
Worklist.push_back(&U);
}
}
Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
return replaceInstUsesWith(
ICI,
ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
}
/// Fold "icmp pred (X+C), X".
Instruction *InstCombinerImpl::foldICmpAddOpConst(Value *X, const APInt &C,
ICmpInst::Predicate Pred) {
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similarly for all other "or equals"
// operators.
assert(!!C && "C should not be zero!");
// (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
// (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
// (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
Constant *R = ConstantInt::get(X->getType(),
APInt::getMaxValue(C.getBitWidth()) - C);
return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
}
// (X+1) >u X --> X <u (0-1) --> X != 255
// (X+2) >u X --> X <u (0-2) --> X <u 254
// (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
return new ICmpInst(ICmpInst::ICMP_ULT, X,
ConstantInt::get(X->getType(), -C));
APInt SMax = APInt::getSignedMaxValue(C.getBitWidth());
// (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
// (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
// (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0
// (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
// (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
// (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
return new ICmpInst(ICmpInst::ICMP_SGT, X,
ConstantInt::get(X->getType(), SMax - C));
// (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
// (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
// (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
// (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
// (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
// (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
return new ICmpInst(ICmpInst::ICMP_SLT, X,
ConstantInt::get(X->getType(), SMax - (C - 1)));
}
/// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
/// (icmp eq/ne A, Log2(AP2/AP1)) ->
/// (icmp eq/ne A, Log2(AP2) - Log2(AP1)).
Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A,
const APInt &AP1,
const APInt &AP2) {
assert(I.isEquality() && "Cannot fold icmp gt/lt");
auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
if (I.getPredicate() == I.ICMP_NE)
Pred = CmpInst::getInversePredicate(Pred);
return new ICmpInst(Pred, LHS, RHS);
};
// Don't bother doing any work for cases which InstSimplify handles.
if (AP2.isZero())
return nullptr;
bool IsAShr = isa<AShrOperator>(I.getOperand(0));
if (IsAShr) {
if (AP2.isAllOnes())
return nullptr;
if (AP2.isNegative() != AP1.isNegative())
return nullptr;
if (AP2.sgt(AP1))
return nullptr;
}
if (!AP1)
// 'A' must be large enough to shift out the highest set bit.
return getICmp(I.ICMP_UGT, A,
ConstantInt::get(A->getType(), AP2.logBase2()));
if (AP1 == AP2)
return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
int Shift;
if (IsAShr && AP1.isNegative())
Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
else
Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
if (Shift > 0) {
if (IsAShr && AP1 == AP2.ashr(Shift)) {
// There are multiple solutions if we are comparing against -1 and the LHS
// of the ashr is not a power of two.
if (AP1.isAllOnes() && !AP2.isPowerOf2())
return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
} else if (AP1 == AP2.lshr(Shift)) {
return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
}
}
// Shifting const2 will never be equal to const1.
// FIXME: This should always be handled by InstSimplify?
auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
return replaceInstUsesWith(I, TorF);
}
/// Handle "(icmp eq/ne (shl AP2, A), AP1)" ->
/// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)).
Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A,
const APInt &AP1,
const APInt &AP2) {
assert(I.isEquality() && "Cannot fold icmp gt/lt");
auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
if (I.getPredicate() == I.ICMP_NE)
Pred = CmpInst::getInversePredicate(Pred);
return new ICmpInst(Pred, LHS, RHS);
};
// Don't bother doing any work for cases which InstSimplify handles.
if (AP2.isZero())
return nullptr;
unsigned AP2TrailingZeros = AP2.countTrailingZeros();
if (!AP1 && AP2TrailingZeros != 0)
return getICmp(
I.ICMP_UGE, A,
ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
if (AP1 == AP2)
return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
// Get the distance between the lowest bits that are set.
int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
if (Shift > 0 && AP2.shl(Shift) == AP1)
return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
// Shifting const2 will never be equal to const1.
// FIXME: This should always be handled by InstSimplify?
auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
return replaceInstUsesWith(I, TorF);
}
/// The caller has matched a pattern of the form:
/// I = icmp ugt (add (add A, B), CI2), CI1
/// If this is of the form:
/// sum = a + b
/// if (sum+128 >u 255)
/// Then replace it with llvm.sadd.with.overflow.i8.
///
static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
ConstantInt *CI2, ConstantInt *CI1,
InstCombinerImpl &IC) {
// The transformation we're trying to do here is to transform this into an
// llvm.sadd.with.overflow. To do this, we have to replace the original add
// with a narrower add, and discard the add-with-constant that is part of the
// range check (if we can't eliminate it, this isn't profitable).
// In order to eliminate the add-with-constant, the compare can be its only
// use.
Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
if (!AddWithCst->hasOneUse())
return nullptr;
// If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
if (!CI2->getValue().isPowerOf2())
return nullptr;
unsigned NewWidth = CI2->getValue().countTrailingZeros();
if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31)
return nullptr;
// The width of the new add formed is 1 more than the bias.
++NewWidth;
// Check to see that CI1 is an all-ones value with NewWidth bits.
if (CI1->getBitWidth() == NewWidth ||
CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
return nullptr;
// This is only really a signed overflow check if the inputs have been
// sign-extended; check for that condition. For example, if CI2 is 2^31 and
// the operands of the add are 64 bits wide, we need at least 33 sign bits.
if (IC.ComputeMinSignedBits(A, 0, &I) > NewWidth ||
IC.ComputeMinSignedBits(B, 0, &I) > NewWidth)
return nullptr;
// In order to replace the original add with a narrower
// llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
// and truncates that discard the high bits of the add. Verify that this is
// the case.
Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
for (User *U : OrigAdd->users()) {
if (U == AddWithCst)
continue;
// Only accept truncates for now. We would really like a nice recursive
// predicate like SimplifyDemandedBits, but which goes downwards the use-def
// chain to see which bits of a value are actually demanded. If the
// original add had another add which was then immediately truncated, we
// could still do the transformation.
TruncInst *TI = dyn_cast<TruncInst>(U);
if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
return nullptr;
}
// If the pattern matches, truncate the inputs to the narrower type and
// use the sadd_with_overflow intrinsic to efficiently compute both the
// result and the overflow bit.
Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
Function *F = Intrinsic::getDeclaration(
I.getModule(), Intrinsic::sadd_with_overflow, NewType);
InstCombiner::BuilderTy &Builder = IC.Builder;
// Put the new code above the original add, in case there are any uses of the
// add between the add and the compare.
Builder.SetInsertPoint(OrigAdd);
Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc");
Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc");
CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd");
Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result");
Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType());
// The inner add was the result of the narrow add, zero extended to the
// wider type. Replace it with the result computed by the intrinsic.
IC.replaceInstUsesWith(*OrigAdd, ZExt);
IC.eraseInstFromFunction(*OrigAdd);
// The original icmp gets replaced with the overflow value.
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
/// If we have:
/// icmp eq/ne (urem/srem %x, %y), 0
/// iff %y is a power-of-two, we can replace this with a bit test:
/// icmp eq/ne (and %x, (add %y, -1)), 0
Instruction *InstCombinerImpl::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) {
// This fold is only valid for equality predicates.
if (!I.isEquality())
return nullptr;
ICmpInst::Predicate Pred;
Value *X, *Y, *Zero;
if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))),
m_CombineAnd(m_Zero(), m_Value(Zero)))))
return nullptr;
if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I))
return nullptr;
// This may increase instruction count, we don't enforce that Y is a constant.
Value *Mask = Builder.CreateAdd(Y, Constant::getAllOnesValue(Y->getType()));
Value *Masked = Builder.CreateAnd(X, Mask);
return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero);
}
/// Fold equality-comparison between zero and any (maybe truncated) right-shift
/// by one-less-than-bitwidth into a sign test on the original value.
Instruction *InstCombinerImpl::foldSignBitTest(ICmpInst &I) {
Instruction *Val;
ICmpInst::Predicate Pred;
if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero())))
return nullptr;
Value *X;
Type *XTy;
Constant *C;
if (match(Val, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))))) {
XTy = X->getType();
unsigned XBitWidth = XTy->getScalarSizeInBits();
if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
APInt(XBitWidth, XBitWidth - 1))))
return nullptr;
} else if (isa<BinaryOperator>(Val) &&
(X = reassociateShiftAmtsOfTwoSameDirectionShifts(
cast<BinaryOperator>(Val), SQ.getWithInstruction(Val),
/*AnalyzeForSignBitExtraction=*/true))) {
XTy = X->getType();
} else
return nullptr;
return ICmpInst::Create(Instruction::ICmp,
Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_SLT,
X, ConstantInt::getNullValue(XTy));
}
// Handle icmp pred X, 0
Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
CmpInst::Predicate Pred = Cmp.getPredicate();
if (!match(Cmp.getOperand(1), m_Zero()))
return nullptr;
// (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
if (Pred == ICmpInst::ICMP_SGT) {
Value *A, *B;
SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B);
if (SPR.Flavor == SPF_SMIN) {
if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
return new ICmpInst(Pred, B, Cmp.getOperand(1));
if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT))
return new ICmpInst(Pred, A, Cmp.getOperand(1));
}
}
if (Instruction *New = foldIRemByPowerOfTwoToBitTest(Cmp))
return New;
// Given:
// icmp eq/ne (urem %x, %y), 0
// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
// icmp eq/ne %x, 0
Value *X, *Y;
if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) &&
ICmpInst::isEquality(Pred)) {
KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
return new ICmpInst(Pred, X, Cmp.getOperand(1));
}
return nullptr;
}
/// Fold icmp Pred X, C.
/// TODO: This code structure does not make sense. The saturating add fold
/// should be moved to some other helper and extended as noted below (it is also
/// possible that code has been made unnecessary - do we canonicalize IR to
/// overflow/saturating intrinsics or not?).
Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
// Match the following pattern, which is a common idiom when writing
// overflow-safe integer arithmetic functions. The source performs an addition
// in wider type and explicitly checks for overflow using comparisons against
// INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic.
//
// TODO: This could probably be generalized to handle other overflow-safe
// operations if we worked out the formulas to compute the appropriate magic
// constants.
//
// sum = a + b
// if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
CmpInst::Predicate Pred = Cmp.getPredicate();
Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1);
Value *A, *B;
ConstantInt *CI, *CI2; // I = icmp ugt (add (add A, B), CI2), CI
if (Pred == ICmpInst::ICMP_UGT && match(Op1, m_ConstantInt(CI)) &&
match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
if (Instruction *Res = processUGT_ADDCST_ADD(Cmp, A, B, CI2, CI, *this))
return Res;
// icmp(phi(C1, C2, ...), C) -> phi(icmp(C1, C), icmp(C2, C), ...).
Constant *C = dyn_cast<Constant>(Op1);
if (!C || C->canTrap())
return nullptr;
if (auto *Phi = dyn_cast<PHINode>(Op0))
if (all_of(Phi->operands(), [](Value *V) { return isa<Constant>(V); })) {
Type *Ty = Cmp.getType();
Builder.SetInsertPoint(Phi);
PHINode *NewPhi =
Builder.CreatePHI(Ty, Phi->getNumOperands());
for (BasicBlock *Predecessor : predecessors(Phi->getParent())) {
auto *Input =
cast<Constant>(Phi->getIncomingValueForBlock(Predecessor));
auto *BoolInput = ConstantExpr::getCompare(Pred, Input, C);
NewPhi->addIncoming(BoolInput, Predecessor);
}
NewPhi->takeName(&Cmp);
return replaceInstUsesWith(Cmp, NewPhi);
}
return nullptr;
}
/// Canonicalize icmp instructions based on dominating conditions.
Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
// This is a cheap/incomplete check for dominance - just match a single
// predecessor with a conditional branch.
BasicBlock *CmpBB = Cmp.getParent();
BasicBlock *DomBB = CmpBB->getSinglePredecessor();
if (!DomBB)
return nullptr;
Value *DomCond;
BasicBlock *TrueBB, *FalseBB;
if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
return nullptr;
assert((TrueBB == CmpBB || FalseBB == CmpBB) &&
"Predecessor block does not point to successor?");
// The branch should get simplified. Don't bother simplifying this condition.
if (TrueBB == FalseBB)
return nullptr;
// Try to simplify this compare to T/F based on the dominating condition.
Optional<bool> Imp = isImpliedCondition(DomCond, &Cmp, DL, TrueBB == CmpBB);
if (Imp)
return replaceInstUsesWith(Cmp, ConstantInt::get(Cmp.getType(), *Imp));
CmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);
ICmpInst::Predicate DomPred;
const APInt *C, *DomC;
if (match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))) &&
match(Y, m_APInt(C))) {
// We have 2 compares of a variable with constants. Calculate the constant
// ranges of those compares to see if we can transform the 2nd compare:
// DomBB:
// DomCond = icmp DomPred X, DomC
// br DomCond, CmpBB, FalseBB
// CmpBB:
// Cmp = icmp Pred X, C
ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, *C);
ConstantRange DominatingCR =
(CmpBB == TrueBB) ? ConstantRange::makeExactICmpRegion(DomPred, *DomC)
: ConstantRange::makeExactICmpRegion(
CmpInst::getInversePredicate(DomPred), *DomC);
ConstantRange Intersection = DominatingCR.intersectWith(CR);
ConstantRange Difference = DominatingCR.difference(CR);
if (Intersection.isEmptySet())
return replaceInstUsesWith(Cmp, Builder.getFalse());
if (Difference.isEmptySet())
return replaceInstUsesWith(Cmp, Builder.getTrue());
// Canonicalizing a sign bit comparison that gets used in a branch,
// pessimizes codegen by generating branch on zero instruction instead
// of a test and branch. So we avoid canonicalizing in such situations
// because test and branch instruction has better branch displacement
// than compare and branch instruction.
bool UnusedBit;
bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit);
if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
return nullptr;
// Avoid an infinite loop with min/max canonicalization.
// TODO: This will be unnecessary if we canonicalize to min/max intrinsics.
if (Cmp.hasOneUse() &&
match(Cmp.user_back(), m_MaxOrMin(m_Value(), m_Value())))
return nullptr;
if (const APInt *EqC = Intersection.getSingleElement())
return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC));
if (const APInt *NeC = Difference.getSingleElement())
return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC));
}
return nullptr;
}
/// Fold icmp (trunc X, Y), C.
Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
TruncInst *Trunc,
const APInt &C) {
ICmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Trunc->getOperand(0);
if (C.isOne() && C.getBitWidth() > 1) {
// icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
Value *V = nullptr;
if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
return new ICmpInst(ICmpInst::ICMP_SLT, V,
ConstantInt::get(V->getType(), 1));
}
unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
SrcBits = X->getType()->getScalarSizeInBits();
if (Cmp.isEquality() && Trunc->hasOneUse()) {
// Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
// of the high bits truncated out of x are known.
KnownBits Known = computeKnownBits(X, 0, &Cmp);
// If all the high bits are known, we can do this xform.
if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) {
// Pull in the high bits from known-ones set.
APInt NewRHS = C.zext(SrcBits);
NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS));
}
}
// Look through truncated right-shift of the sign-bit for a sign-bit check:
// trunc iN (ShOp >> ShAmtC) to i[N - ShAmtC] < 0 --> ShOp < 0
// trunc iN (ShOp >> ShAmtC) to i[N - ShAmtC] > -1 --> ShOp > -1
Value *ShOp;
const APInt *ShAmtC;
bool TrueIfSigned;
if (isSignBitCheck(Pred, C, TrueIfSigned) &&
match(X, m_Shr(m_Value(ShOp), m_APInt(ShAmtC))) &&
DstBits == SrcBits - ShAmtC->getZExtValue()) {
return TrueIfSigned
? new ICmpInst(ICmpInst::ICMP_SLT, ShOp,
ConstantInt::getNullValue(X->getType()))
: new ICmpInst(ICmpInst::ICMP_SGT, ShOp,
ConstantInt::getAllOnesValue(X->getType()));
}
return nullptr;
}
/// Fold icmp (xor X, Y), C.
Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp,
BinaryOperator *Xor,
const APInt &C) {
Value *X = Xor->getOperand(0);
Value *Y = Xor->getOperand(1);
const APInt *XorC;
if (!match(Y, m_APInt(XorC)))
return nullptr;
// If this is a comparison that tests the signbit (X < 0) or (x > -1),
// fold the xor.
ICmpInst::Predicate Pred = Cmp.getPredicate();
bool TrueIfSigned = false;
if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) {
// If the sign bit of the XorCst is not set, there is no change to
// the operation, just stop using the Xor.
if (!XorC->isNegative())
return replaceOperand(Cmp, 0, X);
// Emit the opposite comparison.
if (TrueIfSigned)
return new ICmpInst(ICmpInst::ICMP_SGT, X,
ConstantInt::getAllOnesValue(X->getType()));
else
return new ICmpInst(ICmpInst::ICMP_SLT, X,
ConstantInt::getNullValue(X->getType()));
}
if (Xor->hasOneUse()) {
// (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
if (!Cmp.isEquality() && XorC->isSignMask()) {
Pred = Cmp.getFlippedSignednessPredicate();
return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
}
// (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
Pred = Cmp.getFlippedSignednessPredicate();
Pred = Cmp.getSwappedPredicate(Pred);
return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
}
}
// Mask constant magic can eliminate an 'xor' with unsigned compares.
if (Pred == ICmpInst::ICMP_UGT) {
// (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2)
if (*XorC == ~C && (C + 1).isPowerOf2())
return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
// (xor X, C) >u C --> X >u C (when C+1 is a power of 2)
if (*XorC == C && (C + 1).isPowerOf2())
return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
}
if (Pred == ICmpInst::ICMP_ULT) {
// (xor X, -C) <u C --> X >u ~C (when C is a power of 2)
if (*XorC == -C && C.isPowerOf2())
return new ICmpInst(ICmpInst::ICMP_UGT, X,
ConstantInt::get(X->getType(), ~C));
// (xor X, C) <u C --> X >u ~C (when -C is a power of 2)
if (*XorC == C && (-C).isPowerOf2())
return new ICmpInst(ICmpInst::ICMP_UGT, X,
ConstantInt::get(X->getType(), ~C));
}
return nullptr;
}
/// Fold icmp (and (sh X, Y), C2), C1.
Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp,
BinaryOperator *And,
const APInt &C1,
const APInt &C2) {
BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0));
if (!Shift || !Shift->isShift())
return nullptr;
// If this is: (X >> C3) & C2 != C1 (where any shift and any compare could
// exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in
// code produced by the clang front-end, for bitfield access.
// This seemingly simple opportunity to fold away a shift turns out to be
// rather complicated. See PR17827 for details.
unsigned ShiftOpcode = Shift->getOpcode();
bool IsShl = ShiftOpcode == Instruction::Shl;
const APInt *C3;
if (match(Shift->getOperand(1), m_APInt(C3))) {
APInt NewAndCst, NewCmpCst;
bool AnyCmpCstBitsShiftedOut;
if (ShiftOpcode == Instruction::Shl) {
// For a left shift, we can fold if the comparison is not signed. We can
// also fold a signed comparison if the mask value and comparison value
// are not negative. These constraints may not be obvious, but we can
// prove that they are correct using an SMT solver.
if (Cmp.isSigned() && (C2.isNegative() || C1.isNegative()))
return nullptr;
NewCmpCst = C1.lshr(*C3);
NewAndCst = C2.lshr(*C3);
AnyCmpCstBitsShiftedOut = NewCmpCst.shl(*C3) != C1;
} else if (ShiftOpcode == Instruction::LShr) {
// For a logical right shift, we can fold if the comparison is not signed.
// We can also fold a signed comparison if the shifted mask value and the
// shifted comparison value are not negative. These constraints may not be
// obvious, but we can prove that they are correct using an SMT solver.
NewCmpCst = C1.shl(*C3);
NewAndCst = C2.shl(*C3);
AnyCmpCstBitsShiftedOut = NewCmpCst.lshr(*C3) != C1;
if (Cmp.isSigned() && (NewAndCst.isNegative() || NewCmpCst.isNegative()))
return nullptr;
} else {
// For an arithmetic shift, check that both constants don't use (in a
// signed sense) the top bits being shifted out.
assert(ShiftOpcode == Instruction::AShr && "Unknown shift opcode");
NewCmpCst = C1.shl(*C3);
NewAndCst = C2.shl(*C3);
AnyCmpCstBitsShiftedOut = NewCmpCst.ashr(*C3) != C1;
if (NewAndCst.ashr(*C3) != C2)
return nullptr;
}
if (AnyCmpCstBitsShiftedOut) {
// If we shifted bits out, the fold is not going to work out. As a
// special case, check to see if this means that the result is always
// true or false now.
if (Cmp.getPredicate() == ICmpInst::ICMP_EQ)
return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType()));
if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType()));
} else {
Value *NewAnd = Builder.CreateAnd(
Shift->getOperand(0), ConstantInt::get(And->getType(), NewAndCst));
return new ICmpInst(Cmp.getPredicate(),
NewAnd, ConstantInt::get(And->getType(), NewCmpCst));
}
}
// Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is
// preferable because it allows the C2 << Y expression to be hoisted out of a
// loop if Y is invariant and X is not.
if (Shift->hasOneUse() && C1.isZero() && Cmp.isEquality() &&
!Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
// Compute C2 << Y.
Value *NewShift =
IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1))
: Builder.CreateShl(And->getOperand(1), Shift->getOperand(1));
// Compute X & (C2 << Y).
Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift);
return replaceOperand(Cmp, 0, NewAnd);
}
return nullptr;
}
/// Fold icmp (and X, C2), C1.
Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
BinaryOperator *And,
const APInt &C1) {
bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE;
// For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
// TODO: We canonicalize to the longer form for scalars because we have
// better analysis/folds for icmp, and codegen may be better with icmp.
if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isZero() &&
match(And->getOperand(1), m_One()))
return new TruncInst(And->getOperand(0), Cmp.getType());
const APInt *C2;
Value *X;
if (!match(And, m_And(m_Value(X), m_APInt(C2))))
return nullptr;
// Don't perform the following transforms if the AND has multiple uses
if (!And->hasOneUse())
return nullptr;
if (Cmp.isEquality() && C1.isZero()) {
// Restrict this fold to single-use 'and' (PR10267).
// Replace (and X, (1 << size(X)-1) != 0) with X s< 0
if (C2->isSignMask()) {
Constant *Zero = Constant::getNullValue(X->getType());
auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
return new ICmpInst(NewPred, X, Zero);
}
// Restrict this fold only for single-use 'and' (PR10267).
// ((%x & C) == 0) --> %x u< (-C) iff (-C) is power of two.
if ((~(*C2) + 1).isPowerOf2()) {
Constant *NegBOC =
ConstantExpr::getNeg(cast<Constant>(And->getOperand(1)));
auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
return new ICmpInst(NewPred, X, NegBOC);
}
}
// If the LHS is an 'and' of a truncate and we can widen the and/compare to
// the input width without changing the value produced, eliminate the cast:
//
// icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1'
//
// We can do this transformation if the constants do not have their sign bits
// set or if it is an equality comparison. Extending a relational comparison
// when we're checking the sign bit would not work.
Value *W;
if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) &&
(Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) {
// TODO: Is this a good transform for vectors? Wider types may reduce
// throughput. Should this transform be limited (even for scalars) by using
// shouldChangeType()?
if (!Cmp.getType()->isVectorTy()) {
Type *WideType = W->getType();
unsigned WideScalarBits = WideType->getScalarSizeInBits();
Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits));
Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName());
return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
}
}
if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2))
return I;
// (icmp pred (and (or (lshr A, B), A), 1), 0) -->
// (icmp pred (and A, (or (shl 1, B), 1), 0))
//
// iff pred isn't signed
if (!Cmp.isSigned() && C1.isZero() && And->getOperand(0)->hasOneUse() &&
match(And->getOperand(1), m_One())) {
Constant *One = cast<Constant>(And->getOperand(1));
Value *Or = And->getOperand(0);
Value *A, *B, *LShr;
if (match(Or, m_Or(m_Value(LShr), m_Value(A))) &&
match(LShr, m_LShr(m_Specific(A), m_Value(B)))) {
unsigned UsesRemoved = 0;
if (And->hasOneUse())
++UsesRemoved;
if (Or->hasOneUse())
++UsesRemoved;
if (LShr->hasOneUse())
++UsesRemoved;
// Compute A & ((1 << B) | 1)
Value *NewOr = nullptr;
if (auto *C = dyn_cast<Constant>(B)) {
if (UsesRemoved >= 1)
NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
} else {
if (UsesRemoved >= 3)
NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
/*HasNUW=*/true),
One, Or->getName());
}
if (NewOr) {
Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
return replaceOperand(Cmp, 0, NewAnd);
}
}
}
return nullptr;
}
/// Fold icmp (and X, Y), C.
Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
BinaryOperator *And,
const APInt &C) {
if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
return I;
const ICmpInst::Predicate Pred = Cmp.getPredicate();
bool TrueIfNeg;
if (isSignBitCheck(Pred, C, TrueIfNeg)) {
// ((X - 1) & ~X) < 0 --> X == 0
// ((X - 1) & ~X) >= 0 --> X != 0
Value *X;
if (match(And->getOperand(0), m_Add(m_Value(X), m_AllOnes())) &&
match(And->getOperand(1), m_Not(m_Specific(X)))) {
auto NewPred = TrueIfNeg ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
return new ICmpInst(NewPred, X, ConstantInt::getNullValue(X->getType()));
}
}
// TODO: These all require that Y is constant too, so refactor with the above.
// Try to optimize things like "A[i] & 42 == 0" to index computations.
Value *X = And->getOperand(0);
Value *Y = And->getOperand(1);
if (auto *LI = dyn_cast<LoadInst>(X))
if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
!LI->isVolatile() && isa<ConstantInt>(Y)) {
ConstantInt *C2 = cast<ConstantInt>(Y);
if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2))
return Res;
}
if (!Cmp.isEquality())
return nullptr;
// X & -C == -C -> X > u ~C
// X & -C != -C -> X <= u ~C
// iff C is a power of 2
if (Cmp.getOperand(1) == Y && C.isNegatedPowerOf2()) {
auto NewPred =
Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
}
return nullptr;
}
/// Fold icmp (or X, Y), C.
Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
BinaryOperator *Or,
const APInt &C) {
ICmpInst::Predicate Pred = Cmp.getPredicate();
if (C.isOne()) {
// icmp slt signum(V) 1 --> icmp slt V, 1
Value *V = nullptr;
if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
return new ICmpInst(ICmpInst::ICMP_SLT, V,
ConstantInt::get(V->getType(), 1));
}
Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1);
const APInt *MaskC;
if (match(OrOp1, m_APInt(MaskC)) && Cmp.isEquality()) {
if (*MaskC == C && (C + 1).isPowerOf2()) {
// X | C == C --> X <=u C
// X | C != C --> X >u C
// iff C+1 is a power of 2 (C is a bitmask of the low bits)
Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
return new ICmpInst(Pred, OrOp0, OrOp1);
}
// More general: canonicalize 'equality with set bits mask' to
// 'equality with clear bits mask'.
// (X | MaskC) == C --> (X & ~MaskC) == C ^ MaskC
// (X | MaskC) != C --> (X & ~MaskC) != C ^ MaskC
if (Or->hasOneUse()) {
Value *And = Builder.CreateAnd(OrOp0, ~(*MaskC));
Constant *NewC = ConstantInt::get(Or->getType(), C ^ (*MaskC));
return new ICmpInst(Pred, And, NewC);
}
}
// (X | (X-1)) s< 0 --> X s< 1
// (X | (X-1)) s> -1 --> X s> 0
Value *X;
bool TrueIfSigned;
if (isSignBitCheck(Pred, C, TrueIfSigned) &&
match(Or, m_c_Or(m_Add(m_Value(X), m_AllOnes()), m_Deferred(X)))) {
auto NewPred = TrueIfSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGT;
Constant *NewC = ConstantInt::get(X->getType(), TrueIfSigned ? 1 : 0);
return new ICmpInst(NewPred, X, NewC);
}
if (!Cmp.isEquality() || !C.isZero() || !Or->hasOneUse())
return nullptr;
Value *P, *Q;
if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
// Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
// -> and (icmp eq P, null), (icmp eq Q, null).
Value *CmpP =
Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
Value *CmpQ =
Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
return BinaryOperator::Create(BOpc, CmpP, CmpQ);
}
// Are we using xors to bitwise check for a pair of (in)equalities? Convert to
// a shorter form that has more potential to be folded even further.
Value *X1, *X2, *X3, *X4;
if (match(OrOp0, m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
match(OrOp1, m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
// ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
// ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4);
auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
return BinaryOperator::Create(BOpc, Cmp12, Cmp34);
}
return nullptr;
}
/// Fold icmp (mul X, Y), C.
Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
BinaryOperator *Mul,
const APInt &C) {
const APInt *MulC;
if (!match(Mul->getOperand(1), m_APInt(MulC)))
return nullptr;
// If this is a test of the sign bit and the multiply is sign-preserving with
// a constant operand, use the multiply LHS operand instead.
ICmpInst::Predicate Pred = Cmp.getPredicate();
if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) {
if (MulC->isNegative())
Pred = ICmpInst::getSwappedPredicate(Pred);
return new ICmpInst(Pred, Mul->getOperand(0),
Constant::getNullValue(Mul->getType()));
}
// If the multiply does not wrap, try to divide the compare constant by the
// multiplication factor.
if (Cmp.isEquality() && !MulC->isZero()) {
// (mul nsw X, MulC) == C --> X == C /s MulC
if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) {
Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC));
return new ICmpInst(Pred, Mul->getOperand(0), NewC);
}
// (mul nuw X, MulC) == C --> X == C /u MulC
if (Mul->hasNoUnsignedWrap() && C.urem(*MulC).isZero()) {
Constant *NewC = ConstantInt::get(Mul->getType(), C.udiv(*MulC));
return new ICmpInst(Pred, Mul->getOperand(0), NewC);
}
}
return nullptr;
}
/// Fold icmp (shl 1, Y), C.
static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
const APInt &C) {
Value *Y;
if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
return nullptr;
Type *ShiftType = Shl->getType();
unsigned TypeBits = C.getBitWidth();
bool CIsPowerOf2 = C.isPowerOf2();
ICmpInst::Predicate Pred = Cmp.getPredicate();
if (Cmp.isUnsigned()) {
// (1 << Y) pred C -> Y pred Log2(C)
if (!CIsPowerOf2) {
// (1 << Y) < 30 -> Y <= 4
// (1 << Y) <= 30 -> Y <= 4
// (1 << Y) >= 30 -> Y > 4
// (1 << Y) > 30 -> Y > 4
if (Pred == ICmpInst::ICMP_ULT)
Pred = ICmpInst::ICMP_ULE;
else if (Pred == ICmpInst::ICMP_UGE)
Pred = ICmpInst::ICMP_UGT;
}
// (1 << Y) >= 2147483648 -> Y >= 31 -> Y == 31
// (1 << Y) < 2147483648 -> Y < 31 -> Y != 31
unsigned CLog2 = C.logBase2();
if (CLog2 == TypeBits - 1) {
if (Pred == ICmpInst::ICMP_UGE)
Pred = ICmpInst::ICMP_EQ;
else if (Pred == ICmpInst::ICMP_ULT)
Pred = ICmpInst::ICMP_NE;
}
return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
} else if (Cmp.isSigned()) {
Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
if (C.isAllOnes()) {
// (1 << Y) <= -1 -> Y == 31
if (Pred == ICmpInst::ICMP_SLE)
return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
// (1 << Y) > -1 -> Y != 31
if (Pred == ICmpInst::ICMP_SGT)
return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
} else if (!C) {
// (1 << Y) < 0 -> Y == 31
// (1 << Y) <= 0 -> Y == 31
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
// (1 << Y) >= 0 -> Y != 31
// (1 << Y) > 0 -> Y != 31
if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
}
} else if (Cmp.isEquality() && CIsPowerOf2) {
return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, C.logBase2()));
}
return nullptr;
}
/// Fold icmp (shl X, Y), C.
Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
BinaryOperator *Shl,
const APInt &C) {
const APInt *ShiftVal;
if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal);
const APInt *ShiftAmt;
if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
return foldICmpShlOne(Cmp, Shl, C);
// Check that the shift amount is in range. If not, don't perform undefined
// shifts. When the shift is visited, it will be simplified.
unsigned TypeBits = C.getBitWidth();
if (ShiftAmt->uge(TypeBits))
return nullptr;
ICmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Shl->getOperand(0);
Type *ShType = Shl->getType();
// NSW guarantees that we are only shifting out sign bits from the high bits,
// so we can ASHR the compare constant without needing a mask and eliminate
// the shift.
if (Shl->hasNoSignedWrap()) {
if (Pred == ICmpInst::ICMP_SGT) {
// icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
APInt ShiftedC = C.ashr(*ShiftAmt);
return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
}
if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
C.ashr(*ShiftAmt).shl(*ShiftAmt) == C) {
APInt ShiftedC = C.ashr(*ShiftAmt);
return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
}
if (Pred == ICmpInst::ICMP_SLT) {
// SLE is the same as above, but SLE is canonicalized to SLT, so convert:
// (X << S) <=s C is equiv to X <=s (C >> S) for all C
// (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX
// (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN
assert(!C.isMinSignedValue() && "Unexpected icmp slt");
APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1;
return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
}
// If this is a signed comparison to 0 and the shift is sign preserving,
// use the shift LHS operand instead; isSignTest may change 'Pred', so only
// do that if we're sure to not continue on in this function.
if (isSignTest(Pred, C))
return new ICmpInst(Pred, X, Constant::getNullValue(ShType));
}
// NUW guarantees that we are only shifting out zero bits from the high bits,
// so we can LSHR the compare constant without needing a mask and eliminate
// the shift.
if (Shl->hasNoUnsignedWrap()) {
if (Pred == ICmpInst::ICMP_UGT) {
// icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt)
APInt ShiftedC = C.lshr(*ShiftAmt);
return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
}
if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
C.lshr(*ShiftAmt).shl(*ShiftAmt) == C) {
APInt ShiftedC = C.lshr(*ShiftAmt);
return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
}
if (Pred == ICmpInst::ICMP_ULT) {
// ULE is the same as above, but ULE is canonicalized to ULT, so convert:
// (X << S) <=u C is equiv to X <=u (C >> S) for all C
// (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
// (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
assert(C.ugt(0) && "ult 0 should have been eliminated");
APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1;
return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
}
}
if (Cmp.isEquality() && Shl->hasOneUse()) {
// Strength-reduce the shift into an 'and'.
Constant *Mask = ConstantInt::get(
ShType,
APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt));
return new ICmpInst(Pred, And, LShrC);
}
// Otherwise, if this is a comparison of the sign bit, simplify to and/test.
bool TrueIfSigned = false;
if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) {
// (X << 31) <s 0 --> (X & 1) != 0
Constant *Mask = ConstantInt::get(
ShType,
APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
And, Constant::getNullValue(ShType));
}
// Simplify 'shl' inequality test into 'and' equality test.
if (Cmp.isUnsigned() && Shl->hasOneUse()) {
// (X l<< C2) u<=/u> C1 iff C1+1 is power of two -> X & (~C1 l>> C2) ==/!= 0
if ((C + 1).isPowerOf2() &&
(Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)) {
Value *And = Builder.CreateAnd(X, (~C).lshr(ShiftAmt->getZExtValue()));
return new ICmpInst(Pred == ICmpInst::ICMP_ULE ? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_NE,
And, Constant::getNullValue(ShType));
}
// (X l<< C2) u</u>= C1 iff C1 is power of two -> X & (-C1 l>> C2) ==/!= 0
if (C.isPowerOf2() &&
(Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) {
Value *And =
Builder.CreateAnd(X, (~(C - 1)).lshr(ShiftAmt->getZExtValue()));
return new ICmpInst(Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_NE,
And, Constant::getNullValue(ShType));
}
}
// Transform (icmp pred iM (shl iM %v, N), C)
// -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
// Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
// This enables us to get rid of the shift in favor of a trunc that may be
// free on the target. It has the additional benefit of comparing to a
// smaller constant that may be more target-friendly.
unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
if (Shl->hasOneUse() && Amt != 0 && C.countTrailingZeros() >= Amt &&
DL.isLegalInteger(TypeBits - Amt)) {
Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
if (auto *ShVTy = dyn_cast<VectorType>(ShType))
TruncTy = VectorType::get(TruncTy, ShVTy->getElementCount());
Constant *NewC =
ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt));
return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
}
return nullptr;
}
/// Fold icmp ({al}shr X, Y), C.
Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
BinaryOperator *Shr,
const APInt &C) {
// An exact shr only shifts out zero bits, so:
// icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
Value *X = Shr->getOperand(0);
CmpInst::Predicate Pred = Cmp.getPredicate();
if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && C.isZero())
return new ICmpInst(Pred, X, Cmp.getOperand(1));
const APInt *ShiftVal;
if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal)))
return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftVal);
const APInt *ShiftAmt;
if (!match(Shr->getOperand(1), m_APInt(ShiftAmt)))
return nullptr;
// Check that the shift amount is in range. If not, don't perform undefined
// shifts. When the shift is visited it will be simplified.
unsigned TypeBits = C.getBitWidth();
unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits);
if (ShAmtVal >= TypeBits || ShAmtVal == 0)
return nullptr;
bool IsAShr = Shr->getOpcode() == Instruction::AShr;
bool IsExact = Shr->isExact();
Type *ShrTy = Shr->getType();
// TODO: If we could guarantee that InstSimplify would handle all of the
// constant-value-based preconditions in the folds below, then we could assert
// those conditions rather than checking them. This is difficult because of
// undef/poison (PR34838).
if (IsAShr) {
if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) {
// icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC)
// icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC)
APInt ShiftedC = C.shl(ShAmtVal);
if (ShiftedC.ashr(ShAmtVal) == C)
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
}
if (Pred == CmpInst::ICMP_SGT) {
// icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
(ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
}
// If the compare constant has significant bits above the lowest sign-bit,
// then convert an unsigned cmp to a test of the sign-bit:
// (ashr X, ShiftC) u> C --> X s< 0
// (ashr X, ShiftC) u< C --> X s> -1
if (C.getBitWidth() > 2 && C.getNumSignBits() <= ShAmtVal) {
if (Pred == CmpInst::ICMP_UGT) {
return new ICmpInst(CmpInst::ICMP_SLT, X,
ConstantInt::getNullValue(ShrTy));
}
if (Pred == CmpInst::ICMP_ULT) {
return new ICmpInst(CmpInst::ICMP_SGT, X,
ConstantInt::getAllOnesValue(ShrTy));
}
}
} else {
if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
// icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
// icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
APInt ShiftedC = C.shl(ShAmtVal);
if (ShiftedC.lshr(ShAmtVal) == C)
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
}
if (Pred == CmpInst::ICMP_UGT) {
// icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
}
}
if (!Cmp.isEquality())
return nullptr;
// Handle equality comparisons of shift-by-constant.
// If the comparison constant changes with the shift, the comparison cannot
// succeed (bits of the comparison constant cannot match the shifted value).
// This should be known by InstSimplify and already be folded to true/false.
assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) ||
(!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) &&
"Expected icmp+shr simplify did not occur.");
// If the bits shifted out are known zero, compare the unshifted value:
// (X & 4) >> 1 == 2 --> (X & 4) == 4.
if (Shr->isExact())
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
if (C.isZero()) {
// == 0 is u< 1.
if (Pred == CmpInst::ICMP_EQ)
return new ICmpInst(CmpInst::ICMP_ULT, X,
ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal)));
else
return new ICmpInst(CmpInst::ICMP_UGT, X,
ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1));
}
if (Shr->hasOneUse()) {
// Canonicalize the shift into an 'and':
// icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
Constant *Mask = ConstantInt::get(ShrTy, Val);
Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal));
}
return nullptr;
}
Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
BinaryOperator *SRem,
const APInt &C) {
// Match an 'is positive' or 'is negative' comparison of remainder by a
// constant power-of-2 value:
// (X % pow2C) sgt/slt 0
const ICmpInst::Predicate Pred = Cmp.getPredicate();
if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT)
return nullptr;
// TODO: The one-use check is standard because we do not typically want to
// create longer instruction sequences, but this might be a special-case
// because srem is not good for analysis or codegen.
if (!SRem->hasOneUse())
return nullptr;
const APInt *DivisorC;
if (!C.isZero() || !match(SRem->getOperand(1), m_Power2(DivisorC)))
return nullptr;
// Mask off the sign bit and the modulo bits (low-bits).
Type *Ty = SRem->getType();
APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits());
Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1));
Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC);
// For 'is positive?' check that the sign-bit is clear and at least 1 masked
// bit is set. Example:
// (i8 X % 32) s> 0 --> (X & 159) s> 0
if (Pred == ICmpInst::ICMP_SGT)
return new ICmpInst(ICmpInst::ICMP_SGT, And, ConstantInt::getNullValue(Ty));
// For 'is negative?' check that the sign-bit is set and at least 1 masked
// bit is set. Example:
// (i16 X % 4) s< 0 --> (X & 32771) u> 32768
return new ICmpInst(ICmpInst::ICMP_UGT, And, ConstantInt::get(Ty, SignMask));
}
/// Fold icmp (udiv X, Y), C.
Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp,
BinaryOperator *UDiv,
const APInt &C) {
const APInt *C2;
if (!match(UDiv->getOperand(0), m_APInt(C2)))
return nullptr;
assert(*C2 != 0 && "udiv 0, X should have been simplified already.");
// (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
Value *Y = UDiv->getOperand(1);
if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) {
assert(!C.isMaxValue() &&
"icmp ugt X, UINT_MAX should have been simplified already.");
return new ICmpInst(ICmpInst::ICMP_ULE, Y,
ConstantInt::get(Y->getType(), C2->udiv(C + 1)));
}
// (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) {
assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
return new ICmpInst(ICmpInst::ICMP_UGT, Y,
ConstantInt::get(Y->getType(), C2->udiv(C)));
}
return nullptr;
}
/// Fold icmp ({su}div X, Y), C.
Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
BinaryOperator *Div,
const APInt &C) {
// Fold: icmp pred ([us]div X, C2), C -> range test
// Fold this div into the comparison, producing a range check.
// Determine, based on the divide type, what the range is being
// checked. If there is an overflow on the low or high side, remember
// it, otherwise compute the range [low, hi) bounding the new value.
// See: InsertRangeTest above for the kinds of replacements possible.
const APInt *C2;
if (!match(Div->getOperand(1), m_APInt(C2)))
return nullptr;
// FIXME: If the operand types don't match the type of the divide
// then don't attempt this transform. The code below doesn't have the
// logic to deal with a signed divide and an unsigned compare (and
// vice versa). This is because (x /s C2) <s C produces different
// results than (x /s C2) <u C or (x /u C2) <s C or even
// (x /u C2) <u C. Simply casting the operands and result won't
// work. :( The if statement below tests that condition and bails
// if it finds it.
bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned())
return nullptr;
// The ProdOV computation fails on divide by 0 and divide by -1. Cases with
// INT_MIN will also fail if the divisor is 1. Although folds of all these
// division-by-constant cases should be present, we can not assert that they
// have happened before we reach this icmp instruction.
if (C2->isZero() || C2->isOne() || (DivIsSigned && C2->isAllOnes()))
return nullptr;
// Compute Prod = C * C2. We are essentially solving an equation of
// form X / C2 = C. We solve for X by multiplying C2 and C.
// By solving for X, we can turn this into a range check instead of computing
// a divide.
APInt Prod = C * *C2;
// Determine if the product overflows by seeing if the product is not equal to
// the divide. Make sure we do the same kind of divide as in the LHS
// instruction that we're folding.
bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C;
ICmpInst::Predicate Pred = Cmp.getPredicate();
// If the division is known to be exact, then there is no remainder from the
// divide, so the covered range size is unit, otherwise it is the divisor.
APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2;
// Figure out the interval that is being checked. For example, a comparison
// like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
// Compute this interval based on the constants involved and the signedness of
// the compare/divide. This computes a half-open interval, keeping track of
// whether either value in the interval overflows. After analysis each
// overflow variable is set to 0 if it's corresponding bound variable is valid
// -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
int LoOverflow = 0, HiOverflow = 0;
APInt LoBound, HiBound;
if (!DivIsSigned) { // udiv
// e.g. X/5 op 3 --> [15, 20)
LoBound = Prod;
HiOverflow = LoOverflow = ProdOV;
if (!HiOverflow) {
// If this is not an exact divide, then many values in the range collapse
// to the same result value.
HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
}
} else if (C2->isStrictlyPositive()) { // Divisor is > 0.
if (C.isZero()) { // (X / pos) op 0
// Can't overflow. e.g. X/2 op 0 --> [-1, 2)
LoBound = -(RangeSize - 1);
HiBound = RangeSize;
} else if (C.isStrictlyPositive()) { // (X / pos) op pos
LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
HiOverflow = LoOverflow = ProdOV;
if (!HiOverflow)
HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
} else { // (X / pos) op neg
// e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
HiBound = Prod + 1;
LoOverflow = HiOverflow = ProdOV ? -1 : 0;
if (!LoOverflow) {
APInt DivNeg = -RangeSize;
LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
}
}
} else if (C2->isNegative()) { // Divisor is < 0.
if (Div->isExact())
RangeSize.negate();
if (C.isZero()) { // (X / neg) op 0
// e.g. X/-5 op 0 --> [-4, 5)
LoBound = RangeSize + 1;
HiBound = -RangeSize;
if (HiBound == *C2) { // -INTMIN = INTMIN
HiOverflow = 1; // [INTMIN+1, overflow)
HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN
}
} else if (C.isStrictlyPositive()) { // (X / neg) op pos
// e.g. X/-5 op 3 --> [-19, -14)
HiBound = Prod + 1;
HiOverflow = LoOverflow = ProdOV ? -1 : 0;
if (!LoOverflow)
LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
} else { // (X / neg) op neg
LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
LoOverflow = HiOverflow = ProdOV;
if (!HiOverflow)
HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true);
}
// Dividing by a negative swaps the condition. LT <-> GT
Pred = ICmpInst::getSwappedPredicate(Pred);
}