| //===- ValueTracking.cpp - Walk computations to compute properties --------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file contains routines that help analyze properties that chains of |
| // computations have. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Analysis/ValueTracking.h" |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/None.h" |
| #include "llvm/ADT/Optional.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/iterator_range.h" |
| #include "llvm/Analysis/AliasAnalysis.h" |
| #include "llvm/Analysis/AssumeBundleQueries.h" |
| #include "llvm/Analysis/AssumptionCache.h" |
| #include "llvm/Analysis/EHPersonalities.h" |
| #include "llvm/Analysis/GuardUtils.h" |
| #include "llvm/Analysis/InstructionSimplify.h" |
| #include "llvm/Analysis/Loads.h" |
| #include "llvm/Analysis/LoopInfo.h" |
| #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| #include "llvm/Analysis/TargetLibraryInfo.h" |
| #include "llvm/IR/Argument.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/Constant.h" |
| #include "llvm/IR/ConstantRange.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/DiagnosticInfo.h" |
| #include "llvm/IR/Dominators.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GetElementPtrTypeIterator.h" |
| #include "llvm/IR/GlobalAlias.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/GlobalVariable.h" |
| #include "llvm/IR/InstrTypes.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/IntrinsicsAArch64.h" |
| #include "llvm/IR/IntrinsicsRISCV.h" |
| #include "llvm/IR/IntrinsicsX86.h" |
| #include "llvm/IR/LLVMContext.h" |
| #include "llvm/IR/Metadata.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/Operator.h" |
| #include "llvm/IR/PatternMatch.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/User.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MathExtras.h" |
| #include <algorithm> |
| #include <array> |
| #include <cassert> |
| #include <cstdint> |
| #include <iterator> |
| #include <utility> |
| |
| using namespace llvm; |
| using namespace llvm::PatternMatch; |
| |
| // Controls the number of uses of the value searched for possible |
| // dominating comparisons. |
| static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", |
| cl::Hidden, cl::init(20)); |
| |
| // According to the LangRef, branching on a poison condition is absolutely |
| // immediate full UB. However, historically we haven't implemented that |
| // consistently as we have an important transformation (non-trivial unswitch) |
| // which introduces instances of branch on poison/undef to otherwise well |
| // defined programs. This flag exists to let us test optimization benefit |
| // of exploiting the specified behavior (in combination with enabling the |
| // unswitch fix.) |
| static cl::opt<bool> BranchOnPoisonAsUB("branch-on-poison-as-ub", |
| cl::Hidden, cl::init(false)); |
| |
| |
| /// Returns the bitwidth of the given scalar or pointer type. For vector types, |
| /// returns the element type's bitwidth. |
| static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { |
| if (unsigned BitWidth = Ty->getScalarSizeInBits()) |
| return BitWidth; |
| |
| return DL.getPointerTypeSizeInBits(Ty); |
| } |
| |
| namespace { |
| |
| // Simplifying using an assume can only be done in a particular control-flow |
| // context (the context instruction provides that context). If an assume and |
| // the context instruction are not in the same block then the DT helps in |
| // figuring out if we can use it. |
| struct Query { |
| const DataLayout &DL; |
| AssumptionCache *AC; |
| const Instruction *CxtI; |
| const DominatorTree *DT; |
| |
| // Unlike the other analyses, this may be a nullptr because not all clients |
| // provide it currently. |
| OptimizationRemarkEmitter *ORE; |
| |
| /// If true, it is safe to use metadata during simplification. |
| InstrInfoQuery IIQ; |
| |
| Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, |
| const DominatorTree *DT, bool UseInstrInfo, |
| OptimizationRemarkEmitter *ORE = nullptr) |
| : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {} |
| }; |
| |
| } // end anonymous namespace |
| |
| // Given the provided Value and, potentially, a context instruction, return |
| // the preferred context instruction (if any). |
| static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { |
| // If we've been provided with a context instruction, then use that (provided |
| // it has been inserted). |
| if (CxtI && CxtI->getParent()) |
| return CxtI; |
| |
| // If the value is really an already-inserted instruction, then use that. |
| CxtI = dyn_cast<Instruction>(V); |
| if (CxtI && CxtI->getParent()) |
| return CxtI; |
| |
| return nullptr; |
| } |
| |
| static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) { |
| // If we've been provided with a context instruction, then use that (provided |
| // it has been inserted). |
| if (CxtI && CxtI->getParent()) |
| return CxtI; |
| |
| // If the value is really an already-inserted instruction, then use that. |
| CxtI = dyn_cast<Instruction>(V1); |
| if (CxtI && CxtI->getParent()) |
| return CxtI; |
| |
| CxtI = dyn_cast<Instruction>(V2); |
| if (CxtI && CxtI->getParent()) |
| return CxtI; |
| |
| return nullptr; |
| } |
| |
| static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf, |
| const APInt &DemandedElts, |
| APInt &DemandedLHS, APInt &DemandedRHS) { |
| // The length of scalable vectors is unknown at compile time, thus we |
| // cannot check their values |
| if (isa<ScalableVectorType>(Shuf->getType())) |
| return false; |
| |
| int NumElts = |
| cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements(); |
| int NumMaskElts = cast<FixedVectorType>(Shuf->getType())->getNumElements(); |
| DemandedLHS = DemandedRHS = APInt::getZero(NumElts); |
| if (DemandedElts.isZero()) |
| return true; |
| // Simple case of a shuffle with zeroinitializer. |
| if (all_of(Shuf->getShuffleMask(), [](int Elt) { return Elt == 0; })) { |
| DemandedLHS.setBit(0); |
| return true; |
| } |
| for (int i = 0; i != NumMaskElts; ++i) { |
| if (!DemandedElts[i]) |
| continue; |
| int M = Shuf->getMaskValue(i); |
| assert(M < (NumElts * 2) && "Invalid shuffle mask constant"); |
| |
| // For undef elements, we don't know anything about the common state of |
| // the shuffle result. |
| if (M == -1) |
| return false; |
| if (M < NumElts) |
| DemandedLHS.setBit(M % NumElts); |
| else |
| DemandedRHS.setBit(M % NumElts); |
| } |
| |
| return true; |
| } |
| |
| static void computeKnownBits(const Value *V, const APInt &DemandedElts, |
| KnownBits &Known, unsigned Depth, const Query &Q); |
| |
| static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, |
| const Query &Q) { |
| // FIXME: We currently have no way to represent the DemandedElts of a scalable |
| // vector |
| if (isa<ScalableVectorType>(V->getType())) { |
| Known.resetAll(); |
| return; |
| } |
| |
| auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); |
| APInt DemandedElts = |
| FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); |
| computeKnownBits(V, DemandedElts, Known, Depth, Q); |
| } |
| |
| void llvm::computeKnownBits(const Value *V, KnownBits &Known, |
| const DataLayout &DL, unsigned Depth, |
| AssumptionCache *AC, const Instruction *CxtI, |
| const DominatorTree *DT, |
| OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { |
| ::computeKnownBits(V, Known, Depth, |
| Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); |
| } |
| |
| void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, |
| KnownBits &Known, const DataLayout &DL, |
| unsigned Depth, AssumptionCache *AC, |
| const Instruction *CxtI, const DominatorTree *DT, |
| OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { |
| ::computeKnownBits(V, DemandedElts, Known, Depth, |
| Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); |
| } |
| |
| static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, |
| unsigned Depth, const Query &Q); |
| |
| static KnownBits computeKnownBits(const Value *V, unsigned Depth, |
| const Query &Q); |
| |
| KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, |
| unsigned Depth, AssumptionCache *AC, |
| const Instruction *CxtI, |
| const DominatorTree *DT, |
| OptimizationRemarkEmitter *ORE, |
| bool UseInstrInfo) { |
| return ::computeKnownBits( |
| V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); |
| } |
| |
| KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, |
| const DataLayout &DL, unsigned Depth, |
| AssumptionCache *AC, const Instruction *CxtI, |
| const DominatorTree *DT, |
| OptimizationRemarkEmitter *ORE, |
| bool UseInstrInfo) { |
| return ::computeKnownBits( |
| V, DemandedElts, Depth, |
| Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); |
| } |
| |
| bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, |
| const DataLayout &DL, AssumptionCache *AC, |
| const Instruction *CxtI, const DominatorTree *DT, |
| bool UseInstrInfo) { |
| assert(LHS->getType() == RHS->getType() && |
| "LHS and RHS should have the same type"); |
| assert(LHS->getType()->isIntOrIntVectorTy() && |
| "LHS and RHS should be integers"); |
| // Look for an inverted mask: (X & ~M) op (Y & M). |
| Value *M; |
| if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && |
| match(RHS, m_c_And(m_Specific(M), m_Value()))) |
| return true; |
| if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) && |
| match(LHS, m_c_And(m_Specific(M), m_Value()))) |
| return true; |
| IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType()); |
| KnownBits LHSKnown(IT->getBitWidth()); |
| KnownBits RHSKnown(IT->getBitWidth()); |
| computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo); |
| computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo); |
| return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown); |
| } |
| |
| bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { |
| return !I->user_empty() && all_of(I->users(), [](const User *U) { |
| ICmpInst::Predicate P; |
| return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P); |
| }); |
| } |
| |
| static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, |
| const Query &Q); |
| |
| bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, |
| bool OrZero, unsigned Depth, |
| AssumptionCache *AC, const Instruction *CxtI, |
| const DominatorTree *DT, bool UseInstrInfo) { |
| return ::isKnownToBeAPowerOfTwo( |
| V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); |
| } |
| |
| static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, |
| unsigned Depth, const Query &Q); |
| |
| static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q); |
| |
| bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, |
| AssumptionCache *AC, const Instruction *CxtI, |
| const DominatorTree *DT, bool UseInstrInfo) { |
| return ::isKnownNonZero(V, Depth, |
| Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); |
| } |
| |
| bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, |
| unsigned Depth, AssumptionCache *AC, |
| const Instruction *CxtI, const DominatorTree *DT, |
| bool UseInstrInfo) { |
| KnownBits Known = |
| computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo); |
| return Known.isNonNegative(); |
| } |
| |
| bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, |
| AssumptionCache *AC, const Instruction *CxtI, |
| const DominatorTree *DT, bool UseInstrInfo) { |
| if (auto *CI = dyn_cast<ConstantInt>(V)) |
| return CI->getValue().isStrictlyPositive(); |
| |
| // TODO: We'd doing two recursive queries here. We should factor this such |
| // that only a single query is needed. |
| return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) && |
| isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo); |
| } |
| |
| bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, |
| AssumptionCache *AC, const Instruction *CxtI, |
| const DominatorTree *DT, bool UseInstrInfo) { |
| KnownBits Known = |
| computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo); |
| return Known.isNegative(); |
| } |
| |
| static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, |
| const Query &Q); |
| |
| bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, |
| const DataLayout &DL, AssumptionCache *AC, |
| const Instruction *CxtI, const DominatorTree *DT, |
| bool UseInstrInfo) { |
| return ::isKnownNonEqual(V1, V2, 0, |
| Query(DL, AC, safeCxtI(V2, V1, CxtI), DT, |
| UseInstrInfo, /*ORE=*/nullptr)); |
| } |
| |
| static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, |
| const Query &Q); |
| |
| bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, |
| const DataLayout &DL, unsigned Depth, |
| AssumptionCache *AC, const Instruction *CxtI, |
| const DominatorTree *DT, bool UseInstrInfo) { |
| return ::MaskedValueIsZero( |
| V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); |
| } |
| |
| static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, |
| unsigned Depth, const Query &Q); |
| |
| static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, |
| const Query &Q) { |
| // FIXME: We currently have no way to represent the DemandedElts of a scalable |
| // vector |
| if (isa<ScalableVectorType>(V->getType())) |
| return 1; |
| |
| auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); |
| APInt DemandedElts = |
| FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); |
| return ComputeNumSignBits(V, DemandedElts, Depth, Q); |
| } |
| |
| unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, |
| unsigned Depth, AssumptionCache *AC, |
| const Instruction *CxtI, |
| const DominatorTree *DT, bool UseInstrInfo) { |
| return ::ComputeNumSignBits( |
| V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); |
| } |
| |
| unsigned llvm::ComputeMinSignedBits(const Value *V, const DataLayout &DL, |
| unsigned Depth, AssumptionCache *AC, |
| const Instruction *CxtI, |
| const DominatorTree *DT) { |
| unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT); |
| return V->getType()->getScalarSizeInBits() - SignBits + 1; |
| } |
| |
| static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, |
| bool NSW, const APInt &DemandedElts, |
| KnownBits &KnownOut, KnownBits &Known2, |
| unsigned Depth, const Query &Q) { |
| computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q); |
| |
| // If one operand is unknown and we have no nowrap information, |
| // the result will be unknown independently of the second operand. |
| if (KnownOut.isUnknown() && !NSW) |
| return; |
| |
| computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); |
| KnownOut = KnownBits::computeForAddSub(Add, NSW, Known2, KnownOut); |
| } |
| |
| static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, |
| const APInt &DemandedElts, KnownBits &Known, |
| KnownBits &Known2, unsigned Depth, |
| const Query &Q) { |
| computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q); |
| computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); |
| |
| bool isKnownNegative = false; |
| bool isKnownNonNegative = false; |
| // If the multiplication is known not to overflow, compute the sign bit. |
| if (NSW) { |
| if (Op0 == Op1) { |
| // The product of a number with itself is non-negative. |
| isKnownNonNegative = true; |
| } else { |
| bool isKnownNonNegativeOp1 = Known.isNonNegative(); |
| bool isKnownNonNegativeOp0 = Known2.isNonNegative(); |
| bool isKnownNegativeOp1 = Known.isNegative(); |
| bool isKnownNegativeOp0 = Known2.isNegative(); |
| // The product of two numbers with the same sign is non-negative. |
| isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || |
| (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); |
| // The product of a negative number and a non-negative number is either |
| // negative or zero. |
| if (!isKnownNonNegative) |
| isKnownNegative = |
| (isKnownNegativeOp1 && isKnownNonNegativeOp0 && |
| Known2.isNonZero()) || |
| (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero()); |
| } |
| } |
| |
| Known = KnownBits::mul(Known, Known2); |
| |
| // Only make use of no-wrap flags if we failed to compute the sign bit |
| // directly. This matters if the multiplication always overflows, in |
| // which case we prefer to follow the result of the direct computation, |
| // though as the program is invoking undefined behaviour we can choose |
| // whatever we like here. |
| if (isKnownNonNegative && !Known.isNegative()) |
| Known.makeNonNegative(); |
| else if (isKnownNegative && !Known.isNonNegative()) |
| Known.makeNegative(); |
| } |
| |
| void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, |
| KnownBits &Known) { |
| unsigned BitWidth = Known.getBitWidth(); |
| unsigned NumRanges = Ranges.getNumOperands() / 2; |
| assert(NumRanges >= 1); |
| |
| Known.Zero.setAllBits(); |
| Known.One.setAllBits(); |
| |
| for (unsigned i = 0; i < NumRanges; ++i) { |
| ConstantInt *Lower = |
| mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); |
| ConstantInt *Upper = |
| mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); |
| ConstantRange Range(Lower->getValue(), Upper->getValue()); |
| |
| // The first CommonPrefixBits of all values in Range are equal. |
| unsigned CommonPrefixBits = |
| (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros(); |
| APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); |
| APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth); |
| Known.One &= UnsignedMax & Mask; |
| Known.Zero &= ~UnsignedMax & Mask; |
| } |
| } |
| |
| static bool isEphemeralValueOf(const Instruction *I, const Value *E) { |
| SmallVector<const Value *, 16> WorkSet(1, I); |
| SmallPtrSet<const Value *, 32> Visited; |
| SmallPtrSet<const Value *, 16> EphValues; |
| |
| // The instruction defining an assumption's condition itself is always |
| // considered ephemeral to that assumption (even if it has other |
| // non-ephemeral users). See r246696's test case for an example. |
| if (is_contained(I->operands(), E)) |
| return true; |
| |
| while (!WorkSet.empty()) { |
| const Value *V = WorkSet.pop_back_val(); |
| if (!Visited.insert(V).second) |
| continue; |
| |
| // If all uses of this value are ephemeral, then so is this value. |
| if (llvm::all_of(V->users(), [&](const User *U) { |
| return EphValues.count(U); |
| })) { |
| if (V == E) |
| return true; |
| |
| if (V == I || (isa<Instruction>(V) && |
| !cast<Instruction>(V)->mayHaveSideEffects() && |
| !cast<Instruction>(V)->isTerminator())) { |
| EphValues.insert(V); |
| if (const User *U = dyn_cast<User>(V)) |
| append_range(WorkSet, U->operands()); |
| } |
| } |
| } |
| |
| return false; |
| } |
| |
| // Is this an intrinsic that cannot be speculated but also cannot trap? |
| bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { |
| if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I)) |
| return CI->isAssumeLikeIntrinsic(); |
| |
| return false; |
| } |
| |
| bool llvm::isValidAssumeForContext(const Instruction *Inv, |
| const Instruction *CxtI, |
| const DominatorTree *DT) { |
| // There are two restrictions on the use of an assume: |
| // 1. The assume must dominate the context (or the control flow must |
| // reach the assume whenever it reaches the context). |
| // 2. The context must not be in the assume's set of ephemeral values |
| // (otherwise we will use the assume to prove that the condition |
| // feeding the assume is trivially true, thus causing the removal of |
| // the assume). |
| |
| if (Inv->getParent() == CxtI->getParent()) { |
| // If Inv and CtxI are in the same block, check if the assume (Inv) is first |
| // in the BB. |
| if (Inv->comesBefore(CxtI)) |
| return true; |
| |
| // Don't let an assume affect itself - this would cause the problems |
| // `isEphemeralValueOf` is trying to prevent, and it would also make |
| // the loop below go out of bounds. |
| if (Inv == CxtI) |
| return false; |
| |
| // The context comes first, but they're both in the same block. |
| // Make sure there is nothing in between that might interrupt |
| // the control flow, not even CxtI itself. |
| // We limit the scan distance between the assume and its context instruction |
| // to avoid a compile-time explosion. This limit is chosen arbitrarily, so |
| // it can be adjusted if needed (could be turned into a cl::opt). |
| auto Range = make_range(CxtI->getIterator(), Inv->getIterator()); |
| if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15)) |
| return false; |
| |
| return !isEphemeralValueOf(Inv, CxtI); |
| } |
| |
| // Inv and CxtI are in different blocks. |
| if (DT) { |
| if (DT->dominates(Inv, CxtI)) |
| return true; |
| } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { |
| // We don't have a DT, but this trivially dominates. |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) { |
| // v u> y implies v != 0. |
| if (Pred == ICmpInst::ICMP_UGT) |
| return true; |
| |
| // Special-case v != 0 to also handle v != null. |
| if (Pred == ICmpInst::ICMP_NE) |
| return match(RHS, m_Zero()); |
| |
| // All other predicates - rely on generic ConstantRange handling. |
| const APInt *C; |
| if (!match(RHS, m_APInt(C))) |
| return false; |
| |
| ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); |
| return !TrueValues.contains(APInt::getZero(C->getBitWidth())); |
| } |
| |
| static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) { |
| // Use of assumptions is context-sensitive. If we don't have a context, we |
| // cannot use them! |
| if (!Q.AC || !Q.CxtI) |
| return false; |
| |
| if (Q.CxtI && V->getType()->isPointerTy()) { |
| SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NonNull}; |
| if (!NullPointerIsDefined(Q.CxtI->getFunction(), |
| V->getType()->getPointerAddressSpace())) |
| AttrKinds.push_back(Attribute::Dereferenceable); |
| |
| if (getKnowledgeValidInContext(V, AttrKinds, Q.CxtI, Q.DT, Q.AC)) |
| return true; |
| } |
| |
| for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { |
| if (!AssumeVH) |
| continue; |
| CallInst *I = cast<CallInst>(AssumeVH); |
| assert(I->getFunction() == Q.CxtI->getFunction() && |
| "Got assumption for the wrong function!"); |
| |
| // Warning: This loop can end up being somewhat performance sensitive. |
| // We're running this loop for once for each value queried resulting in a |
| // runtime of ~O(#assumes * #values). |
| |
| assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && |
| "must be an assume intrinsic"); |
| |
| Value *RHS; |
| CmpInst::Predicate Pred; |
| auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); |
| if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS)))) |
| return false; |
| |
| if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, |
| unsigned Depth, const Query &Q) { |
| // Use of assumptions is context-sensitive. If we don't have a context, we |
| // cannot use them! |
| if (!Q.AC || !Q.CxtI) |
| return; |
| |
| unsigned BitWidth = Known.getBitWidth(); |
| |
| // Refine Known set if the pointer alignment is set by assume bundles. |
| if (V->getType()->isPointerTy()) { |
| if (RetainedKnowledge RK = getKnowledgeValidInContext( |
| V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) { |
| Known.Zero.setLowBits(Log2_64(RK.ArgValue)); |
| } |
| } |
| |
| // Note that the patterns below need to be kept in sync with the code |
| // in AssumptionCache::updateAffectedValues. |
| |
| for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { |
| if (!AssumeVH) |
| continue; |
| CallInst *I = cast<CallInst>(AssumeVH); |
| assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && |
| "Got assumption for the wrong function!"); |
| |
| // Warning: This loop can end up being somewhat performance sensitive. |
| // We're running this loop for once for each value queried resulting in a |
| // runtime of ~O(#assumes * #values). |
| |
| assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && |
| "must be an assume intrinsic"); |
| |
| Value *Arg = I->getArgOperand(0); |
| |
| if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| assert(BitWidth == 1 && "assume operand is not i1?"); |
| Known.setAllOnes(); |
| return; |
| } |
| if (match(Arg, m_Not(m_Specific(V))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| assert(BitWidth == 1 && "assume operand is not i1?"); |
| Known.setAllZero(); |
| return; |
| } |
| |
| // The remaining tests are all recursive, so bail out if we hit the limit. |
| if (Depth == MaxAnalysisRecursionDepth) |
| continue; |
| |
| ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); |
| if (!Cmp) |
| continue; |
| |
| // We are attempting to compute known bits for the operands of an assume. |
| // Do not try to use other assumptions for those recursive calls because |
| // that can lead to mutual recursion and a compile-time explosion. |
| // An example of the mutual recursion: computeKnownBits can call |
| // isKnownNonZero which calls computeKnownBitsFromAssume (this function) |
| // and so on. |
| Query QueryNoAC = Q; |
| QueryNoAC.AC = nullptr; |
| |
| // Note that ptrtoint may change the bitwidth. |
| Value *A, *B; |
| auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); |
| |
| CmpInst::Predicate Pred; |
| uint64_t C; |
| switch (Cmp->getPredicate()) { |
| default: |
| break; |
| case ICmpInst::ICMP_EQ: |
| // assume(v = a) |
| if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| Known.Zero |= RHSKnown.Zero; |
| Known.One |= RHSKnown.One; |
| // assume(v & b = a) |
| } else if (match(Cmp, |
| m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| KnownBits MaskKnown = |
| computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| // For those bits in the mask that are known to be one, we can propagate |
| // known bits from the RHS to V. |
| Known.Zero |= RHSKnown.Zero & MaskKnown.One; |
| Known.One |= RHSKnown.One & MaskKnown.One; |
| // assume(~(v & b) = a) |
| } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), |
| m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| KnownBits MaskKnown = |
| computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| // For those bits in the mask that are known to be one, we can propagate |
| // inverted known bits from the RHS to V. |
| Known.Zero |= RHSKnown.One & MaskKnown.One; |
| Known.One |= RHSKnown.Zero & MaskKnown.One; |
| // assume(v | b = a) |
| } else if (match(Cmp, |
| m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| KnownBits BKnown = |
| computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| // For those bits in B that are known to be zero, we can propagate known |
| // bits from the RHS to V. |
| Known.Zero |= RHSKnown.Zero & BKnown.Zero; |
| Known.One |= RHSKnown.One & BKnown.Zero; |
| // assume(~(v | b) = a) |
| } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), |
| m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| KnownBits BKnown = |
| computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| // For those bits in B that are known to be zero, we can propagate |
| // inverted known bits from the RHS to V. |
| Known.Zero |= RHSKnown.One & BKnown.Zero; |
| Known.One |= RHSKnown.Zero & BKnown.Zero; |
| // assume(v ^ b = a) |
| } else if (match(Cmp, |
| m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| KnownBits BKnown = |
| computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| // For those bits in B that are known to be zero, we can propagate known |
| // bits from the RHS to V. For those bits in B that are known to be one, |
| // we can propagate inverted known bits from the RHS to V. |
| Known.Zero |= RHSKnown.Zero & BKnown.Zero; |
| Known.One |= RHSKnown.One & BKnown.Zero; |
| Known.Zero |= RHSKnown.One & BKnown.One; |
| Known.One |= RHSKnown.Zero & BKnown.One; |
| // assume(~(v ^ b) = a) |
| } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), |
| m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| KnownBits BKnown = |
| computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| // For those bits in B that are known to be zero, we can propagate |
| // inverted known bits from the RHS to V. For those bits in B that are |
| // known to be one, we can propagate known bits from the RHS to V. |
| Known.Zero |= RHSKnown.One & BKnown.Zero; |
| Known.One |= RHSKnown.Zero & BKnown.Zero; |
| Known.Zero |= RHSKnown.Zero & BKnown.One; |
| Known.One |= RHSKnown.One & BKnown.One; |
| // assume(v << c = a) |
| } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), |
| m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| // For those bits in RHS that are known, we can propagate them to known |
| // bits in V shifted to the right by C. |
| RHSKnown.Zero.lshrInPlace(C); |
| Known.Zero |= RHSKnown.Zero; |
| RHSKnown.One.lshrInPlace(C); |
| Known.One |= RHSKnown.One; |
| // assume(~(v << c) = a) |
| } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), |
| m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| // For those bits in RHS that are known, we can propagate them inverted |
| // to known bits in V shifted to the right by C. |
| RHSKnown.One.lshrInPlace(C); |
| Known.Zero |= RHSKnown.One; |
| RHSKnown.Zero.lshrInPlace(C); |
| Known.One |= RHSKnown.Zero; |
| // assume(v >> c = a) |
| } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), |
| m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| // For those bits in RHS that are known, we can propagate them to known |
| // bits in V shifted to the right by C. |
| Known.Zero |= RHSKnown.Zero << C; |
| Known.One |= RHSKnown.One << C; |
| // assume(~(v >> c) = a) |
| } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), |
| m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| // For those bits in RHS that are known, we can propagate them inverted |
| // to known bits in V shifted to the right by C. |
| Known.Zero |= RHSKnown.One << C; |
| Known.One |= RHSKnown.Zero << C; |
| } |
| break; |
| case ICmpInst::ICMP_SGE: |
| // assume(v >=_s c) where c is non-negative |
| if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| if (RHSKnown.isNonNegative()) { |
| // We know that the sign bit is zero. |
| Known.makeNonNegative(); |
| } |
| } |
| break; |
| case ICmpInst::ICMP_SGT: |
| // assume(v >_s c) where c is at least -1. |
| if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) { |
| // We know that the sign bit is zero. |
| Known.makeNonNegative(); |
| } |
| } |
| break; |
| case ICmpInst::ICMP_SLE: |
| // assume(v <=_s c) where c is negative |
| if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| if (RHSKnown.isNegative()) { |
| // We know that the sign bit is one. |
| Known.makeNegative(); |
| } |
| } |
| break; |
| case ICmpInst::ICMP_SLT: |
| // assume(v <_s c) where c is non-positive |
| if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| if (RHSKnown.isZero() || RHSKnown.isNegative()) { |
| // We know that the sign bit is one. |
| Known.makeNegative(); |
| } |
| } |
| break; |
| case ICmpInst::ICMP_ULE: |
| // assume(v <=_u c) |
| if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| // Whatever high bits in c are zero are known to be zero. |
| Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); |
| } |
| break; |
| case ICmpInst::ICMP_ULT: |
| // assume(v <_u c) |
| if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && |
| isValidAssumeForContext(I, Q.CxtI, Q.DT)) { |
| KnownBits RHSKnown = |
| computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth); |
| |
| // If the RHS is known zero, then this assumption must be wrong (nothing |
| // is unsigned less than zero). Signal a conflict and get out of here. |
| if (RHSKnown.isZero()) { |
| Known.Zero.setAllBits(); |
| Known.One.setAllBits(); |
| break; |
| } |
| |
| // Whatever high bits in c are zero are known to be zero (if c is a power |
| // of 2, then one more). |
| if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, QueryNoAC)) |
| Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1); |
| else |
| Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); |
| } |
| break; |
| } |
| } |
| |
| // If assumptions conflict with each other or previous known bits, then we |
| // have a logical fallacy. It's possible that the assumption is not reachable, |
| // so this isn't a real bug. On the other hand, the program may have undefined |
| // behavior, or we might have a bug in the compiler. We can't assert/crash, so |
| // clear out the known bits, try to warn the user, and hope for the best. |
| if (Known.Zero.intersects(Known.One)) { |
| Known.resetAll(); |
| |
| if (Q.ORE) |
| Q.ORE->emit([&]() { |
| auto *CxtI = const_cast<Instruction *>(Q.CxtI); |
| return OptimizationRemarkAnalysis("value-tracking", "BadAssumption", |
| CxtI) |
| << "Detected conflicting code assumptions. Program may " |
| "have undefined behavior, or compiler may have " |
| "internal error."; |
| }); |
| } |
| } |
| |
| /// Compute known bits from a shift operator, including those with a |
| /// non-constant shift amount. Known is the output of this function. Known2 is a |
| /// pre-allocated temporary with the same bit width as Known and on return |
| /// contains the known bit of the shift value source. KF is an |
| /// operator-specific function that, given the known-bits and a shift amount, |
| /// compute the implied known-bits of the shift operator's result respectively |
| /// for that shift amount. The results from calling KF are conservatively |
| /// combined for all permitted shift amounts. |
| static void computeKnownBitsFromShiftOperator( |
| const Operator *I, const APInt &DemandedElts, KnownBits &Known, |
| KnownBits &Known2, unsigned Depth, const Query &Q, |
| function_ref<KnownBits(const KnownBits &, const KnownBits &)> KF) { |
| unsigned BitWidth = Known.getBitWidth(); |
| computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); |
| |
| // Note: We cannot use Known.Zero.getLimitedValue() here, because if |
| // BitWidth > 64 and any upper bits are known, we'll end up returning the |
| // limit value (which implies all bits are known). |
| uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue(); |
| uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue(); |
| bool ShiftAmtIsConstant = Known.isConstant(); |
| bool MaxShiftAmtIsOutOfRange = Known.getMaxValue().uge(BitWidth); |
| |
| if (ShiftAmtIsConstant) { |
| Known = KF(Known2, Known); |
| |
| // If the known bits conflict, this must be an overflowing left shift, so |
| // the shift result is poison. We can return anything we want. Choose 0 for |
| // the best folding opportunity. |
| if (Known.hasConflict()) |
| Known.setAllZero(); |
| |
| return; |
| } |
| |
| // If the shift amount could be greater than or equal to the bit-width of the |
| // LHS, the value could be poison, but bail out because the check below is |
| // expensive. |
| // TODO: Should we just carry on? |
| if (MaxShiftAmtIsOutOfRange) { |
| Known.resetAll(); |
| return; |
| } |
| |
| // It would be more-clearly correct to use the two temporaries for this |
| // calculation. Reusing the APInts here to prevent unnecessary allocations. |
| Known.resetAll(); |
| |
| // If we know the shifter operand is nonzero, we can sometimes infer more |
| // known bits. However this is expensive to compute, so be lazy about it and |
| // only compute it when absolutely necessary. |
| Optional<bool> ShifterOperandIsNonZero; |
| |
| // Early exit if we can't constrain any well-defined shift amount. |
| if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) && |
| !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) { |
| ShifterOperandIsNonZero = |
| isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q); |
| if (!*ShifterOperandIsNonZero) |
| return; |
| } |
| |
| Known.Zero.setAllBits(); |
| Known.One.setAllBits(); |
| for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { |
| // Combine the shifted known input bits only for those shift amounts |
| // compatible with its known constraints. |
| if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt) |
| continue; |
| if ((ShiftAmt | ShiftAmtKO) != ShiftAmt) |
| continue; |
| // If we know the shifter is nonzero, we may be able to infer more known |
| // bits. This check is sunk down as far as possible to avoid the expensive |
| // call to isKnownNonZero if the cheaper checks above fail. |
| if (ShiftAmt == 0) { |
| if (!ShifterOperandIsNonZero.hasValue()) |
| ShifterOperandIsNonZero = |
| isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q); |
| if (*ShifterOperandIsNonZero) |
| continue; |
| } |
| |
| Known = KnownBits::commonBits( |
| Known, KF(Known2, KnownBits::makeConstant(APInt(32, ShiftAmt)))); |
| } |
| |
| // If the known bits conflict, the result is poison. Return a 0 and hope the |
| // caller can further optimize that. |
| if (Known.hasConflict()) |
| Known.setAllZero(); |
| } |
| |
| static void computeKnownBitsFromOperator(const Operator *I, |
| const APInt &DemandedElts, |
| KnownBits &Known, unsigned Depth, |
| const Query &Q) { |
| unsigned BitWidth = Known.getBitWidth(); |
| |
| KnownBits Known2(BitWidth); |
| switch (I->getOpcode()) { |
| default: break; |
| case Instruction::Load: |
| if (MDNode *MD = |
| Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range)) |
| computeKnownBitsFromRangeMetadata(*MD, Known); |
| break; |
| case Instruction::And: { |
| // If either the LHS or the RHS are Zero, the result is zero. |
| computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); |
| |
| Known &= Known2; |
| |
| // and(x, add (x, -1)) is a common idiom that always clears the low bit; |
| // here we handle the more general case of adding any odd number by |
| // matching the form add(x, add(x, y)) where y is odd. |
| // TODO: This could be generalized to clearing any bit set in y where the |
| // following bit is known to be unset in y. |
| Value *X = nullptr, *Y = nullptr; |
| if (!Known.Zero[0] && !Known.One[0] && |
| match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))) { |
| Known2.resetAll(); |
| computeKnownBits(Y, DemandedElts, Known2, Depth + 1, Q); |
| if (Known2.countMinTrailingOnes() > 0) |
| Known.Zero.setBit(0); |
| } |
| break; |
| } |
| case Instruction::Or: |
| computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); |
| |
| Known |= Known2; |
| break; |
| case Instruction::Xor: |
| computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); |
| |
| Known ^= Known2; |
| break; |
| case Instruction::Mul: { |
| bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); |
| computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, |
| Known, Known2, Depth, Q); |
| break; |
| } |
| case Instruction::UDiv: { |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); |
| Known = KnownBits::udiv(Known, Known2); |
| break; |
| } |
| case Instruction::Select: { |
| const Value *LHS = nullptr, *RHS = nullptr; |
| SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; |
| if (SelectPatternResult::isMinOrMax(SPF)) { |
| computeKnownBits(RHS, Known, Depth + 1, Q); |
| computeKnownBits(LHS, Known2, Depth + 1, Q); |
| switch (SPF) { |
| default: |
| llvm_unreachable("Unhandled select pattern flavor!"); |
| case SPF_SMAX: |
| Known = KnownBits::smax(Known, Known2); |
| break; |
| case SPF_SMIN: |
| Known = KnownBits::smin(Known, Known2); |
| break; |
| case SPF_UMAX: |
| Known = KnownBits::umax(Known, Known2); |
| break; |
| case SPF_UMIN: |
| Known = KnownBits::umin(Known, Known2); |
| break; |
| } |
| break; |
| } |
| |
| computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); |
| |
| // Only known if known in both the LHS and RHS. |
| Known = KnownBits::commonBits(Known, Known2); |
| |
| if (SPF == SPF_ABS) { |
| // RHS from matchSelectPattern returns the negation part of abs pattern. |
| // If the negate has an NSW flag we can assume the sign bit of the result |
| // will be 0 because that makes abs(INT_MIN) undefined. |
| if (match(RHS, m_Neg(m_Specific(LHS))) && |
| Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) |
| Known.Zero.setSignBit(); |
| } |
| |
| break; |
| } |
| case Instruction::FPTrunc: |
| case Instruction::FPExt: |
| case Instruction::FPToUI: |
| case Instruction::FPToSI: |
| case Instruction::SIToFP: |
| case Instruction::UIToFP: |
| break; // Can't work with floating point. |
| case Instruction::PtrToInt: |
| case Instruction::IntToPtr: |
| // Fall through and handle them the same as zext/trunc. |
| LLVM_FALLTHROUGH; |
| case Instruction::ZExt: |
| case Instruction::Trunc: { |
| Type *SrcTy = I->getOperand(0)->getType(); |
| |
| unsigned SrcBitWidth; |
| // Note that we handle pointer operands here because of inttoptr/ptrtoint |
| // which fall through here. |
| Type *ScalarTy = SrcTy->getScalarType(); |
| SrcBitWidth = ScalarTy->isPointerTy() ? |
| Q.DL.getPointerTypeSizeInBits(ScalarTy) : |
| Q.DL.getTypeSizeInBits(ScalarTy); |
| |
| assert(SrcBitWidth && "SrcBitWidth can't be zero"); |
| Known = Known.anyextOrTrunc(SrcBitWidth); |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| Known = Known.zextOrTrunc(BitWidth); |
| break; |
| } |
| case Instruction::BitCast: { |
| Type *SrcTy = I->getOperand(0)->getType(); |
| if (SrcTy->isIntOrPtrTy() && |
| // TODO: For now, not handling conversions like: |
| // (bitcast i64 %x to <2 x i32>) |
| !I->getType()->isVectorTy()) { |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| break; |
| } |
| |
| // Handle cast from vector integer type to scalar or vector integer. |
| auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy); |
| if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() || |
| !I->getType()->isIntOrIntVectorTy()) |
| break; |
| |
| // Look through a cast from narrow vector elements to wider type. |
| // Examples: v4i32 -> v2i64, v3i8 -> v24 |
| unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits(); |
| if (BitWidth % SubBitWidth == 0) { |
| // Known bits are automatically intersected across demanded elements of a |
| // vector. So for example, if a bit is computed as known zero, it must be |
| // zero across all demanded elements of the vector. |
| // |
| // For this bitcast, each demanded element of the output is sub-divided |
| // across a set of smaller vector elements in the source vector. To get |
| // the known bits for an entire element of the output, compute the known |
| // bits for each sub-element sequentially. This is done by shifting the |
| // one-set-bit demanded elements parameter across the sub-elements for |
| // consecutive calls to computeKnownBits. We are using the demanded |
| // elements parameter as a mask operator. |
| // |
| // The known bits of each sub-element are then inserted into place |
| // (dependent on endian) to form the full result of known bits. |
| unsigned NumElts = DemandedElts.getBitWidth(); |
| unsigned SubScale = BitWidth / SubBitWidth; |
| APInt SubDemandedElts = APInt::getZero(NumElts * SubScale); |
| for (unsigned i = 0; i != NumElts; ++i) { |
| if (DemandedElts[i]) |
| SubDemandedElts.setBit(i * SubScale); |
| } |
| |
| KnownBits KnownSrc(SubBitWidth); |
| for (unsigned i = 0; i != SubScale; ++i) { |
| computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, |
| Depth + 1, Q); |
| unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i; |
| Known.insertBits(KnownSrc, ShiftElt * SubBitWidth); |
| } |
| } |
| break; |
| } |
| case Instruction::SExt: { |
| // Compute the bits in the result that are not present in the input. |
| unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); |
| |
| Known = Known.trunc(SrcBitWidth); |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| // If the sign bit of the input is known set or clear, then we know the |
| // top bits of the result. |
| Known = Known.sext(BitWidth); |
| break; |
| } |
| case Instruction::Shl: { |
| bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); |
| auto KF = [NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt) { |
| KnownBits Result = KnownBits::shl(KnownVal, KnownAmt); |
| // If this shift has "nsw" keyword, then the result is either a poison |
| // value or has the same sign bit as the first operand. |
| if (NSW) { |
| if (KnownVal.Zero.isSignBitSet()) |
| Result.Zero.setSignBit(); |
| if (KnownVal.One.isSignBitSet()) |
| Result.One.setSignBit(); |
| } |
| return Result; |
| }; |
| computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, |
| KF); |
| // Trailing zeros of a right-shifted constant never decrease. |
| const APInt *C; |
| if (match(I->getOperand(0), m_APInt(C))) |
| Known.Zero.setLowBits(C->countTrailingZeros()); |
| break; |
| } |
| case Instruction::LShr: { |
| auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) { |
| return KnownBits::lshr(KnownVal, KnownAmt); |
| }; |
| computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, |
| KF); |
| // Leading zeros of a left-shifted constant never decrease. |
| const APInt *C; |
| if (match(I->getOperand(0), m_APInt(C))) |
| Known.Zero.setHighBits(C->countLeadingZeros()); |
| break; |
| } |
| case Instruction::AShr: { |
| auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) { |
| return KnownBits::ashr(KnownVal, KnownAmt); |
| }; |
| computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, |
| KF); |
| break; |
| } |
| case Instruction::Sub: { |
| bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); |
| computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, |
| DemandedElts, Known, Known2, Depth, Q); |
| break; |
| } |
| case Instruction::Add: { |
| bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); |
| computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, |
| DemandedElts, Known, Known2, Depth, Q); |
| break; |
| } |
| case Instruction::SRem: |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); |
| Known = KnownBits::srem(Known, Known2); |
| break; |
| |
| case Instruction::URem: |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); |
| Known = KnownBits::urem(Known, Known2); |
| break; |
| case Instruction::Alloca: |
| Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign())); |
| break; |
| case Instruction::GetElementPtr: { |
| // Analyze all of the subscripts of this getelementptr instruction |
| // to determine if we can prove known low zero bits. |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| // Accumulate the constant indices in a separate variable |
| // to minimize the number of calls to computeForAddSub. |
| APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); |
| |
| gep_type_iterator GTI = gep_type_begin(I); |
| for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { |
| // TrailZ can only become smaller, short-circuit if we hit zero. |
| if (Known.isUnknown()) |
| break; |
| |
| Value *Index = I->getOperand(i); |
| |
| // Handle case when index is zero. |
| Constant *CIndex = dyn_cast<Constant>(Index); |
| if (CIndex && CIndex->isZeroValue()) |
| continue; |
| |
| if (StructType *STy = GTI.getStructTypeOrNull()) { |
| // Handle struct member offset arithmetic. |
| |
| assert(CIndex && |
| "Access to structure field must be known at compile time"); |
| |
| if (CIndex->getType()->isVectorTy()) |
| Index = CIndex->getSplatValue(); |
| |
| unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); |
| const StructLayout *SL = Q.DL.getStructLayout(STy); |
| uint64_t Offset = SL->getElementOffset(Idx); |
| AccConstIndices += Offset; |
| continue; |
| } |
| |
| // Handle array index arithmetic. |
| Type *IndexedTy = GTI.getIndexedType(); |
| if (!IndexedTy->isSized()) { |
| Known.resetAll(); |
| break; |
| } |
| |
| unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); |
| KnownBits IndexBits(IndexBitWidth); |
| computeKnownBits(Index, IndexBits, Depth + 1, Q); |
| TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy); |
| uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize(); |
| KnownBits ScalingFactor(IndexBitWidth); |
| // Multiply by current sizeof type. |
| // &A[i] == A + i * sizeof(*A[i]). |
| if (IndexTypeSize.isScalable()) { |
| // For scalable types the only thing we know about sizeof is |
| // that this is a multiple of the minimum size. |
| ScalingFactor.Zero.setLowBits(countTrailingZeros(TypeSizeInBytes)); |
| } else if (IndexBits.isConstant()) { |
| APInt IndexConst = IndexBits.getConstant(); |
| APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes); |
| IndexConst *= ScalingFactor; |
| AccConstIndices += IndexConst.sextOrTrunc(BitWidth); |
| continue; |
| } else { |
| ScalingFactor = |
| KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes)); |
| } |
| IndexBits = KnownBits::mul(IndexBits, ScalingFactor); |
| |
| // If the offsets have a different width from the pointer, according |
| // to the language reference we need to sign-extend or truncate them |
| // to the width of the pointer. |
| IndexBits = IndexBits.sextOrTrunc(BitWidth); |
| |
| // Note that inbounds does *not* guarantee nsw for the addition, as only |
| // the offset is signed, while the base address is unsigned. |
| Known = KnownBits::computeForAddSub( |
| /*Add=*/true, /*NSW=*/false, Known, IndexBits); |
| } |
| if (!Known.isUnknown() && !AccConstIndices.isZero()) { |
| KnownBits Index = KnownBits::makeConstant(AccConstIndices); |
| Known = KnownBits::computeForAddSub( |
| /*Add=*/true, /*NSW=*/false, Known, Index); |
| } |
| break; |
| } |
| case Instruction::PHI: { |
| const PHINode *P = cast<PHINode>(I); |
| BinaryOperator *BO = nullptr; |
| Value *R = nullptr, *L = nullptr; |
| if (matchSimpleRecurrence(P, BO, R, L)) { |
| // Handle the case of a simple two-predecessor recurrence PHI. |
| // There's a lot more that could theoretically be done here, but |
| // this is sufficient to catch some interesting cases. |
| unsigned Opcode = BO->getOpcode(); |
| |
| // If this is a shift recurrence, we know the bits being shifted in. |
| // We can combine that with information about the start value of the |
| // recurrence to conclude facts about the result. |
| if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr || |
| Opcode == Instruction::Shl) && |
| BO->getOperand(0) == I) { |
| |
| // We have matched a recurrence of the form: |
| // %iv = [R, %entry], [%iv.next, %backedge] |
| // %iv.next = shift_op %iv, L |
| |
| // Recurse with the phi context to avoid concern about whether facts |
| // inferred hold at original context instruction. TODO: It may be |
| // correct to use the original context. IF warranted, explore and |
| // add sufficient tests to cover. |
| Query RecQ = Q; |
| RecQ.CxtI = P; |
| computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ); |
| switch (Opcode) { |
| case Instruction::Shl: |
| // A shl recurrence will only increase the tailing zeros |
| Known.Zero.setLowBits(Known2.countMinTrailingZeros()); |
| break; |
| case Instruction::LShr: |
| // A lshr recurrence will preserve the leading zeros of the |
| // start value |
| Known.Zero.setHighBits(Known2.countMinLeadingZeros()); |
| break; |
| case Instruction::AShr: |
| // An ashr recurrence will extend the initial sign bit |
| Known.Zero.setHighBits(Known2.countMinLeadingZeros()); |
| Known.One.setHighBits(Known2.countMinLeadingOnes()); |
| break; |
| }; |
| } |
| |
| // Check for operations that have the property that if |
| // both their operands have low zero bits, the result |
| // will have low zero bits. |
| if (Opcode == Instruction::Add || |
| Opcode == Instruction::Sub || |
| Opcode == Instruction::And || |
| Opcode == Instruction::Or || |
| Opcode == Instruction::Mul) { |
| // Change the context instruction to the "edge" that flows into the |
| // phi. This is important because that is where the value is actually |
| // "evaluated" even though it is used later somewhere else. (see also |
| // D69571). |
| Query RecQ = Q; |
| |
| unsigned OpNum = P->getOperand(0) == R ? 0 : 1; |
| Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator(); |
| Instruction *LInst = P->getIncomingBlock(1-OpNum)->getTerminator(); |
| |
| // Ok, we have a PHI of the form L op= R. Check for low |
| // zero bits. |
| RecQ.CxtI = RInst; |
| computeKnownBits(R, Known2, Depth + 1, RecQ); |
| |
| // We need to take the minimum number of known bits |
| KnownBits Known3(BitWidth); |
| RecQ.CxtI = LInst; |
| computeKnownBits(L, Known3, Depth + 1, RecQ); |
| |
| Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), |
| Known3.countMinTrailingZeros())); |
| |
| auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO); |
| if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) { |
| // If initial value of recurrence is nonnegative, and we are adding |
| // a nonnegative number with nsw, the result can only be nonnegative |
| // or poison value regardless of the number of times we execute the |
| // add in phi recurrence. If initial value is negative and we are |
| // adding a negative number with nsw, the result can only be |
| // negative or poison value. Similar arguments apply to sub and mul. |
| // |
| // (add non-negative, non-negative) --> non-negative |
| // (add negative, negative) --> negative |
| if (Opcode == Instruction::Add) { |
| if (Known2.isNonNegative() && Known3.isNonNegative()) |
| Known.makeNonNegative(); |
| else if (Known2.isNegative() && Known3.isNegative()) |
| Known.makeNegative(); |
| } |
| |
| // (sub nsw non-negative, negative) --> non-negative |
| // (sub nsw negative, non-negative) --> negative |
| else if (Opcode == Instruction::Sub && BO->getOperand(0) == I) { |
| if (Known2.isNonNegative() && Known3.isNegative()) |
| Known.makeNonNegative(); |
| else if (Known2.isNegative() && Known3.isNonNegative()) |
| Known.makeNegative(); |
| } |
| |
| // (mul nsw non-negative, non-negative) --> non-negative |
| else if (Opcode == Instruction::Mul && Known2.isNonNegative() && |
| Known3.isNonNegative()) |
| Known.makeNonNegative(); |
| } |
| |
| break; |
| } |
| } |
| |
| // Unreachable blocks may have zero-operand PHI nodes. |
| if (P->getNumIncomingValues() == 0) |
| break; |
| |
| // Otherwise take the unions of the known bit sets of the operands, |
| // taking conservative care to avoid excessive recursion. |
| if (Depth < MaxAnalysisRecursionDepth - 1 && !Known.Zero && !Known.One) { |
| // Skip if every incoming value references to ourself. |
| if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) |
| break; |
| |
| Known.Zero.setAllBits(); |
| Known.One.setAllBits(); |
| for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) { |
| Value *IncValue = P->getIncomingValue(u); |
| // Skip direct self references. |
| if (IncValue == P) continue; |
| |
| // Change the context instruction to the "edge" that flows into the |
| // phi. This is important because that is where the value is actually |
| // "evaluated" even though it is used later somewhere else. (see also |
| // D69571). |
| Query RecQ = Q; |
| RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); |
| |
| Known2 = KnownBits(BitWidth); |
| // Recurse, but cap the recursion to one level, because we don't |
| // want to waste time spinning around in loops. |
| computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ); |
| Known = KnownBits::commonBits(Known, Known2); |
| // If all bits have been ruled out, there's no need to check |
| // more operands. |
| if (Known.isUnknown()) |
| break; |
| } |
| } |
| break; |
| } |
| case Instruction::Call: |
| case Instruction::Invoke: |
| // If range metadata is attached to this call, set known bits from that, |
| // and then intersect with known bits based on other properties of the |
| // function. |
| if (MDNode *MD = |
| Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range)) |
| computeKnownBitsFromRangeMetadata(*MD, Known); |
| if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { |
| computeKnownBits(RV, Known2, Depth + 1, Q); |
| Known.Zero |= Known2.Zero; |
| Known.One |= Known2.One; |
| } |
| if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { |
| switch (II->getIntrinsicID()) { |
| default: break; |
| case Intrinsic::abs: { |
| computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); |
| bool IntMinIsPoison = match(II->getArgOperand(1), m_One()); |
| Known = Known2.abs(IntMinIsPoison); |
| break; |
| } |
| case Intrinsic::bitreverse: |
| computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); |
| Known.Zero |= Known2.Zero.reverseBits(); |
| Known.One |= Known2.One.reverseBits(); |
| break; |
| case Intrinsic::bswap: |
| computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); |
| Known.Zero |= Known2.Zero.byteSwap(); |
| Known.One |= Known2.One.byteSwap(); |
| break; |
| case Intrinsic::ctlz: { |
| computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); |
| // If we have a known 1, its position is our upper bound. |
| unsigned PossibleLZ = Known2.countMaxLeadingZeros(); |
| // If this call is undefined for 0, the result will be less than 2^n. |
| if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) |
| PossibleLZ = std::min(PossibleLZ, BitWidth - 1); |
| unsigned LowBits = Log2_32(PossibleLZ)+1; |
| Known.Zero.setBitsFrom(LowBits); |
| break; |
| } |
| case Intrinsic::cttz: { |
| computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); |
| // If we have a known 1, its position is our upper bound. |
| unsigned PossibleTZ = Known2.countMaxTrailingZeros(); |
| // If this call is undefined for 0, the result will be less than 2^n. |
| if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) |
| PossibleTZ = std::min(PossibleTZ, BitWidth - 1); |
| unsigned LowBits = Log2_32(PossibleTZ)+1; |
| Known.Zero.setBitsFrom(LowBits); |
| break; |
| } |
| case Intrinsic::ctpop: { |
| computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); |
| // We can bound the space the count needs. Also, bits known to be zero |
| // can't contribute to the population. |
| unsigned BitsPossiblySet = Known2.countMaxPopulation(); |
| unsigned LowBits = Log2_32(BitsPossiblySet)+1; |
| Known.Zero.setBitsFrom(LowBits); |
| // TODO: we could bound KnownOne using the lower bound on the number |
| // of bits which might be set provided by popcnt KnownOne2. |
| break; |
| } |
| case Intrinsic::fshr: |
| case Intrinsic::fshl: { |
| const APInt *SA; |
| if (!match(I->getOperand(2), m_APInt(SA))) |
| break; |
| |
| // Normalize to funnel shift left. |
| uint64_t ShiftAmt = SA->urem(BitWidth); |
| if (II->getIntrinsicID() == Intrinsic::fshr) |
| ShiftAmt = BitWidth - ShiftAmt; |
| |
| KnownBits Known3(BitWidth); |
| computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q); |
| |
| Known.Zero = |
| Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt); |
| Known.One = |
| Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt); |
| break; |
| } |
| case Intrinsic::uadd_sat: |
| case Intrinsic::usub_sat: { |
| bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat; |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); |
| |
| // Add: Leading ones of either operand are preserved. |
| // Sub: Leading zeros of LHS and leading ones of RHS are preserved |
| // as leading zeros in the result. |
| unsigned LeadingKnown; |
| if (IsAdd) |
| LeadingKnown = std::max(Known.countMinLeadingOnes(), |
| Known2.countMinLeadingOnes()); |
| else |
| LeadingKnown = std::max(Known.countMinLeadingZeros(), |
| Known2.countMinLeadingOnes()); |
| |
| Known = KnownBits::computeForAddSub( |
| IsAdd, /* NSW */ false, Known, Known2); |
| |
| // We select between the operation result and all-ones/zero |
| // respectively, so we can preserve known ones/zeros. |
| if (IsAdd) { |
| Known.One.setHighBits(LeadingKnown); |
| Known.Zero.clearAllBits(); |
| } else { |
| Known.Zero.setHighBits(LeadingKnown); |
| Known.One.clearAllBits(); |
| } |
| break; |
| } |
| case Intrinsic::umin: |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); |
| Known = KnownBits::umin(Known, Known2); |
| break; |
| case Intrinsic::umax: |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); |
| Known = KnownBits::umax(Known, Known2); |
| break; |
| case Intrinsic::smin: |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); |
| Known = KnownBits::smin(Known, Known2); |
| break; |
| case Intrinsic::smax: |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); |
| Known = KnownBits::smax(Known, Known2); |
| break; |
| case Intrinsic::x86_sse42_crc32_64_64: |
| Known.Zero.setBitsFrom(32); |
| break; |
| case Intrinsic::riscv_vsetvli: |
| case Intrinsic::riscv_vsetvlimax: |
| // Assume that VL output is positive and would fit in an int32_t. |
| // TODO: VLEN might be capped at 16 bits in a future V spec update. |
| if (BitWidth >= 32) |
| Known.Zero.setBitsFrom(31); |
| break; |
| case Intrinsic::vscale: { |
| if (!II->getParent() || !II->getFunction() || |
| !II->getFunction()->hasFnAttribute(Attribute::VScaleRange)) |
| break; |
| |
| auto VScaleRange = II->getFunction() |
| ->getFnAttribute(Attribute::VScaleRange) |
| .getVScaleRangeArgs(); |
| |
| if (VScaleRange.second == 0) |
| break; |
| |
| // If vscale min = max then we know the exact value at compile time |
| // and hence we know the exact bits. |
| if (VScaleRange.first == VScaleRange.second) { |
| Known.One = VScaleRange.first; |
| Known.Zero = VScaleRange.first; |
| Known.Zero.flipAllBits(); |
| break; |
| } |
| |
| unsigned FirstZeroHighBit = 32 - countLeadingZeros(VScaleRange.second); |
| if (FirstZeroHighBit < BitWidth) |
| Known.Zero.setBitsFrom(FirstZeroHighBit); |
| |
| break; |
| } |
| } |
| } |
| break; |
| case Instruction::ShuffleVector: { |
| auto *Shuf = dyn_cast<ShuffleVectorInst>(I); |
| // FIXME: Do we need to handle ConstantExpr involving shufflevectors? |
| if (!Shuf) { |
| Known.resetAll(); |
| return; |
| } |
| // For undef elements, we don't know anything about the common state of |
| // the shuffle result. |
| APInt DemandedLHS, DemandedRHS; |
| if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) { |
| Known.resetAll(); |
| return; |
| } |
| Known.One.setAllBits(); |
| Known.Zero.setAllBits(); |
| if (!!DemandedLHS) { |
| const Value *LHS = Shuf->getOperand(0); |
| computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q); |
| // If we don't know any bits, early out. |
| if (Known.isUnknown()) |
| break; |
| } |
| if (!!DemandedRHS) { |
| const Value *RHS = Shuf->getOperand(1); |
| computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q); |
| Known = KnownBits::commonBits(Known, Known2); |
| } |
| break; |
| } |
| case Instruction::InsertElement: { |
| const Value *Vec = I->getOperand(0); |
| const Value *Elt = I->getOperand(1); |
| auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2)); |
| // Early out if the index is non-constant or out-of-range. |
| unsigned NumElts = DemandedElts.getBitWidth(); |
| if (!CIdx || CIdx->getValue().uge(NumElts)) { |
| Known.resetAll(); |
| return; |
| } |
| Known.One.setAllBits(); |
| Known.Zero.setAllBits(); |
| unsigned EltIdx = CIdx->getZExtValue(); |
| // Do we demand the inserted element? |
| if (DemandedElts[EltIdx]) { |
| computeKnownBits(Elt, Known, Depth + 1, Q); |
| // If we don't know any bits, early out. |
| if (Known.isUnknown()) |
| break; |
| } |
| // We don't need the base vector element that has been inserted. |
| APInt DemandedVecElts = DemandedElts; |
| DemandedVecElts.clearBit(EltIdx); |
| if (!!DemandedVecElts) { |
| computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q); |
| Known = KnownBits::commonBits(Known, Known2); |
| } |
| break; |
| } |
| case Instruction::ExtractElement: { |
| // Look through extract element. If the index is non-constant or |
| // out-of-range demand all elements, otherwise just the extracted element. |
| const Value *Vec = I->getOperand(0); |
| const Value *Idx = I->getOperand(1); |
| auto *CIdx = dyn_cast<ConstantInt>(Idx); |
| if (isa<ScalableVectorType>(Vec->getType())) { |
| // FIXME: there's probably *something* we can do with scalable vectors |
| Known.resetAll(); |
| break; |
| } |
| unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); |
| APInt DemandedVecElts = APInt::getAllOnes(NumElts); |
| if (CIdx && CIdx->getValue().ult(NumElts)) |
| DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); |
| computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q); |
| break; |
| } |
| case Instruction::ExtractValue: |
| if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { |
| const ExtractValueInst *EVI = cast<ExtractValueInst>(I); |
| if (EVI->getNumIndices() != 1) break; |
| if (EVI->getIndices()[0] == 0) { |
| switch (II->getIntrinsicID()) { |
| default: break; |
| case Intrinsic::uadd_with_overflow: |
| case Intrinsic::sadd_with_overflow: |
| computeKnownBitsAddSub(true, II->getArgOperand(0), |
| II->getArgOperand(1), false, DemandedElts, |
| Known, Known2, Depth, Q); |
| break; |
| case Intrinsic::usub_with_overflow: |
| case Intrinsic::ssub_with_overflow: |
| computeKnownBitsAddSub(false, II->getArgOperand(0), |
| II->getArgOperand(1), false, DemandedElts, |
| Known, Known2, Depth, Q); |
| break; |
| case Intrinsic::umul_with_overflow: |
| case Intrinsic::smul_with_overflow: |
| computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, |
| DemandedElts, Known, Known2, Depth, Q); |
| break; |
| } |
| } |
| } |
| break; |
| case Instruction::Freeze: |
| if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, |
| Depth + 1)) |
| computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); |
| break; |
| } |
| } |
| |
| /// Determine which bits of V are known to be either zero or one and return |
| /// them. |
| KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, |
| unsigned Depth, const Query &Q) { |
| KnownBits Known(getBitWidth(V->getType(), Q.DL)); |
| computeKnownBits(V, DemandedElts, Known, Depth, Q); |
| return Known; |
| } |
| |
| /// Determine which bits of V are known to be either zero or one and return |
| /// them. |
| KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) { |
| KnownBits Known(getBitWidth(V->getType(), Q.DL)); |
| computeKnownBits(V, Known, Depth, Q); |
| return Known; |
| } |
| |
| /// Determine which bits of V are known to be either zero or one and return |
| /// them in the Known bit set. |
| /// |
| /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that |
| /// we cannot optimize based on the assumption that it is zero without changing |
| /// it to be an explicit zero. If we don't change it to zero, other code could |
| /// optimized based on the contradictory assumption that it is non-zero. |
| /// Because instcombine aggressively folds operations with undef args anyway, |
| /// this won't lose us code quality. |
| /// |
| /// This function is defined on values with integer type, values with pointer |
| /// type, and vectors of integers. In the case |
| /// where V is a vector, known zero, and known one values are the |
| /// same width as the vector element, and the bit is set only if it is true |
| /// for all of the demanded elements in the vector specified by DemandedElts. |
| void computeKnownBits(const Value *V, const APInt &DemandedElts, |
| KnownBits &Known, unsigned Depth, const Query &Q) { |
| if (!DemandedElts || isa<ScalableVectorType>(V->getType())) { |
| // No demanded elts or V is a scalable vector, better to assume we don't |
| // know anything. |
| Known.resetAll(); |
| return; |
| } |
| |
| assert(V && "No Value?"); |
| assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); |
| |
| #ifndef NDEBUG |
| Type *Ty = V->getType(); |
| unsigned BitWidth = Known.getBitWidth(); |
| |
| assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) && |
| "Not integer or pointer type!"); |
| |
| if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { |
| assert( |
| FVTy->getNumElements() == DemandedElts.getBitWidth() && |
| "DemandedElt width should equal the fixed vector number of elements"); |
| } else { |
| assert(DemandedElts == APInt(1, 1) && |
| "DemandedElt width should be 1 for scalars"); |
| } |
| |
| Type *ScalarTy = Ty->getScalarType(); |
| if (ScalarTy->isPointerTy()) { |
| assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) && |
| "V and Known should have same BitWidth"); |
| } else { |
| assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) && |
| "V and Known should have same BitWidth"); |
| } |
| #endif |
| |
| const APInt *C; |
| if (match(V, m_APInt(C))) { |
| // We know all of the bits for a scalar constant or a splat vector constant! |
| Known = KnownBits::makeConstant(*C); |
| return; |
| } |
| // Null and aggregate-zero are all-zeros. |
| if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { |
| Known.setAllZero(); |
| return; |
| } |
| // Handle a constant vector by taking the intersection of the known bits of |
| // each element. |
| if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { |
| // We know that CDV must be a vector of integers. Take the intersection of |
| // each element. |
| Known.Zero.setAllBits(); Known.One.setAllBits(); |
| for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { |
| if (!DemandedElts[i]) |
| continue; |
| APInt Elt = CDV->getElementAsAPInt(i); |
| Known.Zero &= ~Elt; |
| Known.One &= Elt; |
| } |
| return; |
| } |
| |
| if (const auto *CV = dyn_cast<ConstantVector>(V)) { |
| // We know that CV must be a vector of integers. Take the intersection of |
| // each element. |
| Known.Zero.setAllBits(); Known.One.setAllBits(); |
| for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { |
| if (!DemandedElts[i]) |
| continue; |
| Constant *Element = CV->getAggregateElement(i); |
| auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); |
| if (!ElementCI) { |
| Known.resetAll(); |
| return; |
| } |
| const APInt &Elt = ElementCI->getValue(); |
| Known.Zero &= ~Elt; |
| Known.One &= Elt; |
| } |
| return; |
| } |
| |
| // Start out not knowing anything. |
| Known.resetAll(); |
| |
| // We can't imply anything about undefs. |
| if (isa<UndefValue>(V)) |
| return; |
| |
| // There's no point in looking through other users of ConstantData for |
| // assumptions. Confirm that we've handled them all. |
| assert(!isa<ConstantData>(V) && "Unhandled constant data!"); |
| |
| // All recursive calls that increase depth must come after this. |
| if (Depth == MaxAnalysisRecursionDepth) |
| return; |
| |
| // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has |
| // the bits of its aliasee. |
| if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { |
| if (!GA->isInterposable()) |
| computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); |
| return; |
| } |
| |
| if (const Operator *I = dyn_cast<Operator>(V)) |
| computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q); |
| |
| // Aligned pointers have trailing zeros - refine Known.Zero set |
| if (isa<PointerType>(V->getType())) { |
| Align Alignment = V->getPointerAlignment(Q.DL); |
| Known.Zero.setLowBits(Log2(Alignment)); |
| } |
| |
| // computeKnownBitsFromAssume strictly refines Known. |
| // Therefore, we run them after computeKnownBitsFromOperator. |
| |
| // Check whether a nearby assume intrinsic can determine some known bits. |
| computeKnownBitsFromAssume(V, Known, Depth, Q); |
| |
| assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); |
| } |
| |
| /// Return true if the given value is known to have exactly one |
| /// bit set when defined. For vectors return true if every element is known to |
| /// be a power of two when defined. Supports values with integer or pointer |
| /// types and vectors of integers. |
| bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, |
| const Query &Q) { |
| assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); |
| |
| // Attempt to match against constants. |
| if (OrZero && match(V, m_Power2OrZero())) |
| return true; |
| if (match(V, m_Power2())) |
| return true; |
| |
| // 1 << X is clearly a power of two if the one is not shifted off the end. If |
| // it is shifted off the end then the result is undefined. |
| if (match(V, m_Shl(m_One(), m_Value()))) |
| return true; |
| |
| // (signmask) >>l X is clearly a power of two if the one is not shifted off |
| // the bottom. If it is shifted off the bottom then the result is undefined. |
| if (match(V, m_LShr(m_SignMask(), m_Value()))) |
| return true; |
| |
| // The remaining tests are all recursive, so bail out if we hit the limit. |
| if (Depth++ == MaxAnalysisRecursionDepth) |
| return false; |
| |
| Value *X = nullptr, *Y = nullptr; |
| // A shift left or a logical shift right of a power of two is a power of two |
| // or zero. |
| if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || |
| match(V, m_LShr(m_Value(X), m_Value())))) |
| return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q); |
| |
| if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V)) |
| return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q); |
| |
| if (const SelectInst *SI = dyn_cast<SelectInst>(V)) |
| return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) && |
| isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q); |
| |
| // Peek through min/max. |
| if (match(V, m_MaxOrMin(m_Value(X), m_Value(Y)))) { |
| return isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q) && |
| isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q); |
| } |
| |
| if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { |
| // A power of two and'd with anything is a power of two or zero. |
| if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) || |
| isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q)) |
| return true; |
| // X & (-X) is always a power of two or zero. |
| if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) |
| return true; |
| return false; |
| } |
| |
| // Adding a power-of-two or zero to the same power-of-two or zero yields |
| // either the original power-of-two, a larger power-of-two or zero. |
| if (match(V, m_Add(m_Value(X), m_Value(Y)))) { |
| const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); |
| if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) || |
| Q.IIQ.hasNoSignedWrap(VOBO)) { |
| if (match(X, m_And(m_Specific(Y), m_Value())) || |
| match(X, m_And(m_Value(), m_Specific(Y)))) |
| if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q)) |
| return true; |
| if (match(Y, m_And(m_Specific(X), m_Value())) || |
| match(Y, m_And(m_Value(), m_Specific(X)))) |
| if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q)) |
| return true; |
| |
| unsigned BitWidth = V->getType()->getScalarSizeInBits(); |
| KnownBits LHSBits(BitWidth); |
| computeKnownBits(X, LHSBits, Depth, Q); |
| |
| KnownBits RHSBits(BitWidth); |
| computeKnownBits(Y, RHSBits, Depth, Q); |
| // If i8 V is a power of two or zero: |
| // ZeroBits: 1 1 1 0 1 1 1 1 |
| // ~ZeroBits: 0 0 0 1 0 0 0 0 |
| if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) |
| // If OrZero isn't set, we cannot give back a zero result. |
| // Make sure either the LHS or RHS has a bit set. |
| if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) |
| return true; |
| } |
| } |
| |
| // An exact divide or right shift can only shift off zero bits, so the result |
| // is a power of two only if the first operand is a power of two and not |
| // copying a sign bit (sdiv int_min, 2). |
| if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || |
| match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { |
| return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, |
| Depth, Q); |
| } |
| |
| return false; |
| } |
| |
| /// Test whether a GEP's result is known to be non-null. |
| /// |
| /// Uses properties inherent in a GEP to try to determine whether it is known |
| /// to be non-null. |
| /// |
| /// Currently this routine does not support vector GEPs. |
| static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, |
| const Query &Q) { |
| const Function *F = nullptr; |
| if (const Instruction *I = dyn_cast<Instruction>(GEP)) |
| F = I->getFunction(); |
| |
| if (!GEP->isInBounds() || |
| NullPointerIsDefined(F, GEP->getPointerAddressSpace())) |
| return false; |
| |
| // FIXME: Support vector-GEPs. |
| assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); |
| |
| // If the base pointer is non-null, we cannot walk to a null address with an |
| // inbounds GEP in address space zero. |
| if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q)) |
| return true; |
| |
| // Walk the GEP operands and see if any operand introduces a non-zero offset. |
| // If so, then the GEP cannot produce a null pointer, as doing so would |
| // inherently violate the inbounds contract within address space zero. |
| for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); |
| GTI != GTE; ++GTI) { |
| // Struct types are easy -- they must always be indexed by a constant. |
| if (StructType *STy = GTI.getStructTypeOrNull()) { |
| ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); |
| unsigned ElementIdx = OpC->getZExtValue(); |
| const StructLayout *SL = Q.DL.getStructLayout(STy); |
| uint64_t ElementOffset = SL->getElementOffset(ElementIdx); |
| if (ElementOffset > 0) |
| return true; |
| continue; |
| } |
| |
| // If we have a zero-sized type, the index doesn't matter. Keep looping. |
| if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).getKnownMinSize() == 0) |
| continue; |
| |
| // Fast path the constant operand case both for efficiency and so we don't |
| // increment Depth when just zipping down an all-constant GEP. |
| if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) { |
| if (!OpC->isZero()) |
| return true; |
| continue; |
| } |
| |
| // We post-increment Depth here because while isKnownNonZero increments it |
| // as well, when we pop back up that increment won't persist. We don't want |
| // to recurse 10k times just because we have 10k GEP operands. We don't |
| // bail completely out because we want to handle constant GEPs regardless |
| // of depth. |
| if (Depth++ >= MaxAnalysisRecursionDepth) |
| continue; |
| |
| if (isKnownNonZero(GTI.getOperand(), Depth, Q)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static bool isKnownNonNullFromDominatingCondition(const Value *V, |
| const Instruction *CtxI, |
| const DominatorTree *DT) { |
| if (isa<Constant>(V)) |
| return false; |
| |
| if (!CtxI || !DT) |
| return false; |
| |
| unsigned NumUsesExplored = 0; |
| for (auto *U : V->users()) { |
| // Avoid massive lists |
| if (NumUsesExplored >= DomConditionsMaxUses) |
| break; |
| NumUsesExplored++; |
| |
| // If the value is used as an argument to a call or invoke, then argument |
| // attributes may provide an answer about null-ness. |
| if (const auto *CB = dyn_cast<CallBase>(U)) |
| if (auto *CalledFunc = CB->getCalledFunction()) |
| for (const Argument &Arg : CalledFunc->args()) |
| if (CB->getArgOperand(Arg.getArgNo()) == V && |
| Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) && |
| DT->dominates(CB, CtxI)) |
| return true; |
| |
| // If the value is used as a load/store, then the pointer must be non null. |
| if (V == getLoadStorePointerOperand(U)) { |
| const Instruction *I = cast<Instruction>(U); |
| if (!NullPointerIsDefined(I->getFunction(), |
| V->getType()->getPointerAddressSpace()) && |
| DT->dominates(I, CtxI)) |
| return true; |
| } |
| |
| // Consider only compare instructions uniquely controlling a branch |
| Value *RHS; |
| CmpInst::Predicate Pred; |
| if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS)))) |
| continue; |
| |
| bool NonNullIfTrue; |
| if (cmpExcludesZero(Pred, RHS)) |
| NonNullIfTrue = true; |
| else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS)) |
| NonNullIfTrue = false; |
| else |
| continue; |
| |
| SmallVector<const User *, 4> WorkList; |
| SmallPtrSet<const User *, 4> Visited; |
| for (auto *CmpU : U->users()) { |
| assert(WorkList.empty() && "Should be!"); |
| if (Visited.insert(CmpU).second) |
| WorkList.push_back(CmpU); |
| |
| while (!WorkList.empty()) { |
| auto *Curr = WorkList.pop_back_val(); |
| |
| // If a user is an AND, add all its users to the work list. We only |
| // propagate "pred != null" condition through AND because it is only |
| // correct to assume that all conditions of AND are met in true branch. |
| // TODO: Support similar logic of OR and EQ predicate? |
| if (NonNullIfTrue) |
| if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) { |
| for (auto *CurrU : Curr->users()) |
| if (Visited.insert(CurrU).second) |
| WorkList.push_back(CurrU); |
| continue; |
| } |
| |
| if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) { |
| assert(BI->isConditional() && "uses a comparison!"); |
| |
| BasicBlock *NonNullSuccessor = |
| BI->getSuccessor(NonNullIfTrue ? 0 : 1); |
| BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); |
| if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) |
| return true; |
| } else if (NonNullIfTrue && isGuard(Curr) && |
| DT->dominates(cast<Instruction>(Curr), CtxI)) { |
| return true; |
| } |
| } |
| } |
| } |
| |
| return false; |
| } |
| |
| /// Does the 'Range' metadata (which must be a valid MD_range operand list) |
| /// ensure that the value it's attached to is never Value? 'RangeType' is |
| /// is the type of the value described by the range. |
| static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) { |
| const unsigned NumRanges = Ranges->getNumOperands() / 2; |
| assert(NumRanges >= 1); |
| for (unsigned i = 0; i < NumRanges; ++i) { |
| ConstantInt *Lower = |
| mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0)); |
| ConstantInt *Upper = |
| mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1)); |
| ConstantRange Range(Lower->getValue(), Upper->getValue()); |
| if (Range.contains(Value)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// Try to detect a recurrence that monotonically increases/decreases from a |
| /// non-zero starting value. These are common as induction variables. |
| static bool isNonZeroRecurrence(const PHINode *PN) { |
| BinaryOperator *BO = nullptr; |
| Value *Start = nullptr, *Step = nullptr; |
| const APInt *StartC, *StepC; |
| if (!matchSimpleRecurrence(PN, BO, Start, Step) || |
| !match(Start, m_APInt(StartC)) || StartC->isZero()) |
| return false; |
| |
| switch (BO->getOpcode()) { |
| case Instruction::Add: |
| // Starting from non-zero and stepping away from zero can never wrap back |
| // to zero. |
| return BO->hasNoUnsignedWrap() || |
| (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) && |
| StartC->isNegative() == StepC->isNegative()); |
| case Instruction::Mul: |
| return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) && |
| match(Step, m_APInt(StepC)) && !StepC->isZero(); |
| case Instruction::Shl: |
| return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap(); |
| case Instruction::AShr: |
| case Instruction::LShr: |
| return BO->isExact(); |
| default: |
| return false; |
| } |
| } |
| |
| /// Return true if the given value is known to be non-zero when defined. For |
| /// vectors, return true if every demanded element is known to be non-zero when |
| /// defined. For pointers, if the context instruction and dominator tree are |
| /// specified, perform context-sensitive analysis and return true if the |
| /// pointer couldn't possibly be null at the specified instruction. |
| /// Supports values with integer or pointer type and vectors of integers. |
| bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, |
| const Query &Q) { |
| // FIXME: We currently have no way to represent the DemandedElts of a scalable |
| // vector |
| if (isa<ScalableVectorType>(V->getType())) |
| return false; |
| |
| if (auto *C = dyn_cast<Constant>(V)) { |
| if (C->isNullValue()) |
| return false; |
| if (isa<ConstantInt>(C)) |
| // Must be non-zero due to null test above. |
| return true; |
| |
| if (auto *CE = dyn_cast<ConstantExpr>(C)) { |
| // See the comment for IntToPtr/PtrToInt instructions below. |
| if (CE->getOpcode() == Instruction::IntToPtr || |
| CE->getOpcode() == Instruction::PtrToInt) |
| if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) |
| .getFixedSize() <= |
| Q.DL.getTypeSizeInBits(CE->getType()).getFixedSize()) |
| return isKnownNonZero(CE->getOperand(0), Depth, Q); |
| } |
| |
| // For constant vectors, check that all elements are undefined or known |
| // non-zero to determine that the whole vector is known non-zero. |
| if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) { |
| for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) { |
| if (!DemandedElts[i]) |
| continue; |
| Constant *Elt = C->getAggregateElement(i); |
| if (!Elt || Elt->isNullValue()) |
| return false; |
| if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt)) |
| return false; |
| } |
| return true; |
| } |
| |
| // A global variable in address space 0 is non null unless extern weak |
| // or an absolute symbol reference. Other address spaces may have null as a |
| // valid address for a global, so we can't assume anything. |
| if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { |
| if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && |
| GV->getType()->getAddressSpace() == 0) |
| return true; |
| } else |
| return false; |
| } |
| |
| if (auto *I = dyn_cast<Instruction>(V)) { |
| if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) { |
| // If the possible ranges don't contain zero, then the value is |
| // definitely non-zero. |
| if (auto *Ty = dyn_cast<IntegerType>(V->getType())) { |
| const APInt ZeroValue(Ty->getBitWidth(), 0); |
| if (rangeMetadataExcludesValue(Ranges, ZeroValue)) |
| return true; |
| } |
| } |
| } |
| |
| if (isKnownNonZeroFromAssume(V, Q)) |
| return true; |
| |
| // Some of the tests below are recursive, so bail out if we hit the limit. |
| if (Depth++ >= MaxAnalysisRecursionDepth) |
| return false; |
| |
| // Check for pointer simplifications. |
| |
| if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) { |
| // Alloca never returns null, malloc might. |
| if (isa<AllocaInst>(V) && Q.DL.getAllocaAddrSpace() == 0) |
| return true; |
| |
| // A byval, inalloca may not be null in a non-default addres space. A |
| // nonnull argument is assumed never 0. |
| if (const Argument *A = dyn_cast<Argument>(V)) { |
| if (((A->hasPassPointeeByValueCopyAttr() && |
| !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) || |
| A->hasNonNullAttr())) |
| return true; |
| } |
| |
| // A Load tagged with nonnull metadata is never null. |
| if (const LoadInst *LI = dyn_cast<LoadInst>(V)) |
| if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull)) |
| return true; |
| |
| if (const auto *Call = dyn_cast<CallBase>(V)) { |
| if (Call->isReturnNonNull()) |
| return true; |
| if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) |
| return isKnownNonZero(RP, Depth, Q); |
| } |
| } |
| |
| if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) |
| return true; |
| |
| // Check for recursive pointer simplifications. |
| if (V->getType()->isPointerTy()) { |
| // Look through bitcast operations, GEPs, and int2ptr instructions as they |
| // do not alter the value, or at least not the nullness property of the |
| // value, e.g., int2ptr is allowed to zero/sign extend the value. |
| // |
| // Note that we have to take special care to avoid looking through |
| // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well |
| // as casts that can alter the value, e.g., AddrSpaceCasts. |
| if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) |
| return isGEPKnownNonNull(GEP, Depth, Q); |
| |
| if (auto *BCO = dyn_cast<BitCastOperator>(V)) |
| return isKnownNonZero(BCO->getOperand(0), Depth, Q); |
| |
| if (auto *I2P = dyn_cast<IntToPtrInst>(V)) |
| if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()).getFixedSize() <= |
| Q.DL.getTypeSizeInBits(I2P->getDestTy()).getFixedSize()) |
| return isKnownNonZero(I2P->getOperand(0), Depth, Q); |
| } |
| |
| // Similar to int2ptr above, we can look through ptr2int here if the cast |
| // is a no-op or an extend and not a truncate. |
| if (auto *P2I = dyn_cast<PtrToIntInst>(V)) |
| if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()).getFixedSize() <= |
| Q.DL.getTypeSizeInBits(P2I->getDestTy()).getFixedSize()) |
| return isKnownNonZero(P2I->getOperand(0), Depth, Q); |
| |
| unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL); |
| |
| // X | Y != 0 if X != 0 or Y != 0. |
| Value *X = nullptr, *Y = nullptr; |
| if (match(V, m_Or(m_Value(X), m_Value(Y)))) |
| return isKnownNonZero(X, DemandedElts, Depth, Q) || |
| isKnownNonZero(Y, DemandedElts, Depth, Q); |
| |
| // ext X != 0 if X != 0. |
| if (isa<SExtInst>(V) || isa<ZExtInst>(V)) |
| return isKnownNonZero(cast<Instruction>(V)->getOperand(0), Depth, Q); |
| |
| // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined |
| // if the lowest bit is shifted off the end. |
| if (match(V, m_Shl(m_Value(X), m_Value(Y)))) { |
| // shl nuw can't remove any non-zero bits. |
| const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); |
| if (Q.IIQ.hasNoUnsignedWrap(BO)) |
| return isKnownNonZero(X, Depth, Q); |
| |
| KnownBits Known(BitWidth); |
| computeKnownBits(X, DemandedElts, Known, Depth, Q); |
| if (Known.One[0]) |
| return true; |
| } |
| // shr X, Y != 0 if X is negative. Note that the value of the shift is not |
| // defined if the sign bit is shifted off the end. |
| else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { |
| // shr exact can only shift out zero bits. |
| const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V); |
| if (BO->isExact()) |
| return isKnownNonZero(X, Depth, Q); |
| |
| KnownBits Known = computeKnownBits(X, DemandedElts, Depth, Q); |
| if (Known.isNegative()) |
| return true; |
| |
| // If the shifter operand is a constant, and all of the bits shifted |
| // out are known to be zero, and X is known non-zero then at least one |
| // non-zero bit must remain. |
| if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) { |
| auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); |
| // Is there a known one in the portion not shifted out? |
| if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal) |
| return true; |
| // Are all the bits to be shifted out known zero? |
| if (Known.countMinTrailingZeros() >= ShiftVal) |
| return isKnownNonZero(X, DemandedElts, Depth, Q); |
| } |
| } |
| // div exact can only produce a zero if the dividend is zero. |
| else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { |
| return isKnownNonZero(X, DemandedElts, Depth, Q); |
| } |
| // X + Y. |
| else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { |
| KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); |
| KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); |
| |
| // If X and Y are both non-negative (as signed values) then their sum is not |
| // zero unless both X and Y are zero. |
| if (XKnown.isNonNegative() && YKnown.isNonNegative()) |
| if (isKnownNonZero(X, DemandedElts, Depth, Q) || |
| isKnownNonZero(Y, DemandedElts, Depth, Q)) |
| return true; |
| |
| // If X and Y are both negative (as signed values) then their sum is not |
| // zero unless both X and Y equal INT_MIN. |
| if (XKnown.isNegative() && YKnown.isNegative()) { |
| APInt Mask = APInt::getSignedMaxValue(BitWidth); |
| // The sign bit of X is set. If some other bit is set then X is not equal |
| // to INT_MIN. |
| if (XKnown.One.intersects(Mask)) |
| return true; |
| |