| //===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++ |
| //*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// Provides analysis for querying information about KnownBits during GISel |
| /// passes. |
| // |
| //===----------------------------------------------------------------------===// |
| #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h" |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/FloatingPointMode.h" |
| #include "llvm/ADT/ScopeExit.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/Analysis/ValueTracking.h" |
| #include "llvm/Analysis/VectorUtils.h" |
| #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
| #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
| #include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h" |
| #include "llvm/CodeGen/GlobalISel/Utils.h" |
| #include "llvm/CodeGen/LowLevelTypeUtils.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineOperand.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/Register.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| #include "llvm/CodeGen/TargetOpcodes.h" |
| #include "llvm/IR/ConstantRange.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/FMF.h" |
| #include "llvm/MC/TargetRegistry.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/KnownFPClass.h" |
| #include "llvm/Target/TargetMachine.h" |
| |
| #define DEBUG_TYPE "gisel-known-bits" |
| |
| using namespace llvm; |
| using namespace MIPatternMatch; |
| |
| char llvm::GISelValueTrackingAnalysisLegacy::ID = 0; |
| |
| INITIALIZE_PASS(GISelValueTrackingAnalysisLegacy, DEBUG_TYPE, |
| "Analysis for ComputingKnownBits", false, true) |
| |
| GISelValueTracking::GISelValueTracking(MachineFunction &MF, unsigned MaxDepth) |
| : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), |
| DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {} |
| |
| Align GISelValueTracking::computeKnownAlignment(Register R, unsigned Depth) { |
| const MachineInstr *MI = MRI.getVRegDef(R); |
| switch (MI->getOpcode()) { |
| case TargetOpcode::COPY: |
| return computeKnownAlignment(MI->getOperand(1).getReg(), Depth); |
| case TargetOpcode::G_ASSERT_ALIGN: { |
| // TODO: Min with source |
| return Align(MI->getOperand(2).getImm()); |
| } |
| case TargetOpcode::G_FRAME_INDEX: { |
| int FrameIdx = MI->getOperand(1).getIndex(); |
| return MF.getFrameInfo().getObjectAlign(FrameIdx); |
| } |
| case TargetOpcode::G_INTRINSIC: |
| case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
| case TargetOpcode::G_INTRINSIC_CONVERGENT: |
| case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: |
| default: |
| return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1); |
| } |
| } |
| |
| KnownBits GISelValueTracking::getKnownBits(MachineInstr &MI) { |
| assert(MI.getNumExplicitDefs() == 1 && |
| "expected single return generic instruction"); |
| return getKnownBits(MI.getOperand(0).getReg()); |
| } |
| |
| KnownBits GISelValueTracking::getKnownBits(Register R) { |
| const LLT Ty = MRI.getType(R); |
| // Since the number of lanes in a scalable vector is unknown at compile time, |
| // we track one bit which is implicitly broadcast to all lanes. This means |
| // that all lanes in a scalable vector are considered demanded. |
| APInt DemandedElts = |
| Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); |
| return getKnownBits(R, DemandedElts); |
| } |
| |
| KnownBits GISelValueTracking::getKnownBits(Register R, |
| const APInt &DemandedElts, |
| unsigned Depth) { |
| // For now, we only maintain the cache during one request. |
| assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared"); |
| |
| KnownBits Known; |
| computeKnownBitsImpl(R, Known, DemandedElts, Depth); |
| ComputeKnownBitsCache.clear(); |
| return Known; |
| } |
| |
| bool GISelValueTracking::signBitIsZero(Register R) { |
| LLT Ty = MRI.getType(R); |
| unsigned BitWidth = Ty.getScalarSizeInBits(); |
| return maskedValueIsZero(R, APInt::getSignMask(BitWidth)); |
| } |
| |
| APInt GISelValueTracking::getKnownZeroes(Register R) { |
| return getKnownBits(R).Zero; |
| } |
| |
| APInt GISelValueTracking::getKnownOnes(Register R) { |
| return getKnownBits(R).One; |
| } |
| |
| LLVM_ATTRIBUTE_UNUSED static void |
| dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { |
| dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth |
| << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" |
| << toString(Known.Zero | Known.One, 16, false) << "\n" |
| << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false) |
| << "\n" |
| << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false) |
| << "\n"; |
| } |
| |
| /// Compute known bits for the intersection of \p Src0 and \p Src1 |
| void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1, |
| KnownBits &Known, |
| const APInt &DemandedElts, |
| unsigned Depth) { |
| // Test src1 first, since we canonicalize simpler expressions to the RHS. |
| computeKnownBitsImpl(Src1, Known, DemandedElts, Depth); |
| |
| // If we don't know any bits, early out. |
| if (Known.isUnknown()) |
| return; |
| |
| KnownBits Known2; |
| computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth); |
| |
| // Only known if known in both the LHS and RHS. |
| Known = Known.intersectWith(Known2); |
| } |
| |
| // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is |
| // created using Width. Use this function when the inputs are KnownBits |
| // objects. TODO: Move this KnownBits.h if this is usable in more cases. |
| static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown, |
| const KnownBits &OffsetKnown, |
| const KnownBits &WidthKnown) { |
| KnownBits Mask(BitWidth); |
| Mask.Zero = APInt::getBitsSetFrom( |
| BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth)); |
| Mask.One = APInt::getLowBitsSet( |
| BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth)); |
| return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask; |
| } |
| |
| void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, |
| const APInt &DemandedElts, |
| unsigned Depth) { |
| MachineInstr &MI = *MRI.getVRegDef(R); |
| unsigned Opcode = MI.getOpcode(); |
| LLT DstTy = MRI.getType(R); |
| |
| // Handle the case where this is called on a register that does not have a |
| // type constraint. For example, it may be post-ISel or this target might not |
| // preserve the type when early-selecting instructions. |
| if (!DstTy.isValid()) { |
| Known = KnownBits(); |
| return; |
| } |
| |
| #ifndef NDEBUG |
| if (DstTy.isFixedVector()) { |
| assert( |
| DstTy.getNumElements() == DemandedElts.getBitWidth() && |
| "DemandedElt width should equal the fixed vector number of elements"); |
| } else { |
| assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) && |
| "DemandedElt width should be 1 for scalars or scalable vectors"); |
| } |
| #endif |
| |
| unsigned BitWidth = DstTy.getScalarSizeInBits(); |
| auto CacheEntry = ComputeKnownBitsCache.find(R); |
| if (CacheEntry != ComputeKnownBitsCache.end()) { |
| Known = CacheEntry->second; |
| LLVM_DEBUG(dbgs() << "Cache hit at "); |
| LLVM_DEBUG(dumpResult(MI, Known, Depth)); |
| assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match"); |
| return; |
| } |
| Known = KnownBits(BitWidth); // Don't know anything |
| |
| // Depth may get bigger than max depth if it gets passed to a different |
| // GISelValueTracking object. |
| // This may happen when say a generic part uses a GISelValueTracking object |
| // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr |
| // which creates a new GISelValueTracking object with a different and smaller |
| // depth. If we just check for equality, we would never exit if the depth |
| // that is passed down to the target specific GISelValueTracking object is |
| // already bigger than its max depth. |
| if (Depth >= getMaxDepth()) |
| return; |
| |
| if (!DemandedElts) |
| return; // No demanded elts, better to assume we don't know anything. |
| |
| KnownBits Known2; |
| |
| switch (Opcode) { |
| default: |
| TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, |
| Depth); |
| break; |
| case TargetOpcode::G_BUILD_VECTOR: { |
| // Collect the known bits that are shared by every demanded vector element. |
| Known.Zero.setAllBits(); |
| Known.One.setAllBits(); |
| for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) { |
| if (!DemandedElts[I]) |
| continue; |
| |
| computeKnownBitsImpl(MO.getReg(), Known2, APInt(1, 1), Depth + 1); |
| |
| // Known bits are the values that are shared by every demanded element. |
| Known = Known.intersectWith(Known2); |
| |
| // If we don't know any bits, early out. |
| if (Known.isUnknown()) |
| break; |
| } |
| break; |
| } |
| case TargetOpcode::G_SPLAT_VECTOR: { |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, APInt(1, 1), |
| Depth + 1); |
| // Implicitly truncate the bits to match the official semantics of |
| // G_SPLAT_VECTOR. |
| Known = Known.trunc(BitWidth); |
| break; |
| } |
| case TargetOpcode::COPY: |
| case TargetOpcode::G_PHI: |
| case TargetOpcode::PHI: { |
| Known.One = APInt::getAllOnes(BitWidth); |
| Known.Zero = APInt::getAllOnes(BitWidth); |
| // Destination registers should not have subregisters at this |
| // point of the pipeline, otherwise the main live-range will be |
| // defined more than once, which is against SSA. |
| assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?"); |
| // Record in the cache that we know nothing for MI. |
| // This will get updated later and in the meantime, if we reach that |
| // phi again, because of a loop, we will cut the search thanks to this |
| // cache entry. |
| // We could actually build up more information on the phi by not cutting |
| // the search, but that additional information is more a side effect |
| // than an intended choice. |
| // Therefore, for now, save on compile time until we derive a proper way |
| // to derive known bits for PHIs within loops. |
| ComputeKnownBitsCache[R] = KnownBits(BitWidth); |
| // PHI's operand are a mix of registers and basic blocks interleaved. |
| // We only care about the register ones. |
| for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { |
| const MachineOperand &Src = MI.getOperand(Idx); |
| Register SrcReg = Src.getReg(); |
| // Look through trivial copies and phis but don't look through trivial |
| // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits |
| // analysis is currently unable to determine the bit width of a |
| // register class. |
| // |
| // We can't use NoSubRegister by name as it's defined by each target but |
| // it's always defined to be 0 by tablegen. |
| if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && |
| MRI.getType(SrcReg).isValid()) { |
| // For COPYs we don't do anything, don't increase the depth. |
| computeKnownBitsImpl(SrcReg, Known2, DemandedElts, |
| Depth + (Opcode != TargetOpcode::COPY)); |
| Known2 = Known2.anyextOrTrunc(BitWidth); |
| Known = Known.intersectWith(Known2); |
| // If we reach a point where we don't know anything |
| // just stop looking through the operands. |
| if (Known.isUnknown()) |
| break; |
| } else { |
| // We know nothing. |
| Known = KnownBits(BitWidth); |
| break; |
| } |
| } |
| break; |
| } |
| case TargetOpcode::G_CONSTANT: { |
| Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue()); |
| break; |
| } |
| case TargetOpcode::G_FRAME_INDEX: { |
| int FrameIdx = MI.getOperand(1).getIndex(); |
| TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF); |
| break; |
| } |
| case TargetOpcode::G_SUB: { |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, |
| Depth + 1); |
| Known = KnownBits::sub(Known, Known2); |
| break; |
| } |
| case TargetOpcode::G_XOR: { |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, |
| Depth + 1); |
| |
| Known ^= Known2; |
| break; |
| } |
| case TargetOpcode::G_PTR_ADD: { |
| if (DstTy.isVector()) |
| break; |
| // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets? |
| LLT Ty = MRI.getType(MI.getOperand(1).getReg()); |
| if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) |
| break; |
| [[fallthrough]]; |
| } |
| case TargetOpcode::G_ADD: { |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, |
| Depth + 1); |
| Known = KnownBits::add(Known, Known2); |
| break; |
| } |
| case TargetOpcode::G_AND: { |
| // If either the LHS or the RHS are Zero, the result is zero. |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, |
| Depth + 1); |
| |
| Known &= Known2; |
| break; |
| } |
| case TargetOpcode::G_OR: { |
| // If either the LHS or the RHS are Zero, the result is zero. |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, |
| Depth + 1); |
| |
| Known |= Known2; |
| break; |
| } |
| case TargetOpcode::G_MUL: { |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, |
| Depth + 1); |
| Known = KnownBits::mul(Known, Known2); |
| break; |
| } |
| case TargetOpcode::G_SELECT: { |
| computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(), |
| Known, DemandedElts, Depth + 1); |
| break; |
| } |
| case TargetOpcode::G_SMIN: { |
| // TODO: Handle clamp pattern with number of sign bits |
| KnownBits KnownRHS; |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, |
| Depth + 1); |
| Known = KnownBits::smin(Known, KnownRHS); |
| break; |
| } |
| case TargetOpcode::G_SMAX: { |
| // TODO: Handle clamp pattern with number of sign bits |
| KnownBits KnownRHS; |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, |
| Depth + 1); |
| Known = KnownBits::smax(Known, KnownRHS); |
| break; |
| } |
| case TargetOpcode::G_UMIN: { |
| KnownBits KnownRHS; |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, |
| Depth + 1); |
| Known = KnownBits::umin(Known, KnownRHS); |
| break; |
| } |
| case TargetOpcode::G_UMAX: { |
| KnownBits KnownRHS; |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, |
| Depth + 1); |
| Known = KnownBits::umax(Known, KnownRHS); |
| break; |
| } |
| case TargetOpcode::G_FCMP: |
| case TargetOpcode::G_ICMP: { |
| if (DstTy.isVector()) |
| break; |
| if (TL.getBooleanContents(DstTy.isVector(), |
| Opcode == TargetOpcode::G_FCMP) == |
| TargetLowering::ZeroOrOneBooleanContent && |
| BitWidth > 1) |
| Known.Zero.setBitsFrom(1); |
| break; |
| } |
| case TargetOpcode::G_SEXT: { |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, |
| Depth + 1); |
| // If the sign bit is known to be zero or one, then sext will extend |
| // it to the top bits, else it will just zext. |
| Known = Known.sext(BitWidth); |
| break; |
| } |
| case TargetOpcode::G_ASSERT_SEXT: |
| case TargetOpcode::G_SEXT_INREG: { |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, |
| Depth + 1); |
| Known = Known.sextInReg(MI.getOperand(2).getImm()); |
| break; |
| } |
| case TargetOpcode::G_ANYEXT: { |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, |
| Depth + 1); |
| Known = Known.anyext(BitWidth); |
| break; |
| } |
| case TargetOpcode::G_LOAD: { |
| const MachineMemOperand *MMO = *MI.memoperands_begin(); |
| KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); |
| if (const MDNode *Ranges = MMO->getRanges()) |
| computeKnownBitsFromRangeMetadata(*Ranges, KnownRange); |
| Known = KnownRange.anyext(Known.getBitWidth()); |
| break; |
| } |
| case TargetOpcode::G_SEXTLOAD: |
| case TargetOpcode::G_ZEXTLOAD: { |
| if (DstTy.isVector()) |
| break; |
| const MachineMemOperand *MMO = *MI.memoperands_begin(); |
| KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); |
| if (const MDNode *Ranges = MMO->getRanges()) |
| computeKnownBitsFromRangeMetadata(*Ranges, KnownRange); |
| Known = Opcode == TargetOpcode::G_SEXTLOAD |
| ? KnownRange.sext(Known.getBitWidth()) |
| : KnownRange.zext(Known.getBitWidth()); |
| break; |
| } |
| case TargetOpcode::G_ASHR: { |
| KnownBits LHSKnown, RHSKnown; |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, |
| Depth + 1); |
| Known = KnownBits::ashr(LHSKnown, RHSKnown); |
| break; |
| } |
| case TargetOpcode::G_LSHR: { |
| KnownBits LHSKnown, RHSKnown; |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, |
| Depth + 1); |
| Known = KnownBits::lshr(LHSKnown, RHSKnown); |
| break; |
| } |
| case TargetOpcode::G_SHL: { |
| KnownBits LHSKnown, RHSKnown; |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, |
| Depth + 1); |
| Known = KnownBits::shl(LHSKnown, RHSKnown); |
| break; |
| } |
| case TargetOpcode::G_INTTOPTR: |
| case TargetOpcode::G_PTRTOINT: |
| if (DstTy.isVector()) |
| break; |
| // Fall through and handle them the same as zext/trunc. |
| [[fallthrough]]; |
| case TargetOpcode::G_ZEXT: |
| case TargetOpcode::G_TRUNC: { |
| Register SrcReg = MI.getOperand(1).getReg(); |
| computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); |
| Known = Known.zextOrTrunc(BitWidth); |
| break; |
| } |
| case TargetOpcode::G_ASSERT_ZEXT: { |
| Register SrcReg = MI.getOperand(1).getReg(); |
| computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); |
| |
| unsigned SrcBitWidth = MI.getOperand(2).getImm(); |
| assert(SrcBitWidth && "SrcBitWidth can't be zero"); |
| APInt InMask = APInt::getLowBitsSet(BitWidth, SrcBitWidth); |
| Known.Zero |= (~InMask); |
| Known.One &= (~Known.Zero); |
| break; |
| } |
| case TargetOpcode::G_ASSERT_ALIGN: { |
| int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm()); |
| |
| // TODO: Should use maximum with source |
| // If a node is guaranteed to be aligned, set low zero bits accordingly as |
| // well as clearing one bits. |
| Known.Zero.setLowBits(LogOfAlign); |
| Known.One.clearLowBits(LogOfAlign); |
| break; |
| } |
| case TargetOpcode::G_MERGE_VALUES: { |
| unsigned NumOps = MI.getNumOperands(); |
| unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); |
| |
| for (unsigned I = 0; I != NumOps - 1; ++I) { |
| KnownBits SrcOpKnown; |
| computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown, |
| DemandedElts, Depth + 1); |
| Known.insertBits(SrcOpKnown, I * OpSize); |
| } |
| break; |
| } |
| case TargetOpcode::G_UNMERGE_VALUES: { |
| unsigned NumOps = MI.getNumOperands(); |
| Register SrcReg = MI.getOperand(NumOps - 1).getReg(); |
| LLT SrcTy = MRI.getType(SrcReg); |
| |
| if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType()) |
| return; // TODO: Handle vector->subelement unmerges |
| |
| // Figure out the result operand index |
| unsigned DstIdx = 0; |
| for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R; |
| ++DstIdx) |
| ; |
| |
| APInt SubDemandedElts = DemandedElts; |
| if (SrcTy.isVector()) { |
| unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1; |
| SubDemandedElts = |
| DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes); |
| } |
| |
| KnownBits SrcOpKnown; |
| computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1); |
| |
| if (SrcTy.isVector()) |
| Known = std::move(SrcOpKnown); |
| else |
| Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx); |
| break; |
| } |
| case TargetOpcode::G_BSWAP: { |
| Register SrcReg = MI.getOperand(1).getReg(); |
| computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); |
| Known = Known.byteSwap(); |
| break; |
| } |
| case TargetOpcode::G_BITREVERSE: { |
| Register SrcReg = MI.getOperand(1).getReg(); |
| computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); |
| Known = Known.reverseBits(); |
| break; |
| } |
| case TargetOpcode::G_CTPOP: { |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, |
| Depth + 1); |
| // We can bound the space the count needs. Also, bits known to be zero |
| // can't contribute to the population. |
| unsigned BitsPossiblySet = Known2.countMaxPopulation(); |
| unsigned LowBits = llvm::bit_width(BitsPossiblySet); |
| Known.Zero.setBitsFrom(LowBits); |
| // TODO: we could bound Known.One using the lower bound on the number of |
| // bits which might be set provided by popcnt KnownOne2. |
| break; |
| } |
| case TargetOpcode::G_UBFX: { |
| KnownBits SrcOpKnown, OffsetKnown, WidthKnown; |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, |
| Depth + 1); |
| Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); |
| break; |
| } |
| case TargetOpcode::G_SBFX: { |
| KnownBits SrcOpKnown, OffsetKnown, WidthKnown; |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts, |
| Depth + 1); |
| computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, |
| Depth + 1); |
| Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); |
| // Sign extend the extracted value using shift left and arithmetic shift |
| // right. |
| KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth)); |
| KnownBits ShiftKnown = KnownBits::sub(ExtKnown, WidthKnown); |
| Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown); |
| break; |
| } |
| case TargetOpcode::G_UADDO: |
| case TargetOpcode::G_UADDE: |
| case TargetOpcode::G_SADDO: |
| case TargetOpcode::G_SADDE: |
| case TargetOpcode::G_USUBO: |
| case TargetOpcode::G_USUBE: |
| case TargetOpcode::G_SSUBO: |
| case TargetOpcode::G_SSUBE: |
| case TargetOpcode::G_UMULO: |
| case TargetOpcode::G_SMULO: { |
| if (MI.getOperand(1).getReg() == R) { |
| // If we know the result of a compare has the top bits zero, use this |
| // info. |
| if (TL.getBooleanContents(DstTy.isVector(), false) == |
| TargetLowering::ZeroOrOneBooleanContent && |
| BitWidth > 1) |
| Known.Zero.setBitsFrom(1); |
| } |
| break; |
| } |
| case TargetOpcode::G_CTLZ: |
| case TargetOpcode::G_CTLZ_ZERO_UNDEF: { |
| KnownBits SrcOpKnown; |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, |
| Depth + 1); |
| // If we have a known 1, its position is our upper bound. |
| unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros(); |
| unsigned LowBits = llvm::bit_width(PossibleLZ); |
| Known.Zero.setBitsFrom(LowBits); |
| break; |
| } |
| case TargetOpcode::G_SHUFFLE_VECTOR: { |
| APInt DemandedLHS, DemandedRHS; |
| // Collect the known bits that are shared by every vector element referenced |
| // by the shuffle. |
| unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements(); |
| if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(), |
| DemandedElts, DemandedLHS, DemandedRHS)) |
| break; |
| |
| // Known bits are the values that are shared by every demanded element. |
| Known.Zero.setAllBits(); |
| Known.One.setAllBits(); |
| if (!!DemandedLHS) { |
| computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedLHS, |
| Depth + 1); |
| Known = Known.intersectWith(Known2); |
| } |
| // If we don't know any bits, early out. |
| if (Known.isUnknown()) |
| break; |
| if (!!DemandedRHS) { |
| computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedRHS, |
| Depth + 1); |
| Known = Known.intersectWith(Known2); |
| } |
| break; |
| } |
| case TargetOpcode::G_CONCAT_VECTORS: { |
| if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector()) |
| break; |
| // Split DemandedElts and test each of the demanded subvectors. |
| Known.Zero.setAllBits(); |
| Known.One.setAllBits(); |
| unsigned NumSubVectorElts = |
| MRI.getType(MI.getOperand(1).getReg()).getNumElements(); |
| |
| for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) { |
| APInt DemandedSub = |
| DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts); |
| if (!!DemandedSub) { |
| computeKnownBitsImpl(MO.getReg(), Known2, DemandedSub, Depth + 1); |
| |
| Known = Known.intersectWith(Known2); |
| } |
| // If we don't know any bits, early out. |
| if (Known.isUnknown()) |
| break; |
| } |
| break; |
| } |
| } |
| |
| LLVM_DEBUG(dumpResult(MI, Known, Depth)); |
| |
| // Update the cache. |
| ComputeKnownBitsCache[R] = Known; |
| } |
| |
| static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) { |
| Ty = Ty.getScalarType(); |
| DenormalMode Mode = MF.getDenormalMode(getFltSemanticForLLT(Ty)); |
| return Mode.Output == DenormalMode::IEEE || |
| Mode.Output == DenormalMode::PositiveZero; |
| } |
| |
| void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known, |
| FPClassTest InterestedClasses, |
| unsigned Depth) { |
| LLT Ty = MRI.getType(R); |
| APInt DemandedElts = |
| Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); |
| computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth); |
| } |
| |
| void GISelValueTracking::computeKnownFPClassForFPTrunc( |
| const MachineInstr &MI, const APInt &DemandedElts, |
| FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) { |
| if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == |
| fcNone) |
| return; |
| |
| Register Val = MI.getOperand(1).getReg(); |
| KnownFPClass KnownSrc; |
| computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, |
| Depth + 1); |
| |
| // Sign should be preserved |
| // TODO: Handle cannot be ordered greater than zero |
| if (KnownSrc.cannotBeOrderedLessThanZero()) |
| Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); |
| |
| Known.propagateNaN(KnownSrc, true); |
| |
| // Infinity needs a range check. |
| } |
| |
| void GISelValueTracking::computeKnownFPClass(Register R, |
| const APInt &DemandedElts, |
| FPClassTest InterestedClasses, |
| KnownFPClass &Known, |
| unsigned Depth) { |
| assert(Known.isUnknown() && "should not be called with known information"); |
| |
| if (!DemandedElts) { |
| // No demanded elts, better to assume we don't know anything. |
| Known.resetAll(); |
| return; |
| } |
| |
| assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); |
| |
| MachineInstr &MI = *MRI.getVRegDef(R); |
| unsigned Opcode = MI.getOpcode(); |
| LLT DstTy = MRI.getType(R); |
| |
| if (!DstTy.isValid()) { |
| Known.resetAll(); |
| return; |
| } |
| |
| if (auto Cst = GFConstant::getConstant(R, MRI)) { |
| switch (Cst->getKind()) { |
| case GFConstant::GFConstantKind::Scalar: { |
| auto APF = Cst->getScalarValue(); |
| Known.KnownFPClasses = APF.classify(); |
| Known.SignBit = APF.isNegative(); |
| break; |
| } |
| case GFConstant::GFConstantKind::FixedVector: { |
| Known.KnownFPClasses = fcNone; |
| bool SignBitAllZero = true; |
| bool SignBitAllOne = true; |
| |
| for (auto C : *Cst) { |
| Known.KnownFPClasses |= C.classify(); |
| if (C.isNegative()) |
| SignBitAllZero = false; |
| else |
| SignBitAllOne = false; |
| } |
| |
| if (SignBitAllOne != SignBitAllZero) |
| Known.SignBit = SignBitAllOne; |
| |
| break; |
| } |
| case GFConstant::GFConstantKind::ScalableVector: { |
| Known.resetAll(); |
| break; |
| } |
| } |
| |
| return; |
| } |
| |
| FPClassTest KnownNotFromFlags = fcNone; |
| if (MI.getFlag(MachineInstr::MIFlag::FmNoNans)) |
| KnownNotFromFlags |= fcNan; |
| if (MI.getFlag(MachineInstr::MIFlag::FmNoInfs)) |
| KnownNotFromFlags |= fcInf; |
| |
| // We no longer need to find out about these bits from inputs if we can |
| // assume this from flags/attributes. |
| InterestedClasses &= ~KnownNotFromFlags; |
| |
| auto ClearClassesFromFlags = |
| make_scope_exit([=, &Known] { Known.knownNot(KnownNotFromFlags); }); |
| |
| // All recursive calls that increase depth must come after this. |
| if (Depth == MaxAnalysisRecursionDepth) |
| return; |
| |
| const MachineFunction *MF = MI.getMF(); |
| |
| switch (Opcode) { |
| default: |
| TL.computeKnownFPClassForTargetInstr(*this, R, Known, DemandedElts, MRI, |
| Depth); |
| break; |
| case TargetOpcode::G_FNEG: { |
| Register Val = MI.getOperand(1).getReg(); |
| computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known, Depth + 1); |
| Known.fneg(); |
| break; |
| } |
| case TargetOpcode::G_SELECT: { |
| GSelect &SelMI = cast<GSelect>(MI); |
| Register Cond = SelMI.getCondReg(); |
| Register LHS = SelMI.getTrueReg(); |
| Register RHS = SelMI.getFalseReg(); |
| |
| FPClassTest FilterLHS = fcAllFlags; |
| FPClassTest FilterRHS = fcAllFlags; |
| |
| Register TestedValue; |
| FPClassTest MaskIfTrue = fcAllFlags; |
| FPClassTest MaskIfFalse = fcAllFlags; |
| FPClassTest ClassVal = fcNone; |
| |
| CmpInst::Predicate Pred; |
| Register CmpLHS, CmpRHS; |
| if (mi_match(Cond, MRI, |
| m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) { |
| // If the select filters out a value based on the class, it no longer |
| // participates in the class of the result |
| |
| // TODO: In some degenerate cases we can infer something if we try again |
| // without looking through sign operations. |
| bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS; |
| std::tie(TestedValue, MaskIfTrue, MaskIfFalse) = |
| fcmpImpliesClass(Pred, *MF, CmpLHS, CmpRHS, LookThroughFAbsFNeg); |
| } else if (mi_match( |
| Cond, MRI, |
| m_GIsFPClass(m_Reg(TestedValue), m_FPClassTest(ClassVal)))) { |
| FPClassTest TestedMask = ClassVal; |
| MaskIfTrue = TestedMask; |
| MaskIfFalse = ~TestedMask; |
| } |
| |
| if (TestedValue == LHS) { |
| // match !isnan(x) ? x : y |
| FilterLHS = MaskIfTrue; |
| } else if (TestedValue == RHS) { // && IsExactClass |
| // match !isnan(x) ? y : x |
| FilterRHS = MaskIfFalse; |
| } |
| |
| KnownFPClass Known2; |
| computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known, |
| Depth + 1); |
| Known.KnownFPClasses &= FilterLHS; |
| |
| computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS, |
| Known2, Depth + 1); |
| Known2.KnownFPClasses &= FilterRHS; |
| |
| Known |= Known2; |
| break; |
| } |
| case TargetOpcode::G_FCOPYSIGN: { |
| Register Magnitude = MI.getOperand(1).getReg(); |
| Register Sign = MI.getOperand(2).getReg(); |
| |
| KnownFPClass KnownSign; |
| |
| computeKnownFPClass(Magnitude, DemandedElts, InterestedClasses, Known, |
| Depth + 1); |
| computeKnownFPClass(Sign, DemandedElts, InterestedClasses, KnownSign, |
| Depth + 1); |
| Known.copysign(KnownSign); |
| break; |
| } |
| case TargetOpcode::G_FMA: |
| case TargetOpcode::G_STRICT_FMA: |
| case TargetOpcode::G_FMAD: { |
| if ((InterestedClasses & fcNegative) == fcNone) |
| break; |
| |
| Register A = MI.getOperand(1).getReg(); |
| Register B = MI.getOperand(2).getReg(); |
| Register C = MI.getOperand(3).getReg(); |
| |
| if (A != B) |
| break; |
| |
| // The multiply cannot be -0 and therefore the add can't be -0 |
| Known.knownNot(fcNegZero); |
| |
| // x * x + y is non-negative if y is non-negative. |
| KnownFPClass KnownAddend; |
| computeKnownFPClass(C, DemandedElts, InterestedClasses, KnownAddend, |
| Depth + 1); |
| |
| if (KnownAddend.cannotBeOrderedLessThanZero()) |
| Known.knownNot(fcNegative); |
| break; |
| } |
| case TargetOpcode::G_FSQRT: |
| case TargetOpcode::G_STRICT_FSQRT: { |
| KnownFPClass KnownSrc; |
| FPClassTest InterestedSrcs = InterestedClasses; |
| if (InterestedClasses & fcNan) |
| InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; |
| |
| Register Val = MI.getOperand(1).getReg(); |
| |
| computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1); |
| |
| if (KnownSrc.isKnownNeverPosInfinity()) |
| Known.knownNot(fcPosInf); |
| if (KnownSrc.isKnownNever(fcSNan)) |
| Known.knownNot(fcSNan); |
| |
| // Any negative value besides -0 returns a nan. |
| if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) |
| Known.knownNot(fcNan); |
| |
| // The only negative value that can be returned is -0 for -0 inputs. |
| Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal); |
| break; |
| } |
| case TargetOpcode::G_FABS: { |
| if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) { |
| Register Val = MI.getOperand(1).getReg(); |
| // If we only care about the sign bit we don't need to inspect the |
| // operand. |
| computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known, |
| Depth + 1); |
| } |
| Known.fabs(); |
| break; |
| } |
| case TargetOpcode::G_FSIN: |
| case TargetOpcode::G_FCOS: |
| case TargetOpcode::G_FSINCOS: { |
| // Return NaN on infinite inputs. |
| Register Val = MI.getOperand(1).getReg(); |
| KnownFPClass KnownSrc; |
| |
| computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, |
| Depth + 1); |
| Known.knownNot(fcInf); |
| |
| if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) |
| Known.knownNot(fcNan); |
| break; |
| } |
| case TargetOpcode::G_FMAXNUM: |
| case TargetOpcode::G_FMINNUM: |
| case TargetOpcode::G_FMINNUM_IEEE: |
| case TargetOpcode::G_FMAXIMUM: |
| case TargetOpcode::G_FMINIMUM: |
| case TargetOpcode::G_FMAXNUM_IEEE: |
| case TargetOpcode::G_FMAXIMUMNUM: |
| case TargetOpcode::G_FMINIMUMNUM: { |
| Register LHS = MI.getOperand(1).getReg(); |
| Register RHS = MI.getOperand(2).getReg(); |
| KnownFPClass KnownLHS, KnownRHS; |
| |
| computeKnownFPClass(LHS, DemandedElts, InterestedClasses, KnownLHS, |
| Depth + 1); |
| computeKnownFPClass(RHS, DemandedElts, InterestedClasses, KnownRHS, |
| Depth + 1); |
| |
| bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); |
| Known = KnownLHS | KnownRHS; |
| |
| // If either operand is not NaN, the result is not NaN. |
| if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM || |
| Opcode == TargetOpcode::G_FMAXNUM || |
| Opcode == TargetOpcode::G_FMINIMUMNUM || |
| Opcode == TargetOpcode::G_FMAXIMUMNUM)) |
| Known.knownNot(fcNan); |
| |
| if (Opcode == TargetOpcode::G_FMAXNUM || |
| Opcode == TargetOpcode::G_FMAXIMUMNUM || |
| Opcode == TargetOpcode::G_FMAXNUM_IEEE) { |
| // If at least one operand is known to be positive, the result must be |
| // positive. |
| if ((KnownLHS.cannotBeOrderedLessThanZero() && |
| KnownLHS.isKnownNeverNaN()) || |
| (KnownRHS.cannotBeOrderedLessThanZero() && |
| KnownRHS.isKnownNeverNaN())) |
| Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); |
| } else if (Opcode == TargetOpcode::G_FMAXIMUM) { |
| // If at least one operand is known to be positive, the result must be |
| // positive. |
| if (KnownLHS.cannotBeOrderedLessThanZero() || |
| KnownRHS.cannotBeOrderedLessThanZero()) |
| Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); |
| } else if (Opcode == TargetOpcode::G_FMINNUM || |
| Opcode == TargetOpcode::G_FMINIMUMNUM || |
| Opcode == TargetOpcode::G_FMINNUM_IEEE) { |
| // If at least one operand is known to be negative, the result must be |
| // negative. |
| if ((KnownLHS.cannotBeOrderedGreaterThanZero() && |
| KnownLHS.isKnownNeverNaN()) || |
| (KnownRHS.cannotBeOrderedGreaterThanZero() && |
| KnownRHS.isKnownNeverNaN())) |
| Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); |
| } else if (Opcode == TargetOpcode::G_FMINIMUM) { |
| // If at least one operand is known to be negative, the result must be |
| // negative. |
| if (KnownLHS.cannotBeOrderedGreaterThanZero() || |
| KnownRHS.cannotBeOrderedGreaterThanZero()) |
| Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); |
| } else { |
| llvm_unreachable("unhandled intrinsic"); |
| } |
| |
| // Fixup zero handling if denormals could be returned as a zero. |
| // |
| // As there's no spec for denormal flushing, be conservative with the |
| // treatment of denormals that could be flushed to zero. For older |
| // subtargets on AMDGPU the min/max instructions would not flush the |
| // output and return the original value. |
| // |
| if ((Known.KnownFPClasses & fcZero) != fcNone && |
| !Known.isKnownNeverSubnormal()) { |
| DenormalMode Mode = |
| MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())); |
| if (Mode != DenormalMode::getIEEE()) |
| Known.KnownFPClasses |= fcZero; |
| } |
| |
| if (Known.isKnownNeverNaN()) { |
| if (KnownLHS.SignBit && KnownRHS.SignBit && |
| *KnownLHS.SignBit == *KnownRHS.SignBit) { |
| if (*KnownLHS.SignBit) |
| Known.signBitMustBeOne(); |
| else |
| Known.signBitMustBeZero(); |
| } else if ((Opcode == TargetOpcode::G_FMAXIMUM || |
| Opcode == TargetOpcode::G_FMINIMUM) || |
| Opcode == TargetOpcode::G_FMAXIMUMNUM || |
| Opcode == TargetOpcode::G_FMINIMUMNUM || |
| Opcode == TargetOpcode::G_FMAXNUM_IEEE || |
| Opcode == TargetOpcode::G_FMINNUM_IEEE || |
| // FIXME: Should be using logical zero versions |
| ((KnownLHS.isKnownNeverNegZero() || |
| KnownRHS.isKnownNeverPosZero()) && |
| (KnownLHS.isKnownNeverPosZero() || |
| KnownRHS.isKnownNeverNegZero()))) { |
| if ((Opcode == TargetOpcode::G_FMAXIMUM || |
| Opcode == TargetOpcode::G_FMAXNUM || |
| Opcode == TargetOpcode::G_FMAXIMUMNUM || |
| Opcode == TargetOpcode::G_FMAXNUM_IEEE) && |
| (KnownLHS.SignBit == false || KnownRHS.SignBit == false)) |
| Known.signBitMustBeZero(); |
| else if ((Opcode == TargetOpcode::G_FMINIMUM || |
| Opcode == TargetOpcode::G_FMINNUM || |
| Opcode == TargetOpcode::G_FMINIMUMNUM || |
| Opcode == TargetOpcode::G_FMINNUM_IEEE) && |
| (KnownLHS.SignBit == true || KnownRHS.SignBit == true)) |
| Known.signBitMustBeOne(); |
| } |
| } |
| break; |
| } |
| case TargetOpcode::G_FCANONICALIZE: { |
| Register Val = MI.getOperand(1).getReg(); |
| KnownFPClass KnownSrc; |
| computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, |
| Depth + 1); |
| |
| // This is essentially a stronger form of |
| // propagateCanonicalizingSrc. Other "canonicalizing" operations don't |
| // actually have an IR canonicalization guarantee. |
| |
| // Canonicalize may flush denormals to zero, so we have to consider the |
| // denormal mode to preserve known-not-0 knowledge. |
| Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan; |
| |
| // Stronger version of propagateNaN |
| // Canonicalize is guaranteed to quiet signaling nans. |
| if (KnownSrc.isKnownNeverNaN()) |
| Known.knownNot(fcNan); |
| else |
| Known.knownNot(fcSNan); |
| |
| // If the parent function flushes denormals, the canonical output cannot |
| // be a denormal. |
| LLT Ty = MRI.getType(Val).getScalarType(); |
| const fltSemantics &FPType = getFltSemanticForLLT(Ty); |
| DenormalMode DenormMode = MF->getDenormalMode(FPType); |
| if (DenormMode == DenormalMode::getIEEE()) { |
| if (KnownSrc.isKnownNever(fcPosZero)) |
| Known.knownNot(fcPosZero); |
| if (KnownSrc.isKnownNever(fcNegZero)) |
| Known.knownNot(fcNegZero); |
| break; |
| } |
| |
| if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero()) |
| Known.knownNot(fcSubnormal); |
| |
| if (DenormMode.Input == DenormalMode::PositiveZero || |
| (DenormMode.Output == DenormalMode::PositiveZero && |
| DenormMode.Input == DenormalMode::IEEE)) |
| Known.knownNot(fcNegZero); |
| |
| break; |
| } |
| case TargetOpcode::G_VECREDUCE_FMAX: |
| case TargetOpcode::G_VECREDUCE_FMIN: |
| case TargetOpcode::G_VECREDUCE_FMAXIMUM: |
| case TargetOpcode::G_VECREDUCE_FMINIMUM: { |
| Register Val = MI.getOperand(1).getReg(); |
| // reduce min/max will choose an element from one of the vector elements, |
| // so we can infer and class information that is common to all elements. |
| |
| Known = |
| computeKnownFPClass(Val, MI.getFlags(), InterestedClasses, Depth + 1); |
| // Can only propagate sign if output is never NaN. |
| if (!Known.isKnownNeverNaN()) |
| Known.SignBit.reset(); |
| break; |
| } |
| case TargetOpcode::G_TRUNC: |
| case TargetOpcode::G_FFLOOR: |
| case TargetOpcode::G_FCEIL: |
| case TargetOpcode::G_FRINT: |
| case TargetOpcode::G_FNEARBYINT: |
| case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND: |
| case TargetOpcode::G_INTRINSIC_ROUND: { |
| Register Val = MI.getOperand(1).getReg(); |
| KnownFPClass KnownSrc; |
| FPClassTest InterestedSrcs = InterestedClasses; |
| if (InterestedSrcs & fcPosFinite) |
| InterestedSrcs |= fcPosFinite; |
| if (InterestedSrcs & fcNegFinite) |
| InterestedSrcs |= fcNegFinite; |
| computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1); |
| |
| // Integer results cannot be subnormal. |
| Known.knownNot(fcSubnormal); |
| |
| Known.propagateNaN(KnownSrc, true); |
| |
| // TODO: handle multi unit FPTypes once LLT FPInfo lands |
| |
| // Negative round ups to 0 produce -0 |
| if (KnownSrc.isKnownNever(fcPosFinite)) |
| Known.knownNot(fcPosFinite); |
| if (KnownSrc.isKnownNever(fcNegFinite)) |
| Known.knownNot(fcNegFinite); |
| |
| break; |
| } |
| case TargetOpcode::G_FEXP: |
| case TargetOpcode::G_FEXP2: |
| case TargetOpcode::G_FEXP10: { |
| Known.knownNot(fcNegative); |
| if ((InterestedClasses & fcNan) == fcNone) |
| break; |
| |
| Register Val = MI.getOperand(1).getReg(); |
| KnownFPClass KnownSrc; |
| computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, |
| Depth + 1); |
| if (KnownSrc.isKnownNeverNaN()) { |
| Known.knownNot(fcNan); |
| Known.signBitMustBeZero(); |
| } |
| |
| break; |
| } |
| case TargetOpcode::G_FLOG: |
| case TargetOpcode::G_FLOG2: |
| case TargetOpcode::G_FLOG10: { |
| // log(+inf) -> +inf |
| // log([+-]0.0) -> -inf |
| // log(-inf) -> nan |
| // log(-x) -> nan |
| if ((InterestedClasses & (fcNan | fcInf)) == fcNone) |
| break; |
| |
| FPClassTest InterestedSrcs = InterestedClasses; |
| if ((InterestedClasses & fcNegInf) != fcNone) |
| InterestedSrcs |= fcZero | fcSubnormal; |
| if ((InterestedClasses & fcNan) != fcNone) |
| InterestedSrcs |= fcNan | (fcNegative & ~fcNan); |
| |
| Register Val = MI.getOperand(1).getReg(); |
| KnownFPClass KnownSrc; |
| computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1); |
| |
| if (KnownSrc.isKnownNeverPosInfinity()) |
| Known.knownNot(fcPosInf); |
| |
| if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) |
| Known.knownNot(fcNan); |
| |
| LLT Ty = MRI.getType(Val).getScalarType(); |
| const fltSemantics &FltSem = getFltSemanticForLLT(Ty); |
| DenormalMode Mode = MF->getDenormalMode(FltSem); |
| |
| if (KnownSrc.isKnownNeverLogicalZero(Mode)) |
| Known.knownNot(fcNegInf); |
| |
| break; |
| } |
| case TargetOpcode::G_FPOWI: { |
| if ((InterestedClasses & fcNegative) == fcNone) |
| break; |
| |
| Register Exp = MI.getOperand(2).getReg(); |
| LLT ExpTy = MRI.getType(Exp); |
| KnownBits ExponentKnownBits = getKnownBits( |
| Exp, ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth + 1); |
| |
| if (ExponentKnownBits.Zero[0]) { // Is even |
| Known.knownNot(fcNegative); |
| break; |
| } |
| |
| // Given that exp is an integer, here are the |
| // ways that pow can return a negative value: |
| // |
| // pow(-x, exp) --> negative if exp is odd and x is negative. |
| // pow(-0, exp) --> -inf if exp is negative odd. |
| // pow(-0, exp) --> -0 if exp is positive odd. |
| // pow(-inf, exp) --> -0 if exp is negative odd. |
| // pow(-inf, exp) --> -inf if exp is positive odd. |
| Register Val = MI.getOperand(1).getReg(); |
| KnownFPClass KnownSrc; |
| computeKnownFPClass(Val, DemandedElts, fcNegative, KnownSrc, Depth + 1); |
| if (KnownSrc.isKnownNever(fcNegative)) |
| Known.knownNot(fcNegative); |
| break; |
| } |
| case TargetOpcode::G_FLDEXP: |
| case TargetOpcode::G_STRICT_FLDEXP: { |
| Register Val = MI.getOperand(1).getReg(); |
| KnownFPClass KnownSrc; |
| computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, |
| Depth + 1); |
| Known.propagateNaN(KnownSrc, /*PropagateSign=*/true); |
| |
| // Sign is preserved, but underflows may produce zeroes. |
| if (KnownSrc.isKnownNever(fcNegative)) |
| Known.knownNot(fcNegative); |
| else if (KnownSrc.cannotBeOrderedLessThanZero()) |
| Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); |
| |
| if (KnownSrc.isKnownNever(fcPositive)) |
| Known.knownNot(fcPositive); |
| else if (KnownSrc.cannotBeOrderedGreaterThanZero()) |
| Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); |
| |
| // Can refine inf/zero handling based on the exponent operand. |
| const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf; |
| if ((InterestedClasses & ExpInfoMask) == fcNone) |
| break; |
| if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone) |
| break; |
| |
| // TODO: Handle constant range of Exp |
| |
| break; |
| } |
| case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { |
| computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known, |
| Depth); |
| break; |
| } |
| case TargetOpcode::G_FADD: |
| case TargetOpcode::G_STRICT_FADD: |
| case TargetOpcode::G_FSUB: |
| case TargetOpcode::G_STRICT_FSUB: { |
| Register LHS = MI.getOperand(1).getReg(); |
| Register RHS = MI.getOperand(2).getReg(); |
| KnownFPClass KnownLHS, KnownRHS; |
| bool WantNegative = |
| (Opcode == TargetOpcode::G_FADD || |
| Opcode == TargetOpcode::G_STRICT_FADD) && |
| (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone; |
| bool WantNaN = (InterestedClasses & fcNan) != fcNone; |
| bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone; |
| |
| if (!WantNaN && !WantNegative && !WantNegZero) |
| break; |
| |
| FPClassTest InterestedSrcs = InterestedClasses; |
| if (WantNegative) |
| InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; |
| if (InterestedClasses & fcNan) |
| InterestedSrcs |= fcInf; |
| computeKnownFPClass(RHS, DemandedElts, InterestedSrcs, KnownRHS, Depth + 1); |
| |
| if ((WantNaN && KnownRHS.isKnownNeverNaN()) || |
| (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) || |
| WantNegZero || |
| (Opcode == TargetOpcode::G_FSUB || |
| Opcode == TargetOpcode::G_STRICT_FSUB)) { |
| |
| // RHS is canonically cheaper to compute. Skip inspecting the LHS if |
| // there's no point. |
| computeKnownFPClass(LHS, DemandedElts, InterestedSrcs, KnownLHS, |
| Depth + 1); |
| // Adding positive and negative infinity produces NaN. |
| // TODO: Check sign of infinities. |
| if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && |
| (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity())) |
| Known.knownNot(fcNan); |
| |
| if (Opcode == Instruction::FAdd) { |
| if (KnownLHS.cannotBeOrderedLessThanZero() && |
| KnownRHS.cannotBeOrderedLessThanZero()) |
| Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); |
| |
| // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. |
| if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode( |
| getFltSemanticForLLT(DstTy.getScalarType()))) || |
| KnownRHS.isKnownNeverLogicalNegZero(MF->getDenormalMode( |
| getFltSemanticForLLT(DstTy.getScalarType())))) && |
| // Make sure output negative denormal can't flush to -0 |
| outputDenormalIsIEEEOrPosZero(*MF, DstTy)) |
| Known.knownNot(fcNegZero); |
| } else { |
| // Only fsub -0, +0 can return -0 |
| if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode( |
| getFltSemanticForLLT(DstTy.getScalarType()))) || |
| KnownRHS.isKnownNeverLogicalPosZero(MF->getDenormalMode( |
| getFltSemanticForLLT(DstTy.getScalarType())))) && |
| // Make sure output negative denormal can't flush to -0 |
| outputDenormalIsIEEEOrPosZero(*MF, DstTy)) |
| Known.knownNot(fcNegZero); |
| } |
| } |
| |
| break; |
| } |
| case TargetOpcode::G_FMUL: |
| case TargetOpcode::G_STRICT_FMUL: { |
| Register LHS = MI.getOperand(1).getReg(); |
| Register RHS = MI.getOperand(2).getReg(); |
| // X * X is always non-negative or a NaN. |
| if (LHS == RHS) |
| Known.knownNot(fcNegative); |
| |
| if ((InterestedClasses & fcNan) != fcNan) |
| break; |
| |
| // fcSubnormal is only needed in case of DAZ. |
| const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal; |
| |
| KnownFPClass KnownLHS, KnownRHS; |
| computeKnownFPClass(RHS, DemandedElts, NeedForNan, KnownRHS, Depth + 1); |
| if (!KnownRHS.isKnownNeverNaN()) |
| break; |
| |
| computeKnownFPClass(LHS, DemandedElts, NeedForNan, KnownLHS, Depth + 1); |
| if (!KnownLHS.isKnownNeverNaN()) |
| break; |
| |
| if (KnownLHS.SignBit && KnownRHS.SignBit) { |
| if (*KnownLHS.SignBit == *KnownRHS.SignBit) |
| Known.signBitMustBeZero(); |
| else |
| Known.signBitMustBeOne(); |
| } |
| |
| // If 0 * +/-inf produces NaN. |
| if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) { |
| Known.knownNot(fcNan); |
| break; |
| } |
| |
| if ((KnownRHS.isKnownNeverInfinity() || |
| KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode( |
| getFltSemanticForLLT(DstTy.getScalarType())))) && |
| (KnownLHS.isKnownNeverInfinity() || |
| KnownRHS.isKnownNeverLogicalZero( |
| MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()))))) |
| Known.knownNot(fcNan); |
| |
| break; |
| } |
| case TargetOpcode::G_FDIV: |
| case TargetOpcode::G_FREM: { |
| Register LHS = MI.getOperand(1).getReg(); |
| Register RHS = MI.getOperand(2).getReg(); |
| |
| if (LHS == RHS) { |
| // TODO: Could filter out snan if we inspect the operand |
| if (Opcode == TargetOpcode::G_FDIV) { |
| // X / X is always exactly 1.0 or a NaN. |
| Known.KnownFPClasses = fcNan | fcPosNormal; |
| } else { |
| // X % X is always exactly [+-]0.0 or a NaN. |
| Known.KnownFPClasses = fcNan | fcZero; |
| } |
| |
| break; |
| } |
| |
| const bool WantNan = (InterestedClasses & fcNan) != fcNone; |
| const bool WantNegative = (InterestedClasses & fcNegative) != fcNone; |
| const bool WantPositive = Opcode == TargetOpcode::G_FREM && |
| (InterestedClasses & fcPositive) != fcNone; |
| if (!WantNan && !WantNegative && !WantPositive) |
| break; |
| |
| KnownFPClass KnownLHS, KnownRHS; |
| |
| computeKnownFPClass(RHS, DemandedElts, fcNan | fcInf | fcZero | fcNegative, |
| KnownRHS, Depth + 1); |
| |
| bool KnowSomethingUseful = |
| KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative); |
| |
| if (KnowSomethingUseful || WantPositive) { |
| const FPClassTest InterestedLHS = |
| WantPositive ? fcAllFlags |
| : fcNan | fcInf | fcZero | fcSubnormal | fcNegative; |
| |
| computeKnownFPClass(LHS, DemandedElts, InterestedClasses & InterestedLHS, |
| KnownLHS, Depth + 1); |
| } |
| |
| if (Opcode == Instruction::FDiv) { |
| // Only 0/0, Inf/Inf produce NaN. |
| if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && |
| (KnownLHS.isKnownNeverInfinity() || |
| KnownRHS.isKnownNeverInfinity()) && |
| ((KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode( |
| getFltSemanticForLLT(DstTy.getScalarType())))) || |
| (KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode( |
| getFltSemanticForLLT(DstTy.getScalarType())))))) { |
| Known.knownNot(fcNan); |
| } |
| |
| // X / -0.0 is -Inf (or NaN). |
| // +X / +X is +X |
| if (KnownLHS.isKnownNever(fcNegative) && |
| KnownRHS.isKnownNever(fcNegative)) |
| Known.knownNot(fcNegative); |
| } else { |
| // Inf REM x and x REM 0 produce NaN. |
| if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && |
| KnownLHS.isKnownNeverInfinity() && |
| KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode( |
| getFltSemanticForLLT(DstTy.getScalarType())))) { |
| Known.knownNot(fcNan); |
| } |
| |
| // The sign for frem is the same as the first operand. |
| if (KnownLHS.cannotBeOrderedLessThanZero()) |
| Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); |
| if (KnownLHS.cannotBeOrderedGreaterThanZero()) |
| Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); |
| |
| // See if we can be more aggressive about the sign of 0. |
| if (KnownLHS.isKnownNever(fcNegative)) |
| Known.knownNot(fcNegative); |
| if (KnownLHS.isKnownNever(fcPositive)) |
| Known.knownNot(fcPositive); |
| } |
| |
| break; |
| } |
| case TargetOpcode::G_FPEXT: { |
| Register Dst = MI.getOperand(0).getReg(); |
| Register Src = MI.getOperand(1).getReg(); |
| // Infinity, nan and zero propagate from source. |
| computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1); |
| |
| LLT DstTy = MRI.getType(Dst).getScalarType(); |
| const fltSemantics &DstSem = getFltSemanticForLLT(DstTy); |
| LLT SrcTy = MRI.getType(Src).getScalarType(); |
| const fltSemantics &SrcSem = getFltSemanticForLLT(SrcTy); |
| |
| // All subnormal inputs should be in the normal range in the result type. |
| if (APFloat::isRepresentableAsNormalIn(SrcSem, DstSem)) { |
| if (Known.KnownFPClasses & fcPosSubnormal) |
| Known.KnownFPClasses |= fcPosNormal; |
| if (Known.KnownFPClasses & fcNegSubnormal) |
| Known.KnownFPClasses |= fcNegNormal; |
| Known.knownNot(fcSubnormal); |
| } |
| |
| // Sign bit of a nan isn't guaranteed. |
| if (!Known.isKnownNeverNaN()) |
| Known.SignBit = std::nullopt; |
| break; |
| } |
| case TargetOpcode::G_FPTRUNC: { |
| computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known, |
| Depth); |
| break; |
| } |
| case TargetOpcode::G_SITOFP: |
| case TargetOpcode::G_UITOFP: { |
| // Cannot produce nan |
| Known.knownNot(fcNan); |
| |
| // Integers cannot be subnormal |
| Known.knownNot(fcSubnormal); |
| |
| // sitofp and uitofp turn into +0.0 for zero. |
| Known.knownNot(fcNegZero); |
| if (Opcode == TargetOpcode::G_UITOFP) |
| Known.signBitMustBeZero(); |
| |
| Register Val = MI.getOperand(1).getReg(); |
| LLT Ty = MRI.getType(Val); |
| |
| if (InterestedClasses & fcInf) { |
| // Get width of largest magnitude integer (remove a bit if signed). |
| // This still works for a signed minimum value because the largest FP |
| // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).; |
| int IntSize = Ty.getScalarSizeInBits(); |
| if (Opcode == TargetOpcode::G_SITOFP) |
| --IntSize; |
| |
| // If the exponent of the largest finite FP value can hold the largest |
| // integer, the result of the cast must be finite. |
| LLT FPTy = DstTy.getScalarType(); |
| const fltSemantics &FltSem = getFltSemanticForLLT(FPTy); |
| if (ilogb(APFloat::getLargest(FltSem)) >= IntSize) |
| Known.knownNot(fcInf); |
| } |
| |
| break; |
| } |
| // case TargetOpcode::G_MERGE_VALUES: |
| case TargetOpcode::G_BUILD_VECTOR: |
| case TargetOpcode::G_CONCAT_VECTORS: { |
| GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(MI); |
| |
| if (!DstTy.isFixedVector()) |
| break; |
| |
| bool First = true; |
| for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) { |
| // We know the index we are inserting to, so clear it from Vec check. |
| bool NeedsElt = DemandedElts[Idx]; |
| |
| // Do we demand the inserted element? |
| if (NeedsElt) { |
| Register Src = Merge.getSourceReg(Idx); |
| if (First) { |
| computeKnownFPClass(Src, Known, InterestedClasses, Depth + 1); |
| First = false; |
| } else { |
| KnownFPClass Known2; |
| computeKnownFPClass(Src, Known2, InterestedClasses, Depth + 1); |
| Known |= Known2; |
| } |
| |
| // If we don't know any bits, early out. |
| if (Known.isUnknown()) |
| break; |
| } |
| } |
| |
| break; |
| } |
| case TargetOpcode::G_EXTRACT_VECTOR_ELT: { |
| // Look through extract element. If the index is non-constant or |
| // out-of-range demand all elements, otherwise just the extracted |
| // element. |
| GExtractVectorElement &Extract = cast<GExtractVectorElement>(MI); |
| Register Vec = Extract.getVectorReg(); |
| Register Idx = Extract.getIndexReg(); |
| |
| auto CIdx = getIConstantVRegVal(Idx, MRI); |
| |
| LLT VecTy = MRI.getType(Vec); |
| |
| if (VecTy.isFixedVector()) { |
| unsigned NumElts = VecTy.getNumElements(); |
| APInt DemandedVecElts = APInt::getAllOnes(NumElts); |
| if (CIdx && CIdx->ult(NumElts)) |
| DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); |
| return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known, |
| Depth + 1); |
| } |
| |
| break; |
| } |
| case TargetOpcode::G_INSERT_VECTOR_ELT: { |
| GInsertVectorElement &Insert = cast<GInsertVectorElement>(MI); |
| Register Vec = Insert.getVectorReg(); |
| Register Elt = Insert.getElementReg(); |
| Register Idx = Insert.getIndexReg(); |
| |
| LLT VecTy = MRI.getType(Vec); |
| |
| if (VecTy.isScalableVector()) |
| return; |
| |
| auto CIdx = getIConstantVRegVal(Idx, MRI); |
| |
| unsigned NumElts = DemandedElts.getBitWidth(); |
| APInt DemandedVecElts = DemandedElts; |
| bool NeedsElt = true; |
| // If we know the index we are inserting to, clear it from Vec check. |
| if (CIdx && CIdx->ult(NumElts)) { |
| DemandedVecElts.clearBit(CIdx->getZExtValue()); |
| NeedsElt = DemandedElts[CIdx->getZExtValue()]; |
| } |
| |
| // Do we demand the inserted element? |
| if (NeedsElt) { |
| computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1); |
| // If we don't know any bits, early out. |
| if (Known.isUnknown()) |
| break; |
| } else { |
| Known.KnownFPClasses = fcNone; |
| } |
| |
| // Do we need anymore elements from Vec? |
| if (!DemandedVecElts.isZero()) { |
| KnownFPClass Known2; |
| computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, |
| Depth + 1); |
| Known |= Known2; |
| } |
| |
| break; |
| } |
| case TargetOpcode::G_SHUFFLE_VECTOR: { |
| // For undef elements, we don't know anything about the common state of |
| // the shuffle result. |
| GShuffleVector &Shuf = cast<GShuffleVector>(MI); |
| APInt DemandedLHS, DemandedRHS; |
| if (DstTy.isScalableVector()) { |
| assert(DemandedElts == APInt(1, 1)); |
| DemandedLHS = DemandedRHS = DemandedElts; |
| } else { |
| if (!llvm::getShuffleDemandedElts(DstTy.getNumElements(), Shuf.getMask(), |
| DemandedElts, DemandedLHS, |
| DemandedRHS)) { |
| Known.resetAll(); |
| return; |
| } |
| } |
| |
| if (!!DemandedLHS) { |
| Register LHS = Shuf.getSrc1Reg(); |
| computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known, |
| Depth + 1); |
| |
| // If we don't know any bits, early out. |
| if (Known.isUnknown()) |
| break; |
| } else { |
| Known.KnownFPClasses = fcNone; |
| } |
| |
| if (!!DemandedRHS) { |
| KnownFPClass Known2; |
| Register RHS = Shuf.getSrc2Reg(); |
| computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2, |
| Depth + 1); |
| Known |= Known2; |
| } |
| break; |
| } |
| case TargetOpcode::COPY: { |
| Register Src = MI.getOperand(1).getReg(); |
| |
| if (!Src.isVirtual()) |
| return; |
| |
| computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1); |
| break; |
| } |
| } |
| } |
| |
| KnownFPClass |
| GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts, |
| FPClassTest InterestedClasses, |
| unsigned Depth) { |
| KnownFPClass KnownClasses; |
| computeKnownFPClass(R, DemandedElts, InterestedClasses, KnownClasses, Depth); |
| return KnownClasses; |
| } |
| |
| KnownFPClass GISelValueTracking::computeKnownFPClass( |
| Register R, FPClassTest InterestedClasses, unsigned Depth) { |
| KnownFPClass Known; |
| computeKnownFPClass(R, Known, InterestedClasses, Depth); |
| return Known; |
| } |
| |
| KnownFPClass GISelValueTracking::computeKnownFPClass( |
| Register R, const APInt &DemandedElts, uint32_t Flags, |
| FPClassTest InterestedClasses, unsigned Depth) { |
| if (Flags & MachineInstr::MIFlag::FmNoNans) |
| InterestedClasses &= ~fcNan; |
| if (Flags & MachineInstr::MIFlag::FmNoInfs) |
| InterestedClasses &= ~fcInf; |
| |
| KnownFPClass Result = |
| computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth); |
| |
| if (Flags & MachineInstr::MIFlag::FmNoNans) |
| Result.KnownFPClasses &= ~fcNan; |
| if (Flags & MachineInstr::MIFlag::FmNoInfs) |
| Result.KnownFPClasses &= ~fcInf; |
| return Result; |
| } |
| |
| KnownFPClass GISelValueTracking::computeKnownFPClass( |
| Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) { |
| LLT Ty = MRI.getType(R); |
| APInt DemandedElts = |
| Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); |
| return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth); |
| } |
| |
| /// Compute number of sign bits for the intersection of \p Src0 and \p Src1 |
| unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1, |
| const APInt &DemandedElts, |
| unsigned Depth) { |
| // Test src1 first, since we canonicalize simpler expressions to the RHS. |
| unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth); |
| if (Src1SignBits == 1) |
| return 1; |
| return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); |
| } |
| |
| /// Compute the known number of sign bits with attached range metadata in the |
| /// memory operand. If this is an extending load, accounts for the behavior of |
| /// the high bits. |
| static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, |
| unsigned TyBits) { |
| const MDNode *Ranges = Ld->getRanges(); |
| if (!Ranges) |
| return 1; |
| |
| ConstantRange CR = getConstantRangeFromMetadata(*Ranges); |
| if (TyBits > CR.getBitWidth()) { |
| switch (Ld->getOpcode()) { |
| case TargetOpcode::G_SEXTLOAD: |
| CR = CR.signExtend(TyBits); |
| break; |
| case TargetOpcode::G_ZEXTLOAD: |
| CR = CR.zeroExtend(TyBits); |
| break; |
| default: |
| break; |
| } |
| } |
| |
| return std::min(CR.getSignedMin().getNumSignBits(), |
| CR.getSignedMax().getNumSignBits()); |
| } |
| |
| unsigned GISelValueTracking::computeNumSignBits(Register R, |
| const APInt &DemandedElts, |
| unsigned Depth) { |
| MachineInstr &MI = *MRI.getVRegDef(R); |
| unsigned Opcode = MI.getOpcode(); |
| |
| if (Opcode == TargetOpcode::G_CONSTANT) |
| return MI.getOperand(1).getCImm()->getValue().getNumSignBits(); |
| |
| if (Depth == getMaxDepth()) |
| return 1; |
| |
| if (!DemandedElts) |
| return 1; // No demanded elts, better to assume we don't know anything. |
| |
| LLT DstTy = MRI.getType(R); |
| const unsigned TyBits = DstTy.getScalarSizeInBits(); |
| |
| // Handle the case where this is called on a register that does not have a |
| // type constraint. This is unlikely to occur except by looking through copies |
| // but it is possible for the initial register being queried to be in this |
| // state. |
| if (!DstTy.isValid()) |
| return 1; |
| |
| unsigned FirstAnswer = 1; |
| switch (Opcode) { |
| case TargetOpcode::COPY: { |
| MachineOperand &Src = MI.getOperand(1); |
| if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && |
| MRI.getType(Src.getReg()).isValid()) { |
| // Don't increment Depth for this one since we didn't do any work. |
| return computeNumSignBits(Src.getReg(), DemandedElts, Depth); |
| } |
| |
| return 1; |
| } |
| case TargetOpcode::G_SEXT: { |
| Register Src = MI.getOperand(1).getReg(); |
| LLT SrcTy = MRI.getType(Src); |
| unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); |
| return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; |
| } |
| case TargetOpcode::G_ASSERT_SEXT: |
| case TargetOpcode::G_SEXT_INREG: { |
| // Max of the input and what this extends. |
| Register Src = MI.getOperand(1).getReg(); |
| unsigned SrcBits = MI.getOperand(2).getImm(); |
| unsigned InRegBits = TyBits - SrcBits + 1; |
| return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), |
| InRegBits); |
| } |
| case TargetOpcode::G_LOAD: { |
| GLoad *Ld = cast<GLoad>(&MI); |
| if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) |
| break; |
| |
| return computeNumSignBitsFromRangeMetadata(Ld, TyBits); |
| } |
| case TargetOpcode::G_SEXTLOAD: { |
| GSExtLoad *Ld = cast<GSExtLoad>(&MI); |
| |
| // FIXME: We need an in-memory type representation. |
| if (DstTy.isVector()) |
| return 1; |
| |
| unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); |
| if (NumBits != 1) |
| return NumBits; |
| |
| // e.g. i16->i32 = '17' bits known. |
| const MachineMemOperand *MMO = *MI.memoperands_begin(); |
| return TyBits - MMO->getSizeInBits().getValue() + 1; |
| } |
| case TargetOpcode::G_ZEXTLOAD: { |
| GZExtLoad *Ld = cast<GZExtLoad>(&MI); |
| |
| // FIXME: We need an in-memory type representation. |
| if (DstTy.isVector()) |
| return 1; |
| |
| unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); |
| if (NumBits != 1) |
| return NumBits; |
| |
| // e.g. i16->i32 = '16' bits known. |
| const MachineMemOperand *MMO = *MI.memoperands_begin(); |
| return TyBits - MMO->getSizeInBits().getValue(); |
| } |
| case TargetOpcode::G_AND: |
| case TargetOpcode::G_OR: |
| case TargetOpcode::G_XOR: { |
| Register Src1 = MI.getOperand(1).getReg(); |
| unsigned Src1NumSignBits = |
| computeNumSignBits(Src1, DemandedElts, Depth + 1); |
| if (Src1NumSignBits != 1) { |
| Register Src2 = MI.getOperand(2).getReg(); |
| unsigned Src2NumSignBits = |
| computeNumSignBits(Src2, DemandedElts, Depth + 1); |
| FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits); |
| } |
| break; |
| } |
| case TargetOpcode::G_TRUNC: { |
| Register Src = MI.getOperand(1).getReg(); |
| LLT SrcTy = MRI.getType(Src); |
| |
| // Check if the sign bits of source go down as far as the truncated value. |
| unsigned DstTyBits = DstTy.getScalarSizeInBits(); |
| unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); |
| unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1); |
| if (NumSrcSignBits > (NumSrcBits - DstTyBits)) |
| return NumSrcSignBits - (NumSrcBits - DstTyBits); |
| break; |
| } |
| case TargetOpcode::G_SELECT: { |
| return computeNumSignBitsMin(MI.getOperand(2).getReg(), |
| MI.getOperand(3).getReg(), DemandedElts, |
| Depth + 1); |
| } |
| case TargetOpcode::G_SMIN: |
| case TargetOpcode::G_SMAX: |
| case TargetOpcode::G_UMIN: |
| case TargetOpcode::G_UMAX: |
| // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX. |
| return computeNumSignBitsMin(MI.getOperand(1).getReg(), |
| MI.getOperand(2).getReg(), DemandedElts, |
| Depth + 1); |
| case TargetOpcode::G_SADDO: |
| case TargetOpcode::G_SADDE: |
| case TargetOpcode::G_UADDO: |
| case TargetOpcode::G_UADDE: |
| case TargetOpcode::G_SSUBO: |
| case TargetOpcode::G_SSUBE: |
| case TargetOpcode::G_USUBO: |
| case TargetOpcode::G_USUBE: |
| case TargetOpcode::G_SMULO: |
| case TargetOpcode::G_UMULO: { |
| // If compares returns 0/-1, all bits are sign bits. |
| // We know that we have an integer-based boolean since these operations |
| // are only available for integer. |
| if (MI.getOperand(1).getReg() == R) { |
| if (TL.getBooleanContents(DstTy.isVector(), false) == |
| TargetLowering::ZeroOrNegativeOneBooleanContent) |
| return TyBits; |
| } |
| |
| break; |
| } |
| case TargetOpcode::G_FCMP: |
| case TargetOpcode::G_ICMP: { |
| bool IsFP = Opcode == TargetOpcode::G_FCMP; |
| if (TyBits == 1) |
| break; |
| auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP); |
| if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) |
| return TyBits; // All bits are sign bits. |
| if (BC == TargetLowering::ZeroOrOneBooleanContent) |
| return TyBits - 1; // Every always-zero bit is a sign bit. |
| break; |
| } |
| case TargetOpcode::G_BUILD_VECTOR: { |
| // Collect the known bits that are shared by every demanded vector element. |
| FirstAnswer = TyBits; |
| APInt SingleDemandedElt(1, 1); |
| for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) { |
| if (!DemandedElts[I]) |
| continue; |
| |
| unsigned Tmp2 = |
| computeNumSignBits(MO.getReg(), SingleDemandedElt, Depth + 1); |
| FirstAnswer = std::min(FirstAnswer, Tmp2); |
| |
| // If we don't know any bits, early out. |
| if (FirstAnswer == 1) |
| break; |
| } |
| break; |
| } |
| case TargetOpcode::G_CONCAT_VECTORS: { |
| if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector()) |
| break; |
| FirstAnswer = TyBits; |
| // Determine the minimum number of sign bits across all demanded |
| // elts of the input vectors. Early out if the result is already 1. |
| unsigned NumSubVectorElts = |
| MRI.getType(MI.getOperand(1).getReg()).getNumElements(); |
| for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) { |
| APInt DemandedSub = |
| DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts); |
| if (!DemandedSub) |
| continue; |
| unsigned Tmp2 = computeNumSignBits(MO.getReg(), DemandedSub, Depth + 1); |
| |
| FirstAnswer = std::min(FirstAnswer, Tmp2); |
| |
| // If we don't know any bits, early out. |
| if (FirstAnswer == 1) |
| break; |
| } |
| break; |
| } |
| case TargetOpcode::G_SHUFFLE_VECTOR: { |
| // Collect the minimum number of sign bits that are shared by every vector |
| // element referenced by the shuffle. |
| APInt DemandedLHS, DemandedRHS; |
| Register Src1 = MI.getOperand(1).getReg(); |
| unsigned NumElts = MRI.getType(Src1).getNumElements(); |
| if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(), |
| DemandedElts, DemandedLHS, DemandedRHS)) |
| return 1; |
| |
| if (!!DemandedLHS) |
| FirstAnswer = computeNumSignBits(Src1, DemandedLHS, Depth + 1); |
| // If we don't know anything, early out and try computeKnownBits fall-back. |
| if (FirstAnswer == 1) |
| break; |
| if (!!DemandedRHS) { |
| unsigned Tmp2 = |
| computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1); |
| FirstAnswer = std::min(FirstAnswer, Tmp2); |
| } |
| break; |
| } |
| case TargetOpcode::G_SPLAT_VECTOR: { |
| // Check if the sign bits of source go down as far as the truncated value. |
| Register Src = MI.getOperand(1).getReg(); |
| unsigned NumSrcSignBits = computeNumSignBits(Src, APInt(1, 1), Depth + 1); |
| unsigned NumSrcBits = MRI.getType(Src).getSizeInBits(); |
| if (NumSrcSignBits > (NumSrcBits - TyBits)) |
| return NumSrcSignBits - (NumSrcBits - TyBits); |
| break; |
| } |
| case TargetOpcode::G_INTRINSIC: |
| case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
| case TargetOpcode::G_INTRINSIC_CONVERGENT: |
| case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: |
| default: { |
| unsigned NumBits = |
| TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth); |
| if (NumBits > 1) |
| FirstAnswer = std::max(FirstAnswer, NumBits); |
| break; |
| } |
| } |
| |
| // Finally, if we can prove that the top bits of the result are 0's or 1's, |
| // use this information. |
| KnownBits Known = getKnownBits(R, DemandedElts, Depth); |
| APInt Mask; |
| if (Known.isNonNegative()) { // sign bit is 0 |
| Mask = Known.Zero; |
| } else if (Known.isNegative()) { // sign bit is 1; |
| Mask = Known.One; |
| } else { |
| // Nothing known. |
| return FirstAnswer; |
| } |
| |
| // Okay, we know that the sign bit in Mask is set. Use CLO to determine |
| // the number of identical bits in the top of the input value. |
| Mask <<= Mask.getBitWidth() - TyBits; |
| return std::max(FirstAnswer, Mask.countl_one()); |
| } |
| |
| unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) { |
| LLT Ty = MRI.getType(R); |
| APInt DemandedElts = |
| Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); |
| return computeNumSignBits(R, DemandedElts, Depth); |
| } |
| |
| void GISelValueTrackingAnalysisLegacy::getAnalysisUsage( |
| AnalysisUsage &AU) const { |
| AU.setPreservesAll(); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| |
| bool GISelValueTrackingAnalysisLegacy::runOnMachineFunction( |
| MachineFunction &MF) { |
| return false; |
| } |
| |
| GISelValueTracking &GISelValueTrackingAnalysisLegacy::get(MachineFunction &MF) { |
| if (!Info) { |
| unsigned MaxDepth = |
| MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; |
| Info = std::make_unique<GISelValueTracking>(MF, MaxDepth); |
| } |
| return *Info; |
| } |
| |
| AnalysisKey GISelValueTrackingAnalysis::Key; |
| |
| GISelValueTracking |
| GISelValueTrackingAnalysis::run(MachineFunction &MF, |
| MachineFunctionAnalysisManager &MFAM) { |
| return Result(MF); |
| } |
| |
| PreservedAnalyses |
| GISelValueTrackingPrinterPass::run(MachineFunction &MF, |
| MachineFunctionAnalysisManager &MFAM) { |
| auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(MF); |
| const auto &MRI = MF.getRegInfo(); |
| OS << "name: "; |
| MF.getFunction().printAsOperand(OS, /*PrintType=*/false); |
| OS << '\n'; |
| |
| for (MachineBasicBlock &BB : MF) { |
| for (MachineInstr &MI : BB) { |
| for (MachineOperand &MO : MI.defs()) { |
| if (!MO.isReg() || MO.getReg().isPhysical()) |
| continue; |
| Register Reg = MO.getReg(); |
| if (!MRI.getType(Reg).isValid()) |
| continue; |
| KnownBits Known = VTA.getKnownBits(Reg); |
| unsigned SignedBits = VTA.computeNumSignBits(Reg); |
| OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits |
| << '\n'; |
| }; |
| } |
| } |
| return PreservedAnalyses::all(); |
| } |