| //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// This file describes how to lower LLVM code to machine code. This has two |
| /// main components: |
| /// |
| /// 1. Which ValueTypes are natively supported by the target. |
| /// 2. Which operations are supported for supported ValueTypes. |
| /// 3. Cost thresholds for alternative implementations of certain operations. |
| /// |
| /// In addition it has a few other components, like information about FP |
| /// immediates. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_CODEGEN_TARGETLOWERING_H |
| #define LLVM_CODEGEN_TARGETLOWERING_H |
| |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Analysis/LegacyDivergenceAnalysis.h" |
| #include "llvm/CodeGen/DAGCombine.h" |
| #include "llvm/CodeGen/ISDOpcodes.h" |
| #include "llvm/CodeGen/RuntimeLibcalls.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/SelectionDAGNodes.h" |
| #include "llvm/CodeGen/TargetCallingConv.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/CallSite.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/InlineAsm.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/Support/AtomicOrdering.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/MachineValueType.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <climits> |
| #include <cstdint> |
| #include <iterator> |
| #include <map> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| namespace llvm { |
| |
| class BranchProbability; |
| class CCState; |
| class CCValAssign; |
| class Constant; |
| class FastISel; |
| class FunctionLoweringInfo; |
| class GlobalValue; |
| class IntrinsicInst; |
| struct KnownBits; |
| class LLVMContext; |
| class MachineBasicBlock; |
| class MachineFunction; |
| class MachineInstr; |
| class MachineJumpTableInfo; |
| class MachineLoop; |
| class MachineRegisterInfo; |
| class MCContext; |
| class MCExpr; |
| class Module; |
| class TargetRegisterClass; |
| class TargetLibraryInfo; |
| class TargetRegisterInfo; |
| class Value; |
| |
| namespace Sched { |
| |
| enum Preference { |
| None, // No preference |
| Source, // Follow source order. |
| RegPressure, // Scheduling for lowest register pressure. |
| Hybrid, // Scheduling for both latency and register pressure. |
| ILP, // Scheduling for ILP in low register pressure mode. |
| VLIW // Scheduling for VLIW targets. |
| }; |
| |
| } // end namespace Sched |
| |
| /// This base class for TargetLowering contains the SelectionDAG-independent |
| /// parts that can be used from the rest of CodeGen. |
| class TargetLoweringBase { |
| public: |
| /// This enum indicates whether operations are valid for a target, and if not, |
| /// what action should be used to make them valid. |
| enum LegalizeAction : uint8_t { |
| Legal, // The target natively supports this operation. |
| Promote, // This operation should be executed in a larger type. |
| Expand, // Try to expand this to other ops, otherwise use a libcall. |
| LibCall, // Don't try to expand this to other ops, always use a libcall. |
| Custom // Use the LowerOperation hook to implement custom lowering. |
| }; |
| |
| /// This enum indicates whether a types are legal for a target, and if not, |
| /// what action should be used to make them valid. |
| enum LegalizeTypeAction : uint8_t { |
| TypeLegal, // The target natively supports this type. |
| TypePromoteInteger, // Replace this integer with a larger one. |
| TypeExpandInteger, // Split this integer into two of half the size. |
| TypeSoftenFloat, // Convert this float to a same size integer type, |
| // if an operation is not supported in target HW. |
| TypeExpandFloat, // Split this float into two of half the size. |
| TypeScalarizeVector, // Replace this one-element vector with its element. |
| TypeSplitVector, // Split this vector into two of half the size. |
| TypeWidenVector, // This vector should be widened into a larger vector. |
| TypePromoteFloat // Replace this float with a larger one. |
| }; |
| |
| /// LegalizeKind holds the legalization kind that needs to happen to EVT |
| /// in order to type-legalize it. |
| using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; |
| |
| /// Enum that describes how the target represents true/false values. |
| enum BooleanContent { |
| UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. |
| ZeroOrOneBooleanContent, // All bits zero except for bit 0. |
| ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. |
| }; |
| |
| /// Enum that describes what type of support for selects the target has. |
| enum SelectSupportKind { |
| ScalarValSelect, // The target supports scalar selects (ex: cmov). |
| ScalarCondVectorVal, // The target supports selects with a scalar condition |
| // and vector values (ex: cmov). |
| VectorMaskSelect // The target supports vector selects with a vector |
| // mask (ex: x86 blends). |
| }; |
| |
| /// Enum that specifies what an atomic load/AtomicRMWInst is expanded |
| /// to, if at all. Exists because different targets have different levels of |
| /// support for these atomic instructions, and also have different options |
| /// w.r.t. what they should expand to. |
| enum class AtomicExpansionKind { |
| None, // Don't expand the instruction. |
| LLSC, // Expand the instruction into loadlinked/storeconditional; used |
| // by ARM/AArch64. |
| LLOnly, // Expand the (load) instruction into just a load-linked, which has |
| // greater atomic guarantees than a normal load. |
| CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. |
| MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. |
| }; |
| |
| /// Enum that specifies when a multiplication should be expanded. |
| enum class MulExpansionKind { |
| Always, // Always expand the instruction. |
| OnlyLegalOrCustom, // Only expand when the resulting instructions are legal |
| // or custom. |
| }; |
| |
| class ArgListEntry { |
| public: |
| Value *Val = nullptr; |
| SDValue Node = SDValue(); |
| Type *Ty = nullptr; |
| bool IsSExt : 1; |
| bool IsZExt : 1; |
| bool IsInReg : 1; |
| bool IsSRet : 1; |
| bool IsNest : 1; |
| bool IsByVal : 1; |
| bool IsInAlloca : 1; |
| bool IsReturned : 1; |
| bool IsSwiftSelf : 1; |
| bool IsSwiftError : 1; |
| uint16_t Alignment = 0; |
| |
| ArgListEntry() |
| : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), |
| IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), |
| IsSwiftSelf(false), IsSwiftError(false) {} |
| |
| void setAttributes(const CallBase *Call, unsigned ArgIdx); |
| |
| void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) { |
| return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx); |
| } |
| }; |
| using ArgListTy = std::vector<ArgListEntry>; |
| |
| virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
| ArgListTy &Args) const {}; |
| |
| static ISD::NodeType getExtendForContent(BooleanContent Content) { |
| switch (Content) { |
| case UndefinedBooleanContent: |
| // Extend by adding rubbish bits. |
| return ISD::ANY_EXTEND; |
| case ZeroOrOneBooleanContent: |
| // Extend by adding zero bits. |
| return ISD::ZERO_EXTEND; |
| case ZeroOrNegativeOneBooleanContent: |
| // Extend by copying the sign bit. |
| return ISD::SIGN_EXTEND; |
| } |
| llvm_unreachable("Invalid content kind"); |
| } |
| |
| /// NOTE: The TargetMachine owns TLOF. |
| explicit TargetLoweringBase(const TargetMachine &TM); |
| TargetLoweringBase(const TargetLoweringBase &) = delete; |
| TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; |
| virtual ~TargetLoweringBase() = default; |
| |
| protected: |
| /// Initialize all of the actions to default values. |
| void initActions(); |
| |
| public: |
| const TargetMachine &getTargetMachine() const { return TM; } |
| |
| virtual bool useSoftFloat() const { return false; } |
| |
| /// Return the pointer type for the given address space, defaults to |
| /// the pointer type from the data layout. |
| /// FIXME: The default needs to be removed once all the code is updated. |
| MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { |
| return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); |
| } |
| |
| /// Return the type for frame index, which is determined by |
| /// the alloca address space specified through the data layout. |
| MVT getFrameIndexTy(const DataLayout &DL) const { |
| return getPointerTy(DL, DL.getAllocaAddrSpace()); |
| } |
| |
| /// Return the type for operands of fence. |
| /// TODO: Let fence operands be of i32 type and remove this. |
| virtual MVT getFenceOperandTy(const DataLayout &DL) const { |
| return getPointerTy(DL); |
| } |
| |
| /// EVT is not used in-tree, but is used by out-of-tree target. |
| /// A documentation for this function would be nice... |
| virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; |
| |
| EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, |
| bool LegalTypes = true) const; |
| |
| /// Returns the type to be used for the index operand of: |
| /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, |
| /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR |
| virtual MVT getVectorIdxTy(const DataLayout &DL) const { |
| return getPointerTy(DL); |
| } |
| |
| virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { |
| return true; |
| } |
| |
| /// Return true if it is profitable to convert a select of FP constants into |
| /// a constant pool load whose address depends on the select condition. The |
| /// parameter may be used to differentiate a select with FP compare from |
| /// integer compare. |
| virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { |
| return true; |
| } |
| |
| /// Return true if multiple condition registers are available. |
| bool hasMultipleConditionRegisters() const { |
| return HasMultipleConditionRegisters; |
| } |
| |
| /// Return true if the target has BitExtract instructions. |
| bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } |
| |
| /// Return the preferred vector type legalization action. |
| virtual TargetLoweringBase::LegalizeTypeAction |
| getPreferredVectorAction(MVT VT) const { |
| // The default action for one element vectors is to scalarize |
| if (VT.getVectorNumElements() == 1) |
| return TypeScalarizeVector; |
| // The default action for other vectors is to promote |
| return TypePromoteInteger; |
| } |
| |
| // There are two general methods for expanding a BUILD_VECTOR node: |
| // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle |
| // them together. |
| // 2. Build the vector on the stack and then load it. |
| // If this function returns true, then method (1) will be used, subject to |
| // the constraint that all of the necessary shuffles are legal (as determined |
| // by isShuffleMaskLegal). If this function returns false, then method (2) is |
| // always used. The vector type, and the number of defined values, are |
| // provided. |
| virtual bool |
| shouldExpandBuildVectorWithShuffles(EVT /* VT */, |
| unsigned DefinedValues) const { |
| return DefinedValues < 3; |
| } |
| |
| /// Return true if integer divide is usually cheaper than a sequence of |
| /// several shifts, adds, and multiplies for this target. |
| /// The definition of "cheaper" may depend on whether we're optimizing |
| /// for speed or for size. |
| virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } |
| |
| /// Return true if the target can handle a standalone remainder operation. |
| virtual bool hasStandaloneRem(EVT VT) const { |
| return true; |
| } |
| |
| /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). |
| virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { |
| // Default behavior is to replace SQRT(X) with X*RSQRT(X). |
| return false; |
| } |
| |
| /// Reciprocal estimate status values used by the functions below. |
| enum ReciprocalEstimate : int { |
| Unspecified = -1, |
| Disabled = 0, |
| Enabled = 1 |
| }; |
| |
| /// Return a ReciprocalEstimate enum value for a square root of the given type |
| /// based on the function's attributes. If the operation is not overridden by |
| /// the function's attributes, "Unspecified" is returned and target defaults |
| /// are expected to be used for instruction selection. |
| int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; |
| |
| /// Return a ReciprocalEstimate enum value for a division of the given type |
| /// based on the function's attributes. If the operation is not overridden by |
| /// the function's attributes, "Unspecified" is returned and target defaults |
| /// are expected to be used for instruction selection. |
| int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; |
| |
| /// Return the refinement step count for a square root of the given type based |
| /// on the function's attributes. If the operation is not overridden by |
| /// the function's attributes, "Unspecified" is returned and target defaults |
| /// are expected to be used for instruction selection. |
| int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; |
| |
| /// Return the refinement step count for a division of the given type based |
| /// on the function's attributes. If the operation is not overridden by |
| /// the function's attributes, "Unspecified" is returned and target defaults |
| /// are expected to be used for instruction selection. |
| int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; |
| |
| /// Returns true if target has indicated at least one type should be bypassed. |
| bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } |
| |
| /// Returns map of slow types for division or remainder with corresponding |
| /// fast types |
| const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { |
| return BypassSlowDivWidths; |
| } |
| |
| /// Return true if Flow Control is an expensive operation that should be |
| /// avoided. |
| bool isJumpExpensive() const { return JumpIsExpensive; } |
| |
| /// Return true if selects are only cheaper than branches if the branch is |
| /// unlikely to be predicted right. |
| bool isPredictableSelectExpensive() const { |
| return PredictableSelectIsExpensive; |
| } |
| |
| /// If a branch or a select condition is skewed in one direction by more than |
| /// this factor, it is very likely to be predicted correctly. |
| virtual BranchProbability getPredictableBranchThreshold() const; |
| |
| /// Return true if the following transform is beneficial: |
| /// fold (conv (load x)) -> (load (conv*)x) |
| /// On architectures that don't natively support some vector loads |
| /// efficiently, casting the load to a smaller vector of larger types and |
| /// loading is more efficient, however, this can be undone by optimizations in |
| /// dag combiner. |
| virtual bool isLoadBitCastBeneficial(EVT LoadVT, |
| EVT BitcastVT) const { |
| // Don't do if we could do an indexed load on the original type, but not on |
| // the new one. |
| if (!LoadVT.isSimple() || !BitcastVT.isSimple()) |
| return true; |
| |
| MVT LoadMVT = LoadVT.getSimpleVT(); |
| |
| // Don't bother doing this if it's just going to be promoted again later, as |
| // doing so might interfere with other combines. |
| if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && |
| getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) |
| return false; |
| |
| return true; |
| } |
| |
| /// Return true if the following transform is beneficial: |
| /// (store (y (conv x)), y*)) -> (store x, (x*)) |
| virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const { |
| // Default to the same logic as loads. |
| return isLoadBitCastBeneficial(StoreVT, BitcastVT); |
| } |
| |
| /// Return true if it is expected to be cheaper to do a store of a non-zero |
| /// vector constant with the given size and type for the address space than to |
| /// store the individual scalar element constants. |
| virtual bool storeOfVectorConstantIsCheap(EVT MemVT, |
| unsigned NumElem, |
| unsigned AddrSpace) const { |
| return false; |
| } |
| |
| /// Allow store merging after legalization in addition to before legalization. |
| /// This may catch stores that do not exist earlier (eg, stores created from |
| /// intrinsics). |
| virtual bool mergeStoresAfterLegalization() const { return true; } |
| |
| /// Returns if it's reasonable to merge stores to MemVT size. |
| virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, |
| const SelectionDAG &DAG) const { |
| return true; |
| } |
| |
| /// Return true if it is cheap to speculate a call to intrinsic cttz. |
| virtual bool isCheapToSpeculateCttz() const { |
| return false; |
| } |
| |
| /// Return true if it is cheap to speculate a call to intrinsic ctlz. |
| virtual bool isCheapToSpeculateCtlz() const { |
| return false; |
| } |
| |
| /// Return true if ctlz instruction is fast. |
| virtual bool isCtlzFast() const { |
| return false; |
| } |
| |
| /// Return true if it is safe to transform an integer-domain bitwise operation |
| /// into the equivalent floating-point operation. This should be set to true |
| /// if the target has IEEE-754-compliant fabs/fneg operations for the input |
| /// type. |
| virtual bool hasBitPreservingFPLogic(EVT VT) const { |
| return false; |
| } |
| |
| /// Return true if it is cheaper to split the store of a merged int val |
| /// from a pair of smaller values into multiple stores. |
| virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { |
| return false; |
| } |
| |
| /// Return if the target supports combining a |
| /// chain like: |
| /// \code |
| /// %andResult = and %val1, #mask |
| /// %icmpResult = icmp %andResult, 0 |
| /// \endcode |
| /// into a single machine instruction of a form like: |
| /// \code |
| /// cc = test %register, #mask |
| /// \endcode |
| virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { |
| return false; |
| } |
| |
| /// Use bitwise logic to make pairs of compares more efficient. For example: |
| /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 |
| /// This should be true when it takes more than one instruction to lower |
| /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on |
| /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. |
| virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { |
| return false; |
| } |
| |
| /// Return the preferred operand type if the target has a quick way to compare |
| /// integer values of the given size. Assume that any legal integer type can |
| /// be compared efficiently. Targets may override this to allow illegal wide |
| /// types to return a vector type if there is support to compare that type. |
| virtual MVT hasFastEqualityCompare(unsigned NumBits) const { |
| MVT VT = MVT::getIntegerVT(NumBits); |
| return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; |
| } |
| |
| /// Return true if the target should transform: |
| /// (X & Y) == Y ---> (~X & Y) == 0 |
| /// (X & Y) != Y ---> (~X & Y) != 0 |
| /// |
| /// This may be profitable if the target has a bitwise and-not operation that |
| /// sets comparison flags. A target may want to limit the transformation based |
| /// on the type of Y or if Y is a constant. |
| /// |
| /// Note that the transform will not occur if Y is known to be a power-of-2 |
| /// because a mask and compare of a single bit can be handled by inverting the |
| /// predicate, for example: |
| /// (X & 8) == 8 ---> (X & 8) != 0 |
| virtual bool hasAndNotCompare(SDValue Y) const { |
| return false; |
| } |
| |
| /// Return true if the target has a bitwise and-not operation: |
| /// X = ~A & B |
| /// This can be used to simplify select or other instructions. |
| virtual bool hasAndNot(SDValue X) const { |
| // If the target has the more complex version of this operation, assume that |
| // it has this operation too. |
| return hasAndNotCompare(X); |
| } |
| |
| /// There are two ways to clear extreme bits (either low or high): |
| /// Mask: x & (-1 << y) (the instcombine canonical form) |
| /// Shifts: x >> y << y |
| /// Return true if the variant with 2 shifts is preferred. |
| /// Return false if there is no preference. |
| virtual bool preferShiftsToClearExtremeBits(SDValue X) const { |
| // By default, let's assume that no one prefers shifts. |
| return false; |
| } |
| |
| /// Should we tranform the IR-optimal check for whether given truncation |
| /// down into KeptBits would be truncating or not: |
| /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) |
| /// Into it's more traditional form: |
| /// ((%x << C) a>> C) dstcond %x |
| /// Return true if we should transform. |
| /// Return false if there is no preference. |
| virtual bool shouldTransformSignedTruncationCheck(EVT XVT, |
| unsigned KeptBits) const { |
| // By default, let's assume that no one prefers shifts. |
| return false; |
| } |
| |
| /// Return true if the target wants to use the optimization that |
| /// turns ext(promotableInst1(...(promotableInstN(load)))) into |
| /// promotedInst1(...(promotedInstN(ext(load)))). |
| bool enableExtLdPromotion() const { return EnableExtLdPromotion; } |
| |
| /// Return true if the target can combine store(extractelement VectorTy, |
| /// Idx). |
| /// \p Cost[out] gives the cost of that transformation when this is true. |
| virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, |
| unsigned &Cost) const { |
| return false; |
| } |
| |
| /// Return true if inserting a scalar into a variable element of an undef |
| /// vector is more efficiently handled by splatting the scalar instead. |
| virtual bool shouldSplatInsEltVarIndex(EVT) const { |
| return false; |
| } |
| |
| /// Return true if target supports floating point exceptions. |
| bool hasFloatingPointExceptions() const { |
| return HasFloatingPointExceptions; |
| } |
| |
| /// Return true if target always beneficiates from combining into FMA for a |
| /// given value type. This must typically return false on targets where FMA |
| /// takes more cycles to execute than FADD. |
| virtual bool enableAggressiveFMAFusion(EVT VT) const { |
| return false; |
| } |
| |
| /// Return the ValueType of the result of SETCC operations. |
| virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
| EVT VT) const; |
| |
| /// Return the ValueType for comparison libcalls. Comparions libcalls include |
| /// floating point comparion calls, and Ordered/Unordered check calls on |
| /// floating point numbers. |
| virtual |
| MVT::SimpleValueType getCmpLibcallReturnType() const; |
| |
| /// For targets without i1 registers, this gives the nature of the high-bits |
| /// of boolean values held in types wider than i1. |
| /// |
| /// "Boolean values" are special true/false values produced by nodes like |
| /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. |
| /// Not to be confused with general values promoted from i1. Some cpus |
| /// distinguish between vectors of boolean and scalars; the isVec parameter |
| /// selects between the two kinds. For example on X86 a scalar boolean should |
| /// be zero extended from i1, while the elements of a vector of booleans |
| /// should be sign extended from i1. |
| /// |
| /// Some cpus also treat floating point types the same way as they treat |
| /// vectors instead of the way they treat scalars. |
| BooleanContent getBooleanContents(bool isVec, bool isFloat) const { |
| if (isVec) |
| return BooleanVectorContents; |
| return isFloat ? BooleanFloatContents : BooleanContents; |
| } |
| |
| BooleanContent getBooleanContents(EVT Type) const { |
| return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); |
| } |
| |
| /// Return target scheduling preference. |
| Sched::Preference getSchedulingPreference() const { |
| return SchedPreferenceInfo; |
| } |
| |
| /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics |
| /// for different nodes. This function returns the preference (or none) for |
| /// the given node. |
| virtual Sched::Preference getSchedulingPreference(SDNode *) const { |
| return Sched::None; |
| } |
| |
| /// Return the register class that should be used for the specified value |
| /// type. |
| virtual const TargetRegisterClass *getRegClassFor(MVT VT) const { |
| const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
| assert(RC && "This value type is not natively supported!"); |
| return RC; |
| } |
| |
| /// Return the 'representative' register class for the specified value |
| /// type. |
| /// |
| /// The 'representative' register class is the largest legal super-reg |
| /// register class for the register class of the value type. For example, on |
| /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep |
| /// register class is GR64 on x86_64. |
| virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { |
| const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; |
| return RC; |
| } |
| |
| /// Return the cost of the 'representative' register class for the specified |
| /// value type. |
| virtual uint8_t getRepRegClassCostFor(MVT VT) const { |
| return RepRegClassCostForVT[VT.SimpleTy]; |
| } |
| |
| /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS |
| /// instructions, and false if a library call is preferred (e.g for code-size |
| /// reasons). |
| virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { |
| return true; |
| } |
| |
| /// Return true if the target has native support for the specified value type. |
| /// This means that it has a register that directly holds it without |
| /// promotions or expansions. |
| bool isTypeLegal(EVT VT) const { |
| assert(!VT.isSimple() || |
| (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)); |
| return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; |
| } |
| |
| class ValueTypeActionImpl { |
| /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum |
| /// that indicates how instruction selection should deal with the type. |
| LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; |
| |
| public: |
| ValueTypeActionImpl() { |
| std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), |
| TypeLegal); |
| } |
| |
| LegalizeTypeAction getTypeAction(MVT VT) const { |
| return ValueTypeActions[VT.SimpleTy]; |
| } |
| |
| void setTypeAction(MVT VT, LegalizeTypeAction Action) { |
| ValueTypeActions[VT.SimpleTy] = Action; |
| } |
| }; |
| |
| const ValueTypeActionImpl &getValueTypeActions() const { |
| return ValueTypeActions; |
| } |
| |
| /// Return how we should legalize values of this type, either it is already |
| /// legal (return 'Legal') or we need to promote it to a larger type (return |
| /// 'Promote'), or we need to expand it into multiple registers of smaller |
| /// integer type (return 'Expand'). 'Custom' is not an option. |
| LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { |
| return getTypeConversion(Context, VT).first; |
| } |
| LegalizeTypeAction getTypeAction(MVT VT) const { |
| return ValueTypeActions.getTypeAction(VT); |
| } |
| |
| /// For types supported by the target, this is an identity function. For |
| /// types that must be promoted to larger types, this returns the larger type |
| /// to promote to. For integer types that are larger than the largest integer |
| /// register, this contains one step in the expansion to get to the smaller |
| /// register. For illegal floating point types, this returns the integer type |
| /// to transform to. |
| EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { |
| return getTypeConversion(Context, VT).second; |
| } |
| |
| /// For types supported by the target, this is an identity function. For |
| /// types that must be expanded (i.e. integer types that are larger than the |
| /// largest integer register or illegal floating point types), this returns |
| /// the largest legal type it will be expanded to. |
| EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { |
| assert(!VT.isVector()); |
| while (true) { |
| switch (getTypeAction(Context, VT)) { |
| case TypeLegal: |
| return VT; |
| case TypeExpandInteger: |
| VT = getTypeToTransformTo(Context, VT); |
| break; |
| default: |
| llvm_unreachable("Type is not legal nor is it to be expanded!"); |
| } |
| } |
| } |
| |
| /// Vector types are broken down into some number of legal first class types. |
| /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 |
| /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 |
| /// turns into 4 EVT::i32 values with both PPC and X86. |
| /// |
| /// This method returns the number of registers needed, and the VT for each |
| /// register. It also returns the VT and quantity of the intermediate values |
| /// before they are promoted/expanded. |
| unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, |
| EVT &IntermediateVT, |
| unsigned &NumIntermediates, |
| MVT &RegisterVT) const; |
| |
| /// Certain targets such as MIPS require that some types such as vectors are |
| /// always broken down into scalars in some contexts. This occurs even if the |
| /// vector type is legal. |
| virtual unsigned getVectorTypeBreakdownForCallingConv( |
| LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
| unsigned &NumIntermediates, MVT &RegisterVT) const { |
| return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, |
| RegisterVT); |
| } |
| |
| struct IntrinsicInfo { |
| unsigned opc = 0; // target opcode |
| EVT memVT; // memory VT |
| |
| // value representing memory location |
| PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; |
| |
| int offset = 0; // offset off of ptrVal |
| unsigned size = 0; // the size of the memory location |
| // (taken from memVT if zero) |
| unsigned align = 1; // alignment |
| |
| MachineMemOperand::Flags flags = MachineMemOperand::MONone; |
| IntrinsicInfo() = default; |
| }; |
| |
| /// Given an intrinsic, checks if on the target the intrinsic will need to map |
| /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
| /// true and store the intrinsic information into the IntrinsicInfo that was |
| /// passed to the function. |
| virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
| MachineFunction &, |
| unsigned /*Intrinsic*/) const { |
| return false; |
| } |
| |
| /// Returns true if the target can instruction select the specified FP |
| /// immediate natively. If false, the legalizer will materialize the FP |
| /// immediate as a load from a constant pool. |
| virtual bool isFPImmLegal(const APFloat &/*Imm*/, EVT /*VT*/) const { |
| return false; |
| } |
| |
| /// Targets can use this to indicate that they only support *some* |
| /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
| /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be |
| /// legal. |
| virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { |
| return true; |
| } |
| |
| /// Returns true if the operation can trap for the value type. |
| /// |
| /// VT must be a legal type. By default, we optimistically assume most |
| /// operations don't trap except for integer divide and remainder. |
| virtual bool canOpTrap(unsigned Op, EVT VT) const; |
| |
| /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there |
| /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a |
| /// constant pool entry. |
| virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, |
| EVT /*VT*/) const { |
| return false; |
| } |
| |
| /// Return how this operation should be treated: either it is legal, needs to |
| /// be promoted to a larger size, needs to be expanded to some other code |
| /// sequence, or the target has a custom expander for it. |
| LegalizeAction getOperationAction(unsigned Op, EVT VT) const { |
| if (VT.isExtended()) return Expand; |
| // If a target-specific SDNode requires legalization, require the target |
| // to provide custom legalization for it. |
| if (Op >= array_lengthof(OpActions[0])) return Custom; |
| return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; |
| } |
| |
| /// Custom method defined by each target to indicate if an operation which |
| /// may require a scale is supported natively by the target. |
| /// If not, the operation is illegal. |
| virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, |
| unsigned Scale) const { |
| return false; |
| } |
| |
| /// Some fixed point operations may be natively supported by the target but |
| /// only for specific scales. This method allows for checking |
| /// if the width is supported by the target for a given operation that may |
| /// depend on scale. |
| LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, |
| unsigned Scale) const { |
| auto Action = getOperationAction(Op, VT); |
| if (Action != Legal) |
| return Action; |
| |
| // This operation is supported in this type but may only work on specific |
| // scales. |
| bool Supported; |
| switch (Op) { |
| default: |
| llvm_unreachable("Unexpected fixed point operation."); |
| case ISD::SMULFIX: |
| case ISD::UMULFIX: |
| Supported = isSupportedFixedPointOperation(Op, VT, Scale); |
| break; |
| } |
| |
| return Supported ? Action : Expand; |
| } |
| |
| LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { |
| unsigned EqOpc; |
| switch (Op) { |
| default: llvm_unreachable("Unexpected FP pseudo-opcode"); |
| case ISD::STRICT_FADD: EqOpc = ISD::FADD; break; |
| case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break; |
| case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break; |
| case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break; |
| case ISD::STRICT_FREM: EqOpc = ISD::FREM; break; |
| case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; |
| case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; |
| case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; |
| case ISD::STRICT_FMA: EqOpc = ISD::FMA; break; |
| case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; |
| case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; |
| case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; |
| case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; |
| case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; |
| case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; |
| case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; |
| case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; |
| case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; |
| case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; |
| case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; |
| case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break; |
| case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; |
| case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; |
| case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; |
| } |
| |
| auto Action = getOperationAction(EqOpc, VT); |
| |
| // We don't currently handle Custom or Promote for strict FP pseudo-ops. |
| // For now, we just expand for those cases. |
| if (Action != Legal) |
| Action = Expand; |
| |
| return Action; |
| } |
| |
| /// Return true if the specified operation is legal on this target or can be |
| /// made legal with custom lowering. This is used to help guide high-level |
| /// lowering decisions. |
| bool isOperationLegalOrCustom(unsigned Op, EVT VT) const { |
| return (VT == MVT::Other || isTypeLegal(VT)) && |
| (getOperationAction(Op, VT) == Legal || |
| getOperationAction(Op, VT) == Custom); |
| } |
| |
| /// Return true if the specified operation is legal on this target or can be |
| /// made legal using promotion. This is used to help guide high-level lowering |
| /// decisions. |
| bool isOperationLegalOrPromote(unsigned Op, EVT VT) const { |
| return (VT == MVT::Other || isTypeLegal(VT)) && |
| (getOperationAction(Op, VT) == Legal || |
| getOperationAction(Op, VT) == Promote); |
| } |
| |
| /// Return true if the specified operation is legal on this target or can be |
| /// made legal with custom lowering or using promotion. This is used to help |
| /// guide high-level lowering decisions. |
| bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const { |
| return (VT == MVT::Other || isTypeLegal(VT)) && |
| (getOperationAction(Op, VT) == Legal || |
| getOperationAction(Op, VT) == Custom || |
| getOperationAction(Op, VT) == Promote); |
| } |
| |
| /// Return true if the operation uses custom lowering, regardless of whether |
| /// the type is legal or not. |
| bool isOperationCustom(unsigned Op, EVT VT) const { |
| return getOperationAction(Op, VT) == Custom; |
| } |
| |
| /// Return true if lowering to a jump table is allowed. |
| virtual bool areJTsAllowed(const Function *Fn) const { |
| if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") |
| return false; |
| |
| return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
| isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
| } |
| |
| /// Check whether the range [Low,High] fits in a machine word. |
| bool rangeFitsInWord(const APInt &Low, const APInt &High, |
| const DataLayout &DL) const { |
| // FIXME: Using the pointer type doesn't seem ideal. |
| uint64_t BW = DL.getIndexSizeInBits(0u); |
| uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; |
| return Range <= BW; |
| } |
| |
| /// Return true if lowering to a jump table is suitable for a set of case |
| /// clusters which may contain \p NumCases cases, \p Range range of values. |
| /// FIXME: This function check the maximum table size and density, but the |
| /// minimum size is not checked. It would be nice if the minimum size is |
| /// also combined within this function. Currently, the minimum size check is |
| /// performed in findJumpTable() in SelectionDAGBuiler and |
| /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl. |
| virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, |
| uint64_t Range) const { |
| const bool OptForSize = SI->getParent()->getParent()->optForSize(); |
| const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); |
| const unsigned MaxJumpTableSize = |
| OptForSize || getMaximumJumpTableSize() == 0 |
| ? UINT_MAX |
| : getMaximumJumpTableSize(); |
| // Check whether a range of clusters is dense enough for a jump table. |
| if (Range <= MaxJumpTableSize && |
| (NumCases * 100 >= Range * MinDensity)) { |
| return true; |
| } |
| return false; |
| } |
| |
| /// Return true if lowering to a bit test is suitable for a set of case |
| /// clusters which contains \p NumDests unique destinations, \p Low and |
| /// \p High as its lowest and highest case values, and expects \p NumCmps |
| /// case value comparisons. Check if the number of destinations, comparison |
| /// metric, and range are all suitable. |
| bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, |
| const APInt &Low, const APInt &High, |
| const DataLayout &DL) const { |
| // FIXME: I don't think NumCmps is the correct metric: a single case and a |
| // range of cases both require only one branch to lower. Just looking at the |
| // number of clusters and destinations should be enough to decide whether to |
| // build bit tests. |
| |
| // To lower a range with bit tests, the range must fit the bitwidth of a |
| // machine word. |
| if (!rangeFitsInWord(Low, High, DL)) |
| return false; |
| |
| // Decide whether it's profitable to lower this range with bit tests. Each |
| // destination requires a bit test and branch, and there is an overall range |
| // check branch. For a small number of clusters, separate comparisons might |
| // be cheaper, and for many destinations, splitting the range might be |
| // better. |
| return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || |
| (NumDests == 3 && NumCmps >= 6); |
| } |
| |
| /// Return true if the specified operation is illegal on this target or |
| /// unlikely to be made legal with custom lowering. This is used to help guide |
| /// high-level lowering decisions. |
| bool isOperationExpand(unsigned Op, EVT VT) const { |
| return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); |
| } |
| |
| /// Return true if the specified operation is legal on this target. |
| bool isOperationLegal(unsigned Op, EVT VT) const { |
| return (VT == MVT::Other || isTypeLegal(VT)) && |
| getOperationAction(Op, VT) == Legal; |
| } |
| |
| /// Return how this load with extension should be treated: either it is legal, |
| /// needs to be promoted to a larger size, needs to be expanded to some other |
| /// code sequence, or the target has a custom expander for it. |
| LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, |
| EVT MemVT) const { |
| if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
| unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
| unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
| assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && |
| MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); |
| unsigned Shift = 4 * ExtType; |
| return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); |
| } |
| |
| /// Return true if the specified load with extension is legal on this target. |
| bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
| return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
| } |
| |
| /// Return true if the specified load with extension is legal or custom |
| /// on this target. |
| bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
| return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || |
| getLoadExtAction(ExtType, ValVT, MemVT) == Custom; |
| } |
| |
| /// Return how this store with truncation should be treated: either it is |
| /// legal, needs to be promoted to a larger size, needs to be expanded to some |
| /// other code sequence, or the target has a custom expander for it. |
| LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { |
| if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
| unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
| unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
| assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && |
| "Table isn't big enough!"); |
| return TruncStoreActions[ValI][MemI]; |
| } |
| |
| /// Return true if the specified store with truncation is legal on this |
| /// target. |
| bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { |
| return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; |
| } |
| |
| /// Return true if the specified store with truncation has solution on this |
| /// target. |
| bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { |
| return isTypeLegal(ValVT) && |
| (getTruncStoreAction(ValVT, MemVT) == Legal || |
| getTruncStoreAction(ValVT, MemVT) == Custom); |
| } |
| |
| /// Return how the indexed load should be treated: either it is legal, needs |
| /// to be promoted to a larger size, needs to be expanded to some other code |
| /// sequence, or the target has a custom expander for it. |
| LegalizeAction |
| getIndexedLoadAction(unsigned IdxMode, MVT VT) const { |
| assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && |
| "Table isn't big enough!"); |
| unsigned Ty = (unsigned)VT.SimpleTy; |
| return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4); |
| } |
| |
| /// Return true if the specified indexed load is legal on this target. |
| bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { |
| return VT.isSimple() && |
| (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || |
| getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); |
| } |
| |
| /// Return how the indexed store should be treated: either it is legal, needs |
| /// to be promoted to a larger size, needs to be expanded to some other code |
| /// sequence, or the target has a custom expander for it. |
| LegalizeAction |
| getIndexedStoreAction(unsigned IdxMode, MVT VT) const { |
| assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && |
| "Table isn't big enough!"); |
| unsigned Ty = (unsigned)VT.SimpleTy; |
| return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); |
| } |
| |
| /// Return true if the specified indexed load is legal on this target. |
| bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { |
| return VT.isSimple() && |
| (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || |
| getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); |
| } |
| |
| /// Return how the condition code should be treated: either it is legal, needs |
| /// to be expanded to some other code sequence, or the target has a custom |
| /// expander for it. |
| LegalizeAction |
| getCondCodeAction(ISD::CondCode CC, MVT VT) const { |
| assert((unsigned)CC < array_lengthof(CondCodeActions) && |
| ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && |
| "Table isn't big enough!"); |
| // See setCondCodeAction for how this is encoded. |
| uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
| uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; |
| LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); |
| assert(Action != Promote && "Can't promote condition code!"); |
| return Action; |
| } |
| |
| /// Return true if the specified condition code is legal on this target. |
| bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { |
| return getCondCodeAction(CC, VT) == Legal; |
| } |
| |
| /// Return true if the specified condition code is legal or custom on this |
| /// target. |
| bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { |
| return getCondCodeAction(CC, VT) == Legal || |
| getCondCodeAction(CC, VT) == Custom; |
| } |
| |
| /// If the action for this operation is to promote, this method returns the |
| /// ValueType to promote to. |
| MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { |
| assert(getOperationAction(Op, VT) == Promote && |
| "This operation isn't promoted!"); |
| |
| // See if this has an explicit type specified. |
| std::map<std::pair<unsigned, MVT::SimpleValueType>, |
| MVT::SimpleValueType>::const_iterator PTTI = |
| PromoteToType.find(std::make_pair(Op, VT.SimpleTy)); |
| if (PTTI != PromoteToType.end()) return PTTI->second; |
| |
| assert((VT.isInteger() || VT.isFloatingPoint()) && |
| "Cannot autopromote this type, add it with AddPromotedToType."); |
| |
| MVT NVT = VT; |
| do { |
| NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); |
| assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && |
| "Didn't find type to promote to!"); |
| } while (!isTypeLegal(NVT) || |
| getOperationAction(Op, NVT) == Promote); |
| return NVT; |
| } |
| |
| /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM |
| /// operations except for the pointer size. If AllowUnknown is true, this |
| /// will return MVT::Other for types with no EVT counterpart (e.g. structs), |
| /// otherwise it will assert. |
| EVT getValueType(const DataLayout &DL, Type *Ty, |
| bool AllowUnknown = false) const { |
| // Lower scalar pointers to native pointer types. |
| if (PointerType *PTy = dyn_cast<PointerType>(Ty)) |
| return getPointerTy(DL, PTy->getAddressSpace()); |
| |
| if (Ty->isVectorTy()) { |
| VectorType *VTy = cast<VectorType>(Ty); |
| Type *Elm = VTy->getElementType(); |
| // Lower vectors of pointers to native pointer types. |
| if (PointerType *PT = dyn_cast<PointerType>(Elm)) { |
| EVT PointerTy(getPointerTy(DL, PT->getAddressSpace())); |
| Elm = PointerTy.getTypeForEVT(Ty->getContext()); |
| } |
| |
| return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), |
| VTy->getNumElements()); |
| } |
| return EVT::getEVT(Ty, AllowUnknown); |
| } |
| |
| /// Return the MVT corresponding to this LLVM type. See getValueType. |
| MVT getSimpleValueType(const DataLayout &DL, Type *Ty, |
| bool AllowUnknown = false) const { |
| return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); |
| } |
| |
| /// Return the desired alignment for ByVal or InAlloca aggregate function |
| /// arguments in the caller parameter area. This is the actual alignment, not |
| /// its logarithm. |
| virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; |
| |
| /// Return the type of registers that this ValueType will eventually require. |
| MVT getRegisterType(MVT VT) const { |
| assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)); |
| return RegisterTypeForVT[VT.SimpleTy]; |
| } |
| |
| /// Return the type of registers that this ValueType will eventually require. |
| MVT getRegisterType(LLVMContext &Context, EVT VT) const { |
| if (VT.isSimple()) { |
| assert((unsigned)VT.getSimpleVT().SimpleTy < |
| array_lengthof(RegisterTypeForVT)); |
| return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; |
| } |
| if (VT.isVector()) { |
| EVT VT1; |
| MVT RegisterVT; |
| unsigned NumIntermediates; |
| (void)getVectorTypeBreakdown(Context, VT, VT1, |
| NumIntermediates, RegisterVT); |
| return RegisterVT; |
| } |
| if (VT.isInteger()) { |
| return getRegisterType(Context, getTypeToTransformTo(Context, VT)); |
| } |
| llvm_unreachable("Unsupported extended type!"); |
| } |
| |
| /// Return the number of registers that this ValueType will eventually |
| /// require. |
| /// |
| /// This is one for any types promoted to live in larger registers, but may be |
| /// more than one for types (like i64) that are split into pieces. For types |
| /// like i140, which are first promoted then expanded, it is the number of |
| /// registers needed to hold all the bits of the original type. For an i140 |
| /// on a 32 bit machine this means 5 registers. |
| unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { |
| if (VT.isSimple()) { |
| assert((unsigned)VT.getSimpleVT().SimpleTy < |
| array_lengthof(NumRegistersForVT)); |
| return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; |
| } |
| if (VT.isVector()) { |
| EVT VT1; |
| MVT VT2; |
| unsigned NumIntermediates; |
| return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); |
| } |
| if (VT.isInteger()) { |
| unsigned BitWidth = VT.getSizeInBits(); |
| unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); |
| return (BitWidth + RegWidth - 1) / RegWidth; |
| } |
| llvm_unreachable("Unsupported extended type!"); |
| } |
| |
| /// Certain combinations of ABIs, Targets and features require that types |
| /// are legal for some operations and not for other operations. |
| /// For MIPS all vector types must be passed through the integer register set. |
| virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
| CallingConv::ID CC, EVT VT) const { |
| return getRegisterType(Context, VT); |
| } |
| |
| /// Certain targets require unusual breakdowns of certain types. For MIPS, |
| /// this occurs when a vector type is used, as vector are passed through the |
| /// integer register set. |
| virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
| CallingConv::ID CC, |
| EVT VT) const { |
| return getNumRegisters(Context, VT); |
| } |
| |
| /// Certain targets have context senstive alignment requirements, where one |
| /// type has the alignment requirement of another type. |
| virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, |
| DataLayout DL) const { |
| return DL.getABITypeAlignment(ArgTy); |
| } |
| |
| /// If true, then instruction selection should seek to shrink the FP constant |
| /// of the specified type to a smaller type in order to save space and / or |
| /// reduce runtime. |
| virtual bool ShouldShrinkFPConstant(EVT) const { return true; } |
| |
| /// Return true if it is profitable to reduce a load to a smaller type. |
| /// Example: (i16 (trunc (i32 (load x))) -> i16 load x |
| virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, |
| EVT NewVT) const { |
| // By default, assume that it is cheaper to extract a subvector from a wide |
| // vector load rather than creating multiple narrow vector loads. |
| if (NewVT.isVector() && !Load->hasOneUse()) |
| return false; |
| |
| return true; |
| } |
| |
| /// When splitting a value of the specified type into parts, does the Lo |
| /// or Hi part come first? This usually follows the endianness, except |
| /// for ppcf128, where the Hi part always comes first. |
| bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { |
| return DL.isBigEndian() || VT == MVT::ppcf128; |
| } |
| |
| /// If true, the target has custom DAG combine transformations that it can |
| /// perform for the specified node. |
| bool hasTargetDAGCombine(ISD::NodeType NT) const { |
| assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); |
| return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); |
| } |
| |
| unsigned getGatherAllAliasesMaxDepth() const { |
| return GatherAllAliasesMaxDepth; |
| } |
| |
| /// Returns the size of the platform's va_list object. |
| virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { |
| return getPointerTy(DL).getSizeInBits(); |
| } |
| |
| /// Get maximum # of store operations permitted for llvm.memset |
| /// |
| /// This function returns the maximum number of store operations permitted |
| /// to replace a call to llvm.memset. The value is set by the target at the |
| /// performance threshold for such a replacement. If OptSize is true, |
| /// return the limit for functions that have OptSize attribute. |
| unsigned getMaxStoresPerMemset(bool OptSize) const { |
| return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; |
| } |
| |
| /// Get maximum # of store operations permitted for llvm.memcpy |
| /// |
| /// This function returns the maximum number of store operations permitted |
| /// to replace a call to llvm.memcpy. The value is set by the target at the |
| /// performance threshold for such a replacement. If OptSize is true, |
| /// return the limit for functions that have OptSize attribute. |
| unsigned getMaxStoresPerMemcpy(bool OptSize) const { |
| return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; |
| } |
| |
| /// \brief Get maximum # of store operations to be glued together |
| /// |
| /// This function returns the maximum number of store operations permitted |
| /// to glue together during lowering of llvm.memcpy. The value is set by |
| // the target at the performance threshold for such a replacement. |
| virtual unsigned getMaxGluedStoresPerMemcpy() const { |
| return MaxGluedStoresPerMemcpy; |
| } |
| |
| /// Get maximum # of load operations permitted for memcmp |
| /// |
| /// This function returns the maximum number of load operations permitted |
| /// to replace a call to memcmp. The value is set by the target at the |
| /// performance threshold for such a replacement. If OptSize is true, |
| /// return the limit for functions that have OptSize attribute. |
| unsigned getMaxExpandSizeMemcmp(bool OptSize) const { |
| return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; |
| } |
| |
| /// For memcmp expansion when the memcmp result is only compared equal or |
| /// not-equal to 0, allow up to this number of load pairs per block. As an |
| /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block: |
| /// a0 = load2bytes &a[0] |
| /// b0 = load2bytes &b[0] |
| /// a2 = load1byte &a[2] |
| /// b2 = load1byte &b[2] |
| /// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0 |
| virtual unsigned getMemcmpEqZeroLoadsPerBlock() const { |
| return 1; |
| } |
| |
| /// Get maximum # of store operations permitted for llvm.memmove |
| /// |
| /// This function returns the maximum number of store operations permitted |
| /// to replace a call to llvm.memmove. The value is set by the target at the |
| /// performance threshold for such a replacement. If OptSize is true, |
| /// return the limit for functions that have OptSize attribute. |
| unsigned getMaxStoresPerMemmove(bool OptSize) const { |
| return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; |
| } |
| |
| /// Determine if the target supports unaligned memory accesses. |
| /// |
| /// This function returns true if the target allows unaligned memory accesses |
| /// of the specified type in the given address space. If true, it also returns |
| /// whether the unaligned memory access is "fast" in the last argument by |
| /// reference. This is used, for example, in situations where an array |
| /// copy/move/set is converted to a sequence of store operations. Its use |
| /// helps to ensure that such replacements don't generate code that causes an |
| /// alignment error (trap) on the target machine. |
| virtual bool allowsMisalignedMemoryAccesses(EVT, |
| unsigned AddrSpace = 0, |
| unsigned Align = 1, |
| bool * /*Fast*/ = nullptr) const { |
| return false; |
| } |
| |
| /// Return true if the target supports a memory access of this type for the |
| /// given address space and alignment. If the access is allowed, the optional |
| /// final parameter returns if the access is also fast (as defined by the |
| /// target). |
| bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
| unsigned AddrSpace = 0, unsigned Alignment = 1, |
| bool *Fast = nullptr) const; |
| |
| /// Returns the target specific optimal type for load and store operations as |
| /// a result of memset, memcpy, and memmove lowering. |
| /// |
| /// If DstAlign is zero that means it's safe to destination alignment can |
| /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't |
| /// a need to check it against alignment requirement, probably because the |
| /// source does not need to be loaded. If 'IsMemset' is true, that means it's |
| /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of |
| /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it |
| /// does not need to be loaded. It returns EVT::Other if the type should be |
| /// determined using generic target-independent logic. |
| virtual EVT getOptimalMemOpType(uint64_t /*Size*/, |
| unsigned /*DstAlign*/, unsigned /*SrcAlign*/, |
| bool /*IsMemset*/, |
| bool /*ZeroMemset*/, |
| bool /*MemcpyStrSrc*/, |
| MachineFunction &/*MF*/) const { |
| return MVT::Other; |
| } |
| |
| /// Returns true if it's safe to use load / store of the specified type to |
| /// expand memcpy / memset inline. |
| /// |
| /// This is mostly true for all types except for some special cases. For |
| /// example, on X86 targets without SSE2 f64 load / store are done with fldl / |
| /// fstpl which also does type conversion. Note the specified type doesn't |
| /// have to be legal as the hook is used before type legalization. |
| virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } |
| |
| /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp. |
| bool usesUnderscoreSetJmp() const { |
| return UseUnderscoreSetJmp; |
| } |
| |
| /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp. |
| bool usesUnderscoreLongJmp() const { |
| return UseUnderscoreLongJmp; |
| } |
| |
| /// Return lower limit for number of blocks in a jump table. |
| virtual unsigned getMinimumJumpTableEntries() const; |
| |
| /// Return lower limit of the density in a jump table. |
| unsigned getMinimumJumpTableDensity(bool OptForSize) const; |
| |
| /// Return upper limit for number of entries in a jump table. |
| /// Zero if no limit. |
| unsigned getMaximumJumpTableSize() const; |
| |
| virtual bool isJumpTableRelative() const { |
| return TM.isPositionIndependent(); |
| } |
| |
| /// If a physical register, this specifies the register that |
| /// llvm.savestack/llvm.restorestack should save and restore. |
| unsigned getStackPointerRegisterToSaveRestore() const { |
| return StackPointerRegisterToSaveRestore; |
| } |
| |
| /// If a physical register, this returns the register that receives the |
| /// exception address on entry to an EH pad. |
| virtual unsigned |
| getExceptionPointerRegister(const Constant *PersonalityFn) const { |
| // 0 is guaranteed to be the NoRegister value on all targets |
| return 0; |
| } |
| |
| /// If a physical register, this returns the register that receives the |
| /// exception typeid on entry to a landing pad. |
| virtual unsigned |
| getExceptionSelectorRegister(const Constant *PersonalityFn) const { |
| // 0 is guaranteed to be the NoRegister value on all targets |
| return 0; |
| } |
| |
| virtual bool needsFixedCatchObjects() const { |
| report_fatal_error("Funclet EH is not implemented for this target"); |
| } |
| |
| /// Returns the target's jmp_buf size in bytes (if never set, the default is |
| /// 200) |
| unsigned getJumpBufSize() const { |
| return JumpBufSize; |
| } |
| |
| /// Returns the target's jmp_buf alignment in bytes (if never set, the default |
| /// is 0) |
| unsigned getJumpBufAlignment() const { |
| return JumpBufAlignment; |
| } |
| |
| /// Return the minimum stack alignment of an argument. |
| unsigned getMinStackArgumentAlignment() const { |
| return MinStackArgumentAlignment; |
| } |
| |
| /// Return the minimum function alignment. |
| unsigned getMinFunctionAlignment() const { |
| return MinFunctionAlignment; |
| } |
| |
| /// Return the preferred function alignment. |
| unsigned getPrefFunctionAlignment() const { |
| return PrefFunctionAlignment; |
| } |
| |
| /// Return the preferred loop alignment. |
| virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const { |
| return PrefLoopAlignment; |
| } |
| |
| /// Should loops be aligned even when the function is marked OptSize (but not |
| /// MinSize). |
| virtual bool alignLoopsWithOptSize() const { |
| return false; |
| } |
| |
| /// If the target has a standard location for the stack protector guard, |
| /// returns the address of that location. Otherwise, returns nullptr. |
| /// DEPRECATED: please override useLoadStackGuardNode and customize |
| /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). |
| virtual Value *getIRStackGuard(IRBuilder<> &IRB) const; |
| |
| /// Inserts necessary declarations for SSP (stack protection) purpose. |
| /// Should be used only when getIRStackGuard returns nullptr. |
| virtual void insertSSPDeclarations(Module &M) const; |
| |
| /// Return the variable that's previously inserted by insertSSPDeclarations, |
| /// if any, otherwise return nullptr. Should be used only when |
| /// getIRStackGuard returns nullptr. |
| virtual Value *getSDagStackGuard(const Module &M) const; |
| |
| /// If this function returns true, stack protection checks should XOR the |
| /// frame pointer (or whichever pointer is used to address locals) into the |
| /// stack guard value before checking it. getIRStackGuard must return nullptr |
| /// if this returns true. |
| virtual bool useStackGuardXorFP() const { return false; } |
| |
| /// If the target has a standard stack protection check function that |
| /// performs validation and error handling, returns the function. Otherwise, |
| /// returns nullptr. Must be previously inserted by insertSSPDeclarations. |
| /// Should be used only when getIRStackGuard returns nullptr. |
| virtual Function *getSSPStackGuardCheck(const Module &M) const; |
| |
| protected: |
| Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, |
| bool UseTLS) const; |
| |
| public: |
| /// Returns the target-specific address of the unsafe stack pointer. |
| virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const; |
| |
| /// Returns the name of the symbol used to emit stack probes or the empty |
| /// string if not applicable. |
| virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { |
| return ""; |
| } |
| |
| /// Returns true if a cast between SrcAS and DestAS is a noop. |
| virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { |
| return false; |
| } |
| |
| /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we |
| /// are happy to sink it into basic blocks. |
| virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { |
| return isNoopAddrSpaceCast(SrcAS, DestAS); |
| } |
| |
| /// Return true if the pointer arguments to CI should be aligned by aligning |
| /// the object whose address is being passed. If so then MinSize is set to the |
| /// minimum size the object must be to be aligned and PrefAlign is set to the |
| /// preferred alignment. |
| virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, |
| unsigned & /*PrefAlign*/) const { |
| return false; |
| } |
| |
| //===--------------------------------------------------------------------===// |
| /// \name Helpers for TargetTransformInfo implementations |
| /// @{ |
| |
| /// Get the ISD node that corresponds to the Instruction class opcode. |
| int InstructionOpcodeToISD(unsigned Opcode) const; |
| |
| /// Estimate the cost of type-legalization and the legalized type. |
| std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL, |
| Type *Ty) const; |
| |
| /// @} |
| |
| //===--------------------------------------------------------------------===// |
| /// \name Helpers for atomic expansion. |
| /// @{ |
| |
| /// Returns the maximum atomic operation size (in bits) supported by |
| /// the backend. Atomic operations greater than this size (as well |
| /// as ones that are not naturally aligned), will be expanded by |
| /// AtomicExpandPass into an __atomic_* library call. |
| unsigned getMaxAtomicSizeInBitsSupported() const { |
| return MaxAtomicSizeInBitsSupported; |
| } |
| |
| /// Returns the size of the smallest cmpxchg or ll/sc instruction |
| /// the backend supports. Any smaller operations are widened in |
| /// AtomicExpandPass. |
| /// |
| /// Note that *unlike* operations above the maximum size, atomic ops |
| /// are still natively supported below the minimum; they just |
| /// require a more complex expansion. |
| unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } |
| |
| /// Whether the target supports unaligned atomic operations. |
| bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } |
| |
| /// Whether AtomicExpandPass should automatically insert fences and reduce |
| /// ordering for this atomic. This should be true for most architectures with |
| /// weak memory ordering. Defaults to false. |
| virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { |
| return false; |
| } |
| |
| /// Perform a load-linked operation on Addr, returning a "Value *" with the |
| /// corresponding pointee type. This may entail some non-trivial operations to |
| /// truncate or reconstruct types that will be illegal in the backend. See |
| /// ARMISelLowering for an example implementation. |
| virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, |
| AtomicOrdering Ord) const { |
| llvm_unreachable("Load linked unimplemented on this target"); |
| } |
| |
| /// Perform a store-conditional operation to Addr. Return the status of the |
| /// store. This should be 0 if the store succeeded, non-zero otherwise. |
| virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, |
| Value *Addr, AtomicOrdering Ord) const { |
| llvm_unreachable("Store conditional unimplemented on this target"); |
| } |
| |
| /// Perform a masked atomicrmw using a target-specific intrinsic. This |
| /// represents the core LL/SC loop which will be lowered at a late stage by |
| /// the backend. |
| virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder, |
| AtomicRMWInst *AI, |
| Value *AlignedAddr, Value *Incr, |
| Value *Mask, Value *ShiftAmt, |
| AtomicOrdering Ord) const { |
| llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); |
| } |
| |
| /// Perform a masked cmpxchg using a target-specific intrinsic. This |
| /// represents the core LL/SC loop which will be lowered at a late stage by |
| /// the backend. |
| virtual Value *emitMaskedAtomicCmpXchgIntrinsic( |
| IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
| Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
| llvm_unreachable("Masked cmpxchg expansion unimplemented on this target"); |
| } |
| |
| /// Inserts in the IR a target-specific intrinsic specifying a fence. |
| /// It is called by AtomicExpandPass before expanding an |
| /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad |
| /// if shouldInsertFencesForAtomic returns true. |
| /// |
| /// Inst is the original atomic instruction, prior to other expansions that |
| /// may be performed. |
| /// |
| /// This function should either return a nullptr, or a pointer to an IR-level |
| /// Instruction*. Even complex fence sequences can be represented by a |
| /// single Instruction* through an intrinsic to be lowered later. |
| /// Backends should override this method to produce target-specific intrinsic |
| /// for their fences. |
| /// FIXME: Please note that the default implementation here in terms of |
| /// IR-level fences exists for historical/compatibility reasons and is |
| /// *unsound* ! Fences cannot, in general, be used to restore sequential |
| /// consistency. For example, consider the following example: |
| /// atomic<int> x = y = 0; |
| /// int r1, r2, r3, r4; |
| /// Thread 0: |
| /// x.store(1); |
| /// Thread 1: |
| /// y.store(1); |
| /// Thread 2: |
| /// r1 = x.load(); |
| /// r2 = y.load(); |
| /// Thread 3: |
| /// r3 = y.load(); |
| /// r4 = x.load(); |
| /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all |
| /// seq_cst. But if they are lowered to monotonic accesses, no amount of |
| /// IR-level fences can prevent it. |
| /// @{ |
| virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, |
| AtomicOrdering Ord) const { |
| if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore()) |
| return Builder.CreateFence(Ord); |
| else |
| return nullptr; |
| } |
| |
| virtual Instruction *emitTrailingFence(IRBuilder<> &Builder, |
| Instruction *Inst, |
| AtomicOrdering Ord) const { |
| if (isAcquireOrStronger(Ord)) |
| return Builder.CreateFence(Ord); |
| else |
| return nullptr; |
| } |
| /// @} |
| |
| // Emits code that executes when the comparison result in the ll/sc |
| // expansion of a cmpxchg instruction is such that the store-conditional will |
| // not execute. This makes it possible to balance out the load-linked with |
| // a dedicated instruction, if desired. |
| // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would |
| // be unnecessarily held, except if clrex, inserted by this hook, is executed. |
| virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {} |
| |
| /// Returns true if the given (atomic) store should be expanded by the |
| /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. |
| virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
| return false; |
| } |
| |
| /// Returns true if arguments should be sign-extended in lib calls. |
| virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { |
| return IsSigned; |
| } |
| |
| /// Returns how the given (atomic) load should be expanded by the |
| /// IR-level AtomicExpand pass. |
| virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
| return AtomicExpansionKind::None; |
| } |
| |
| /// Returns how the given atomic cmpxchg should be expanded by the IR-level |
| /// AtomicExpand pass. |
| virtual AtomicExpansionKind |
| shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { |
| return AtomicExpansionKind::None; |
| } |
| |
| /// Returns how the IR-level AtomicExpand pass should expand the given |
| /// AtomicRMW, if at all. Default is to never expand. |
| virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { |
| return RMW->isFloatingPointOperation() ? |
| AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; |
| } |
| |
| /// On some platforms, an AtomicRMW that never actually modifies the value |
| /// (such as fetch_add of 0) can be turned into a fence followed by an |
| /// atomic load. This may sound useless, but it makes it possible for the |
| /// processor to keep the cacheline shared, dramatically improving |
| /// performance. And such idempotent RMWs are useful for implementing some |
| /// kinds of locks, see for example (justification + benchmarks): |
| /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf |
| /// This method tries doing that transformation, returning the atomic load if |
| /// it succeeds, and nullptr otherwise. |
| /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo |
| /// another round of expansion. |
| virtual LoadInst * |
| lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { |
| return nullptr; |
| } |
| |
| /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, |
| /// SIGN_EXTEND, or ANY_EXTEND). |
| virtual ISD::NodeType getExtendForAtomicOps() const { |
| return ISD::ZERO_EXTEND; |
| } |
| |
| /// @} |
| |
| /// Returns true if we should normalize |
| /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and |
| /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely |
| /// that it saves us from materializing N0 and N1 in an integer register. |
| /// Targets that are able to perform and/or on flags should return false here. |
| virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, |
| EVT VT) const { |
| // If a target has multiple condition registers, then it likely has logical |
| // operations on those registers. |
| if (hasMultipleConditionRegisters()) |
| return false; |
| // Only do the transform if the value won't be split into multiple |
| // registers. |
| LegalizeTypeAction Action = getTypeAction(Context, VT); |
| return Action != TypeExpandInteger && Action != TypeExpandFloat && |
| Action != TypeSplitVector; |
| } |
| |
| virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } |
| |
| /// Return true if a select of constants (select Cond, C1, C2) should be |
| /// transformed into simple math ops with the condition value. For example: |
| /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 |
| virtual bool convertSelectOfConstantsToMath(EVT VT) const { |
| return false; |
| } |
| |
| /// Return true if it is profitable to transform an integer |
| /// multiplication-by-constant into simpler operations like shifts and adds. |
| /// This may be true if the target does not directly support the |
| /// multiplication operation for the specified type or the sequence of simpler |
| /// ops is faster than the multiply. |
| virtual bool decomposeMulByConstant(EVT VT, SDValue C) const { |
| return false; |
| } |
| |
| /// Return true if it is more correct/profitable to use strict FP_TO_INT |
| /// conversion operations - canonicalizing the FP source value instead of |
| /// converting all cases and then selecting based on value. |
| /// This may be true if the target throws exceptions for out of bounds |
| /// conversions or has fast FP CMOV. |
| virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, |
| bool IsSigned) const { |
| return false; |
| } |
| |
| //===--------------------------------------------------------------------===// |
| // TargetLowering Configuration Methods - These methods should be invoked by |
| // the derived class constructor to configure this object for the target. |
| // |
| protected: |
| /// Specify how the target extends the result of integer and floating point |
| /// boolean values from i1 to a wider type. See getBooleanContents. |
| void setBooleanContents(BooleanContent Ty) { |
| BooleanContents = Ty; |
| BooleanFloatContents = Ty; |
| } |
| |
| /// Specify how the target extends the result of integer and floating point |
| /// boolean values from i1 to a wider type. See getBooleanContents. |
| void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { |
| BooleanContents = IntTy; |
| BooleanFloatContents = FloatTy; |
| } |
| |
| /// Specify how the target extends the result of a vector boolean value from a |
| /// vector of i1 to a wider type. See getBooleanContents. |
| void setBooleanVectorContents(BooleanContent Ty) { |
| BooleanVectorContents = Ty; |
| } |
| |
| /// Specify the target scheduling preference. |
| void setSchedulingPreference(Sched::Preference Pref) { |
| SchedPreferenceInfo = Pref; |
| } |
| |
| /// Indicate whether this target prefers to use _setjmp to implement |
| /// llvm.setjmp or the version without _. Defaults to false. |
| void setUseUnderscoreSetJmp(bool Val) { |
| UseUnderscoreSetJmp = Val; |
| } |
| |
| /// Indicate whether this target prefers to use _longjmp to implement |
| /// llvm.longjmp or the version without _. Defaults to false. |
| void setUseUnderscoreLongJmp(bool Val) { |
| UseUnderscoreLongJmp = Val; |
| } |
| |
| /// Indicate the minimum number of blocks to generate jump tables. |
| void setMinimumJumpTableEntries(unsigned Val); |
| |
| /// Indicate the maximum number of entries in jump tables. |
| /// Set to zero to generate unlimited jump tables. |
| void setMaximumJumpTableSize(unsigned); |
| |
| /// If set to a physical register, this specifies the register that |
| /// llvm.savestack/llvm.restorestack should save and restore. |
| void setStackPointerRegisterToSaveRestore(unsigned R) { |
| StackPointerRegisterToSaveRestore = R; |
| } |
| |
| /// Tells the code generator that the target has multiple (allocatable) |
| /// condition registers that can be used to store the results of comparisons |
| /// for use by selects and conditional branches. With multiple condition |
| /// registers, the code generator will not aggressively sink comparisons into |
| /// the blocks of their users. |
| void setHasMultipleConditionRegisters(bool hasManyRegs = true) { |
| HasMultipleConditionRegisters = hasManyRegs; |
| } |
| |
| /// Tells the code generator that the target has BitExtract instructions. |
| /// The code generator will aggressively sink "shift"s into the blocks of |
| /// their users if the users will generate "and" instructions which can be |
| /// combined with "shift" to BitExtract instructions. |
| void setHasExtractBitsInsn(bool hasExtractInsn = true) { |
| HasExtractBitsInsn = hasExtractInsn; |
| } |
| |
| /// Tells the code generator not to expand logic operations on comparison |
| /// predicates into separate sequences that increase the amount of flow |
| /// control. |
| void setJumpIsExpensive(bool isExpensive = true); |
| |
| /// Tells the code generator that this target supports floating point |
| /// exceptions and cares about preserving floating point exception behavior. |
| void setHasFloatingPointExceptions(bool FPExceptions = true) { |
| HasFloatingPointExceptions = FPExceptions; |
| } |
| |
| /// Tells the code generator which bitwidths to bypass. |
| void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { |
| BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; |
| } |
| |
| /// Add the specified register class as an available regclass for the |
| /// specified value type. This indicates the selector can handle values of |
| /// that class natively. |
| void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { |
| assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)); |
| RegClassForVT[VT.SimpleTy] = RC; |
| } |
| |
| /// Return the largest legal super-reg register class of the register class |
| /// for the specified type and its associated "cost". |
| virtual std::pair<const TargetRegisterClass *, uint8_t> |
| findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; |
| |
| /// Once all of the register classes are added, this allows us to compute |
| /// derived properties we expose. |
| void computeRegisterProperties(const TargetRegisterInfo *TRI); |
| |
| /// Indicate that the specified operation does not work with the specified |
| /// type and indicate what to do about it. Note that VT may refer to either |
| /// the type of a result or that of an operand of Op. |
| void setOperationAction(unsigned Op, MVT VT, |
| LegalizeAction Action) { |
| assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); |
| OpActions[(unsigned)VT.SimpleTy][Op] = Action; |
| } |
| |
| /// Indicate that the specified load with extension does not work with the |
| /// specified type and indicate what to do about it. |
| void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
| LegalizeAction Action) { |
| assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && |
| MemVT.isValid() && "Table isn't big enough!"); |
| assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); |
| unsigned Shift = 4 * ExtType; |
| LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); |
| LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; |
| } |
| |
| /// Indicate that the specified truncating store does not work with the |
| /// specified type and indicate what to do about it. |
| void setTruncStoreAction(MVT ValVT, MVT MemVT, |
| LegalizeAction Action) { |
| assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); |
| TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; |
| } |
| |
| /// Indicate that the specified indexed load does or does not work with the |
| /// specified type and indicate what to do abort it. |
| /// |
| /// NOTE: All indexed mode loads are initialized to Expand in |
| /// TargetLowering.cpp |
| void setIndexedLoadAction(unsigned IdxMode, MVT VT, |
| LegalizeAction Action) { |
| assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && |
| (unsigned)Action < 0xf && "Table isn't big enough!"); |
| // Load action are kept in the upper half. |
| IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; |
| IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4; |
| } |
| |
| /// Indicate that the specified indexed store does or does not work with the |
| /// specified type and indicate what to do about it. |
| /// |
| /// NOTE: All indexed mode stores are initialized to Expand in |
| /// TargetLowering.cpp |
| void setIndexedStoreAction(unsigned IdxMode, MVT VT, |
| LegalizeAction Action) { |
| assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && |
| (unsigned)Action < 0xf && "Table isn't big enough!"); |
| // Store action are kept in the lower half. |
| IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; |
| IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action); |
| } |
| |
| /// Indicate that the specified condition code is or isn't supported on the |
| /// target and indicate what to do about it. |
| void setCondCodeAction(ISD::CondCode CC, MVT VT, |
| LegalizeAction Action) { |
| assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && |
| "Table isn't big enough!"); |
| assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); |
| /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit |
| /// value and the upper 29 bits index into the second dimension of the array |
| /// to select what 32-bit value to use. |
| uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
| CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); |
| CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; |
| } |
| |
| /// If Opc/OrigVT is specified as being promoted, the promotion code defaults |
| /// to trying a larger integer/fp until it can find one that works. If that |
| /// default is insufficient, this method can be used by the target to override |
| /// the default. |
| void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
| PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; |
| } |
| |
| /// Convenience method to set an operation to Promote and specify the type |
| /// in a single call. |
| void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
| setOperationAction(Opc, OrigVT, Promote); |
| AddPromotedToType(Opc, OrigVT, DestVT); |
| } |
| |
| /// Targets should invoke this method for each target independent node that |
| /// they want to provide a custom DAG combiner for by implementing the |
| /// PerformDAGCombine virtual method. |
| void setTargetDAGCombine(ISD::NodeType NT) { |
| assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); |
| TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); |
| } |
| |
| /// Set the target's required jmp_buf buffer size (in bytes); default is 200 |
| void setJumpBufSize(unsigned Size) { |
| JumpBufSize = Size; |
| } |
| |
| /// Set the target's required jmp_buf buffer alignment (in bytes); default is |
| /// 0 |
| void setJumpBufAlignment(unsigned Align) { |
| JumpBufAlignment = Align; |
| } |
| |
| /// Set the target's minimum function alignment (in log2(bytes)) |
| void setMinFunctionAlignment(unsigned Align) { |
| MinFunctionAlignment = Align; |
| } |
| |
| /// Set the target's preferred function alignment. This should be set if |
| /// there is a performance benefit to higher-than-minimum alignment (in |
| /// log2(bytes)) |
| void setPrefFunctionAlignment(unsigned Align) { |
| PrefFunctionAlignment = Align; |
| } |
| |
| /// Set the target's preferred loop alignment. Default alignment is zero, it |
| /// means the target does not care about loop alignment. The alignment is |
| /// specified in log2(bytes). The target may also override |
| /// getPrefLoopAlignment to provide per-loop values. |
| void setPrefLoopAlignment(unsigned Align) { |
| PrefLoopAlignment = Align; |
| } |
| |
| /// Set the minimum stack alignment of an argument (in log2(bytes)). |
| void setMinStackArgumentAlignment(unsigned Align) { |
| MinStackArgumentAlignment = Align; |
| } |
| |
| /// Set the maximum atomic operation size supported by the |
| /// backend. Atomic operations greater than this size (as well as |
| /// ones that are not naturally aligned), will be expanded by |
| /// AtomicExpandPass into an __atomic_* library call. |
| void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { |
| MaxAtomicSizeInBitsSupported = SizeInBits; |
| } |
| |
| /// Sets the minimum cmpxchg or ll/sc size supported by the backend. |
| void setMinCmpXchgSizeInBits(unsigned SizeInBits) { |
| MinCmpXchgSizeInBits = SizeInBits; |
| } |
| |
| /// Sets whether unaligned atomic operations are supported. |
| void setSupportsUnalignedAtomics(bool UnalignedSupported) { |
| SupportsUnalignedAtomics = UnalignedSupported; |
| } |
| |
| public: |
| //===--------------------------------------------------------------------===// |
| // Addressing mode description hooks (used by LSR etc). |
| // |
| |
| /// CodeGenPrepare sinks address calculations into the same BB as Load/Store |
| /// instructions reading the address. This allows as much computation as |
| /// possible to be done in the address mode for that operand. This hook lets |
| /// targets also pass back when this should be done on intrinsics which |
| /// load/store. |
| virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, |
| SmallVectorImpl<Value*> &/*Ops*/, |
| Type *&/*AccessTy*/) const { |
| return false; |
| } |
| |
| /// This represents an addressing mode of: |
| /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg |
| /// If BaseGV is null, there is no BaseGV. |
| /// If BaseOffs is zero, there is no base offset. |
| /// If HasBaseReg is false, there is no base register. |
| /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with |
| /// no scale. |
| struct AddrMode { |
| GlobalValue *BaseGV = nullptr; |
| int64_t BaseOffs = 0; |
| bool HasBaseReg = false; |
| int64_t Scale = 0; |
| AddrMode() = default; |
| }; |
| |
| /// Return true if the addressing mode represented by AM is legal for this |
| /// target, for a load/store of the specified type. |
| /// |
| /// The type may be VoidTy, in which case only return true if the addressing |
| /// mode is legal for a load/store of any legal type. TODO: Handle |
| /// pre/postinc as well. |
| /// |
| /// If the address space cannot be determined, it will be -1. |
| /// |
| /// TODO: Remove default argument |
| virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
| Type *Ty, unsigned AddrSpace, |
| Instruction *I = nullptr) const; |
| |
| /// Return the cost of the scaling factor used in the addressing mode |
| /// represented by AM for this target, for a load/store of the specified type. |
| /// |
| /// If the AM is supported, the return value must be >= 0. |
| /// If the AM is not supported, it returns a negative value. |
| /// TODO: Handle pre/postinc as well. |
| /// TODO: Remove default argument |
| virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, |
| Type *Ty, unsigned AS = 0) const { |
| // Default: assume that any scaling factor used in a legal AM is free. |
| if (isLegalAddressingMode(DL, AM, Ty, AS)) |
| return 0; |
| return -1; |
| } |
| |
| /// Return true if the specified immediate is legal icmp immediate, that is |
| /// the target has icmp instructions which can compare a register against the |
| /// immediate without having to materialize the immediate into a register. |
| virtual bool isLegalICmpImmediate(int64_t) const { |
| return true; |
| } |
| |
| /// Return true if the specified immediate is legal add immediate, that is the |
| /// target has add instructions which can add a register with the immediate |
| /// without having to materialize the immediate into a register. |
| virtual bool isLegalAddImmediate(int64_t) const { |
| return true; |
| } |
| |
| /// Return true if the specified immediate is legal for the value input of a |
| /// store instruction. |
| virtual bool isLegalStoreImmediate(int64_t Value) const { |
| // Default implementation assumes that at least 0 works since it is likely |
| // that a zero register exists or a zero immediate is allowed. |
| return Value == 0; |
| } |
| |
| /// Return true if it's significantly cheaper to shift a vector by a uniform |
| /// scalar than by an amount which will vary across each lane. On x86, for |
| /// example, there is a "psllw" instruction for the former case, but no simple |
| /// instruction for a general "a << b" operation on vectors. |
| virtual bool isVectorShiftByScalarCheap(Type *Ty) const { |
| return false; |
| } |
| |
| /// Returns true if the opcode is a commutative binary operation. |
| virtual bool isCommutativeBinOp(unsigned Opcode) const { |
| // FIXME: This should get its info from the td file. |
| switch (Opcode) { |
| case ISD::ADD: |
| case ISD::SMIN: |
| case ISD::SMAX: |
| case ISD::UMIN: |
| case ISD::UMAX: |
| case ISD::MUL: |
| case ISD::MULHU: |
| case ISD::MULHS: |
| case ISD::SMUL_LOHI: |
| case ISD::UMUL_LOHI: |
| case ISD::FADD: |
| case ISD::FMUL: |
| case ISD::AND: |
| case ISD::OR: |
| case ISD::XOR: |
| case ISD::SADDO: |
| case ISD::UADDO: |
| case ISD::ADDC: |
| case ISD::ADDE: |
| case ISD::SADDSAT: |
| case ISD::UADDSAT: |
| case ISD::FMINNUM: |
| case ISD::FMAXNUM: |
| case ISD::FMINIMUM: |
| case ISD::FMAXIMUM: |
| return true; |
| default: return false; |
| } |
| } |
| |
| /// Return true if it's free to truncate a value of type FromTy to type |
| /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 |
| /// by referencing its sub-register AX. |
| /// Targets must return false when FromTy <= ToTy. |
| virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { |
| return false; |
| } |
| |
| /// Return true if a truncation from FromTy to ToTy is permitted when deciding |
| /// whether a call is in tail position. Typically this means that both results |
| /// would be assigned to the same register or stack slot, but it could mean |
| /// the target performs adequate checks of its own before proceeding with the |
| /// tail call. Targets must return false when FromTy <= ToTy. |
| virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { |
| return false; |
| } |
| |
| virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { |
| return false; |
| } |
| |
| virtual bool isProfitableToHoist(Instruction *I) const { return true; } |
| |
| /// Return true if the extension represented by \p I is free. |
| /// Unlikely the is[Z|FP]ExtFree family which is based on types, |
| /// this method can use the context provided by \p I to decide |
| /// whether or not \p I is free. |
| /// This method extends the behavior of the is[Z|FP]ExtFree family. |
| /// In other words, if is[Z|FP]Free returns true, then this method |
| /// returns true as well. The converse is not true. |
| /// The target can perform the adequate checks by overriding isExtFreeImpl. |
| /// \pre \p I must be a sign, zero, or fp extension. |
| bool isExtFree(const Instruction *I) const { |
| switch (I->getOpcode()) { |
| case Instruction::FPExt: |
| if (isFPExtFree(EVT::getEVT(I->getType()), |
| EVT::getEVT(I->getOperand(0)->getType()))) |
| return true; |
| break; |
| case Instruction::ZExt: |
| if (isZExtFree(I->getOperand(0)->getType(), I->getType())) |
| return true; |
| break; |
| case Instruction::SExt: |
| break; |
| default: |
| llvm_unreachable("Instruction is not an extension"); |
| } |
| return isExtFreeImpl(I); |
| } |
| |
| /// Return true if \p Load and \p Ext can form an ExtLoad. |
| /// For example, in AArch64 |
| /// %L = load i8, i8* %ptr |
| /// %E = zext i8 %L to i32 |
| /// can be lowered into one load instruction |
| /// ldrb w0, [x0] |
| bool isExtLoad(const LoadInst *Load, const Instruction *Ext, |
| const DataLayout &DL) const { |
| EVT VT = getValueType(DL, Ext->getType()); |
| EVT LoadVT = getValueType(DL, Load->getType()); |
| |
| // If the load has other users and the truncate is not free, the ext |
| // probably isn't free. |
| if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && |
| !isTruncateFree(Ext->getType(), Load->getType())) |
| return false; |
| |
| // Check whether the target supports casts folded into loads. |
| unsigned LType; |
| if (isa<ZExtInst>(Ext)) |
| LType = ISD::ZEXTLOAD; |
| else { |
| assert(isa<SExtInst>(Ext) && "Unexpected ext type!"); |
| LType = ISD::SEXTLOAD; |
| } |
| |
| return isLoadExtLegal(LType, VT, LoadVT); |
| } |
| |
| /// Return true if any actual instruction that defines a value of type FromTy |
| /// implicitly zero-extends the value to ToTy in the result register. |
| /// |
| /// The function should return true when it is likely that the truncate can |
| /// be freely folded with an instruction defining a value of FromTy. If |
| /// the defining instruction is unknown (because you're looking at a |
| /// function argument, PHI, etc.) then the target may require an |
| /// explicit truncate, which is not necessarily free, but this function |
| /// does not deal with those cases. |
| /// Targets must return false when FromTy >= ToTy. |
| virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { |
| return false; |
| } |
| |
| virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { |
| return false; |
| } |
| |
| /// Return true if sign-extension from FromTy to ToTy is cheaper than |
| /// zero-extension. |
| virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { |
| return false; |
| } |
| |
| /// Return true if sinking I's operands to the same basic block as I is |
| /// profitable, e.g. because the operands can be folded into a target |
| /// instruction during instruction selection. After calling the function |
| /// \p Ops contains the Uses to sink ordered by dominance (dominating users |
| /// come first). |
| virtual bool shouldSinkOperands(Instruction *I, |
| SmallVectorImpl<Use *> &Ops) const { |
| return false; |
| } |
| |
| /// Return true if the target supplies and combines to a paired load |
| /// two loaded values of type LoadedType next to each other in memory. |
| /// RequiredAlignment gives the minimal alignment constraints that must be met |
| /// to be able to select this paired load. |
| /// |
| /// This information is *not* used to generate actual paired loads, but it is |
| /// used to generate a sequence of loads that is easier to combine into a |
| /// paired load. |
| /// For instance, something like this: |
| /// a = load i64* addr |
| /// b = trunc i64 a to i32 |
| /// c = lshr i64 a, 32 |
| /// d = trunc i64 c to i32 |
| /// will be optimized into: |
| /// b = load i32* addr1 |
| /// d = load i32* addr2 |
| /// Where addr1 = addr2 +/- sizeof(i32). |
| /// |
| /// In other words, unless the target performs a post-isel load combining, |
| /// this information should not be provided because it will generate more |
| /// loads. |
| virtual bool hasPairedLoad(EVT /*LoadedType*/, |
| unsigned & /*RequiredAlignment*/) const { |
| return false; |
| } |
| |
| /// Return true if the target has a vector blend instruction. |
| virtual bool hasVectorBlend() const { return false; } |
| |
| /// Get the maximum supported factor for interleaved memory accesses. |
| /// Default to be the minimum interleave factor: 2. |
| virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } |
| |
| /// Lower an interleaved load to target specific intrinsics. Return |
| /// true on success. |
| /// |
| /// \p LI is the vector load instruction. |
| /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. |
| /// \p Indices is the corresponding indices for each shufflevector. |
| /// \p Factor is the interleave factor. |
| virtual bool lowerInterleavedLoad(LoadInst *LI, |
| ArrayRef<ShuffleVectorInst *> Shuffles, |
| ArrayRef<unsigned> Indices, |
| unsigned Factor) const { |
| return false; |
| } |
| |
| /// Lower an interleaved store to target specific intrinsics. Return |
| /// true on success. |
| /// |
| /// \p SI is the vector store instruction. |
| /// \p SVI is the shufflevector to RE-interleave the stored vector. |
| /// \p Factor is the interleave factor. |
| virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
| unsigned Factor) const { |
| return false; |
| } |
| |
| /// Return true if zero-extending the specific node Val to type VT2 is free |
| /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or |
| /// because it's folded such as X86 zero-extending loads). |
| virtual bool isZExtFree(SDValue Val, EVT VT2) const { |
| return isZExtFree(Val.getValueType(), VT2); |
| } |
| |
| /// Return true if an fpext operation is free (for instance, because |
| /// single-precision floating-point numbers are implicitly extended to |
| /// double-precision). |
| virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { |
| assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && |
| "invalid fpext types"); |
| return false; |
| } |
| |
| /// Return true if an fpext operation input to an \p Opcode operation is free |
| /// (for instance, because half-precision floating-point numbers are |
| /// implicitly extended to float-precision) for an FMA instruction. |
| virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const { |
| assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && |
| "invalid fpext types"); |
| return isFPExtFree(DestVT, SrcVT); |
| } |
| |
| /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
| /// extend node) is profitable. |
| virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } |
| |
| /// Return true if an fneg operation is free to the point where it is never |
| /// worthwhile to replace it with a bitwise operation. |
| virtual bool isFNegFree(EVT VT) const { |
| assert(VT.isFloatingPoint()); |
| return false; |
| } |
| |
| /// Return true if an fabs operation is free to the point where it is never |
| /// worthwhile to replace it with a bitwise operation. |
| virtual bool isFAbsFree(EVT VT) const { |
| assert(VT.isFloatingPoint()); |
| return false; |
| } |
| |
| /// Return true if an FMA operation is faster than a pair of fmul and fadd |
| /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
| /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
| /// |
| /// NOTE: This may be called before legalization on types for which FMAs are |
| /// not legal, but should return true if those types will eventually legalize |
| /// to types that support FMAs. After legalization, it will only be called on |
| /// types that support FMAs (via Legal or Custom actions) |
| virtual bool isFMAFasterThanFMulAndFAdd(EVT) const { |
| return false; |
| } |
| |
| /// Return true if it's profitable to narrow operations of type VT1 to |
| /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from |
| /// i32 to i16. |
| virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const { |
| return false; |
| } |
| |
| /// Return true if it is beneficial to convert a load of a constant to |
| /// just the constant itself. |
| /// On some targets it might be more efficient to use a combination of |
| /// arithmetic instructions to materialize the constant instead of loading it |
| /// from a constant pool. |
| virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
| Type *Ty) const { |
| return false; |
| } |
| |
| /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type |
| /// from this source type with this index. This is needed because |
| /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of |
| /// the first element, and only the target knows which lowering is cheap. |
| virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
| unsigned Index) const { |
| return false; |
| } |
| |
| /// Try to convert an extract element of a vector binary operation into an |
| /// extract element followed by a scalar operation. |
| virtual bool shouldScalarizeBinop(SDValue VecOp) const { |
| return false; |
| } |
| |
| // Return true if it is profitable to use a scalar input to a BUILD_VECTOR |
| // even if the vector itself has multiple uses. |
| virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { |
| return false; |
| } |
| |
| // Return true if CodeGenPrepare should consider splitting large offset of a |
| // GEP to make the GEP fit into the addressing mode and can be sunk into the |
| // same blocks of its users. |
| virtual bool shouldConsiderGEPOffsetSplit() const { return false; } |
| |
| //===--------------------------------------------------------------------===// |
| // Runtime Library hooks |
| // |
| |
| /// Rename the default libcall routine name for the specified libcall. |
| void setLibcallName(RTLIB::Libcall Call, const char *Name) { |
| LibcallRoutineNames[Call] = Name; |
| } |
| |
| /// Get the libcall routine name for the specified libcall. |
| const char *getLibcallName(RTLIB::Libcall Call) const { |
| return LibcallRoutineNames[Call]; |
| } |
| |
| /// Override the default CondCode to be used to test the result of the |
| /// comparison libcall against zero. |
| void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { |
| CmpLibcallCCs[Call] = CC; |
| } |
| |
| /// Get the CondCode that's to be used to test the result of the comparison |
| /// libcall against zero. |
| ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { |
| return CmpLibcallCCs[Call]; |
| } |
| |
| /// Set the CallingConv that should be used for the specified libcall. |
| void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { |
| LibcallCallingConvs[Call] = CC; |
| } |
| |
| /// Get the CallingConv that should be used for the specified libcall. |
| CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { |
| return LibcallCallingConvs[Call]; |
| } |
| |
| /// Execute target specific actions to finalize target lowering. |
| /// This is used to set extra flags in MachineFrameInformation and freezing |
| /// the set of reserved registers. |
| /// The default implementation just freezes the set of reserved registers. |
| virtual void finalizeLowering(MachineFunction &MF) const; |
| |
| private: |
| const TargetMachine &TM; |
| |
| /// Tells the code generator that the target has multiple (allocatable) |
| /// condition registers that can be used to store the results of comparisons |
| /// for use by selects and conditional branches. With multiple condition |
| /// registers, the code generator will not aggressively sink comparisons into |
| /// the blocks of their users. |
| bool HasMultipleConditionRegisters; |
| |
| /// Tells the code generator that the target has BitExtract instructions. |
| /// The code generator will aggressively sink "shift"s into the blocks of |
| /// their users if the users will generate "and" instructions which can be |
| /// combined with "shift" to BitExtract instructions. |
| bool HasExtractBitsInsn; |
| |
| /// Tells the code generator to bypass slow divide or remainder |
| /// instructions. For example, BypassSlowDivWidths[32,8] tells the code |
| /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer |
| /// div/rem when the operands are positive and less than 256. |
| DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; |
| |
| /// Tells the code generator that it shouldn't generate extra flow control |
| /// instructions and should attempt to combine flow control instructions via |
| /// predication. |
| bool JumpIsExpensive; |
| |
| /// Whether the target supports or cares about preserving floating point |
| /// exception behavior. |
| bool HasFloatingPointExceptions; |
| |
| /// This target prefers to use _setjmp to implement llvm.setjmp. |
| /// |
| /// Defaults to false. |
| bool UseUnderscoreSetJmp; |
| |
| /// This target prefers to use _longjmp to implement llvm.longjmp. |
| /// |
| /// Defaults to false. |
| bool UseUnderscoreLongJmp; |
| |
| /// Information about the contents of the high-bits in boolean values held in |
| /// a type wider than i1. See getBooleanContents. |
| BooleanContent BooleanContents; |
| |
| /// Information about the contents of the high-bits in boolean values held in |
| /// a type wider than i1. See getBooleanContents. |
| BooleanContent BooleanFloatContents; |
| |
| /// Information about the contents of the high-bits in boolean vector values |
| /// when the element type is wider than i1. See getBooleanContents. |
| BooleanContent BooleanVectorContents; |
| |
| /// The target scheduling preference: shortest possible total cycles or lowest |
| /// register usage. |
| Sched::Preference SchedPreferenceInfo; |
| |
| /// The size, in bytes, of the target's jmp_buf buffers |
| unsigned JumpBufSize; |
| |
| /// The alignment, in bytes, of the target's jmp_buf buffers |
| unsigned JumpBufAlignment; |
| |
| /// The minimum alignment that any argument on the stack needs to have. |
| unsigned MinStackArgumentAlignment; |
| |
| /// The minimum function alignment (used when optimizing for size, and to |
| /// prevent explicitly provided alignment from leading to incorrect code). |
| unsigned MinFunctionAlignment; |
| |
| /// The preferred function alignment (used when alignment unspecified and |
| /// optimizing for speed). |
| unsigned PrefFunctionAlignment; |
| |
| /// The preferred loop alignment. |
| unsigned PrefLoopAlignment; |
| |
| /// Size in bits of the maximum atomics size the backend supports. |
| /// Accesses larger than this will be expanded by AtomicExpandPass. |
| unsigned MaxAtomicSizeInBitsSupported; |
| |
| /// Size in bits of the minimum cmpxchg or ll/sc operation the |
| /// backend supports. |
| unsigned MinCmpXchgSizeInBits; |
| |
| /// This indicates if the target supports unaligned atomic operations. |
| bool SupportsUnalignedAtomics; |
| |
| /// If set to a physical register, this specifies the register that |
| /// llvm.savestack/llvm.restorestack should save and restore. |
| unsigned StackPointerRegisterToSaveRestore; |
| |
| /// This indicates the default register class to use for each ValueType the |
| /// target supports natively. |
| const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; |
| unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE]; |
| MVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; |
| |
| /// This indicates the "representative" register class to use for each |
| /// ValueType the target supports natively. This information is used by the |
| /// scheduler to track register pressure. By default, the representative |
| /// register class is the largest legal super-reg register class of the |
| /// register class of the specified type. e.g. On x86, i8, i16, and i32's |
| /// representative class would be GR32. |
| const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE]; |
| |
| /// This indicates the "cost" of the "representative" register class for each |
| /// ValueType. The cost is used by the scheduler to approximate register |
| /// pressure. |
| uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE]; |
| |
| /// For any value types we are promoting or expanding, this contains the value |
| /// type that we are changing to. For Expanded types, this contains one step |
| /// of the expand (e.g. i64 -> i32), even if there are multiple steps required |
| /// (e.g. i64 -> i16). For types natively supported by the system, this holds |
| /// the same type (e.g. i32 -> i32). |
| MVT TransformToType[MVT::LAST_VALUETYPE]; |
| |
| /// For each operation and each value type, keep a LegalizeAction that |
| /// indicates how instruction selection should deal with the operation. Most |
| /// operations are Legal (aka, supported natively by the target), but |
| /// operations that are not should be described. Note that operations on |
| /// non-legal value types are not described here. |
| LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; |
| |
| /// For each load extension type and each value type, keep a LegalizeAction |
| /// that indicates how instruction selection should deal with a load of a |
| /// specific value type and extension type. Uses 4-bits to store the action |
| /// for each of the 4 load ext types. |
| uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; |
| |
| /// For each value type pair keep a LegalizeAction that indicates whether a |
| /// truncating store of a specific value type and truncating type is legal. |
| LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; |
| |
| /// For each indexed mode and each value type, keep a pair of LegalizeAction |
| /// that indicates how instruction selection should deal with the load / |
| /// store. |
| /// |
| /// The first dimension is the value_type for the reference. The second |
| /// dimension represents the various modes for load store. |
| uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; |
| |
| /// For each condition code (ISD::CondCode) keep a LegalizeAction that |
| /// indicates how instruction selection should deal with the condition code. |
| /// |
| /// Because each CC action takes up 4 bits, we need to have the array size be |
| /// large enough to fit all of the value types. This can be done by rounding |
| /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. |
| uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; |
| |
| protected: |
| ValueTypeActionImpl ValueTypeActions; |
| |
| private: |
| LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; |
| |
| /// Targets can specify ISD nodes that they would like PerformDAGCombine |
| /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this |
| /// array. |
| unsigned char |
| TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; |
| |
| /// For operations that must be promoted to a specific type, this holds the |
| /// destination type. This map should be sparse, so don't hold it as an |
| /// array. |
| /// |
| /// Targets add entries to this map with AddPromotedToType(..), clients access |
| /// this with getTypeToPromoteTo(..). |
| std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> |
| PromoteToType; |
| |
| /// Stores the name each libcall. |
| const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; |
| |
| /// The ISD::CondCode that should be used to test the result of each of the |
| /// comparison libcall against zero. |
| ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; |
| |
| /// Stores the CallingConv that should be used for each libcall. |
| CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; |
| |
| /// Set default libcall names and calling conventions. |
| void InitLibcalls(const Triple &TT); |
| |
| protected: |
| /// Return true if the extension represented by \p I is free. |
| /// \pre \p I is a sign, zero, or fp extension and |
| /// is[Z|FP]ExtFree of the related types is not true. |
| virtual bool isExtFreeImpl(const Instruction *I) const { return false; } |
| |
| /// Depth that GatherAllAliases should should continue looking for chain |
| /// dependencies when trying to find a more preferable chain. As an |
| /// approximation, this should be more than the number of consecutive stores |
| /// expected to be merged. |
| unsigned GatherAllAliasesMaxDepth; |
| |
| /// Specify maximum number of store instructions per memset call. |
| /// |
| /// When lowering \@llvm.memset this field specifies the maximum number of |
| /// store operations that may be substituted for the call to memset. Targets |
| /// must set this value based on the cost threshold for that target. Targets |
| /// should assume that the memset will be done using as many of the largest |
| /// store operations first, followed by smaller ones, if necessary, per |
| /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine |
| /// with 16-bit alignment would result in four 2-byte stores and one 1-byte |
| /// store. This only applies to setting a constant array of a constant size. |
| unsigned MaxStoresPerMemset; |
| |
| /// Maximum number of stores operations that may be substituted for the call |
| /// to memset, used for functions with OptSize attribute. |
| unsigned MaxStoresPerMemsetOptSize; |
| |
| /// Specify maximum bytes of store instructions per memcpy call. |
| /// |
| /// When lowering \@llvm.memcpy this field specifies the maximum number of |
| /// store operations that may be substituted for a call to memcpy. Targets |
| /// must set this value based on the cost threshold for that target. Targets |
| /// should assume that the memcpy will be done using as many of the largest |
| /// store operations first, followed by smaller ones, if necessary, per |
| /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine |
| /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store |
| /// and one 1-byte store. This only applies to copying a constant array of |
| /// constant size. |
| unsigned MaxStoresPerMemcpy; |
| |
| |
| /// \brief Specify max number of store instructions to glue in inlined memcpy. |
| /// |
| /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number |
| /// of store instructions to keep together. This helps in pairing and |
| // vectorization later on. |
| unsigned MaxGluedStoresPerMemcpy = 0; |
| |
| /// Maximum number of store operations that may be substituted for a call to |
| /// memcpy, used for functions with OptSize attribute. |
| unsigned MaxStoresPerMemcpyOptSize; |
| unsigned MaxLoadsPerMemcmp; |
| unsigned MaxLoadsPerMemcmpOptSize; |
| |
| /// Specify maximum bytes of store instructions per memmove call. |
| /// |
| /// When lowering \@llvm.memmove this field specifies the maximum number of |
| /// store instructions that may be substituted for a call to memmove. Targets |
| /// must set this value based on the cost threshold for that target. Targets |
| /// should assume that the memmove will be done using as many of the largest |
| /// store operations first, followed by smaller ones, if necessary, per |
| /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine |
| /// with 8-bit alignment would result in nine 1-byte stores. This only |
| /// applies to copying a constant array of constant size. |
| unsigned MaxStoresPerMemmove; |
| |
| /// Maximum number of store instructions that may be substituted for a call to |
| /// memmove, used for functions with OptSize attribute. |
| unsigned MaxStoresPerMemmoveOptSize; |
| |
| /// Tells the code generator that select is more expensive than a branch if |
| /// the branch is usually predicted right. |
| bool PredictableSelectIsExpensive; |
| |
| /// \see enableExtLdPromotion. |
| bool EnableExtLdPromotion; |
| |
| /// Return true if the value types that can be represented by the specified |
| /// register class are all legal. |
| bool isLegalRC(const TargetRegisterInfo &TRI, |
| const TargetRegisterClass &RC) const; |
| |
| /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
| /// sequence of memory operands that is recognized by PrologEpilogInserter. |
| MachineBasicBlock *emitPatchPoint(MachineInstr &MI, |
| MachineBasicBlock *MBB) const; |
| |
| /// Replace/modify the XRay custom event operands with target-dependent |
| /// details. |
| MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, |
| MachineBasicBlock *MBB) const; |
| |
| /// Replace/modify the XRay typed event operands with target-dependent |
| /// details. |
| MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI, |
| MachineBasicBlock *MBB) const; |
| }; |
| |
| /// This class defines information used to lower LLVM code to legal SelectionDAG |
| /// operators that the target instruction selector can accept natively. |
| /// |
| /// This class also defines callbacks that targets must implement to lower |
| /// target-specific constructs to SelectionDAG operators. |
| class TargetLowering : public TargetLoweringBase { |
| public: |
| struct DAGCombinerInfo; |
| |
| TargetLowering(const TargetLowering &) = delete; |
| TargetLowering &operator=(const TargetLowering &) = delete; |
| |
| /// NOTE: The TargetMachine owns TLOF. |
| explicit TargetLowering(const TargetMachine &TM); |
| |
| bool isPositionIndependent() const; |
| |
| virtual bool isSDNodeSourceOfDivergence(const SDNode *N, |
| FunctionLoweringInfo *FLI, |
| LegacyDivergenceAnalysis *DA) const { |
| return false; |
| } |
| |
| virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { |
| return false; |
| } |
| |
| /// Returns true by value, base pointer and offset pointer and addressing mode |
| /// by reference if the node's address can be legally represented as |
| /// pre-indexed load / store address. |
| virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, |
| SDValue &/*Offset*/, |
| ISD::MemIndexedMode &/*AM*/, |
| SelectionDAG &/*DAG*/) const { |
| return false; |
| } |
| |
| /// Returns true by value, base pointer and offset pointer and addressing mode |
| /// by reference if this node can be combined with a load / store to form a |
| /// post-indexed load / store. |
| virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, |
| SDValue &/*Base*/, |
| SDValue &/*Offset*/, |
| ISD::MemIndexedMode &/*AM*/, |
| SelectionDAG &/*DAG*/) const { |
| return false; |
| } |
| |
| /// Return the entry encoding for a jump table in the current function. The |
| /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. |
| virtual unsigned getJumpTableEncoding() const; |
| |
| virtual const MCExpr * |
| LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, |
| const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, |
| MCContext &/*Ctx*/) const { |
| llvm_unreachable("Need to implement this hook if target has custom JTIs"); |
| } |
| |
| /// Returns relocation base for the given PIC jumptable. |
| virtual SDValue getPICJumpTableRelocBase(SDValue Table, |
| SelectionDAG &DAG) const; |
| |
| /// This returns the relocation base for the given PIC jumptable, the same as |
| /// getPICJumpTableRelocBase, but as an MCExpr. |
| virtual const MCExpr * |
| getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
| unsigned JTI, MCContext &Ctx) const; |
| |
| /// Return true if folding a constant offset with the given GlobalAddress is |
| /// legal. It is frequently not legal in PIC relocation models. |
| virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; |
| |
| bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, |
| SDValue &Chain) const; |
| |
| void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
| SDValue &NewRHS, ISD::CondCode &CCCode, |
| const SDLoc &DL) const; |
| |
| /// Returns a pair of (return value, chain). |
| /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. |
| std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, |
| EVT RetVT, ArrayRef<SDValue> Ops, |
| bool isSigned, const SDLoc &dl, |
| bool doesNotReturn = false, |
| bool isReturnValueUsed = true) const; |
| |
| /// Check whether parameters to a call that are passed in callee saved |
| /// registers are the same as from the calling function. This needs to be |
| /// checked for tail call eligibility. |
| bool parametersInCSRMatch(const MachineRegisterInfo &MRI, |
| const uint32_t *CallerPreservedMask, |
| const SmallVectorImpl<CCValAssign> &ArgLocs, |
| const SmallVectorImpl<SDValue> &OutVals) const; |
| |
| //===--------------------------------------------------------------------===// |
| // TargetLowering Optimization Methods |
| // |
| |
| /// A convenience struct that encapsulates a DAG, and two SDValues for |
| /// returning information from TargetLowering to its clients that want to |
| /// combine. |
| struct TargetLoweringOpt { |
| SelectionDAG &DAG; |
| bool LegalTys; |
| bool LegalOps; |
| SDValue Old; |
| SDValue New; |
| |
| explicit TargetLoweringOpt(SelectionDAG &InDAG, |
| bool LT, bool LO) : |
| DAG(InDAG), LegalTys(LT), LegalOps(LO) {} |
| |
| bool LegalTypes() const { return LegalTys; } |
| bool LegalOperations() const { return LegalOps; } |
| |
| bool CombineTo(SDValue O, SDValue N) { |
| Old = O; |
| New = N; |
| return true; |
| } |
| }; |
| |
| /// Check to see if the specified operand of the specified instruction is a |
| /// constant integer. If so, check to see if there are any bits set in the |
| /// constant that are not demanded. If so, shrink the constant and return |
| /// true. |
| bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, |
| TargetLoweringOpt &TLO) const; |
| |
| // Target hook to do target-specific const optimization, which is called by |
| // ShrinkDemandedConstant. This function should return true if the target |
| // doesn't want ShrinkDemandedConstant to further optimize the constant. |
| virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, |
| TargetLoweringOpt &TLO) const { |
| return false; |
| } |
| |
| /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This |
| /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be |
| /// generalized for targets with other types of implicit widening casts. |
| bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, |
| TargetLoweringOpt &TLO) const; |
| |
| /// Look at Op. At this point, we know that only the DemandedBits bits of the |
| /// result of Op are ever used downstream. If we can use this information to |
| /// simplify Op, create a new simplified DAG node and return true, returning |
| /// the original and new nodes in Old and New. Otherwise, analyze the |
| /// expression and return a mask of KnownOne and KnownZero bits for the |
| /// expression (used to simplify the caller). The KnownZero/One bits may only |
| /// be accurate for those bits in the Demanded masks. |
| /// \p AssumeSingleUse When this parameter is true, this function will |
| /// attempt to simplify \p Op even if there are multiple uses. |
| /// Callers are responsible for correctly updating the DAG based on the |
| /// results of this function, because simply replacing replacing TLO.Old |
| /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
| /// has multiple uses. |
| bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
| const APInt &DemandedElts, KnownBits &Known, |
| TargetLoweringOpt &TLO, unsigned Depth = 0, |
| bool AssumeSingleUse = false) const; |
| |
| /// Helper wrapper around SimplifyDemandedBits, demanding all elements. |
| /// Adds Op back to the worklist upon success. |
| bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
| KnownBits &Known, TargetLoweringOpt &TLO, |
| unsigned Depth = 0, |
| bool AssumeSingleUse = false) const; |
| |
| /// Helper wrapper around SimplifyDemandedBits. |
| /// Adds Op back to the worklist upon success. |
| bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, |
| DAGCombinerInfo &DCI) const; |
| |
| /// Look at Vector Op. At this point, we know that only the DemandedElts |
| /// elements of the result of Op are ever used downstream. If we can use |
| /// this information to simplify Op, create a new simplified DAG node and |
| /// return true, storing the original and new nodes in TLO. |
| /// Otherwise, analyze the expression and return a mask of KnownUndef and |
| /// KnownZero elements for the expression (used to simplify the caller). |
| /// The KnownUndef/Zero elements may only be accurate for those bits |
| /// in the DemandedMask. |
| /// \p AssumeSingleUse When this parameter is true, this function will |
| /// attempt to simplify \p Op even if there are multiple uses. |
| /// Callers are responsible for correctly updating the DAG based on the |
| /// results of this function, because simply replacing replacing TLO.Old |
| /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
| /// has multiple uses. |
| bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, |
| APInt &KnownUndef, APInt &KnownZero, |
| TargetLoweringOpt &TLO, unsigned Depth = 0, |
| bool AssumeSingleUse = false) const; |
| |
| /// Helper wrapper around SimplifyDemandedVectorElts. |
| /// Adds Op back to the worklist upon success. |
| bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, |
| APInt &KnownUndef, APInt |