| //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the interfaces that NVPTX uses to lower LLVM code into a |
| // selection DAG. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H |
| #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H |
| |
| #include "NVPTX.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| #include "llvm/Support/AtomicOrdering.h" |
| |
| namespace llvm { |
| namespace NVPTXISD { |
| enum NodeType : unsigned { |
| // Start the numbering from where ISD NodeType finishes. |
| FIRST_NUMBER = ISD::BUILTIN_OP_END, |
| Wrapper, |
| CALL, |
| RET_GLUE, |
| LOAD_PARAM, |
| DeclareParam, |
| DeclareScalarParam, |
| DeclareRetParam, |
| DeclareRet, |
| DeclareScalarRet, |
| PrintCall, |
| PrintConvergentCall, |
| PrintCallUni, |
| PrintConvergentCallUni, |
| CallArgBegin, |
| CallArg, |
| LastCallArg, |
| CallArgEnd, |
| CallVoid, |
| CallVal, |
| CallSymbol, |
| Prototype, |
| MoveParam, |
| PseudoUseParam, |
| RETURN, |
| CallSeqBegin, |
| CallSeqEnd, |
| CallPrototype, |
| ProxyReg, |
| FSHL_CLAMP, |
| FSHR_CLAMP, |
| MUL_WIDE_SIGNED, |
| MUL_WIDE_UNSIGNED, |
| SETP_F16X2, |
| SETP_BF16X2, |
| BFE, |
| BFI, |
| PRMT, |
| |
| /// This node is similar to ISD::BUILD_VECTOR except that the output may be |
| /// implicitly bitcast to a scalar. This allows for the representation of |
| /// packing move instructions for vector types which are not legal i.e. v2i32 |
| BUILD_VECTOR, |
| |
| /// This node is the inverse of NVPTX::BUILD_VECTOR. It takes a single value |
| /// which may be a scalar and unpacks it into multiple values by implicitly |
| /// converting it to a vector. |
| UNPACK_VECTOR, |
| |
| FCOPYSIGN, |
| DYNAMIC_STACKALLOC, |
| STACKRESTORE, |
| STACKSAVE, |
| BrxStart, |
| BrxItem, |
| BrxEnd, |
| Dummy, |
| |
| FIRST_MEMORY_OPCODE, |
| LoadV2 = FIRST_MEMORY_OPCODE, |
| LoadV4, |
| LDUV2, // LDU.v2 |
| LDUV4, // LDU.v4 |
| StoreV2, |
| StoreV4, |
| LoadParam, |
| LoadParamV2, |
| LoadParamV4, |
| StoreParam, |
| StoreParamV2, |
| StoreParamV4, |
| StoreParamS32, // to sext and store a <32bit value, not used currently |
| StoreParamU32, // to zext and store a <32bit value, not used currently |
| StoreRetval, |
| StoreRetvalV2, |
| StoreRetvalV4, |
| LAST_MEMORY_OPCODE = StoreRetvalV4, |
| }; |
| } |
| |
| class NVPTXSubtarget; |
| |
| //===--------------------------------------------------------------------===// |
| // TargetLowering Implementation |
| //===--------------------------------------------------------------------===// |
| class NVPTXTargetLowering : public TargetLowering { |
| public: |
| explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM, |
| const NVPTXSubtarget &STI); |
| SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
| |
| SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
| |
| const char *getTargetNodeName(unsigned Opcode) const override; |
| |
| bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, |
| MachineFunction &MF, |
| unsigned Intrinsic) const override; |
| |
| Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx, |
| const DataLayout &DL) const; |
| |
| /// getFunctionParamOptimizedAlign - since function arguments are passed via |
| /// .param space, we may want to increase their alignment in a way that |
| /// ensures that we can effectively vectorize their loads & stores. We can |
| /// increase alignment only if the function has internal or has private |
| /// linkage as for other linkage types callers may already rely on default |
| /// alignment. To allow using 128-bit vectorized loads/stores, this function |
| /// ensures that alignment is 16 or greater. |
| Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, |
| const DataLayout &DL) const; |
| |
| /// Helper for computing alignment of a device function byval parameter. |
| Align getFunctionByValParamAlign(const Function *F, Type *ArgTy, |
| Align InitialAlign, |
| const DataLayout &DL) const; |
| |
| // Helper for getting a function parameter name. Name is composed from |
| // its index and the function name. Negative index corresponds to special |
| // parameter (unsized array) used for passing variable arguments. |
| std::string getParamName(const Function *F, int Idx) const; |
| |
| /// isLegalAddressingMode - Return true if the addressing mode represented |
| /// by AM is legal for this target, for a load/store of the specified type |
| /// Used to guide target specific optimizations, like loop strength |
| /// reduction (LoopStrengthReduce.cpp) and memory optimization for |
| /// address mode (CodeGenPrepare.cpp) |
| bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
| unsigned AS, |
| Instruction *I = nullptr) const override; |
| |
| bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { |
| // Truncating 64-bit to 32-bit is free in SASS. |
| if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) |
| return false; |
| return SrcTy->getPrimitiveSizeInBits() == 64 && |
| DstTy->getPrimitiveSizeInBits() == 32; |
| } |
| |
| EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, |
| EVT VT) const override { |
| if (VT.isVector()) |
| return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); |
| return MVT::i1; |
| } |
| |
| ConstraintType getConstraintType(StringRef Constraint) const override; |
| std::pair<unsigned, const TargetRegisterClass *> |
| getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
| StringRef Constraint, MVT VT) const override; |
| |
| SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
| bool isVarArg, |
| const SmallVectorImpl<ISD::InputArg> &Ins, |
| const SDLoc &dl, SelectionDAG &DAG, |
| SmallVectorImpl<SDValue> &InVals) const override; |
| |
| SDValue LowerCall(CallLoweringInfo &CLI, |
| SmallVectorImpl<SDValue> &InVals) const override; |
| |
| SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; |
| |
| std::string |
| getPrototype(const DataLayout &DL, Type *, const ArgListTy &, |
| const SmallVectorImpl<ISD::OutputArg> &, MaybeAlign retAlignment, |
| std::optional<std::pair<unsigned, const APInt &>> VAInfo, |
| const CallBase &CB, unsigned UniqueCallSite) const; |
| |
| SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
| const SmallVectorImpl<ISD::OutputArg> &Outs, |
| const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, |
| SelectionDAG &DAG) const override; |
| |
| void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
| std::vector<SDValue> &Ops, |
| SelectionDAG &DAG) const override; |
| |
| const NVPTXTargetMachine *nvTM; |
| |
| // PTX always uses 32-bit shift amounts |
| MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { |
| return MVT::i32; |
| } |
| |
| TargetLoweringBase::LegalizeTypeAction |
| getPreferredVectorAction(MVT VT) const override; |
| |
| // Get the degree of precision we want from 32-bit floating point division |
| // operations. |
| // |
| // 0 - Use ptx div.approx |
| // 1 - Use ptx.div.full (approximate, but less so than div.approx) |
| // 2 - Use IEEE-compliant div instructions, if available. |
| int getDivF32Level() const; |
| |
| // Get whether we should use a precise or approximate 32-bit floating point |
| // sqrt instruction. |
| bool usePrecSqrtF32() const; |
| |
| // Get whether we should use instructions that flush floating-point denormals |
| // to sign-preserving zero. |
| bool useF32FTZ(const MachineFunction &MF) const; |
| |
| SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
| int &ExtraSteps, bool &UseOneConst, |
| bool Reciprocal) const override; |
| |
| unsigned combineRepeatedFPDivisors() const override { return 2; } |
| |
| bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const; |
| bool allowUnsafeFPMath(MachineFunction &MF) const; |
| |
| bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
| EVT) const override { |
| return true; |
| } |
| |
| // The default is the same as pointer type, but brx.idx only accepts i32 |
| MVT getJumpTableRegTy(const DataLayout &) const override { return MVT::i32; } |
| |
| unsigned getJumpTableEncoding() const override; |
| |
| bool enableAggressiveFMAFusion(EVT VT) const override { return true; } |
| |
| // The default is to transform llvm.ctlz(x, false) (where false indicates that |
| // x == 0 is not undefined behavior) into a branch that checks whether x is 0 |
| // and avoids calling ctlz in that case. We have a dedicated ctlz |
| // instruction, so we say that ctlz is cheap to speculate. |
| bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; } |
| |
| AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { |
| return AtomicExpansionKind::None; |
| } |
| |
| AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { |
| return AtomicExpansionKind::None; |
| } |
| |
| AtomicExpansionKind |
| shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
| |
| bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { |
| // There's rarely any point of packing something into a vector type if we |
| // already have the source data. |
| return true; |
| } |
| |
| bool shouldInsertFencesForAtomic(const Instruction *) const override; |
| |
| AtomicOrdering |
| atomicOperationOrderAfterFenceSplit(const Instruction *I) const override; |
| |
| Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, |
| AtomicOrdering Ord) const override; |
| Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, |
| AtomicOrdering Ord) const override; |
| |
| unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT, |
| EVT ToVT) const override; |
| |
| private: |
| const NVPTXSubtarget &STI; // cache the subtarget here |
| mutable unsigned GlobalUniqueCallSite; |
| |
| SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; |
| |
| SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue PromoteBinOpIfF32FTZ(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
| SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
| |
| SDValue LowerCopyToReg_128(SDValue Op, SelectionDAG &DAG) const; |
| unsigned getNumRegisters(LLVMContext &Context, EVT VT, |
| std::optional<MVT> RegisterVT) const override; |
| bool |
| splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, |
| SDValue *Parts, unsigned NumParts, MVT PartVT, |
| std::optional<CallingConv::ID> CC) const override; |
| |
| void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
| SelectionDAG &DAG) const override; |
| SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
| |
| Align getArgumentAlignment(const CallBase *CB, Type *Ty, unsigned Idx, |
| const DataLayout &DL) const; |
| }; |
| |
| } // namespace llvm |
| |
| #endif |