| //===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// \file |
| /// This file a TargetTransformInfo::Concept conforming object specific to the |
| /// NVPTX target machine. It uses the target's detailed information to |
| /// provide more precise answers to certain TTI queries, while letting the |
| /// target independent and default TTI implementations handle the rest. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H |
| #define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H |
| |
| #include "NVPTXTargetMachine.h" |
| #include "MCTargetDesc/NVPTXBaseInfo.h" |
| #include "llvm/Analysis/TargetTransformInfo.h" |
| #include "llvm/CodeGen/BasicTTIImpl.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| |
| namespace llvm { |
| |
| class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> { |
| typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT; |
| typedef TargetTransformInfo TTI; |
| friend BaseT; |
| |
| const NVPTXSubtarget *ST; |
| const NVPTXTargetLowering *TLI; |
| |
| const NVPTXSubtarget *getST() const { return ST; }; |
| const NVPTXTargetLowering *getTLI() const { return TLI; }; |
| |
| public: |
| explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F) |
| : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), |
| TLI(ST->getTargetLowering()) {} |
| |
| bool hasBranchDivergence() { return true; } |
| |
| bool isSourceOfDivergence(const Value *V); |
| |
| unsigned getFlatAddressSpace() const { |
| return AddressSpace::ADDRESS_SPACE_GENERIC; |
| } |
| |
| bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { |
| return AS != AddressSpace::ADDRESS_SPACE_SHARED && |
| AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM; |
| } |
| |
| Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, |
| IntrinsicInst &II) const; |
| |
| // Loads and stores can be vectorized if the alignment is at least as big as |
| // the load/store we want to vectorize. |
| bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, |
| unsigned AddrSpace) const { |
| return Alignment >= ChainSizeInBytes; |
| } |
| bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, |
| unsigned AddrSpace) const { |
| return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace); |
| } |
| |
| // NVPTX has infinite registers of all kinds, but the actual machine doesn't. |
| // We conservatively return 1 here which is just enough to enable the |
| // vectorizers but disables heuristics based on the number of registers. |
| // FIXME: Return a more reasonable number, while keeping an eye on |
| // LoopVectorizer's unrolling heuristics. |
| unsigned getNumberOfRegisters(bool Vector) const { return 1; } |
| |
| // Only <2 x half> should be vectorized, so always return 32 for the vector |
| // register size. |
| TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { |
| return TypeSize::getFixed(32); |
| } |
| unsigned getMinVectorRegisterBitWidth() const { return 32; } |
| |
| // We don't want to prevent inlining because of target-cpu and -features |
| // attributes that were added to newer versions of LLVM/Clang: There are |
| // no incompatible functions in PTX, ptxas will throw errors in such cases. |
| bool areInlineCompatible(const Function *Caller, |
| const Function *Callee) const { |
| return true; |
| } |
| |
| // Increase the inlining cost threshold by a factor of 5, reflecting that |
| // calls are particularly expensive in NVPTX. |
| unsigned getInliningThresholdMultiplier() { return 5; } |
| |
| InstructionCost getArithmeticInstrCost( |
| unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
| TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |
| TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, |
| TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, |
| TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, |
| ArrayRef<const Value *> Args = ArrayRef<const Value *>(), |
| const Instruction *CxtI = nullptr); |
| |
| void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
| TTI::UnrollingPreferences &UP, |
| OptimizationRemarkEmitter *ORE); |
| |
| void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
| TTI::PeelingPreferences &PP); |
| |
| bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { |
| // Volatile loads/stores are only supported for shared and global address |
| // spaces, or for generic AS that maps to them. |
| if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC || |
| AddrSpace == llvm::ADDRESS_SPACE_GLOBAL || |
| AddrSpace == llvm::ADDRESS_SPACE_SHARED)) |
| return false; |
| |
| switch(I->getOpcode()){ |
| default: |
| return false; |
| case Instruction::Load: |
| case Instruction::Store: |
| return true; |
| } |
| } |
| }; |
| |
| } // end namespace llvm |
| |
| #endif |