llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h - llvm-project - Git at Google

 //===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
 /// This file a TargetTransformInfo::Concept conforming object specific to the
 /// NVPTX target machine. It uses the target's detailed information to
 /// provide more precise answers to certain TTI queries, while letting the
 /// target independent and default TTI implementations handle the rest.
 ///
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
 #define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H

 #include "NVPTXTargetMachine.h"
 #include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include <optional>

 namespace llvm {

 class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
   typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
   typedef TargetTransformInfo TTI;
   friend BaseT;

   const NVPTXSubtarget *ST;
   const NVPTXTargetLowering *TLI;

   const NVPTXSubtarget *getST() const { return ST; };
   const NVPTXTargetLowering *getTLI() const { return TLI; };

 public:
   explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
         TLI(ST->getTargetLowering()) {}

   bool hasBranchDivergence(const Function *F = nullptr) { return true; }

   bool isSourceOfDivergence(const Value *V);

   unsigned getFlatAddressSpace() const {
     return AddressSpace::ADDRESS_SPACE_GENERIC;
   }

   bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
     return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
            AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
   }

   std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
                                                     IntrinsicInst &II) const;

   // Loads and stores can be vectorized if the alignment is at least as big as
   // the load/store we want to vectorize.
   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
                                    unsigned AddrSpace) const {
     return Alignment >= ChainSizeInBytes;
   }
   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
                                     unsigned AddrSpace) const {
     return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
   }

   // NVPTX has infinite registers of all kinds, but the actual machine doesn't.
   // We conservatively return 1 here which is just enough to enable the
   // vectorizers but disables heuristics based on the number of registers.
   // FIXME: Return a more reasonable number, while keeping an eye on
   // LoopVectorizer's unrolling heuristics.
   unsigned getNumberOfRegisters(bool Vector) const { return 1; }

   // Only <2 x half> should be vectorized, so always return 32 for the vector
   // register size.
   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
     return TypeSize::getFixed(32);
   }
   unsigned getMinVectorRegisterBitWidth() const { return 32; }

   // We don't want to prevent inlining because of target-cpu and -features
   // attributes that were added to newer versions of LLVM/Clang: There are
   // no incompatible functions in PTX, ptxas will throw errors in such cases.
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const {
     return true;
   }

   // Increase the inlining cost threshold by a factor of 11, reflecting that
   // calls are particularly expensive in NVPTX.
   unsigned getInliningThresholdMultiplier() const { return 11; }

   InstructionCost getArithmeticInstrCost(
       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
       const Instruction *CxtI = nullptr);

   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP,
                                OptimizationRemarkEmitter *ORE);

   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                              TTI::PeelingPreferences &PP);

   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
     // Volatile loads/stores are only supported for shared and global address
     // spaces, or for generic AS that maps to them.
     if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
           AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
           AddrSpace == llvm::ADDRESS_SPACE_SHARED))
       return false;

     switch(I->getOpcode()){
     default:
       return false;
     case Instruction::Load:
     case Instruction::Store:
       return true;
     }
   }
 };

 } // end namespace llvm

 #endif
	//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	/// \file
	/// This file a TargetTransformInfo::Concept conforming object specific to the
	/// NVPTX target machine. It uses the target's detailed information to
	/// provide more precise answers to certain TTI queries, while letting the
	/// target independent and default TTI implementations handle the rest.
	///
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
	#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H

	#include "NVPTXTargetMachine.h"
	#include "MCTargetDesc/NVPTXBaseInfo.h"
	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/CodeGen/BasicTTIImpl.h"
	#include "llvm/CodeGen/TargetLowering.h"
	#include <optional>

	namespace llvm {

	class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
	typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
	typedef TargetTransformInfo TTI;
	friend BaseT;

	const NVPTXSubtarget *ST;
	const NVPTXTargetLowering *TLI;

	const NVPTXSubtarget *getST() const { return ST; };
	const NVPTXTargetLowering *getTLI() const { return TLI; };

	public:
	explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
	: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
	TLI(ST->getTargetLowering()) {}

	bool hasBranchDivergence(const Function *F = nullptr) { return true; }

	bool isSourceOfDivergence(const Value *V);

	unsigned getFlatAddressSpace() const {
	return AddressSpace::ADDRESS_SPACE_GENERIC;
	}

	bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
	return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
	AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
	}

	std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
	IntrinsicInst &II) const;

	// Loads and stores can be vectorized if the alignment is at least as big as
	// the load/store we want to vectorize.
	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
	unsigned AddrSpace) const {
	return Alignment >= ChainSizeInBytes;
	}
	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
	unsigned AddrSpace) const {
	return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
	}

	// NVPTX has infinite registers of all kinds, but the actual machine doesn't.
	// We conservatively return 1 here which is just enough to enable the
	// vectorizers but disables heuristics based on the number of registers.
	// FIXME: Return a more reasonable number, while keeping an eye on
	// LoopVectorizer's unrolling heuristics.
	unsigned getNumberOfRegisters(bool Vector) const { return 1; }

	// Only <2 x half> should be vectorized, so always return 32 for the vector
	// register size.
	TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
	return TypeSize::getFixed(32);
	}
	unsigned getMinVectorRegisterBitWidth() const { return 32; }

	// We don't want to prevent inlining because of target-cpu and -features
	// attributes that were added to newer versions of LLVM/Clang: There are
	// no incompatible functions in PTX, ptxas will throw errors in such cases.
	bool areInlineCompatible(const Function *Caller,
	const Function *Callee) const {
	return true;
	}

	// Increase the inlining cost threshold by a factor of 11, reflecting that
	// calls are particularly expensive in NVPTX.
	unsigned getInliningThresholdMultiplier() const { return 11; }

	InstructionCost getArithmeticInstrCost(
	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
	TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
	TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
	ArrayRef<const Value > Args = ArrayRef<const Value >(),
	const Instruction *CxtI = nullptr);

	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
	TTI::UnrollingPreferences &UP,
	OptimizationRemarkEmitter *ORE);

	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
	TTI::PeelingPreferences &PP);

	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
	// Volatile loads/stores are only supported for shared and global address
	// spaces, or for generic AS that maps to them.
	if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC \|\|
	AddrSpace == llvm::ADDRESS_SPACE_GLOBAL \|\|
	AddrSpace == llvm::ADDRESS_SPACE_SHARED))
	return false;

	switch(I->getOpcode()){
	default:
	return false;
	case Instruction::Load:
	case Instruction::Store:
	return true;
	}
	}
	};

	} // end namespace llvm

	#endif