blob: 9c96fdd42781497cca30a4ed494d12edb0ada4d6 [file] [log] [blame]
//===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file a TargetTransformInfoImplBase conforming object specific to the
/// AArch64 target machine. It uses the target's detailed information to
/// provide more precise answers to certain TTI queries, while letting the
/// target independent and default TTI implementations handle the rest.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
#include "AArch64.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/InstructionCost.h"
#include <cstdint>
#include <optional>
namespace llvm {
class APInt;
class Instruction;
class IntrinsicInst;
class Loop;
class SCEV;
class ScalarEvolution;
class Type;
class Value;
class VectorType;
class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
using TTI = TargetTransformInfo;
friend BaseT;
const AArch64Subtarget *ST;
const AArch64TargetLowering *TLI;
static const FeatureBitset InlineInverseFeatures;
const AArch64Subtarget *getST() const { return ST; }
const AArch64TargetLowering *getTLI() const { return TLI; }
enum MemIntrinsicType {
VECTOR_LDST_TWO_ELEMENTS,
VECTOR_LDST_THREE_ELEMENTS,
VECTOR_LDST_FOUR_ELEMENTS
};
bool isWideningInstruction(Type *DstTy, unsigned Opcode,
ArrayRef<const Value *> Args,
Type *SrcOverrideTy = nullptr) const;
// A helper function called by 'getVectorInstrCost'.
//
// 'Val' and 'Index' are forwarded from 'getVectorInstrCost';
// \param ScalarUserAndIdx encodes the information about extracts from a
/// vector with 'Scalar' being the value being extracted,'User' being the user
/// of the extract(nullptr if user is not known before vectorization) and
/// 'Idx' being the extract lane.
InstructionCost getVectorInstrCostHelper(
unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
const Instruction *I = nullptr, Value *Scalar = nullptr,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const;
public:
explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const override;
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
const ArrayRef<Type *> &Types) const override;
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
unsigned DefaultCallPenalty) const override;
APInt getFeatureMask(const Function &F) const override;
bool isMultiversionedFunction(const Function &F) const override;
/// \name Scalar TTI Implementations
/// @{
using BaseT::getIntImmCost;
InstructionCost getIntImmCost(int64_t Val) const;
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) const override;
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind,
Instruction *Inst = nullptr) const override;
InstructionCost
getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind) const override;
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
/// @}
/// \name Vector TTI Implementations
/// @{
bool enableInterleavedAccessVectorization() const override { return true; }
bool enableMaskedInterleavedAccessVectorization() const override {
return ST->hasSVE();
}
unsigned getNumberOfRegisters(unsigned ClassID) const override {
bool Vector = (ClassID == 1);
if (Vector) {
if (ST->hasNEON())
return 32;
return 0;
}
return 31;
}
InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const override;
std::optional<Instruction *>
instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) const override;
TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override;
unsigned getMinVectorRegisterBitWidth() const override {
return ST->getMinVectorRegisterBitWidth();
}
std::optional<unsigned> getVScaleForTuning() const override {
return ST->getVScaleForTuning();
}
bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
bool shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const override;
/// Try to return an estimate cost factor that can be used as a multiplier
/// when scalarizing an operation for a vector with ElementCount \p VF.
/// For scalable vectors this currently takes the most pessimistic view based
/// upon the maximum possible value for vscale.
unsigned getMaxNumElements(ElementCount VF) const {
if (!VF.isScalable())
return VF.getFixedValue();
return VF.getKnownMinValue() * ST->getVScaleForTuning();
}
unsigned getMaxInterleaveFactor(ElementCount VF) const override;
bool prefersVectorizedAddressing() const override;
InstructionCost
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind) const override;
InstructionCost
getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const override;
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst,
Type *Src) const;
InstructionCost
getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const override;
InstructionCost
getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index,
TTI::TargetCostKind CostKind) const override;
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const override;
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index, const Value *Op0,
const Value *Op1) const override;
/// \param ScalarUserAndIdx encodes the information about extracts from a
/// vector with 'Scalar' being the value being extracted,'User' being the user
/// of the extract(nullptr if user is not known before vectorization) and
/// 'Idx' being the extract lane.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index, Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>>
ScalarUserAndIdx) const override;
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index) const override;
InstructionCost
getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
TTI::TargetCostKind CostKind) const override;
InstructionCost
getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy,
TTI::TargetCostKind CostKind) const;
InstructionCost getSpliceCost(VectorType *Tp, int Index,
TTI::TargetCostKind CostKind) const;
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
ArrayRef<const Value *> Args = {},
const Instruction *CxtI = nullptr) const override;
InstructionCost
getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr,
TTI::TargetCostKind CostKind) const override;
InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
const Instruction *I = nullptr) const override;
TTI::MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
bool useNeonVector(const Type *Ty) const;
InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
const Instruction *I = nullptr) const override;
InstructionCost
getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const override;
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) const override;
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) const override;
Value *
getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
bool CanCreate = true) const override;
bool getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) const override;
bool isElementTypeLegalForScalableVector(Type *Ty) const override {
if (Ty->isPointerTy())
return true;
if (Ty->isBFloatTy() && ST->hasBF16())
return true;
if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
return true;
if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
return true;
return false;
}
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const {
if (!ST->hasSVE())
return false;
// For fixed vectors, avoid scalarization if using SVE for them.
if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors() &&
DataType->getPrimitiveSizeInBits() != 128)
return false; // Fall back to scalarization of masked operations.
return isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment,
unsigned /*AddressSpace*/) const override {
return isLegalMaskedLoadStore(DataType, Alignment);
}
bool isLegalMaskedStore(Type *DataType, Align Alignment,
unsigned /*AddressSpace*/) const override {
return isLegalMaskedLoadStore(DataType, Alignment);
}
bool isLegalMaskedGatherScatter(Type *DataType) const {
if (!ST->isSVEAvailable())
return false;
// For fixed vectors, scalarize if not using SVE for them.
auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
DataTypeFVTy->getNumElements() < 2))
return false;
return isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override {
return isLegalMaskedGatherScatter(DataType);
}
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override {
return isLegalMaskedGatherScatter(DataType);
}
bool isLegalBroadcastLoad(Type *ElementTy,
ElementCount NumElements) const override {
// Return true if we can generate a `ld1r` splat load instruction.
if (!ST->hasNEON() || NumElements.isScalable())
return false;
switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
case 8:
case 16:
case 32:
case 64: {
// We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
return VectorBits >= 64;
}
}
return false;
}
bool isLegalNTStoreLoad(Type *DataType, Align Alignment) const {
// NOTE: The logic below is mostly geared towards LV, which calls it with
// vectors with 2 elements. We might want to improve that, if other
// users show up.
// Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if
// the vector can be halved so that each half fits into a register. That's
// the case if the element type fits into a register and the number of
// elements is a power of 2 > 1.
if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) {
unsigned NumElements = DataTypeTy->getNumElements();
unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
EltSize <= 128 && isPowerOf2_64(EltSize);
}
return BaseT::isLegalNTStore(DataType, Alignment);
}
bool isLegalNTStore(Type *DataType, Align Alignment) const override {
return isLegalNTStoreLoad(DataType, Alignment);
}
bool isLegalNTLoad(Type *DataType, Align Alignment) const override {
// Only supports little-endian targets.
if (ST->isLittleEndian())
return isLegalNTStoreLoad(DataType, Alignment);
return BaseT::isLegalNTLoad(DataType, Alignment);
}
InstructionCost getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const override;
bool enableOrderedReductions() const override { return true; }
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
bool shouldConsiderAddressTypePromotion(
const Instruction &I,
bool &AllowPromotionWithoutCommonHeader) const override;
bool shouldExpandReduction(const IntrinsicInst *II) const override {
return false;
}
unsigned getGISelRematGlobalCost() const override { return 2; }
unsigned getMinTripCountTailFoldingThreshold() const override {
return ST->hasSVE() ? 5 : 0;
}
TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
if (ST->hasSVE())
return IVUpdateMayOverflow
? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
: TailFoldingStyle::DataAndControlFlow;
return TailFoldingStyle::DataWithoutLaneMask;
}
bool preferFixedOverScalableIfEqualCost() const override;
unsigned getEpilogueVectorizationMinVF() const override;
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override;
bool supportsScalableVectors() const override {
return ST->isSVEorStreamingSVEAvailable();
}
bool enableScalableVectorization() const override;
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const override;
bool preferPredicatedReductionSelect() const override { return ST->hasSVE(); }
/// FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the
/// architecture features are not present.
std::optional<InstructionCost>
getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info,
TTI::OperandValueInfo Op2Info, bool IncludeTrunc,
std::function<InstructionCost(Type *)> InstCost) const;
InstructionCost
getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) const override;
InstructionCost
getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
VectorType *ValTy, std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) const override;
InstructionCost getMulAccReductionCost(
bool IsUnsigned, Type *ResTy, VectorType *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const override;
InstructionCost
getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp, ArrayRef<const Value *> Args = {},
const Instruction *CxtI = nullptr) const override;
InstructionCost getScalarizationOverhead(
VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
ArrayRef<Value *> VL = {}) const override;
/// Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns an invalid cost.
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
StackOffset BaseOffset, bool HasBaseReg,
int64_t Scale,
unsigned AddrSpace) const override;
bool enableSelectOptimize() const override {
return ST->enableSelectOptimize();
}
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override;
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
Type *ScalarValTy) const override {
// We can vectorize store v4i8.
if (ScalarMemTy->isIntegerTy(8) && isPowerOf2_32(VF) && VF >= 4)
return 4;
return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
}
std::optional<unsigned> getMinPageSize() const override { return 4096; }
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) const override;
bool isProfitableToSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
/// @}
};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H