[TTI] Return a more sensible cost for histogram intrinsic. (#97397)
This is just an initial cost, making it invalid for any target which
doesn't specifically return a cost for now. Also adds an AArch64
specific cost check.
We will need to improve that later, e.g. by returning a scalarization
cost for generic targets and possibly introducing a new TTI method, at
least once LoopVectorize has changed it's cost model. The reason is
that the histogram intrinsic also effectively contains a gather and
scatter, and we will need details of the addressing to determine an
appropriate cost for that.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index eb60b96..0ee8136 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -61,6 +61,11 @@
static cl::opt<bool> EnableLSRCostOpt("enable-aarch64-lsr-cost-opt",
cl::init(true), cl::Hidden);
+// A complete guess as to a reasonable cost.
+static cl::opt<unsigned>
+ BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden,
+ cl::desc("The cost of a histcnt instruction"));
+
namespace {
class TailFoldingOption {
// These bitfields will only ever be set to something non-zero in operator=,
@@ -508,11 +513,39 @@
VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock;
}
+static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
+ Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
+ Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
+
+ // Only allow (32b and 64b) integers or pointers for now...
+ if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) ||
+ (EltTy->getScalarSizeInBits() != 32 &&
+ EltTy->getScalarSizeInBits() != 64))
+ return InstructionCost::getInvalid();
+
+ // FIXME: Hacky check for legal vector types. We can promote smaller types
+ // but we cannot legalize vectors via splitting for histcnt.
+ // FIXME: We should be able to generate histcnt for fixed-length vectors
+ // using ptrue with a specific VL.
+ if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy))
+ if ((VTy->getElementCount().getKnownMinValue() != 2 &&
+ VTy->getElementCount().getKnownMinValue() != 4) ||
+ VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 ||
+ !VTy->isScalableTy())
+ return InstructionCost::getInvalid();
+
+ return InstructionCost(BaseHistCntCost);
+}
+
InstructionCost
AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
auto *RetTy = ICA.getReturnType();
switch (ICA.getID()) {
+ case Intrinsic::experimental_vector_histogram_add:
+ if (!ST->hasSVE2())
+ return InstructionCost::getInvalid();
+ return getHistogramCost(ICA);
case Intrinsic::umin:
case Intrinsic::umax:
case Intrinsic::smin: