blob: f1ebcbc6fc513470ee789eba2d9decbfeba74193 [file] [log] [blame]
//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines the WebAssembly-specific TargetTransformInfo
/// implementation.
///
//===----------------------------------------------------------------------===//
#include "WebAssemblyTargetTransformInfo.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "wasmtti"
TargetTransformInfo::PopcntSupportKind
WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
return TargetTransformInfo::PSK_FastHardware;
}
unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
unsigned Result = BaseT::getNumberOfRegisters(ClassID);
// For SIMD, use at least 16 registers, as a rough guess.
bool Vector = (ClassID == 1);
if (Vector)
Result = std::max(Result, 16u);
return Result;
}
TypeSize WebAssemblyTTIImpl::getRegisterBitWidth(
TargetTransformInfo::RegisterKind K) const {
switch (K) {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(64);
case TargetTransformInfo::RGK_FixedWidthVector:
return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
case TargetTransformInfo::RGK_ScalableVector:
return TypeSize::getScalable(0);
}
llvm_unreachable("Unsupported register kind");
}
InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
InstructionCost Cost =
BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
if (auto *VTy = dyn_cast<VectorType>(Ty)) {
switch (Opcode) {
case Instruction::LShr:
case Instruction::AShr:
case Instruction::Shl:
// SIMD128's shifts currently only accept a scalar shift count. For each
// element, we'll need to extract, op, insert. The following is a rough
// approxmation.
if (Opd2Info != TTI::OK_UniformValue &&
Opd2Info != TTI::OK_UniformConstantValue)
Cost =
cast<FixedVectorType>(VTy)->getNumElements() *
(TargetTransformInfo::TCC_Basic +
getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
TargetTransformInfo::TCC_Basic);
break;
}
}
return Cost;
}
InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode,
Type *Val,
unsigned Index) {
InstructionCost Cost =
BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index);
// SIMD128's insert/extract currently only take constant indices.
if (Index == -1u)
return Cost + 25 * TargetTransformInfo::TCC_Expensive;
return Cost;
}
bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
// Allow inlining only when the Callee has a subset of the Caller's
// features. In principle, we should be able to inline regardless of any
// features because WebAssembly supports features at module granularity, not
// function granularity, but without this restriction it would be possible for
// a module to "forget" about features if all the functions that used them
// were inlined.
const TargetMachine &TM = getTLI()->getTargetMachine();
const FeatureBitset &CallerBits =
TM.getSubtargetImpl(*Caller)->getFeatureBits();
const FeatureBitset &CalleeBits =
TM.getSubtargetImpl(*Callee)->getFeatureBits();
return (CallerBits & CalleeBits) == CalleeBits;
}
void WebAssemblyTTIImpl::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) const {
// Scan the loop: don't unroll loops with calls. This is a standard approach
// for most (all?) targets.
for (BasicBlock *BB : L->blocks())
for (Instruction &I : *BB)
if (isa<CallInst>(I) || isa<InvokeInst>(I))
if (const Function *F = cast<CallBase>(I).getCalledFunction())
if (isLoweredToCall(F))
return;
// The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
// the various microarchitectures that use the BasicTTI implementation and
// has been selected through heuristics across multiple cores and runtimes.
UP.Partial = UP.Runtime = UP.UpperBound = true;
UP.PartialThreshold = 30;
// Avoid unrolling when optimizing for size.
UP.OptSizeThreshold = 0;
UP.PartialOptSizeThreshold = 0;
// Set number of instructions optimized when "back edge"
// becomes "fall through" to default value of 2.
UP.BEInsns = 2;
}