| //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// This file defines the WebAssembly-specific TargetTransformInfo |
| /// implementation. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "WebAssemblyTargetTransformInfo.h" |
| #include "llvm/CodeGen/CostTable.h" |
| #include "llvm/Support/Debug.h" |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "wasmtti" |
| |
| TargetTransformInfo::PopcntSupportKind |
| WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { |
| assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); |
| return TargetTransformInfo::PSK_FastHardware; |
| } |
| |
| unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const { |
| unsigned Result = BaseT::getNumberOfRegisters(ClassID); |
| |
| // For SIMD, use at least 16 registers, as a rough guess. |
| bool Vector = (ClassID == 1); |
| if (Vector) |
| Result = std::max(Result, 16u); |
| |
| return Result; |
| } |
| |
| TypeSize WebAssemblyTTIImpl::getRegisterBitWidth( |
| TargetTransformInfo::RegisterKind K) const { |
| switch (K) { |
| case TargetTransformInfo::RGK_Scalar: |
| return TypeSize::getFixed(64); |
| case TargetTransformInfo::RGK_FixedWidthVector: |
| return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64); |
| case TargetTransformInfo::RGK_ScalableVector: |
| return TypeSize::getScalable(0); |
| } |
| |
| llvm_unreachable("Unsupported register kind"); |
| } |
| |
| InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost( |
| unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
| TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, |
| TTI::OperandValueProperties Opd1PropInfo, |
| TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, |
| const Instruction *CxtI) { |
| |
| InstructionCost Cost = |
| BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost( |
| Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); |
| |
| if (auto *VTy = dyn_cast<VectorType>(Ty)) { |
| switch (Opcode) { |
| case Instruction::LShr: |
| case Instruction::AShr: |
| case Instruction::Shl: |
| // SIMD128's shifts currently only accept a scalar shift count. For each |
| // element, we'll need to extract, op, insert. The following is a rough |
| // approxmation. |
| if (Opd2Info != TTI::OK_UniformValue && |
| Opd2Info != TTI::OK_UniformConstantValue) |
| Cost = |
| cast<FixedVectorType>(VTy)->getNumElements() * |
| (TargetTransformInfo::TCC_Basic + |
| getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) + |
| TargetTransformInfo::TCC_Basic); |
| break; |
| } |
| } |
| return Cost; |
| } |
| |
| InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, |
| Type *Val, |
| unsigned Index) { |
| InstructionCost Cost = |
| BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index); |
| |
| // SIMD128's insert/extract currently only take constant indices. |
| if (Index == -1u) |
| return Cost + 25 * TargetTransformInfo::TCC_Expensive; |
| |
| return Cost; |
| } |
| |
| bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller, |
| const Function *Callee) const { |
| // Allow inlining only when the Callee has a subset of the Caller's |
| // features. In principle, we should be able to inline regardless of any |
| // features because WebAssembly supports features at module granularity, not |
| // function granularity, but without this restriction it would be possible for |
| // a module to "forget" about features if all the functions that used them |
| // were inlined. |
| const TargetMachine &TM = getTLI()->getTargetMachine(); |
| |
| const FeatureBitset &CallerBits = |
| TM.getSubtargetImpl(*Caller)->getFeatureBits(); |
| const FeatureBitset &CalleeBits = |
| TM.getSubtargetImpl(*Callee)->getFeatureBits(); |
| |
| return (CallerBits & CalleeBits) == CalleeBits; |
| } |
| |
| void WebAssemblyTTIImpl::getUnrollingPreferences( |
| Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, |
| OptimizationRemarkEmitter *ORE) const { |
| // Scan the loop: don't unroll loops with calls. This is a standard approach |
| // for most (all?) targets. |
| for (BasicBlock *BB : L->blocks()) |
| for (Instruction &I : *BB) |
| if (isa<CallInst>(I) || isa<InvokeInst>(I)) |
| if (const Function *F = cast<CallBase>(I).getCalledFunction()) |
| if (isLoweredToCall(F)) |
| return; |
| |
| // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of |
| // the various microarchitectures that use the BasicTTI implementation and |
| // has been selected through heuristics across multiple cores and runtimes. |
| UP.Partial = UP.Runtime = UP.UpperBound = true; |
| UP.PartialThreshold = 30; |
| |
| // Avoid unrolling when optimizing for size. |
| UP.OptSizeThreshold = 0; |
| UP.PartialOptSizeThreshold = 0; |
| |
| // Set number of instructions optimized when "back edge" |
| // becomes "fall through" to default value of 2. |
| UP.BEInsns = 2; |
| } |