| //===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "VPlanAnalysis.h" |
| #include "VPlan.h" |
| #include "VPlanCFG.h" |
| #include "VPlanDominatorTree.h" |
| #include "llvm/ADT/PostOrderIterator.h" |
| #include "llvm/ADT/TypeSwitch.h" |
| #include "llvm/Analysis/ScalarEvolution.h" |
| #include "llvm/Analysis/TargetTransformInfo.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/PatternMatch.h" |
| #include "llvm/Support/GenericDomTreeConstruction.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "vplan" |
| |
| VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) |
| : Ctx(Plan.getScalarHeader()->getIRBasicBlock()->getContext()) { |
| if (auto LoopRegion = Plan.getVectorLoopRegion()) { |
| if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>( |
| &LoopRegion->getEntryBasicBlock()->front())) { |
| CanonicalIVTy = CanIV->getScalarType(); |
| return; |
| } |
| } |
| |
| // If there's no canonical IV, retrieve the type from the trip count |
| // expression. |
| auto *TC = Plan.getTripCount(); |
| if (TC->isLiveIn()) { |
| CanonicalIVTy = TC->getLiveInIRValue()->getType(); |
| return; |
| } |
| CanonicalIVTy = cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType(); |
| } |
| |
| Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) { |
| Type *ResTy = inferScalarType(R->getIncomingValue(0)); |
| for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) { |
| VPValue *Inc = R->getIncomingValue(I); |
| assert(inferScalarType(Inc) == ResTy && |
| "different types inferred for different incoming values"); |
| CachedTypes[Inc] = ResTy; |
| } |
| return ResTy; |
| } |
| |
| Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { |
| // Set the result type from the first operand, check if the types for all |
| // other operands match and cache them. |
| auto SetResultTyFromOp = [this, R]() { |
| Type *ResTy = inferScalarType(R->getOperand(0)); |
| for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) { |
| VPValue *OtherV = R->getOperand(Op); |
| assert(inferScalarType(OtherV) == ResTy && |
| "different types inferred for different operands"); |
| CachedTypes[OtherV] = ResTy; |
| } |
| return ResTy; |
| }; |
| |
| unsigned Opcode = R->getOpcode(); |
| if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) |
| return SetResultTyFromOp(); |
| |
| switch (Opcode) { |
| case Instruction::ExtractElement: |
| case Instruction::Freeze: |
| case VPInstruction::ReductionStartVector: |
| return inferScalarType(R->getOperand(0)); |
| case Instruction::Select: { |
| Type *ResTy = inferScalarType(R->getOperand(1)); |
| VPValue *OtherV = R->getOperand(2); |
| assert(inferScalarType(OtherV) == ResTy && |
| "different types inferred for different operands"); |
| CachedTypes[OtherV] = ResTy; |
| return ResTy; |
| } |
| case Instruction::ICmp: |
| case VPInstruction::ActiveLaneMask: |
| assert(inferScalarType(R->getOperand(0)) == |
| inferScalarType(R->getOperand(1)) && |
| "different types inferred for different operands"); |
| return IntegerType::get(Ctx, 1); |
| case VPInstruction::ComputeAnyOfResult: |
| case VPInstruction::ComputeFindLastIVResult: |
| case VPInstruction::ComputeReductionResult: { |
| auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0)); |
| auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); |
| return OrigPhi->getType(); |
| } |
| case VPInstruction::ExplicitVectorLength: |
| return Type::getIntNTy(Ctx, 32); |
| case Instruction::PHI: |
| // Infer the type of first operand only, as other operands of header phi's |
| // may lead to infinite recursion. |
| return inferScalarType(R->getOperand(0)); |
| case VPInstruction::FirstOrderRecurrenceSplice: |
| case VPInstruction::Not: |
| case VPInstruction::CalculateTripCountMinusVF: |
| case VPInstruction::CanonicalIVIncrementForPart: |
| case VPInstruction::AnyOf: |
| return SetResultTyFromOp(); |
| case VPInstruction::FirstActiveLane: |
| return Type::getIntNTy(Ctx, 64); |
| case VPInstruction::ExtractLastElement: |
| case VPInstruction::ExtractPenultimateElement: { |
| Type *BaseTy = inferScalarType(R->getOperand(0)); |
| if (auto *VecTy = dyn_cast<VectorType>(BaseTy)) |
| return VecTy->getElementType(); |
| return BaseTy; |
| } |
| case VPInstruction::LogicalAnd: |
| assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) && |
| inferScalarType(R->getOperand(1))->isIntegerTy(1) && |
| "LogicalAnd operands should be bool"); |
| return IntegerType::get(Ctx, 1); |
| case VPInstruction::Broadcast: |
| case VPInstruction::PtrAdd: |
| // Return the type based on first operand. |
| return inferScalarType(R->getOperand(0)); |
| case VPInstruction::BranchOnCond: |
| case VPInstruction::BranchOnCount: |
| return Type::getVoidTy(Ctx); |
| default: |
| break; |
| } |
| // Type inference not implemented for opcode. |
| LLVM_DEBUG({ |
| dbgs() << "LV: Found unhandled opcode for: "; |
| R->getVPSingleValue()->dump(); |
| }); |
| llvm_unreachable("Unhandled opcode!"); |
| } |
| |
| Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) { |
| unsigned Opcode = R->getOpcode(); |
| if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || |
| Instruction::isBitwiseLogicOp(Opcode)) { |
| Type *ResTy = inferScalarType(R->getOperand(0)); |
| assert(ResTy == inferScalarType(R->getOperand(1)) && |
| "types for both operands must match for binary op"); |
| CachedTypes[R->getOperand(1)] = ResTy; |
| return ResTy; |
| } |
| |
| switch (Opcode) { |
| case Instruction::ICmp: |
| case Instruction::FCmp: |
| return IntegerType::get(Ctx, 1); |
| case Instruction::FNeg: |
| case Instruction::Freeze: |
| return inferScalarType(R->getOperand(0)); |
| case Instruction::ExtractValue: { |
| assert(R->getNumOperands() == 2 && "expected single level extractvalue"); |
| auto *StructTy = cast<StructType>(inferScalarType(R->getOperand(0))); |
| auto *CI = cast<ConstantInt>(R->getOperand(1)->getLiveInIRValue()); |
| return StructTy->getTypeAtIndex(CI->getZExtValue()); |
| } |
| default: |
| break; |
| } |
| |
| // Type inference not implemented for opcode. |
| LLVM_DEBUG({ |
| dbgs() << "LV: Found unhandled opcode for: "; |
| R->getVPSingleValue()->dump(); |
| }); |
| llvm_unreachable("Unhandled opcode!"); |
| } |
| |
| Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) { |
| auto &CI = *cast<CallInst>(R->getUnderlyingInstr()); |
| return CI.getType(); |
| } |
| |
| Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) { |
| assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) && |
| "Store recipes should not define any values"); |
| return cast<LoadInst>(&R->getIngredient())->getType(); |
| } |
| |
| Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) { |
| Type *ResTy = inferScalarType(R->getOperand(1)); |
| VPValue *OtherV = R->getOperand(2); |
| assert(inferScalarType(OtherV) == ResTy && |
| "different types inferred for different operands"); |
| CachedTypes[OtherV] = ResTy; |
| return ResTy; |
| } |
| |
| Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) { |
| unsigned Opcode = R->getUnderlyingInstr()->getOpcode(); |
| |
| if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || |
| Instruction::isBitwiseLogicOp(Opcode)) { |
| Type *ResTy = inferScalarType(R->getOperand(0)); |
| assert(ResTy == inferScalarType(R->getOperand(1)) && |
| "inferred types for operands of binary op don't match"); |
| CachedTypes[R->getOperand(1)] = ResTy; |
| return ResTy; |
| } |
| |
| if (Instruction::isCast(Opcode)) |
| return R->getUnderlyingInstr()->getType(); |
| |
| switch (Opcode) { |
| case Instruction::Call: { |
| unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1); |
| return cast<Function>(R->getOperand(CallIdx)->getLiveInIRValue()) |
| ->getReturnType(); |
| } |
| case Instruction::Select: { |
| Type *ResTy = inferScalarType(R->getOperand(1)); |
| assert(ResTy == inferScalarType(R->getOperand(2)) && |
| "inferred types for operands of select op don't match"); |
| CachedTypes[R->getOperand(2)] = ResTy; |
| return ResTy; |
| } |
| case Instruction::ICmp: |
| case Instruction::FCmp: |
| return IntegerType::get(Ctx, 1); |
| case Instruction::Alloca: |
| case Instruction::ExtractValue: |
| return R->getUnderlyingInstr()->getType(); |
| case Instruction::Freeze: |
| case Instruction::FNeg: |
| case Instruction::GetElementPtr: |
| return inferScalarType(R->getOperand(0)); |
| case Instruction::Load: |
| return cast<LoadInst>(R->getUnderlyingInstr())->getType(); |
| case Instruction::Store: |
| // FIXME: VPReplicateRecipes with store opcodes still define a result |
| // VPValue, so we need to handle them here. Remove the code here once this |
| // is modeled accurately in VPlan. |
| return Type::getVoidTy(Ctx); |
| default: |
| break; |
| } |
| // Type inference not implemented for opcode. |
| LLVM_DEBUG({ |
| dbgs() << "LV: Found unhandled opcode for: "; |
| R->getVPSingleValue()->dump(); |
| }); |
| llvm_unreachable("Unhandled opcode"); |
| } |
| |
| Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { |
| if (Type *CachedTy = CachedTypes.lookup(V)) |
| return CachedTy; |
| |
| if (V->isLiveIn()) { |
| if (auto *IRValue = V->getLiveInIRValue()) |
| return IRValue->getType(); |
| // All VPValues without any underlying IR value (like the vector trip count |
| // or the backedge-taken count) have the same type as the canonical IV. |
| return CanonicalIVTy; |
| } |
| |
| Type *ResultTy = |
| TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe()) |
| .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe, |
| VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe, |
| VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>( |
| [this](const auto *R) { |
| // Handle header phi recipes, except VPWidenIntOrFpInduction |
| // which needs special handling due it being possibly truncated. |
| // TODO: consider inferring/caching type of siblings, e.g., |
| // backedge value, here and in cases below. |
| return inferScalarType(R->getStartValue()); |
| }) |
| .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>( |
| [](const auto *R) { return R->getScalarType(); }) |
| .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe, |
| VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe, |
| VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe, |
| VPPartialReductionRecipe>([this](const VPRecipeBase *R) { |
| return inferScalarType(R->getOperand(0)); |
| }) |
| // VPInstructionWithType must be handled before VPInstruction. |
| .Case<VPInstructionWithType, VPWidenIntrinsicRecipe, |
| VPWidenCastRecipe>( |
| [](const auto *R) { return R->getResultType(); }) |
| .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe, |
| VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>( |
| [this](const auto *R) { return inferScalarTypeForRecipe(R); }) |
| .Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) { |
| // TODO: Use info from interleave group. |
| return V->getUnderlyingValue()->getType(); |
| }) |
| .Case<VPExtendedReductionRecipe, VPMulAccumulateReductionRecipe>( |
| [](const auto *R) { return R->getResultType(); }) |
| .Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) { |
| return R->getSCEV()->getType(); |
| }) |
| .Case<VPReductionRecipe>([this](const auto *R) { |
| return inferScalarType(R->getChainOp()); |
| }); |
| |
| assert(ResultTy && "could not infer type for the given VPValue"); |
| CachedTypes[V] = ResultTy; |
| return ResultTy; |
| } |
| |
| void llvm::collectEphemeralRecipesForVPlan( |
| VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) { |
| // First, collect seed recipes which are operands of assumes. |
| SmallVector<VPRecipeBase *> Worklist; |
| for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( |
| vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) { |
| for (VPRecipeBase &R : *VPBB) { |
| auto *RepR = dyn_cast<VPReplicateRecipe>(&R); |
| if (!RepR || !match(RepR->getUnderlyingInstr(), |
| PatternMatch::m_Intrinsic<Intrinsic::assume>())) |
| continue; |
| Worklist.push_back(RepR); |
| EphRecipes.insert(RepR); |
| } |
| } |
| |
| // Process operands of candidates in worklist and add them to the set of |
| // ephemeral recipes, if they don't have side-effects and are only used by |
| // other ephemeral recipes. |
| while (!Worklist.empty()) { |
| VPRecipeBase *Cur = Worklist.pop_back_val(); |
| for (VPValue *Op : Cur->operands()) { |
| auto *OpR = Op->getDefiningRecipe(); |
| if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(OpR)) |
| continue; |
| if (any_of(Op->users(), [EphRecipes](VPUser *U) { |
| auto *UR = dyn_cast<VPRecipeBase>(U); |
| return !UR || !EphRecipes.contains(UR); |
| })) |
| continue; |
| EphRecipes.insert(OpR); |
| Worklist.push_back(OpR); |
| } |
| } |
| } |
| |
| template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>( |
| DominatorTreeBase<VPBlockBase, false> &DT); |
| |
| bool VPDominatorTree::properlyDominates(const VPRecipeBase *A, |
| const VPRecipeBase *B) { |
| if (A == B) |
| return false; |
| |
| auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) { |
| for (auto &R : *A->getParent()) { |
| if (&R == A) |
| return true; |
| if (&R == B) |
| return false; |
| } |
| llvm_unreachable("recipe not found"); |
| }; |
| const VPBlockBase *ParentA = A->getParent(); |
| const VPBlockBase *ParentB = B->getParent(); |
| if (ParentA == ParentB) |
| return LocalComesBefore(A, B); |
| |
| #ifndef NDEBUG |
| auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { |
| auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); |
| if (Region && Region->isReplicator()) { |
| assert(Region->getNumSuccessors() == 1 && |
| Region->getNumPredecessors() == 1 && "Expected SESE region!"); |
| assert(R->getParent()->size() == 1 && |
| "A recipe in an original replicator region must be the only " |
| "recipe in its block"); |
| return Region; |
| } |
| return nullptr; |
| }; |
| assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) && |
| "No replicate regions expected at this point"); |
| assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) && |
| "No replicate regions expected at this point"); |
| #endif |
| return Base::properlyDominates(ParentA, ParentB); |
| } |
| |
| /// Get the VF scaling factor applied to the recipe's output, if the recipe has |
| /// one. |
| static unsigned getVFScaleFactor(VPRecipeBase *R) { |
| if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R)) |
| return RR->getVFScaleFactor(); |
| if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R)) |
| return RR->getVFScaleFactor(); |
| assert( |
| (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != |
| VPInstruction::ReductionStartVector) && |
| "getting scaling factor of reduction-start-vector not implemented yet"); |
| return 1; |
| } |
| |
| bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI) const { |
| return any_of(MaxLocalUsers, [&TTI](auto &LU) { |
| return LU.second > TTI.getNumberOfRegisters(LU.first); |
| }); |
| } |
| |
| SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan( |
| VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI, |
| const SmallPtrSetImpl<const Value *> &ValuesToIgnore) { |
| // Each 'key' in the map opens a new interval. The values |
| // of the map are the index of the 'last seen' usage of the |
| // recipe that is the key. |
| using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>; |
| |
| // Maps indices to recipes. |
| SmallVector<VPRecipeBase *, 64> Idx2Recipe; |
| // Marks the end of each interval. |
| IntervalMap EndPoint; |
| // Saves the list of recipe indices that are used in the loop. |
| SmallPtrSet<VPRecipeBase *, 8> Ends; |
| // Saves the list of values that are used in the loop but are defined outside |
| // the loop (not including non-recipe values such as arguments and |
| // constants). |
| SmallSetVector<VPValue *, 8> LoopInvariants; |
| LoopInvariants.insert(&Plan.getVectorTripCount()); |
| |
| // We scan the loop in a topological order in order and assign a number to |
| // each recipe. We use RPO to ensure that defs are met before their users. We |
| // assume that each recipe that has in-loop users starts an interval. We |
| // record every time that an in-loop value is used, so we have a list of the |
| // first and last occurrences of each recipe. |
| ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( |
| Plan.getVectorLoopRegion()); |
| for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { |
| if (!VPBB->getParent()) |
| break; |
| for (VPRecipeBase &R : *VPBB) { |
| Idx2Recipe.push_back(&R); |
| |
| // Save the end location of each USE. |
| for (VPValue *U : R.operands()) { |
| auto *DefR = U->getDefiningRecipe(); |
| |
| // Ignore non-recipe values such as arguments, constants, etc. |
| // FIXME: Might need some motivation why these values are ignored. If |
| // for example an argument is used inside the loop it will increase the |
| // register pressure (so shouldn't we add it to LoopInvariants). |
| if (!DefR && (!U->getLiveInIRValue() || |
| !isa<Instruction>(U->getLiveInIRValue()))) |
| continue; |
| |
| // If this recipe is outside the loop then record it and continue. |
| if (!DefR) { |
| LoopInvariants.insert(U); |
| continue; |
| } |
| |
| // Overwrite previous end points. |
| EndPoint[DefR] = Idx2Recipe.size(); |
| Ends.insert(DefR); |
| } |
| } |
| if (VPBB == Plan.getVectorLoopRegion()->getExiting()) { |
| // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the |
| // exiting block, where their increment will get materialized eventually. |
| for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { |
| if (isa<VPWidenIntOrFpInductionRecipe>(&R)) { |
| EndPoint[&R] = Idx2Recipe.size(); |
| Ends.insert(&R); |
| } |
| } |
| } |
| } |
| |
| // Saves the list of intervals that end with the index in 'key'. |
| using RecipeList = SmallVector<VPRecipeBase *, 2>; |
| SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds; |
| |
| // Next, we transpose the EndPoints into a multi map that holds the list of |
| // intervals that *end* at a specific location. |
| for (auto &Interval : EndPoint) |
| TransposeEnds[Interval.second].push_back(Interval.first); |
| |
| SmallPtrSet<VPRecipeBase *, 8> OpenIntervals; |
| SmallVector<VPRegisterUsage, 8> RUs(VFs.size()); |
| SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size()); |
| |
| LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); |
| |
| VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); |
| |
| const auto &TTICapture = TTI; |
| auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned { |
| if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty) || |
| (VF.isScalable() && |
| !TTICapture.isElementTypeLegalForScalableVector(Ty))) |
| return 0; |
| return TTICapture.getRegUsageForType(VectorType::get(Ty, VF)); |
| }; |
| |
| // We scan the instructions linearly and record each time that a new interval |
| // starts, by placing it in a set. If we find this value in TransposEnds then |
| // we remove it from the set. The max register usage is the maximum register |
| // usage of the recipes of the set. |
| for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) { |
| VPRecipeBase *R = Idx2Recipe[Idx]; |
| |
| // Remove all of the recipes that end at this location. |
| RecipeList &List = TransposeEnds[Idx]; |
| for (VPRecipeBase *ToRemove : List) |
| OpenIntervals.erase(ToRemove); |
| |
| // Ignore recipes that are never used within the loop and do not have side |
| // effects. |
| if (!Ends.count(R) && !R->mayHaveSideEffects()) |
| continue; |
| |
| // Skip recipes for ignored values. |
| // TODO: Should mark recipes for ephemeral values that cannot be removed |
| // explictly in VPlan. |
| if (isa<VPSingleDefRecipe>(R) && |
| ValuesToIgnore.contains( |
| cast<VPSingleDefRecipe>(R)->getUnderlyingValue())) |
| continue; |
| |
| // For each VF find the maximum usage of registers. |
| for (unsigned J = 0, E = VFs.size(); J < E; ++J) { |
| // Count the number of registers used, per register class, given all open |
| // intervals. |
| // Note that elements in this SmallMapVector will be default constructed |
| // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if |
| // there is no previous entry for ClassID. |
| SmallMapVector<unsigned, unsigned, 4> RegUsage; |
| |
| for (auto *R : OpenIntervals) { |
| // Skip recipes that weren't present in the original loop. |
| // TODO: Remove after removing the legacy |
| // LoopVectorizationCostModel::calculateRegisterUsage |
| if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe, |
| VPBranchOnMaskRecipe>(R)) |
| continue; |
| |
| if (VFs[J].isScalar() || |
| isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe, |
| VPScalarIVStepsRecipe>(R) || |
| (isa<VPInstruction>(R) && |
| all_of(cast<VPSingleDefRecipe>(R)->users(), |
| [&](VPUser *U) { |
| return cast<VPRecipeBase>(U)->usesScalars( |
| R->getVPSingleValue()); |
| })) || |
| (isa<VPReductionPHIRecipe>(R) && |
| (cast<VPReductionPHIRecipe>(R))->isInLoop())) { |
| unsigned ClassID = TTI.getRegisterClassForType( |
| false, TypeInfo.inferScalarType(R->getVPSingleValue())); |
| // FIXME: The target might use more than one register for the type |
| // even in the scalar case. |
| RegUsage[ClassID] += 1; |
| } else { |
| // The output from scaled phis and scaled reductions actually has |
| // fewer lanes than the VF. |
| unsigned ScaleFactor = getVFScaleFactor(R); |
| ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor); |
| LLVM_DEBUG(if (VF != VFs[J]) { |
| dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF |
| << " for " << *R << "\n"; |
| }); |
| |
| for (VPValue *DefV : R->definedValues()) { |
| Type *ScalarTy = TypeInfo.inferScalarType(DefV); |
| unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy); |
| RegUsage[ClassID] += GetRegUsage(ScalarTy, VF); |
| } |
| } |
| } |
| |
| for (const auto &Pair : RegUsage) { |
| auto &Entry = MaxUsages[J][Pair.first]; |
| Entry = std::max(Entry, Pair.second); |
| } |
| } |
| |
| LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # " |
| << OpenIntervals.size() << '\n'); |
| |
| // Add the current recipe to the list of open intervals. |
| OpenIntervals.insert(R); |
| } |
| |
| // We also search for instructions that are defined outside the loop, but are |
| // used inside the loop. We need this number separately from the max-interval |
| // usage number because when we unroll, loop-invariant values do not take |
| // more register. |
| VPRegisterUsage RU; |
| for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) { |
| // Note that elements in this SmallMapVector will be default constructed |
| // as 0. So we can use "Invariant[ClassID] += n" in the code below even if |
| // there is no previous entry for ClassID. |
| SmallMapVector<unsigned, unsigned, 4> Invariant; |
| |
| for (auto *In : LoopInvariants) { |
| // FIXME: The target might use more than one register for the type |
| // even in the scalar case. |
| bool IsScalar = all_of(In->users(), [&](VPUser *U) { |
| return cast<VPRecipeBase>(U)->usesScalars(In); |
| }); |
| |
| ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[Idx]; |
| unsigned ClassID = TTI.getRegisterClassForType( |
| VF.isVector(), TypeInfo.inferScalarType(In)); |
| Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(In), VF); |
| } |
| |
| LLVM_DEBUG({ |
| dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n'; |
| dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size() |
| << " item\n"; |
| for (const auto &pair : MaxUsages[Idx]) { |
| dbgs() << "LV(REG): RegisterClass: " |
| << TTI.getRegisterClassName(pair.first) << ", " << pair.second |
| << " registers\n"; |
| } |
| dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() |
| << " item\n"; |
| for (const auto &pair : Invariant) { |
| dbgs() << "LV(REG): RegisterClass: " |
| << TTI.getRegisterClassName(pair.first) << ", " << pair.second |
| << " registers\n"; |
| } |
| }); |
| |
| RU.LoopInvariantRegs = Invariant; |
| RU.MaxLocalUsers = MaxUsages[Idx]; |
| RUs[Idx] = RU; |
| } |
| |
| return RUs; |
| } |