| //===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "VPlanUtils.h" |
| #include "VPlanAnalysis.h" |
| #include "VPlanCFG.h" |
| #include "VPlanDominatorTree.h" |
| #include "VPlanPatternMatch.h" |
| #include "llvm/ADT/TypeSwitch.h" |
| #include "llvm/Analysis/MemoryLocation.h" |
| #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" |
| |
| using namespace llvm; |
| using namespace llvm::VPlanPatternMatch; |
| using namespace llvm::SCEVPatternMatch; |
| |
| bool vputils::onlyFirstLaneUsed(const VPValue *Def) { |
| return all_of(Def->users(), |
| [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); }); |
| } |
| |
| bool vputils::onlyFirstPartUsed(const VPValue *Def) { |
| return all_of(Def->users(), |
| [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); }); |
| } |
| |
| bool vputils::onlyScalarValuesUsed(const VPValue *Def) { |
| return all_of(Def->users(), |
| [Def](const VPUser *U) { return U->usesScalars(Def); }); |
| } |
| |
| VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { |
| if (auto *E = dyn_cast<SCEVConstant>(Expr)) |
| return Plan.getOrAddLiveIn(E->getValue()); |
| // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction |
| // value. Otherwise the value may be defined in a loop and using it directly |
| // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA |
| // form. |
| auto *U = dyn_cast<SCEVUnknown>(Expr); |
| if (U && !isa<Instruction>(U->getValue())) |
| return Plan.getOrAddLiveIn(U->getValue()); |
| auto *Expanded = new VPExpandSCEVRecipe(Expr); |
| Plan.getEntry()->appendRecipe(Expanded); |
| return Expanded; |
| } |
| |
| bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) { |
| if (isa<VPActiveLaneMaskPHIRecipe>(V)) |
| return true; |
| |
| auto IsWideCanonicalIV = [](VPValue *A) { |
| return isa<VPWidenCanonicalIVRecipe>(A) || |
| (isa<VPWidenIntOrFpInductionRecipe>(A) && |
| cast<VPWidenIntOrFpInductionRecipe>(A)->isCanonical()); |
| }; |
| |
| VPValue *A, *B; |
| |
| auto m_CanonicalScalarIVSteps = |
| m_ScalarIVSteps(m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()), |
| m_One(), m_Specific(&Plan.getVF())); |
| |
| if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One()))) |
| return B == Plan.getTripCount() && |
| (match(A, m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A)); |
| |
| // For scalar plans, the header mask uses the scalar steps. |
| if (match(V, m_ICmp(m_CanonicalScalarIVSteps, |
| m_Specific(Plan.getBackedgeTakenCount())))) { |
| assert(Plan.hasScalarVFOnly() && |
| "Non-scalar VF using scalar IV steps for header mask?"); |
| return true; |
| } |
| |
| return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) && |
| B == Plan.getBackedgeTakenCount(); |
| } |
| |
| /// Returns true if \p R propagates poison from any operand to its result. |
| static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) { |
| return TypeSwitch<const VPRecipeBase *, bool>(R) |
| .Case<VPWidenGEPRecipe, VPWidenCastRecipe>( |
| [](const VPRecipeBase *) { return true; }) |
| .Case([](const VPReplicateRecipe *Rep) { |
| // GEP and casts propagate poison from all operands. |
| unsigned Opcode = Rep->getOpcode(); |
| return Opcode == Instruction::GetElementPtr || |
| Instruction::isCast(Opcode); |
| }) |
| .Default([](const VPRecipeBase *) { return false; }); |
| } |
| |
| /// Returns true if \p V being poison is guaranteed to trigger UB because it |
| /// propagates to the address of a memory recipe. |
| static bool poisonGuaranteesUB(const VPValue *V) { |
| SmallPtrSet<const VPValue *, 8> Visited; |
| SmallVector<const VPValue *, 16> Worklist; |
| |
| Worklist.push_back(V); |
| |
| while (!Worklist.empty()) { |
| const VPValue *Current = Worklist.pop_back_val(); |
| if (!Visited.insert(Current).second) |
| continue; |
| |
| for (VPUser *U : Current->users()) { |
| // Check if Current is used as an address operand for load/store. |
| if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) { |
| if (MemR->getAddr() == Current) |
| return true; |
| continue; |
| } |
| if (auto *Rep = dyn_cast<VPReplicateRecipe>(U)) { |
| unsigned Opcode = Rep->getOpcode(); |
| if ((Opcode == Instruction::Load && Rep->getOperand(0) == Current) || |
| (Opcode == Instruction::Store && Rep->getOperand(1) == Current)) |
| return true; |
| } |
| |
| // Check if poison propagates through this recipe to any of its users. |
| auto *R = cast<VPRecipeBase>(U); |
| for (const VPValue *Op : R->operands()) { |
| if (Op == Current && propagatesPoisonFromRecipeOp(R)) { |
| Worklist.push_back(R->getVPSingleValue()); |
| break; |
| } |
| } |
| } |
| } |
| |
| return false; |
| } |
| |
| const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V, |
| PredicatedScalarEvolution &PSE, |
| const Loop *L) { |
| ScalarEvolution &SE = *PSE.getSE(); |
| if (isa<VPIRValue, VPSymbolicValue>(V)) { |
| Value *LiveIn = V->getUnderlyingValue(); |
| if (LiveIn && SE.isSCEVable(LiveIn->getType())) |
| return SE.getSCEV(LiveIn); |
| return SE.getCouldNotCompute(); |
| } |
| |
| // Helper to create SCEVs for binary and unary operations. |
| auto CreateSCEV = |
| [&](ArrayRef<VPValue *> Ops, |
| function_ref<const SCEV *(ArrayRef<const SCEV *>)> CreateFn) |
| -> const SCEV * { |
| SmallVector<const SCEV *, 2> SCEVOps; |
| for (VPValue *Op : Ops) { |
| const SCEV *S = getSCEVExprForVPValue(Op, PSE, L); |
| if (isa<SCEVCouldNotCompute>(S)) |
| return SE.getCouldNotCompute(); |
| SCEVOps.push_back(S); |
| } |
| return CreateFn(SCEVOps); |
| }; |
| |
| VPValue *LHSVal, *RHSVal; |
| if (match(V, m_Add(m_VPValue(LHSVal), m_VPValue(RHSVal)))) |
| return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getAddExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0); |
| }); |
| if (match(V, m_Sub(m_VPValue(LHSVal), m_VPValue(RHSVal)))) |
| return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getMinusSCEV(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0); |
| }); |
| if (match(V, m_Not(m_VPValue(LHSVal)))) { |
| // not X = xor X, -1 = -1 - X |
| return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getMinusSCEV(SE.getMinusOne(Ops[0]->getType()), Ops[0]); |
| }); |
| } |
| if (match(V, m_Mul(m_VPValue(LHSVal), m_VPValue(RHSVal)))) |
| return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getMulExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0); |
| }); |
| if (match(V, |
| m_Binary<Instruction::UDiv>(m_VPValue(LHSVal), m_VPValue(RHSVal)))) |
| return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getUDivExpr(Ops[0], Ops[1]); |
| }); |
| // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n. |
| const APInt *Mask; |
| if (match(V, m_c_BinaryAnd(m_VPValue(LHSVal), m_APInt(Mask))) && |
| (*Mask + 1).isPowerOf2()) |
| return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getURemExpr(Ops[0], SE.getConstant(*Mask + 1)); |
| }); |
| if (match(V, m_Trunc(m_VPValue(LHSVal)))) { |
| const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); |
| Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); |
| return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getTruncateExpr(Ops[0], DestTy); |
| }); |
| } |
| if (match(V, m_ZExt(m_VPValue(LHSVal)))) { |
| const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); |
| Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); |
| return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getZeroExtendExpr(Ops[0], DestTy); |
| }); |
| } |
| if (match(V, m_SExt(m_VPValue(LHSVal)))) { |
| const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); |
| Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); |
| |
| // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension |
| // onto the operands before computing the subtraction. |
| VPValue *SubLHS, *SubRHS; |
| auto *SubR = dyn_cast<VPRecipeWithIRFlags>(LHSVal); |
| if (match(LHSVal, m_Sub(m_VPValue(SubLHS), m_VPValue(SubRHS))) && SubR && |
| SubR->hasNoSignedWrap() && poisonGuaranteesUB(LHSVal)) { |
| const SCEV *V1 = getSCEVExprForVPValue(SubLHS, PSE, L); |
| const SCEV *V2 = getSCEVExprForVPValue(SubRHS, PSE, L); |
| if (!isa<SCEVCouldNotCompute>(V1) && !isa<SCEVCouldNotCompute>(V2)) |
| return SE.getMinusSCEV(SE.getSignExtendExpr(V1, DestTy), |
| SE.getSignExtendExpr(V2, DestTy), SCEV::FlagNSW); |
| } |
| |
| return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getSignExtendExpr(Ops[0], DestTy); |
| }); |
| } |
| if (match(V, |
| m_Intrinsic<Intrinsic::umax>(m_VPValue(LHSVal), m_VPValue(RHSVal)))) |
| return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getUMaxExpr(Ops[0], Ops[1]); |
| }); |
| if (match(V, |
| m_Intrinsic<Intrinsic::smax>(m_VPValue(LHSVal), m_VPValue(RHSVal)))) |
| return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getSMaxExpr(Ops[0], Ops[1]); |
| }); |
| if (match(V, |
| m_Intrinsic<Intrinsic::umin>(m_VPValue(LHSVal), m_VPValue(RHSVal)))) |
| return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getUMinExpr(Ops[0], Ops[1]); |
| }); |
| if (match(V, |
| m_Intrinsic<Intrinsic::smin>(m_VPValue(LHSVal), m_VPValue(RHSVal)))) |
| return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getSMinExpr(Ops[0], Ops[1]); |
| }); |
| |
| ArrayRef<VPValue *> Ops; |
| Type *SourceElementType; |
| if (match(V, m_GetElementPtr(SourceElementType, Ops))) { |
| const SCEV *GEPExpr = CreateSCEV(Ops, [&](ArrayRef<const SCEV *> Ops) { |
| return SE.getGEPExpr(Ops.front(), Ops.drop_front(), SourceElementType); |
| }); |
| return PSE.getPredicatedSCEV(GEPExpr); |
| } |
| |
| // TODO: Support constructing SCEVs for more recipes as needed. |
| const VPRecipeBase *DefR = V->getDefiningRecipe(); |
| const SCEV *Expr = |
| TypeSwitch<const VPRecipeBase *, const SCEV *>(DefR) |
| .Case([](const VPExpandSCEVRecipe *R) { return R->getSCEV(); }) |
| .Case([&SE, &PSE, L](const VPCanonicalIVPHIRecipe *R) { |
| if (!L) |
| return SE.getCouldNotCompute(); |
| const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L); |
| return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L, |
| SCEV::FlagAnyWrap); |
| }) |
| .Case([&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) { |
| const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L); |
| if (!L || isa<SCEVCouldNotCompute>(Step)) |
| return SE.getCouldNotCompute(); |
| const SCEV *Start = |
| getSCEVExprForVPValue(R->getStartValue(), PSE, L); |
| const SCEV *AddRec = |
| SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap); |
| if (R->getTruncInst()) |
| return SE.getTruncateExpr(AddRec, R->getScalarType()); |
| return AddRec; |
| }) |
| .Case([&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) { |
| const SCEV *Start = |
| getSCEVExprForVPValue(R->getStartValue(), PSE, L); |
| if (!L || isa<SCEVCouldNotCompute>(Start)) |
| return SE.getCouldNotCompute(); |
| const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L); |
| if (isa<SCEVCouldNotCompute>(Step)) |
| return SE.getCouldNotCompute(); |
| return SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap); |
| }) |
| .Case([&SE, &PSE, L](const VPDerivedIVRecipe *R) { |
| const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L); |
| const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), PSE, L); |
| const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), PSE, L); |
| if (any_of(ArrayRef({Start, IV, Scale}), |
| IsaPred<SCEVCouldNotCompute>)) |
| return SE.getCouldNotCompute(); |
| |
| return SE.getAddExpr( |
| SE.getTruncateOrSignExtend(Start, IV->getType()), |
| SE.getMulExpr( |
| IV, SE.getTruncateOrSignExtend(Scale, IV->getType()))); |
| }) |
| .Case([&SE, &PSE, L](const VPScalarIVStepsRecipe *R) { |
| const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), PSE, L); |
| const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), PSE, L); |
| if (isa<SCEVCouldNotCompute>(IV) || !isa<SCEVConstant>(Step)) |
| return SE.getCouldNotCompute(); |
| return SE.getTruncateOrSignExtend(IV, Step->getType()); |
| }) |
| .Default( |
| [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); }); |
| |
| return PSE.getPredicatedSCEV(Expr); |
| } |
| |
| bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, |
| const Loop *L) { |
| // If address is an SCEVAddExpr, we require that all operands must be either |
| // be invariant or a (possibly sign-extend) affine AddRec. |
| if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Addr)) { |
| return all_of(PtrAdd->operands(), [&SE, L](const SCEV *Op) { |
| return SE.isLoopInvariant(Op, L) || |
| match(Op, m_scev_SExt(m_scev_AffineAddRec(m_SCEV(), m_SCEV()))) || |
| match(Op, m_scev_AffineAddRec(m_SCEV(), m_SCEV())); |
| }); |
| } |
| |
| // Otherwise, check if address is loop invariant or an affine add recurrence. |
| return SE.isLoopInvariant(Addr, L) || |
| match(Addr, m_scev_AffineAddRec(m_SCEV(), m_SCEV())); |
| } |
| |
| /// Returns true if \p Opcode preserves uniformity, i.e., if all operands are |
| /// uniform, the result will also be uniform. |
| static bool preservesUniformity(unsigned Opcode) { |
| if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) |
| return true; |
| switch (Opcode) { |
| case Instruction::Freeze: |
| case Instruction::GetElementPtr: |
| case Instruction::ICmp: |
| case Instruction::FCmp: |
| case Instruction::Select: |
| case VPInstruction::Not: |
| case VPInstruction::Broadcast: |
| case VPInstruction::PtrAdd: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool vputils::isSingleScalar(const VPValue *VPV) { |
| // A live-in must be uniform across the scope of VPlan. |
| if (isa<VPIRValue, VPSymbolicValue>(VPV)) |
| return true; |
| |
| if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) { |
| const VPRegionBlock *RegionOfR = Rep->getRegion(); |
| // Don't consider recipes in replicate regions as uniform yet; their first |
| // lane cannot be accessed when executing the replicate region for other |
| // lanes. |
| if (RegionOfR && RegionOfR->isReplicator()) |
| return false; |
| return Rep->isSingleScalar() || (preservesUniformity(Rep->getOpcode()) && |
| all_of(Rep->operands(), isSingleScalar)); |
| } |
| if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(VPV)) |
| return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar); |
| if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) { |
| return preservesUniformity(WidenR->getOpcode()) && |
| all_of(WidenR->operands(), isSingleScalar); |
| } |
| if (auto *VPI = dyn_cast<VPInstruction>(VPV)) |
| return VPI->isSingleScalar() || VPI->isVectorToScalar() || |
| (preservesUniformity(VPI->getOpcode()) && |
| all_of(VPI->operands(), isSingleScalar)); |
| if (auto *RR = dyn_cast<VPReductionRecipe>(VPV)) |
| return !RR->isPartialReduction(); |
| if (isa<VPCanonicalIVPHIRecipe, VPVectorPointerRecipe, |
| VPVectorEndPointerRecipe>(VPV)) |
| return true; |
| if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV)) |
| return Expr->isSingleScalar(); |
| |
| // VPExpandSCEVRecipes must be placed in the entry and are always uniform. |
| return isa<VPExpandSCEVRecipe>(VPV); |
| } |
| |
| bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { |
| // Live-ins are uniform. |
| if (isa<VPIRValue, VPSymbolicValue>(V)) |
| return true; |
| |
| VPRecipeBase *R = V->getDefiningRecipe(); |
| if (R && V->isDefinedOutsideLoopRegions()) { |
| if (match(V->getDefiningRecipe(), |
| m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>())) |
| return false; |
| return all_of(R->operands(), isUniformAcrossVFsAndUFs); |
| } |
| |
| auto *CanonicalIV = |
| R->getParent()->getEnclosingLoopRegion()->getCanonicalIV(); |
| // Canonical IV chain is uniform. |
| if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue()) |
| return true; |
| |
| return TypeSwitch<const VPRecipeBase *, bool>(R) |
| .Case([](const VPDerivedIVRecipe *R) { return true; }) |
| .Case([](const VPReplicateRecipe *R) { |
| // Be conservative about side-effects, except for the |
| // known-side-effecting assumes and stores, which we know will be |
| // uniform. |
| return R->isSingleScalar() && |
| (!R->mayHaveSideEffects() || |
| isa<AssumeInst, StoreInst>(R->getUnderlyingInstr())) && |
| all_of(R->operands(), isUniformAcrossVFsAndUFs); |
| }) |
| .Case([](const VPWidenRecipe *R) { |
| return preservesUniformity(R->getOpcode()) && |
| all_of(R->operands(), isUniformAcrossVFsAndUFs); |
| }) |
| .Case([](const VPInstruction *VPI) { |
| return (VPI->isScalarCast() && |
| isUniformAcrossVFsAndUFs(VPI->getOperand(0))) || |
| (preservesUniformity(VPI->getOpcode()) && |
| all_of(VPI->operands(), isUniformAcrossVFsAndUFs)); |
| }) |
| .Case([](const VPWidenCastRecipe *R) { |
| // A cast is uniform according to its operand. |
| return isUniformAcrossVFsAndUFs(R->getOperand(0)); |
| }) |
| .Default([](const VPRecipeBase *) { // A value is considered non-uniform |
| // unless proven otherwise. |
| return false; |
| }); |
| } |
| |
| VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) { |
| auto DepthFirst = vp_depth_first_shallow(Plan.getEntry()); |
| auto I = find_if(DepthFirst, [&VPDT](VPBlockBase *VPB) { |
| return VPBlockUtils::isHeader(VPB, VPDT); |
| }); |
| return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I); |
| } |
| |
| unsigned vputils::getVFScaleFactor(VPRecipeBase *R) { |
| if (!R) |
| return 1; |
| if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R)) |
| return RR->getVFScaleFactor(); |
| if (auto *RR = dyn_cast<VPReductionRecipe>(R)) |
| return RR->getVFScaleFactor(); |
| if (auto *ER = dyn_cast<VPExpressionRecipe>(R)) |
| return ER->getVFScaleFactor(); |
| assert( |
| (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != |
| VPInstruction::ReductionStartVector) && |
| "getting scaling factor of reduction-start-vector not implemented yet"); |
| return 1; |
| } |
| |
| std::optional<VPValue *> |
| vputils::getRecipesForUncountableExit(VPlan &Plan, |
| SmallVectorImpl<VPRecipeBase *> &Recipes, |
| SmallVectorImpl<VPRecipeBase *> &GEPs) { |
| // Given a VPlan like the following (just including the recipes contributing |
| // to loop control exiting here, not the actual work), we're looking to match |
| // the recipes contributing to the uncountable exit condition comparison |
| // (here, vp<%4>) back to either live-ins or the address nodes for the load |
| // used as part of the uncountable exit comparison so that we can copy them |
| // to a preheader and rotate the address in the loop to the next vector |
| // iteration. |
| // |
| // Currently, the address of the load is restricted to a GEP with 2 operands |
| // and a live-in base address. This constraint may be relaxed later. |
| // |
| // VPlan ' for UF>=1' { |
| // Live-in vp<%0> = VF |
| // Live-in ir<64> = original trip-count |
| // |
| // entry: |
| // Successor(s): preheader, vector.ph |
| // |
| // vector.ph: |
| // Successor(s): vector loop |
| // |
| // <x1> vector loop: { |
| // vector.body: |
| // EMIT vp<%2> = CANONICAL-INDUCTION ir<0> |
| // vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0> |
| // CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3> |
| // WIDEN ir<%ee.load> = load ir<%ee.addr> |
| // WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0> |
| // EMIT vp<%5> = any-of vp<%4> |
| // EMIT vp<%6> = add vp<%2>, vp<%0> |
| // EMIT vp<%7> = icmp eq vp<%6>, ir<64> |
| // EMIT branch-on-two-conds vp<%5>, vp<%7> |
| // No successors |
| // } |
| // Successor(s): early.exit, middle.block |
| // |
| // middle.block: |
| // Successor(s): preheader |
| // |
| // preheader: |
| // No successors |
| // } |
| |
| // Find the uncountable loop exit condition. |
| auto *Region = Plan.getVectorLoopRegion(); |
| VPValue *UncountableCondition = nullptr; |
| if (!match(Region->getExitingBasicBlock()->getTerminator(), |
| m_BranchOnTwoConds(m_AnyOf(m_VPValue(UncountableCondition)), |
| m_VPValue()))) |
| return std::nullopt; |
| |
| SmallVector<VPValue *, 4> Worklist; |
| Worklist.push_back(UncountableCondition); |
| while (!Worklist.empty()) { |
| VPValue *V = Worklist.pop_back_val(); |
| |
| // Any value defined outside the loop does not need to be copied. |
| if (V->isDefinedOutsideLoopRegions()) |
| continue; |
| |
| // FIXME: Remove the single user restriction; it's here because we're |
| // starting with the simplest set of loops we can, and multiple |
| // users means needing to add PHI nodes in the transform. |
| if (V->getNumUsers() > 1) |
| return std::nullopt; |
| |
| VPValue *Op1, *Op2; |
| // Walk back through recipes until we find at least one load from memory. |
| if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) { |
| Worklist.push_back(Op1); |
| Worklist.push_back(Op2); |
| Recipes.push_back(V->getDefiningRecipe()); |
| } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) { |
| // Reject masked loads for the time being; they make the exit condition |
| // more complex. |
| if (Load->isMasked()) |
| return std::nullopt; |
| |
| VPValue *GEP = Load->getAddr(); |
| if (!match(GEP, m_GetElementPtr(m_LiveIn(), m_VPValue()))) |
| return std::nullopt; |
| |
| Recipes.push_back(Load); |
| Recipes.push_back(GEP->getDefiningRecipe()); |
| GEPs.push_back(GEP->getDefiningRecipe()); |
| } else |
| return std::nullopt; |
| } |
| |
| return UncountableCondition; |
| } |
| |
| bool VPBlockUtils::isHeader(const VPBlockBase *VPB, |
| const VPDominatorTree &VPDT) { |
| auto *VPBB = dyn_cast<VPBasicBlock>(VPB); |
| if (!VPBB) |
| return false; |
| |
| // If VPBB is in a region R, VPBB is a loop header if R is a loop region with |
| // VPBB as its entry, i.e., free of predecessors. |
| if (auto *R = VPBB->getParent()) |
| return !R->isReplicator() && !VPBB->hasPredecessors(); |
| |
| // A header dominates its second predecessor (the latch), with the other |
| // predecessor being the preheader |
| return VPB->getPredecessors().size() == 2 && |
| VPDT.dominates(VPB, VPB->getPredecessors()[1]); |
| } |
| |
| bool VPBlockUtils::isLatch(const VPBlockBase *VPB, |
| const VPDominatorTree &VPDT) { |
| // A latch has a header as its second successor, with its other successor |
| // leaving the loop. A preheader OTOH has a header as its first (and only) |
| // successor. |
| return VPB->getNumSuccessors() == 2 && |
| VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT); |
| } |
| |
| std::optional<MemoryLocation> |
| vputils::getMemoryLocation(const VPRecipeBase &R) { |
| auto *M = dyn_cast<VPIRMetadata>(&R); |
| if (!M) |
| return std::nullopt; |
| MemoryLocation Loc; |
| // Populate noalias metadata from VPIRMetadata. |
| if (MDNode *NoAliasMD = M->getMetadata(LLVMContext::MD_noalias)) |
| Loc.AATags.NoAlias = NoAliasMD; |
| if (MDNode *AliasScopeMD = M->getMetadata(LLVMContext::MD_alias_scope)) |
| Loc.AATags.Scope = AliasScopeMD; |
| return Loc; |
| } |
| |
| /// Find the ComputeReductionResult recipe for \p PhiR, looking through selects |
| /// inserted for predicated reductions or tail folding. |
| VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) { |
| VPValue *BackedgeVal = PhiR->getBackedgeValue(); |
| if (auto *Res = vputils::findUserOf<VPInstruction::ComputeReductionResult>( |
| BackedgeVal)) |
| return Res; |
| |
| // Look through selects inserted for tail folding or predicated reductions. |
| VPRecipeBase *SelR = vputils::findUserOf( |
| BackedgeVal, m_Select(m_VPValue(), m_VPValue(), m_VPValue())); |
| if (!SelR) |
| return nullptr; |
| return vputils::findUserOf<VPInstruction::ComputeReductionResult>( |
| cast<VPSingleDefRecipe>(SelR)); |
| } |