llvm/lib/Transforms/Vectorize/VPlanUtils.cpp - llvm-project - Git at Google

 //===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "VPlanUtils.h"
 #include "VPlanAnalysis.h"
 #include "VPlanCFG.h"
 #include "VPlanDominatorTree.h"
 #include "VPlanPatternMatch.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolutionPatternMatch.h"

 using namespace llvm;
 using namespace llvm::VPlanPatternMatch;
 using namespace llvm::SCEVPatternMatch;

 bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
   return all_of(Def->users(),
                 [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); });
 }

 bool vputils::onlyFirstPartUsed(const VPValue *Def) {
   return all_of(Def->users(),
                 [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); });
 }

 bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
   return all_of(Def->users(),
                 [Def](const VPUser *U) { return U->usesScalars(Def); });
 }

 VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
   if (auto *E = dyn_cast<SCEVConstant>(Expr))
     return Plan.getOrAddLiveIn(E->getValue());
   // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
   // value. Otherwise the value may be defined in a loop and using it directly
   // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
   // form.
   auto *U = dyn_cast<SCEVUnknown>(Expr);
   if (U && !isa<Instruction>(U->getValue()))
     return Plan.getOrAddLiveIn(U->getValue());
   auto *Expanded = new VPExpandSCEVRecipe(Expr);
   Plan.getEntry()->appendRecipe(Expanded);
   return Expanded;
 }

 bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
   if (isa<VPActiveLaneMaskPHIRecipe>(V))
     return true;

   auto IsWideCanonicalIV = [](VPValue *A) {
     return isa<VPWidenCanonicalIVRecipe>(A) ||
            (isa<VPWidenIntOrFpInductionRecipe>(A) &&
             cast<VPWidenIntOrFpInductionRecipe>(A)->isCanonical());
   };

   VPValue *A, *B;

   auto m_CanonicalScalarIVSteps =
       m_ScalarIVSteps(m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
                       m_One(), m_Specific(&Plan.getVF()));

   if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
     return B == Plan.getTripCount() &&
            (match(A, m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A));

   // For scalar plans, the header mask uses the scalar steps.
   if (match(V, m_ICmp(m_CanonicalScalarIVSteps,
                       m_Specific(Plan.getBackedgeTakenCount())))) {
     assert(Plan.hasScalarVFOnly() &&
            "Non-scalar VF using scalar IV steps for header mask?");
     return true;
   }

   return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
          B == Plan.getBackedgeTakenCount();
 }

 /// Returns true if \p R propagates poison from any operand to its result.
 static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) {
   return TypeSwitch<const VPRecipeBase *, bool>(R)
       .Case<VPWidenGEPRecipe, VPWidenCastRecipe>(
           [](const VPRecipeBase *) { return true; })
       .Case([](const VPReplicateRecipe *Rep) {
         // GEP and casts propagate poison from all operands.
         unsigned Opcode = Rep->getOpcode();
         return Opcode == Instruction::GetElementPtr ||
                Instruction::isCast(Opcode);
       })
       .Default([](const VPRecipeBase *) { return false; });
 }

 /// Returns true if \p V being poison is guaranteed to trigger UB because it
 /// propagates to the address of a memory recipe.
 static bool poisonGuaranteesUB(const VPValue *V) {
   SmallPtrSet<const VPValue *, 8> Visited;
   SmallVector<const VPValue *, 16> Worklist;

   Worklist.push_back(V);

   while (!Worklist.empty()) {
     const VPValue *Current = Worklist.pop_back_val();
     if (!Visited.insert(Current).second)
       continue;

     for (VPUser *U : Current->users()) {
       // Check if Current is used as an address operand for load/store.
       if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) {
         if (MemR->getAddr() == Current)
           return true;
         continue;
       }
       if (auto *Rep = dyn_cast<VPReplicateRecipe>(U)) {
         unsigned Opcode = Rep->getOpcode();
         if ((Opcode == Instruction::Load && Rep->getOperand(0) == Current) ||
             (Opcode == Instruction::Store && Rep->getOperand(1) == Current))
           return true;
       }

       // Check if poison propagates through this recipe to any of its users.
       auto *R = cast<VPRecipeBase>(U);
       for (const VPValue *Op : R->operands()) {
         if (Op == Current && propagatesPoisonFromRecipeOp(R)) {
           Worklist.push_back(R->getVPSingleValue());
           break;
         }
       }
     }
   }

   return false;
 }

 const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
                                            PredicatedScalarEvolution &PSE,
                                            const Loop *L) {
   ScalarEvolution &SE = *PSE.getSE();
   if (isa<VPIRValue, VPSymbolicValue>(V)) {
     Value *LiveIn = V->getUnderlyingValue();
     if (LiveIn && SE.isSCEVable(LiveIn->getType()))
       return SE.getSCEV(LiveIn);
     return SE.getCouldNotCompute();
   }

   // Helper to create SCEVs for binary and unary operations.
   auto CreateSCEV =
       [&](ArrayRef<VPValue *> Ops,
           function_ref<const SCEV *(ArrayRef<const SCEV *>)> CreateFn)
       -> const SCEV * {
     SmallVector<const SCEV *, 2> SCEVOps;
     for (VPValue *Op : Ops) {
       const SCEV *S = getSCEVExprForVPValue(Op, PSE, L);
       if (isa<SCEVCouldNotCompute>(S))
         return SE.getCouldNotCompute();
       SCEVOps.push_back(S);
     }
     return CreateFn(SCEVOps);
   };

   VPValue *LHSVal, *RHSVal;
   if (match(V, m_Add(m_VPValue(LHSVal), m_VPValue(RHSVal))))
     return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getAddExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
     });
   if (match(V, m_Sub(m_VPValue(LHSVal), m_VPValue(RHSVal))))
     return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getMinusSCEV(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
     });
   if (match(V, m_Not(m_VPValue(LHSVal)))) {
     // not X = xor X, -1 = -1 - X
     return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getMinusSCEV(SE.getMinusOne(Ops[0]->getType()), Ops[0]);
     });
   }
   if (match(V, m_Mul(m_VPValue(LHSVal), m_VPValue(RHSVal))))
     return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getMulExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
     });
   if (match(V,
             m_Binary<Instruction::UDiv>(m_VPValue(LHSVal), m_VPValue(RHSVal))))
     return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getUDivExpr(Ops[0], Ops[1]);
     });
   // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n.
   const APInt *Mask;
   if (match(V, m_c_BinaryAnd(m_VPValue(LHSVal), m_APInt(Mask))) &&
       (*Mask + 1).isPowerOf2())
     return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getURemExpr(Ops[0], SE.getConstant(*Mask + 1));
     });
   if (match(V, m_Trunc(m_VPValue(LHSVal)))) {
     const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
     Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
     return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getTruncateExpr(Ops[0], DestTy);
     });
   }
   if (match(V, m_ZExt(m_VPValue(LHSVal)))) {
     const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
     Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
     return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getZeroExtendExpr(Ops[0], DestTy);
     });
   }
   if (match(V, m_SExt(m_VPValue(LHSVal)))) {
     const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
     Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);

     // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension
     // onto the operands before computing the subtraction.
     VPValue *SubLHS, *SubRHS;
     auto *SubR = dyn_cast<VPRecipeWithIRFlags>(LHSVal);
     if (match(LHSVal, m_Sub(m_VPValue(SubLHS), m_VPValue(SubRHS))) && SubR &&
         SubR->hasNoSignedWrap() && poisonGuaranteesUB(LHSVal)) {
       const SCEV *V1 = getSCEVExprForVPValue(SubLHS, PSE, L);
       const SCEV *V2 = getSCEVExprForVPValue(SubRHS, PSE, L);
       if (!isa<SCEVCouldNotCompute>(V1) && !isa<SCEVCouldNotCompute>(V2))
         return SE.getMinusSCEV(SE.getSignExtendExpr(V1, DestTy),
                                SE.getSignExtendExpr(V2, DestTy), SCEV::FlagNSW);
     }

     return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getSignExtendExpr(Ops[0], DestTy);
     });
   }
   if (match(V,
             m_Intrinsic<Intrinsic::umax>(m_VPValue(LHSVal), m_VPValue(RHSVal))))
     return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getUMaxExpr(Ops[0], Ops[1]);
     });
   if (match(V,
             m_Intrinsic<Intrinsic::smax>(m_VPValue(LHSVal), m_VPValue(RHSVal))))
     return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getSMaxExpr(Ops[0], Ops[1]);
     });
   if (match(V,
             m_Intrinsic<Intrinsic::umin>(m_VPValue(LHSVal), m_VPValue(RHSVal))))
     return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getUMinExpr(Ops[0], Ops[1]);
     });
   if (match(V,
             m_Intrinsic<Intrinsic::smin>(m_VPValue(LHSVal), m_VPValue(RHSVal))))
     return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getSMinExpr(Ops[0], Ops[1]);
     });

   ArrayRef<VPValue *> Ops;
   Type *SourceElementType;
   if (match(V, m_GetElementPtr(SourceElementType, Ops))) {
     const SCEV *GEPExpr = CreateSCEV(Ops, [&](ArrayRef<const SCEV *> Ops) {
       return SE.getGEPExpr(Ops.front(), Ops.drop_front(), SourceElementType);
     });
     return PSE.getPredicatedSCEV(GEPExpr);
   }

   // TODO: Support constructing SCEVs for more recipes as needed.
   const VPRecipeBase *DefR = V->getDefiningRecipe();
   const SCEV *Expr =
       TypeSwitch<const VPRecipeBase *, const SCEV *>(DefR)
           .Case([](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
           .Case([&SE, &PSE, L](const VPCanonicalIVPHIRecipe *R) {
             if (!L)
               return SE.getCouldNotCompute();
             const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
             return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L,
                                     SCEV::FlagAnyWrap);
           })
           .Case([&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) {
             const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L);
             if (!L || isa<SCEVCouldNotCompute>(Step))
               return SE.getCouldNotCompute();
             const SCEV *Start =
                 getSCEVExprForVPValue(R->getStartValue(), PSE, L);
             const SCEV *AddRec =
                 SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
             if (R->getTruncInst())
               return SE.getTruncateExpr(AddRec, R->getScalarType());
             return AddRec;
           })
           .Case([&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) {
             const SCEV *Start =
                 getSCEVExprForVPValue(R->getStartValue(), PSE, L);
             if (!L || isa<SCEVCouldNotCompute>(Start))
               return SE.getCouldNotCompute();
             const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L);
             if (isa<SCEVCouldNotCompute>(Step))
               return SE.getCouldNotCompute();
             return SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
           })
           .Case([&SE, &PSE, L](const VPDerivedIVRecipe *R) {
             const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
             const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), PSE, L);
             const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), PSE, L);
             if (any_of(ArrayRef({Start, IV, Scale}),
                        IsaPred<SCEVCouldNotCompute>))
               return SE.getCouldNotCompute();

             return SE.getAddExpr(
                 SE.getTruncateOrSignExtend(Start, IV->getType()),
                 SE.getMulExpr(
                     IV, SE.getTruncateOrSignExtend(Scale, IV->getType())));
           })
           .Case([&SE, &PSE, L](const VPScalarIVStepsRecipe *R) {
             const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
             const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), PSE, L);
             if (isa<SCEVCouldNotCompute>(IV) || !isa<SCEVConstant>(Step))
               return SE.getCouldNotCompute();
             return SE.getTruncateOrSignExtend(IV, Step->getType());
           })
           .Default(
               [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });

   return PSE.getPredicatedSCEV(Expr);
 }

 bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE,
                                    const Loop *L) {
   // If address is an SCEVAddExpr, we require that all operands must be either
   // be invariant or a (possibly sign-extend) affine AddRec.
   if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Addr)) {
     return all_of(PtrAdd->operands(), [&SE, L](const SCEV *Op) {
       return SE.isLoopInvariant(Op, L) ||
              match(Op, m_scev_SExt(m_scev_AffineAddRec(m_SCEV(), m_SCEV()))) ||
              match(Op, m_scev_AffineAddRec(m_SCEV(), m_SCEV()));
     });
   }

   // Otherwise, check if address is loop invariant or an affine add recurrence.
   return SE.isLoopInvariant(Addr, L) ||
          match(Addr, m_scev_AffineAddRec(m_SCEV(), m_SCEV()));
 }

 /// Returns true if \p Opcode preserves uniformity, i.e., if all operands are
 /// uniform, the result will also be uniform.
 static bool preservesUniformity(unsigned Opcode) {
   if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
     return true;
   switch (Opcode) {
   case Instruction::Freeze:
   case Instruction::GetElementPtr:
   case Instruction::ICmp:
   case Instruction::FCmp:
   case Instruction::Select:
   case VPInstruction::Not:
   case VPInstruction::Broadcast:
   case VPInstruction::PtrAdd:
     return true;
   default:
     return false;
   }
 }

 bool vputils::isSingleScalar(const VPValue *VPV) {
   // A live-in must be uniform across the scope of VPlan.
   if (isa<VPIRValue, VPSymbolicValue>(VPV))
     return true;

   if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
     const VPRegionBlock *RegionOfR = Rep->getRegion();
     // Don't consider recipes in replicate regions as uniform yet; their first
     // lane cannot be accessed when executing the replicate region for other
     // lanes.
     if (RegionOfR && RegionOfR->isReplicator())
       return false;
     return Rep->isSingleScalar() || (preservesUniformity(Rep->getOpcode()) &&
                                      all_of(Rep->operands(), isSingleScalar));
   }
   if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(VPV))
     return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar);
   if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
     return preservesUniformity(WidenR->getOpcode()) &&
            all_of(WidenR->operands(), isSingleScalar);
   }
   if (auto *VPI = dyn_cast<VPInstruction>(VPV))
     return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
            (preservesUniformity(VPI->getOpcode()) &&
             all_of(VPI->operands(), isSingleScalar));
   if (auto *RR = dyn_cast<VPReductionRecipe>(VPV))
     return !RR->isPartialReduction();
   if (isa<VPCanonicalIVPHIRecipe, VPVectorPointerRecipe,
           VPVectorEndPointerRecipe>(VPV))
     return true;
   if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
     return Expr->isSingleScalar();

   // VPExpandSCEVRecipes must be placed in the entry and are always uniform.
   return isa<VPExpandSCEVRecipe>(VPV);
 }

 bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
   // Live-ins are uniform.
   if (isa<VPIRValue, VPSymbolicValue>(V))
     return true;

   VPRecipeBase *R = V->getDefiningRecipe();
   if (R && V->isDefinedOutsideLoopRegions()) {
     if (match(V->getDefiningRecipe(),
               m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
       return false;
     return all_of(R->operands(), isUniformAcrossVFsAndUFs);
   }

   auto *CanonicalIV =
       R->getParent()->getEnclosingLoopRegion()->getCanonicalIV();
   // Canonical IV chain is uniform.
   if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue())
     return true;

   return TypeSwitch<const VPRecipeBase *, bool>(R)
       .Case([](const VPDerivedIVRecipe *R) { return true; })
       .Case([](const VPReplicateRecipe *R) {
         // Be conservative about side-effects, except for the
         // known-side-effecting assumes and stores, which we know will be
         // uniform.
         return R->isSingleScalar() &&
                (!R->mayHaveSideEffects() ||
                 isa<AssumeInst, StoreInst>(R->getUnderlyingInstr())) &&
                all_of(R->operands(), isUniformAcrossVFsAndUFs);
       })
       .Case([](const VPWidenRecipe *R) {
         return preservesUniformity(R->getOpcode()) &&
                all_of(R->operands(), isUniformAcrossVFsAndUFs);
       })
       .Case([](const VPInstruction *VPI) {
         return (VPI->isScalarCast() &&
                 isUniformAcrossVFsAndUFs(VPI->getOperand(0))) ||
                (preservesUniformity(VPI->getOpcode()) &&
                 all_of(VPI->operands(), isUniformAcrossVFsAndUFs));
       })
       .Case([](const VPWidenCastRecipe *R) {
         // A cast is uniform according to its operand.
         return isUniformAcrossVFsAndUFs(R->getOperand(0));
       })
       .Default([](const VPRecipeBase *) { // A value is considered non-uniform
                                           // unless proven otherwise.
         return false;
       });
 }

 VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
   auto DepthFirst = vp_depth_first_shallow(Plan.getEntry());
   auto I = find_if(DepthFirst, [&VPDT](VPBlockBase *VPB) {
     return VPBlockUtils::isHeader(VPB, VPDT);
   });
   return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
 }

 unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
   if (!R)
     return 1;
   if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
     return RR->getVFScaleFactor();
   if (auto *RR = dyn_cast<VPReductionRecipe>(R))
     return RR->getVFScaleFactor();
   if (auto *ER = dyn_cast<VPExpressionRecipe>(R))
     return ER->getVFScaleFactor();
   assert(
       (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
                                      VPInstruction::ReductionStartVector) &&
       "getting scaling factor of reduction-start-vector not implemented yet");
   return 1;
 }

 std::optional<VPValue *>
 vputils::getRecipesForUncountableExit(VPlan &Plan,
                                       SmallVectorImpl<VPRecipeBase *> &Recipes,
                                       SmallVectorImpl<VPRecipeBase *> &GEPs) {
   // Given a VPlan like the following (just including the recipes contributing
   // to loop control exiting here, not the actual work), we're looking to match
   // the recipes contributing to the uncountable exit condition comparison
   // (here, vp<%4>) back to either live-ins or the address nodes for the load
   // used as part of the uncountable exit comparison so that we can copy them
   // to a preheader and rotate the address in the loop to the next vector
   // iteration.
   //
   // Currently, the address of the load is restricted to a GEP with 2 operands
   // and a live-in base address. This constraint may be relaxed later.
   //
   // VPlan ' for UF>=1' {
   // Live-in vp<%0> = VF
   // Live-in ir<64> = original trip-count
   //
   // entry:
   // Successor(s): preheader, vector.ph
   //
   // vector.ph:
   // Successor(s): vector loop
   //
   // <x1> vector loop: {
   //   vector.body:
   //     EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
   //     vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
   //     CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
   //     WIDEN ir<%ee.load> = load ir<%ee.addr>
   //     WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
   //     EMIT vp<%5> = any-of vp<%4>
   //     EMIT vp<%6> = add vp<%2>, vp<%0>
   //     EMIT vp<%7> = icmp eq vp<%6>, ir<64>
   //     EMIT branch-on-two-conds vp<%5>, vp<%7>
   //   No successors
   // }
   // Successor(s): early.exit, middle.block
   //
   // middle.block:
   // Successor(s): preheader
   //
   // preheader:
   // No successors
   // }

   // Find the uncountable loop exit condition.
   auto *Region = Plan.getVectorLoopRegion();
   VPValue *UncountableCondition = nullptr;
   if (!match(Region->getExitingBasicBlock()->getTerminator(),
              m_BranchOnTwoConds(m_AnyOf(m_VPValue(UncountableCondition)),
                                 m_VPValue())))
     return std::nullopt;

   SmallVector<VPValue *, 4> Worklist;
   Worklist.push_back(UncountableCondition);
   while (!Worklist.empty()) {
     VPValue *V = Worklist.pop_back_val();

     // Any value defined outside the loop does not need to be copied.
     if (V->isDefinedOutsideLoopRegions())
       continue;

     // FIXME: Remove the single user restriction; it's here because we're
     //        starting with the simplest set of loops we can, and multiple
     //        users means needing to add PHI nodes in the transform.
     if (V->getNumUsers() > 1)
       return std::nullopt;

     VPValue *Op1, *Op2;
     // Walk back through recipes until we find at least one load from memory.
     if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) {
       Worklist.push_back(Op1);
       Worklist.push_back(Op2);
       Recipes.push_back(V->getDefiningRecipe());
     } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
       // Reject masked loads for the time being; they make the exit condition
       // more complex.
       if (Load->isMasked())
         return std::nullopt;

       VPValue *GEP = Load->getAddr();
       if (!match(GEP, m_GetElementPtr(m_LiveIn(), m_VPValue())))
         return std::nullopt;

       Recipes.push_back(Load);
       Recipes.push_back(GEP->getDefiningRecipe());
       GEPs.push_back(GEP->getDefiningRecipe());
     } else
       return std::nullopt;
   }

   return UncountableCondition;
 }

 bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
                             const VPDominatorTree &VPDT) {
   auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
   if (!VPBB)
     return false;

   // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
   // VPBB as its entry, i.e., free of predecessors.
   if (auto *R = VPBB->getParent())
     return !R->isReplicator() && !VPBB->hasPredecessors();

   // A header dominates its second predecessor (the latch), with the other
   // predecessor being the preheader
   return VPB->getPredecessors().size() == 2 &&
          VPDT.dominates(VPB, VPB->getPredecessors()[1]);
 }

 bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
                            const VPDominatorTree &VPDT) {
   // A latch has a header as its second successor, with its other successor
   // leaving the loop. A preheader OTOH has a header as its first (and only)
   // successor.
   return VPB->getNumSuccessors() == 2 &&
          VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
 }

 std::optional<MemoryLocation>
 vputils::getMemoryLocation(const VPRecipeBase &R) {
   auto *M = dyn_cast<VPIRMetadata>(&R);
   if (!M)
     return std::nullopt;
   MemoryLocation Loc;
   // Populate noalias metadata from VPIRMetadata.
   if (MDNode *NoAliasMD = M->getMetadata(LLVMContext::MD_noalias))
     Loc.AATags.NoAlias = NoAliasMD;
   if (MDNode *AliasScopeMD = M->getMetadata(LLVMContext::MD_alias_scope))
     Loc.AATags.Scope = AliasScopeMD;
   return Loc;
 }

 /// Find the ComputeReductionResult recipe for \p PhiR, looking through selects
 /// inserted for predicated reductions or tail folding.
 VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) {
   VPValue *BackedgeVal = PhiR->getBackedgeValue();
   if (auto *Res = vputils::findUserOf<VPInstruction::ComputeReductionResult>(
           BackedgeVal))
     return Res;

   // Look through selects inserted for tail folding or predicated reductions.
   VPRecipeBase *SelR = vputils::findUserOf(
       BackedgeVal, m_Select(m_VPValue(), m_VPValue(), m_VPValue()));
   if (!SelR)
     return nullptr;
   return vputils::findUserOf<VPInstruction::ComputeReductionResult>(
       cast<VPSingleDefRecipe>(SelR));
 }