llvm/lib/Transforms/Vectorize/VPlanUtils.cpp - llvm-project - Git at Google

 //===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "VPlanUtils.h"
 #include "VPlanCFG.h"
 #include "VPlanDominatorTree.h"
 #include "VPlanPatternMatch.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"

 using namespace llvm;
 using namespace llvm::VPlanPatternMatch;

 bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
   return all_of(Def->users(),
                 [Def](const VPUser *U) { return U->onlyFirstLaneUsed(Def); });
 }

 bool vputils::onlyFirstPartUsed(const VPValue *Def) {
   return all_of(Def->users(),
                 [Def](const VPUser *U) { return U->onlyFirstPartUsed(Def); });
 }

 bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
   return all_of(Def->users(),
                 [Def](const VPUser *U) { return U->usesScalars(Def); });
 }

 VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
   if (auto *Expanded = Plan.getSCEVExpansion(Expr))
     return Expanded;
   VPValue *Expanded = nullptr;
   if (auto *E = dyn_cast<SCEVConstant>(Expr))
     Expanded = Plan.getOrAddLiveIn(E->getValue());
   else {
     auto *U = dyn_cast<SCEVUnknown>(Expr);
     // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
     // value. Otherwise the value may be defined in a loop and using it directly
     // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
     // form.
     if (U && !isa<Instruction>(U->getValue())) {
       Expanded = Plan.getOrAddLiveIn(U->getValue());
     } else {
       Expanded = new VPExpandSCEVRecipe(Expr);
       Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
     }
   }
   Plan.addSCEVExpansion(Expr, Expanded);
   return Expanded;
 }

 bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
   if (isa<VPActiveLaneMaskPHIRecipe>(V))
     return true;

   auto IsWideCanonicalIV = [](VPValue *A) {
     return isa<VPWidenCanonicalIVRecipe>(A) ||
            (isa<VPWidenIntOrFpInductionRecipe>(A) &&
             cast<VPWidenIntOrFpInductionRecipe>(A)->isCanonical());
   };

   VPValue *A, *B;

   if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
     return B == Plan.getTripCount() &&
            (match(A,
                   m_ScalarIVSteps(
                       m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
                       m_One(), m_Specific(&Plan.getVF()))) ||
             IsWideCanonicalIV(A));

   return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
          B == Plan.getBackedgeTakenCount();
 }

 const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
   if (V->isLiveIn()) {
     if (Value *LiveIn = V->getLiveInIRValue())
       return SE.getSCEV(LiveIn);
     return SE.getCouldNotCompute();
   }

   // TODO: Support constructing SCEVs for more recipes as needed.
   return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())
       .Case<VPExpandSCEVRecipe>(
           [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
       .Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
 }

 bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
   // Live-ins are uniform.
   if (V->isLiveIn())
     return true;

   VPRecipeBase *R = V->getDefiningRecipe();
   if (R && V->isDefinedOutsideLoopRegions()) {
     if (match(V->getDefiningRecipe(),
               m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
       return false;
     return all_of(R->operands(), isUniformAcrossVFsAndUFs);
   }

   auto *CanonicalIV =
       R->getParent()->getEnclosingLoopRegion()->getCanonicalIV();
   // Canonical IV chain is uniform.
   if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue())
     return true;

   return TypeSwitch<const VPRecipeBase *, bool>(R)
       .Case<VPDerivedIVRecipe>([](const auto *R) { return true; })
       .Case<VPReplicateRecipe>([](const auto *R) {
         // Be conservative about side-effects, except for the
         // known-side-effecting assumes and stores, which we know will be
         // uniform.
         return R->isSingleScalar() &&
                (!R->mayHaveSideEffects() ||
                 isa<AssumeInst, StoreInst>(R->getUnderlyingInstr())) &&
                all_of(R->operands(), isUniformAcrossVFsAndUFs);
       })
       .Case<VPInstruction>([](const auto *VPI) {
         return VPI->isScalarCast() &&
                isUniformAcrossVFsAndUFs(VPI->getOperand(0));
       })
       .Case<VPWidenCastRecipe>([](const auto *R) {
         // A cast is uniform according to its operand.
         return isUniformAcrossVFsAndUFs(R->getOperand(0));
       })
       .Default([](const VPRecipeBase *) { // A value is considered non-uniform
                                           // unless proven otherwise.
         return false;
       });
 }

 VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
   auto DepthFirst = vp_depth_first_shallow(Plan.getEntry());
   auto I = find_if(DepthFirst, [&VPDT](VPBlockBase *VPB) {
     return VPBlockUtils::isHeader(VPB, VPDT);
   });
   return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
 }

 unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
   if (!R)
     return 1;
   if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
     return RR->getVFScaleFactor();
   if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
     return RR->getVFScaleFactor();
   assert(
       (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
                                      VPInstruction::ReductionStartVector) &&
       "getting scaling factor of reduction-start-vector not implemented yet");
   return 1;
 }

 std::optional<VPValue *>
 vputils::getRecipesForUncountableExit(VPlan &Plan,
                                       SmallVectorImpl<VPRecipeBase *> &Recipes,
                                       SmallVectorImpl<VPRecipeBase *> &GEPs) {
   // Given a VPlan like the following (just including the recipes contributing
   // to loop control exiting here, not the actual work), we're looking to match
   // the recipes contributing to the uncountable exit condition comparison
   // (here, vp<%4>) back to either live-ins or the address nodes for the load
   // used as part of the uncountable exit comparison so that we can copy them
   // to a preheader and rotate the address in the loop to the next vector
   // iteration.
   //
   // Currently, the address of the load is restricted to a GEP with 2 operands
   // and a live-in base address. This constraint may be relaxed later.
   //
   // VPlan ' for UF>=1' {
   // Live-in vp<%0> = VF
   // Live-in ir<64> = original trip-count
   //
   // entry:
   // Successor(s): preheader, vector.ph
   //
   // vector.ph:
   // Successor(s): vector loop
   //
   // <x1> vector loop: {
   //   vector.body:
   //     EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
   //     vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
   //     CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
   //     WIDEN ir<%ee.load> = load ir<%ee.addr>
   //     WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
   //     EMIT vp<%5> = any-of vp<%4>
   //     EMIT vp<%6> = add vp<%2>, vp<%0>
   //     EMIT vp<%7> = icmp eq vp<%6>, ir<64>
   //     EMIT vp<%8> = or vp<%5>, vp<%7>
   //     EMIT branch-on-cond vp<%8>
   //   No successors
   // }
   // Successor(s): middle.block
   //
   // middle.block:
   // Successor(s): preheader
   //
   // preheader:
   // No successors
   // }

   // Find the uncountable loop exit condition.
   auto *Region = Plan.getVectorLoopRegion();
   VPValue *UncountableCondition = nullptr;
   if (!match(Region->getExitingBasicBlock()->getTerminator(),
              m_BranchOnCond(m_OneUse(m_c_BinaryOr(
                  m_AnyOf(m_VPValue(UncountableCondition)), m_VPValue())))))
     return std::nullopt;

   SmallVector<VPValue *, 4> Worklist;
   Worklist.push_back(UncountableCondition);
   while (!Worklist.empty()) {
     VPValue *V = Worklist.pop_back_val();

     // Any value defined outside the loop does not need to be copied.
     if (V->isDefinedOutsideLoopRegions())
       continue;

     // FIXME: Remove the single user restriction; it's here because we're
     //        starting with the simplest set of loops we can, and multiple
     //        users means needing to add PHI nodes in the transform.
     if (V->getNumUsers() > 1)
       return std::nullopt;

     VPValue *Op1, *Op2;
     // Walk back through recipes until we find at least one load from memory.
     if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) {
       Worklist.push_back(Op1);
       Worklist.push_back(Op2);
       Recipes.push_back(V->getDefiningRecipe());
     } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
       // Reject masked loads for the time being; they make the exit condition
       // more complex.
       if (Load->isMasked())
         return std::nullopt;

       VPValue *GEP = Load->getAddr();
       if (!match(GEP, m_GetElementPtr(m_LiveIn(), m_VPValue())))
         return std::nullopt;

       Recipes.push_back(Load);
       Recipes.push_back(GEP->getDefiningRecipe());
       GEPs.push_back(GEP->getDefiningRecipe());
     } else
       return std::nullopt;
   }

   return UncountableCondition;
 }

 bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
                             const VPDominatorTree &VPDT) {
   auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
   if (!VPBB)
     return false;

   // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
   // VPBB as its entry, i.e., free of predecessors.
   if (auto *R = VPBB->getParent())
     return !R->isReplicator() && !VPBB->hasPredecessors();

   // A header dominates its second predecessor (the latch), with the other
   // predecessor being the preheader
   return VPB->getPredecessors().size() == 2 &&
          VPDT.dominates(VPB, VPB->getPredecessors()[1]);
 }

 bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
                            const VPDominatorTree &VPDT) {
   // A latch has a header as its second successor, with its other successor
   // leaving the loop. A preheader OTOH has a header as its first (and only)
   // successor.
   return VPB->getNumSuccessors() == 2 &&
          VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
 }
	//===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "VPlanUtils.h"
	#include "VPlanCFG.h"
	#include "VPlanDominatorTree.h"
	#include "VPlanPatternMatch.h"
	#include "llvm/ADT/TypeSwitch.h"
	#include "llvm/Analysis/ScalarEvolutionExpressions.h"

	using namespace llvm;
	using namespace llvm::VPlanPatternMatch;

	bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
	return all_of(Def->users(),
	[Def](const VPUser *U) { return U->onlyFirstLaneUsed(Def); });
	}

	bool vputils::onlyFirstPartUsed(const VPValue *Def) {
	return all_of(Def->users(),
	[Def](const VPUser *U) { return U->onlyFirstPartUsed(Def); });
	}

	bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
	return all_of(Def->users(),
	[Def](const VPUser *U) { return U->usesScalars(Def); });
	}

	VPValue vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV Expr) {
	if (auto *Expanded = Plan.getSCEVExpansion(Expr))
	return Expanded;
	VPValue *Expanded = nullptr;
	if (auto *E = dyn_cast<SCEVConstant>(Expr))
	Expanded = Plan.getOrAddLiveIn(E->getValue());
	else {
	auto *U = dyn_cast<SCEVUnknown>(Expr);
	// Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
	// value. Otherwise the value may be defined in a loop and using it directly
	// will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
	// form.
	if (U && !isa<Instruction>(U->getValue())) {
	Expanded = Plan.getOrAddLiveIn(U->getValue());
	} else {
	Expanded = new VPExpandSCEVRecipe(Expr);
	Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
	}
	}
	Plan.addSCEVExpansion(Expr, Expanded);
	return Expanded;
	}

	bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
	if (isa<VPActiveLaneMaskPHIRecipe>(V))
	return true;

	auto IsWideCanonicalIV = [](VPValue *A) {
	return isa<VPWidenCanonicalIVRecipe>(A) \|\|
	(isa<VPWidenIntOrFpInductionRecipe>(A) &&
	cast<VPWidenIntOrFpInductionRecipe>(A)->isCanonical());
	};

	VPValue A, B;

	if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
	return B == Plan.getTripCount() &&
	(match(A,
	m_ScalarIVSteps(
	m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
	m_One(), m_Specific(&Plan.getVF()))) \|\|
	IsWideCanonicalIV(A));

	return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
	B == Plan.getBackedgeTakenCount();
	}

	const SCEV vputils::getSCEVExprForVPValue(VPValue V, ScalarEvolution &SE) {
	if (V->isLiveIn()) {
	if (Value *LiveIn = V->getLiveInIRValue())
	return SE.getSCEV(LiveIn);
	return SE.getCouldNotCompute();
	}

	// TODO: Support constructing SCEVs for more recipes as needed.
	return TypeSwitch<const VPRecipeBase , const SCEV >(V->getDefiningRecipe())
	.Case<VPExpandSCEVRecipe>(
	[](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
	.Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
	}

	bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
	// Live-ins are uniform.
	if (V->isLiveIn())
	return true;

	VPRecipeBase *R = V->getDefiningRecipe();
	if (R && V->isDefinedOutsideLoopRegions()) {
	if (match(V->getDefiningRecipe(),
	m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
	return false;
	return all_of(R->operands(), isUniformAcrossVFsAndUFs);
	}

	auto *CanonicalIV =
	R->getParent()->getEnclosingLoopRegion()->getCanonicalIV();
	// Canonical IV chain is uniform.
	if (V == CanonicalIV \|\| V == CanonicalIV->getBackedgeValue())
	return true;

	return TypeSwitch<const VPRecipeBase *, bool>(R)
	.Case<VPDerivedIVRecipe>([](const auto *R) { return true; })
	.Case<VPReplicateRecipe>([](const auto *R) {
	// Be conservative about side-effects, except for the
	// known-side-effecting assumes and stores, which we know will be
	// uniform.
	return R->isSingleScalar() &&
	(!R->mayHaveSideEffects() \|\|
	isa<AssumeInst, StoreInst>(R->getUnderlyingInstr())) &&
	all_of(R->operands(), isUniformAcrossVFsAndUFs);
	})
	.Case<VPInstruction>([](const auto *VPI) {
	return VPI->isScalarCast() &&
	isUniformAcrossVFsAndUFs(VPI->getOperand(0));
	})
	.Case<VPWidenCastRecipe>([](const auto *R) {
	// A cast is uniform according to its operand.
	return isUniformAcrossVFsAndUFs(R->getOperand(0));
	})
	.Default([](const VPRecipeBase *) { // A value is considered non-uniform
	// unless proven otherwise.
	return false;
	});
	}

	VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
	auto DepthFirst = vp_depth_first_shallow(Plan.getEntry());
	auto I = find_if(DepthFirst, [&VPDT](VPBlockBase *VPB) {
	return VPBlockUtils::isHeader(VPB, VPDT);
	});
	return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
	}

	unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
	if (!R)
	return 1;
	if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
	return RR->getVFScaleFactor();
	if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
	return RR->getVFScaleFactor();
	assert(
	(!isa<VPInstruction>(R) \|\| cast<VPInstruction>(R)->getOpcode() !=
	VPInstruction::ReductionStartVector) &&
	"getting scaling factor of reduction-start-vector not implemented yet");
	return 1;
	}

	std::optional<VPValue *>
	vputils::getRecipesForUncountableExit(VPlan &Plan,
	SmallVectorImpl<VPRecipeBase *> &Recipes,
	SmallVectorImpl<VPRecipeBase *> &GEPs) {
	// Given a VPlan like the following (just including the recipes contributing
	// to loop control exiting here, not the actual work), we're looking to match
	// the recipes contributing to the uncountable exit condition comparison
	// (here, vp<%4>) back to either live-ins or the address nodes for the load
	// used as part of the uncountable exit comparison so that we can copy them
	// to a preheader and rotate the address in the loop to the next vector
	// iteration.
	//
	// Currently, the address of the load is restricted to a GEP with 2 operands
	// and a live-in base address. This constraint may be relaxed later.
	//
	// VPlan ' for UF>=1' {
	// Live-in vp<%0> = VF
	// Live-in ir<64> = original trip-count
	//
	// entry:
	// Successor(s): preheader, vector.ph
	//
	// vector.ph:
	// Successor(s): vector loop
	//
	// <x1> vector loop: {
	// vector.body:
	// EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
	// vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
	// CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
	// WIDEN ir<%ee.load> = load ir<%ee.addr>
	// WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
	// EMIT vp<%5> = any-of vp<%4>
	// EMIT vp<%6> = add vp<%2>, vp<%0>
	// EMIT vp<%7> = icmp eq vp<%6>, ir<64>
	// EMIT vp<%8> = or vp<%5>, vp<%7>
	// EMIT branch-on-cond vp<%8>
	// No successors
	// }
	// Successor(s): middle.block
	//
	// middle.block:
	// Successor(s): preheader
	//
	// preheader:
	// No successors
	// }

	// Find the uncountable loop exit condition.
	auto *Region = Plan.getVectorLoopRegion();
	VPValue *UncountableCondition = nullptr;
	if (!match(Region->getExitingBasicBlock()->getTerminator(),
	m_BranchOnCond(m_OneUse(m_c_BinaryOr(
	m_AnyOf(m_VPValue(UncountableCondition)), m_VPValue())))))
	return std::nullopt;

	SmallVector<VPValue *, 4> Worklist;
	Worklist.push_back(UncountableCondition);
	while (!Worklist.empty()) {
	VPValue *V = Worklist.pop_back_val();

	// Any value defined outside the loop does not need to be copied.
	if (V->isDefinedOutsideLoopRegions())
	continue;

	// FIXME: Remove the single user restriction; it's here because we're
	// starting with the simplest set of loops we can, and multiple
	// users means needing to add PHI nodes in the transform.
	if (V->getNumUsers() > 1)
	return std::nullopt;

	VPValue Op1, Op2;
	// Walk back through recipes until we find at least one load from memory.
	if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) {
	Worklist.push_back(Op1);
	Worklist.push_back(Op2);
	Recipes.push_back(V->getDefiningRecipe());
	} else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
	// Reject masked loads for the time being; they make the exit condition
	// more complex.
	if (Load->isMasked())
	return std::nullopt;

	VPValue *GEP = Load->getAddr();
	if (!match(GEP, m_GetElementPtr(m_LiveIn(), m_VPValue())))
	return std::nullopt;

	Recipes.push_back(Load);
	Recipes.push_back(GEP->getDefiningRecipe());
	GEPs.push_back(GEP->getDefiningRecipe());
	} else
	return std::nullopt;
	}

	return UncountableCondition;
	}

	bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
	const VPDominatorTree &VPDT) {
	auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
	if (!VPBB)
	return false;

	// If VPBB is in a region R, VPBB is a loop header if R is a loop region with
	// VPBB as its entry, i.e., free of predecessors.
	if (auto *R = VPBB->getParent())
	return !R->isReplicator() && !VPBB->hasPredecessors();

	// A header dominates its second predecessor (the latch), with the other
	// predecessor being the preheader
	return VPB->getPredecessors().size() == 2 &&
	VPDT.dominates(VPB, VPB->getPredecessors()[1]);
	}

	bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
	const VPDominatorTree &VPDT) {
	// A latch has a header as its second successor, with its other successor
	// leaving the loop. A preheader OTOH has a header as its first (and only)
	// successor.
	return VPB->getNumSuccessors() == 2 &&
	VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
	}