Reapply "[VPlan] Add canonical IV during construction (NFC)."

This reverts commit d431921677ae923d189ff2d6f188f676a2964ed8.

Missing gtests have been updated.

Original message:

This addresses an existing TODO and simply moves the current code to add
canonical IV recipes to the initial skeleton construction, at the same
place where the corresponding region will be introduced.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5a7b18f..4a809ba 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9159,31 +9159,6 @@
   }
 }
 
-// Add the necessary canonical IV and branch recipes required to control the
-// loop.
-static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
-                                  DebugLoc DL) {
-  Value *StartIdx = ConstantInt::get(IdxTy, 0);
-  auto *StartV = Plan.getOrAddLiveIn(StartIdx);
-
-  // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
-  auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
-  VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
-  VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
-  Header->insert(CanonicalIVPHI, Header->begin());
-
-  VPBuilder Builder(TopRegion->getExitingBasicBlock());
-  // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
-  auto *CanonicalIVIncrement = Builder.createOverflowingOp(
-      Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL,
-      "index.next");
-  CanonicalIVPHI->addOperand(CanonicalIVIncrement);
-
-  // Add the BranchOnCount VPInstruction to the latch.
-  Builder.createNaryOp(VPInstruction::BranchOnCount,
-                       {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
-}
-
 /// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
 /// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
 /// the end value of the induction.
@@ -9455,7 +9430,8 @@
   auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
   VPlanTransforms::prepareForVectorization(
       *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
-      CM.foldTailByMasking(), OrigLoop);
+      CM.foldTailByMasking(), OrigLoop,
+      getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
   VPlanTransforms::createLoopRegions(*Plan);
 
   // Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9466,14 +9442,22 @@
   for (ElementCount VF : Range)
     IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);
 
-  DebugLoc DL = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
   TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
   // Use NUW for the induction increment if we proved that it won't overflow in
   // the vector loop or when not folding the tail. In the later case, we know
   // that the canonical induction increment will not overflow as the vector trip
   // count is >= increment and a multiple of the increment.
   bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
-  addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
+  if (!HasNUW) {
+    auto *IVInc = Plan->getVectorLoopRegion()
+                      ->getExitingBasicBlock()
+                      ->getTerminator()
+                      ->getOperand(0);
+    assert(match(IVInc, m_VPInstruction<Instruction::Add>(
+                            m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
+           "Did not find the canonical IV increment");
+    cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
+  }
 
   VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
                                 Builder);
@@ -9747,19 +9731,13 @@
   DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
   auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
   VPlanTransforms::prepareForVectorization(
-      *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
+      *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
+      getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
   VPlanTransforms::createLoopRegions(*Plan);
 
   for (ElementCount VF : Range)
     Plan->addVF(VF);
 
-  // Tail folding is not supported for outer loops, so the induction increment
-  // is guaranteed to not wrap.
-  bool HasNUW = true;
-  addCanonicalIVRecipes(
-      *Plan, Legal->getWidestInductionType(), HasNUW,
-      getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
-
   if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
           Plan,
           [this](PHINode *P) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 239ee22..89c77f2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -722,6 +722,11 @@
     return R && classof(R);
   }
 
+  static inline bool classof(const VPValue *V) {
+    auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
+    return R && classof(R);
+  }
+
   /// Drop all poison-generating flags.
   void dropPoisonGeneratingFlags() {
     // NOTE: This needs to be kept in-sync with
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 58d6393..c7132e8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -15,6 +15,7 @@
 #include "VPlan.h"
 #include "VPlanCFG.h"
 #include "VPlanDominatorTree.h"
+#include "VPlanPatternMatch.h"
 #include "VPlanTransforms.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
@@ -461,10 +462,44 @@
     VPBlockUtils::connectBlocks(R, Succ);
 }
 
+// Add the necessary canonical IV and branch recipes required to control the
+// loop.
+static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
+                                  VPBasicBlock *LatchVPBB, Type *IdxTy,
+                                  DebugLoc DL) {
+  using namespace VPlanPatternMatch;
+  Value *StartIdx = ConstantInt::get(IdxTy, 0);
+  auto *StartV = Plan.getOrAddLiveIn(StartIdx);
+
+  // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
+  auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
+  HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
+
+  // We are about to replace the branch to exit the region. Remove the original
+  // BranchOnCond, if there is any.
+  if (!LatchVPBB->empty() &&
+      match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue())))
+    LatchVPBB->getTerminator()->eraseFromParent();
+
+  VPBuilder Builder(LatchVPBB);
+  // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
+  // Initially the induction increment is guaranteed to not wrap, but that may
+  // change later, e.g. when tail-folding, when the flags need to be dropped.
+  auto *CanonicalIVIncrement = Builder.createOverflowingOp(
+      Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL,
+      "index.next");
+  CanonicalIVPHI->addOperand(CanonicalIVIncrement);
+
+  // Add the BranchOnCount VPInstruction to the latch.
+  Builder.createNaryOp(VPInstruction::BranchOnCount,
+                       {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
+}
+
 void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
                                               PredicatedScalarEvolution &PSE,
                                               bool RequiresScalarEpilogueCheck,
-                                              bool TailFolded, Loop *TheLoop) {
+                                              bool TailFolded, Loop *TheLoop,
+                                              DebugLoc IVDL) {
   VPDominatorTree VPDT;
   VPDT.recalculate(Plan);
 
@@ -479,6 +514,9 @@
   VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
   LatchVPB->swapSuccessors();
 
+  addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
+                        cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
+
   // Create SCEV and VPValue for the trip count.
   // We use the symbolic max backedge-taken-count, which works also when
   // vectorizing loops with uncountable early exits.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
index c4d7952..bab7a9e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -58,7 +58,9 @@
     for (VPRecipeBase &VPI : *VPBB) {
       if (isa<VPWidenPHIRecipe>(&VPI))
         continue;
-      auto *VPInst = cast<VPInstruction>(&VPI);
+      auto *VPInst = dyn_cast<VPInstruction>(&VPI);
+      if (!VPInst)
+        continue;
       auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
       if (!Inst)
         continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 64e28c2..9e8b518 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -68,7 +68,8 @@
   static void prepareForVectorization(VPlan &Plan, Type *InductionTy,
                                       PredicatedScalarEvolution &PSE,
                                       bool RequiresScalarEpilogueCheck,
-                                      bool TailFolded, Loop *TheLoop);
+                                      bool TailFolded, Loop *TheLoop,
+                                      DebugLoc IVDL);
 
   /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
   /// flat CFG into a hierarchical CFG.
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
index 11abdf7..dbed67a 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
@@ -51,13 +51,15 @@
   // Check that the region following the preheader consists of a block for the
   // original header and a separate latch.
   VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock();
-  EXPECT_EQ(7u, VecBB->size());
+  EXPECT_EQ(10u, VecBB->size());
   EXPECT_EQ(0u, VecBB->getNumPredecessors());
   EXPECT_EQ(0u, VecBB->getNumSuccessors());
   EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB);
   EXPECT_EQ(&*Plan, VecBB->getPlan());
 
   auto Iter = VecBB->begin();
+  auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&*Iter++);
+  EXPECT_NE(nullptr, CanIV);
   VPWidenPHIRecipe *Phi = dyn_cast<VPWidenPHIRecipe>(&*Iter++);
   EXPECT_NE(nullptr, Phi);
 
@@ -100,7 +102,7 @@
   raw_string_ostream OS(FullDump);
   Plan->printDOT(OS);
   const char *ExpectedStr = R"(digraph VPlan {
-graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"]
+graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = VF * UF\nLive-in vp\<%1\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"]
 node [shape=rect, fontname=Courier, fontsize=30]
 edge [fontname=Courier, fontsize=30]
 compound=true
@@ -119,6 +121,7 @@
     label="\<x1\> vector loop"
     N2 [label =
       "vector.body:\l" +
+      "  EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" +
       "  WIDEN-PHI ir\<%indvars.iv\> = phi ir\<0\>, ir\<%indvars.iv.next\>\l" +
       "  EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%indvars.iv\>\l" +
       "  EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" +
@@ -126,13 +129,15 @@
       "  EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
       "  EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" +
       "  EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" +
+      "  EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" +
+      "  EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" +
       "No successors\l"
     ]
   }
   N2 -> N4 [ label="" ltail=cluster_N3]
   N4 [label =
     "middle.block:\l" +
-    "  EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
+    "  EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%1\>\l" +
     "  EMIT branch-on-cond vp\<%cmp.n\>\l" +
     "Successor(s): ir-bb\<for.end\>, scalar.ph\l"
   ]
@@ -207,12 +212,13 @@
   // Check that the region following the preheader consists of a block for the
   // original header and a separate latch.
   VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock();
-  EXPECT_EQ(8u, VecBB->size());
+  EXPECT_EQ(11u, VecBB->size());
   EXPECT_EQ(0u, VecBB->getNumPredecessors());
   EXPECT_EQ(0u, VecBB->getNumSuccessors());
   EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB);
 
   auto Iter = VecBB->begin();
+  EXPECT_NE(nullptr, dyn_cast<VPCanonicalIVPHIRecipe>(&*Iter++));
   EXPECT_NE(nullptr, dyn_cast<VPWidenPHIRecipe>(&*Iter++));
   EXPECT_NE(nullptr, dyn_cast<VPWidenGEPRecipe>(&*Iter++));
   EXPECT_NE(nullptr, dyn_cast<VPWidenMemoryRecipe>(&*Iter++));
@@ -221,6 +227,8 @@
   EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
   EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
   EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++));
+  EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++));
+  EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++));
   EXPECT_EQ(VecBB->end(), Iter);
 }
 
@@ -261,7 +269,7 @@
   raw_string_ostream OS(FullDump);
   Plan->printDOT(OS);
   const char *ExpectedStr = R"(digraph VPlan {
-graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"]
+graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = VF * UF\nLive-in vp\<%1\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"]
 node [shape=rect, fontname=Courier, fontsize=30]
 edge [fontname=Courier, fontsize=30]
 compound=true
@@ -280,6 +288,7 @@
     label="\<x1\> vector loop"
     N2 [label =
       "vector.body:\l" +
+      "  EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" +
       "  WIDEN-PHI ir\<%iv\> = phi ir\<0\>, ir\<%iv.next\>\l" +
       "  EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%iv\>\l" +
       "  EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" +
@@ -293,13 +302,15 @@
       "  EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
       "  EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" +
       "  EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" +
+      "  EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" +
+      "  EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" +
       "No successors\l"
     ]
   }
   N4 -> N5 [ label="" ltail=cluster_N3]
   N5 [label =
     "middle.block:\l" +
-    "  EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
+    "  EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%1\>\l" +
     "  EMIT branch-on-cond vp\<%cmp.n\>\l" +
     "Successor(s): ir-bb\<exit.2\>, scalar.ph\l"
   ]
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
index 1ffd1a6..555b472 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
@@ -101,8 +101,8 @@
   EXPECT_NE(nullptr, Entry->getSingleSuccessor());
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 13));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 15));
 
   VPlanSlp Slp(VPIAI, *Body);
   SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
@@ -173,8 +173,8 @@
   EXPECT_NE(nullptr, Entry->getSingleSuccessor());
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 13));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 15));
 
   auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
 
@@ -192,8 +192,8 @@
   EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
   EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
 
-  VPInstruction *GetA = cast<VPInstruction>(&*std::next(Body->begin(), 1));
-  VPInstruction *GetB = cast<VPInstruction>(&*std::next(Body->begin(), 3));
+  VPInstruction *GetA = cast<VPInstruction>(&*std::next(Body->begin(), 2));
+  VPInstruction *GetB = cast<VPInstruction>(&*std::next(Body->begin(), 4));
   EXPECT_EQ(GetA, CombinedLoadA->getOperand(0));
   EXPECT_EQ(GetB, CombinedLoadB->getOperand(0));
 
@@ -245,8 +245,8 @@
   EXPECT_NE(nullptr, Entry->getSingleSuccessor());
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 8));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 10));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 9));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 11));
 
   VPlanSlp Slp(VPIAI, *Body);
   SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
@@ -309,8 +309,8 @@
   EXPECT_NE(nullptr, Entry->getSingleSuccessor());
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 5));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 10));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 6));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 11));
 
   VPlanSlp Slp(VPIAI, *Body);
   SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
@@ -341,16 +341,16 @@
   VPInstruction *CombinedLoadA =
       cast<VPInstruction>(CombinedMulAB->getOperand(0));
   EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
-  VPInstruction *LoadvA0 = cast<VPInstruction>(&*std::next(Body->begin(), 2));
-  VPInstruction *LoadvA1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
+  VPInstruction *LoadvA0 = cast<VPInstruction>(&*std::next(Body->begin(), 3));
+  VPInstruction *LoadvA1 = cast<VPInstruction>(&*std::next(Body->begin(), 13));
   EXPECT_EQ(LoadvA0->getOperand(0), CombinedLoadA->getOperand(0));
   EXPECT_EQ(LoadvA1->getOperand(0), CombinedLoadA->getOperand(1));
 
   VPInstruction *CombinedLoadB =
       cast<VPInstruction>(CombinedMulAB->getOperand(1));
   EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
-  VPInstruction *LoadvB0 = cast<VPInstruction>(&*std::next(Body->begin(), 4));
-  VPInstruction *LoadvB1 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
+  VPInstruction *LoadvB0 = cast<VPInstruction>(&*std::next(Body->begin(), 5));
+  VPInstruction *LoadvB1 = cast<VPInstruction>(&*std::next(Body->begin(), 15));
   EXPECT_EQ(LoadvB0->getOperand(0), CombinedLoadB->getOperand(0));
   EXPECT_EQ(LoadvB1->getOperand(0), CombinedLoadB->getOperand(1));
 
@@ -359,16 +359,16 @@
   VPInstruction *CombinedLoadC =
       cast<VPInstruction>(CombinedMulCD->getOperand(0));
   EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadC->getOpcode());
-  VPInstruction *LoadvC0 = cast<VPInstruction>(&*std::next(Body->begin(), 7));
-  VPInstruction *LoadvC1 = cast<VPInstruction>(&*std::next(Body->begin(), 17));
+  VPInstruction *LoadvC0 = cast<VPInstruction>(&*std::next(Body->begin(), 8));
+  VPInstruction *LoadvC1 = cast<VPInstruction>(&*std::next(Body->begin(), 18));
   EXPECT_EQ(LoadvC0->getOperand(0), CombinedLoadC->getOperand(0));
   EXPECT_EQ(LoadvC1->getOperand(0), CombinedLoadC->getOperand(1));
 
   VPInstruction *CombinedLoadD =
       cast<VPInstruction>(CombinedMulCD->getOperand(1));
   EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadD->getOpcode());
-  VPInstruction *LoadvD0 = cast<VPInstruction>(&*std::next(Body->begin(), 9));
-  VPInstruction *LoadvD1 = cast<VPInstruction>(&*std::next(Body->begin(), 19));
+  VPInstruction *LoadvD0 = cast<VPInstruction>(&*std::next(Body->begin(), 10));
+  VPInstruction *LoadvD1 = cast<VPInstruction>(&*std::next(Body->begin(), 20));
   EXPECT_EQ(LoadvD0->getOperand(0), CombinedLoadD->getOperand(0));
   EXPECT_EQ(LoadvD1->getOperand(0), CombinedLoadD->getOperand(1));
 
@@ -446,8 +446,8 @@
   EXPECT_NE(nullptr, Entry->getSingleSuccessor());
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 25));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 27));
 
   checkReorderExample(
       Store1, Store2, Body,
@@ -518,8 +518,8 @@
   EXPECT_NE(nullptr, Entry->getSingleSuccessor());
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 25));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 27));
 
   checkReorderExample(
       Store1, Store2, Body,
@@ -590,8 +590,8 @@
   EXPECT_NE(nullptr, Entry->getSingleSuccessor());
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 25));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 27));
 
   auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
   VPlanSlp Slp(VPIAI, *Body);
@@ -666,8 +666,8 @@
   EXPECT_NE(nullptr, Entry->getSingleSuccessor());
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 25));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 27));
 
   checkReorderExample(
       Store1, Store2, Body,
@@ -728,8 +728,8 @@
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
   VPBasicBlock *BB2 = Body->getSingleSuccessor()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(), 3));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(), 5));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(), 4));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(), 6));
 
   VPlanSlp Slp(VPIAI, *BB2);
   SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
@@ -791,8 +791,8 @@
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
   VPBasicBlock *BB2 = Body->getSingleSuccessor()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(), 1));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(), 3));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(), 2));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(), 4));
 
   VPlanSlp Slp(VPIAI, *BB2);
   SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
@@ -850,8 +850,8 @@
   EXPECT_NE(nullptr, Entry->getSingleSuccessor());
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 13));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 15));
 
   VPlanSlp Slp(VPIAI, *Body);
   SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
@@ -909,8 +909,8 @@
   EXPECT_NE(nullptr, Entry->getSingleSuccessor());
   VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
 
-  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
-  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
+  VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 13));
+  VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 15));
 
   VPlanSlp Slp(VPIAI, *Body);
   SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index 2f47d9c..bf67a55 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -73,7 +73,7 @@
     DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
     auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
     VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
-                                             PSE, true, false, L);
+                                             PSE, true, false, L, {});
     VPlanTransforms::createLoopRegions(*Plan);
     return Plan;
   }