Use MemorySSA in LICM to do sinking and hoisting.

Summary:
Step 2 in using MemorySSA in LICM:
Use MemorySSA in LICM to do sinking and hoisting, all under "EnableMSSALoopDependency" flag.
Promotion is disabled.

Enable flag in LICM sink/hoist tests to test correctness of this change. Moved one test which
relied on promotion, in order to test all sinking tests.

Reviewers: sanjoy, davide, gberry, george.burgess.iv

Subscribers: llvm-commits, Prazek

Differential Revision: https://reviews.llvm.org/D40375

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350879 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h
index 098876e..169d5bd 100644
--- a/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/include/llvm/Analysis/MemorySSAUpdater.h
@@ -35,6 +35,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/IR/BasicBlock.h"
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h
index 7b93f29..1740e9e 100644
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -41,6 +41,7 @@
 class DataLayout;
 class Loop;
 class LoopInfo;
+class MemorySSAUpdater;
 class OptimizationRemarkEmitter;
 class PredicatedScalarEvolution;
 class PredIteratorCache;
@@ -109,7 +110,7 @@
 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                 TargetLibraryInfo *, TargetTransformInfo *, Loop *,
-                AliasSetTracker *, ICFLoopSafetyInfo *,
+                AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
                 OptimizationRemarkEmitter *ORE);
 
 /// Walk the specified region of the CFG (defined by all blocks
@@ -122,7 +123,8 @@
 /// ORE. It returns changed status.
 bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                  TargetLibraryInfo *, Loop *, AliasSetTracker *,
-                 ICFLoopSafetyInfo *, OptimizationRemarkEmitter *ORE);
+                 MemorySSAUpdater *, ICFLoopSafetyInfo *,
+                 OptimizationRemarkEmitter *ORE);
 
 /// This function deletes dead loops. The caller of this function needs to
 /// guarantee that the loop is infact dead.
@@ -274,7 +276,7 @@
 /// If \p ORE is set use it to emit optimization remarks.
 bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                         Loop *CurLoop, AliasSetTracker *CurAST,
-                        bool TargetExecutesOncePerLoop,
+                        MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
                         OptimizationRemarkEmitter *ORE = nullptr);
 
 /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 3b44ac5..44ef6ea 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -46,11 +46,11 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
@@ -69,6 +69,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <algorithm>
@@ -106,16 +107,29 @@
 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
                cl::desc("How many instruction to cross product using AA"));
 
+// Experimental option to allow imprecision in LICM (use MemorySSA cap) in
+// pathological cases, in exchange for faster compile. This is to be removed
+// if MemorySSA starts to address the same issue. This flag applies only when
+// LICM uses MemorySSA instead on AliasSetTracker. When the flag is disabled
+// (default), LICM calls MemorySSAWalker's getClobberingMemoryAccess, which
+// gets perfect accuracy. When flag is enabled, LICM will call into MemorySSA's
+// getDefiningAccess, which may not be precise, since optimizeUses is capped.
+static cl::opt<bool> EnableLicmCap(
+    "enable-licm-cap", cl::init(false), cl::Hidden,
+    cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in "
+             "pathological cases, in exchange for faster compile"));
+
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
                                   TargetTransformInfo *TTI, bool &FreeInLoop);
 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
-                  OptimizationRemarkEmitter *ORE);
+                  MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
-                 OptimizationRemarkEmitter *ORE, bool FreeInLoop);
+                 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
+                 bool FreeInLoop);
 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
                                            const DominatorTree *DT,
                                            const Loop *CurLoop,
@@ -125,14 +139,14 @@
 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
                                      AliasSetTracker *CurAST, Loop *CurLoop,
                                      AliasAnalysis *AA);
-
-static Instruction *
-CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
-                            const LoopInfo *LI,
-                            const LoopSafetyInfo *SafetyInfo);
+static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
+                                             Loop *CurLoop);
+static Instruction *CloneInstructionInExitBlock(
+    Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
+    const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
 
 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
-                             AliasSetTracker *AST);
+                             AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
 
 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
                                   ICFLoopSafetyInfo &SafetyInfo);
@@ -194,8 +208,10 @@
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
-    if (EnableMSSALoopDependency)
+    if (EnableMSSALoopDependency) {
       AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
+    }
     AU.addRequired<TargetTransformInfoWrapperPass>();
     getLoopAnalysisUsage(AU);
   }
@@ -275,7 +291,15 @@
 
   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
 
-  std::unique_ptr<AliasSetTracker> CurAST = collectAliasInfoForLoop(L, LI, AA);
+  std::unique_ptr<AliasSetTracker> CurAST;
+  std::unique_ptr<MemorySSAUpdater> MSSAU;
+  if (!MSSA) {
+    LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
+    CurAST = collectAliasInfoForLoop(L, LI, AA);
+  } else {
+    LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA. Promotion disabled.\n");
+    MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+  }
 
   // Get the preheader block to move instructions into...
   BasicBlock *Preheader = L->getLoopPreheader();
@@ -296,10 +320,10 @@
   //
   if (L->hasDedicatedExits())
     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
-                          CurAST.get(), &SafetyInfo, ORE);
+                          CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
-                           CurAST.get(), &SafetyInfo, ORE);
+                           CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
 
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can.
@@ -328,27 +352,30 @@
 
       bool Promoted = false;
 
-      // Loop over all of the alias sets in the tracker object.
-      for (AliasSet &AS : *CurAST) {
-        // We can promote this alias set if it has a store, if it is a "Must"
-        // alias set, if the pointer is loop invariant, and if we are not
-        // eliminating any volatile loads or stores.
-        if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
-            !L->isLoopInvariant(AS.begin()->getValue()))
-          continue;
+      if (CurAST.get()) {
+        // Loop over all of the alias sets in the tracker object.
+        for (AliasSet &AS : *CurAST) {
+          // We can promote this alias set if it has a store, if it is a "Must"
+          // alias set, if the pointer is loop invariant, and if we are not
+          // eliminating any volatile loads or stores.
+          if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+              !L->isLoopInvariant(AS.begin()->getValue()))
+            continue;
 
-        assert(
-            !AS.empty() &&
-            "Must alias set should have at least one pointer element in it!");
+          assert(
+              !AS.empty() &&
+              "Must alias set should have at least one pointer element in it!");
 
-        SmallSetVector<Value *, 8> PointerMustAliases;
-        for (const auto &ASI : AS)
-          PointerMustAliases.insert(ASI.getValue());
+          SmallSetVector<Value *, 8> PointerMustAliases;
+          for (const auto &ASI : AS)
+            PointerMustAliases.insert(ASI.getValue());
 
-        Promoted |= promoteLoopAccessesToScalars(
-            PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L,
-            CurAST.get(), &SafetyInfo, ORE);
+          Promoted |= promoteLoopAccessesToScalars(
+              PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L,
+              CurAST.get(), &SafetyInfo, ORE);
+        }
       }
+      // FIXME: Promotion initially disabled when using MemorySSA.
 
       // Once we have promoted values across the loop body we have to
       // recursively reform LCSSA as any nested loop may now have values defined
@@ -372,9 +399,12 @@
 
   // If this loop is nested inside of another one, save the alias information
   // for when we process the outer loop.
-  if (L->getParentLoop() && !DeleteAST)
+  if (CurAST.get() && L->getParentLoop() && !DeleteAST)
     LoopToAliasSetMap[L] = std::move(CurAST);
 
+  if (MSSAU.get() && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   if (Changed && SE)
     SE->forgetLoopDispositions(L);
   return Changed;
@@ -388,13 +418,16 @@
 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                       DominatorTree *DT, TargetLibraryInfo *TLI,
                       TargetTransformInfo *TTI, Loop *CurLoop,
-                      AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
+                      AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+                      ICFLoopSafetyInfo *SafetyInfo,
                       OptimizationRemarkEmitter *ORE) {
 
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
-         CurLoop != nullptr && CurAST && SafetyInfo != nullptr &&
-         "Unexpected input to sinkRegion");
+         CurLoop != nullptr && SafetyInfo != nullptr &&
+         "Unexpected input to sinkRegion.");
+  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
+         "Either AliasSetTracker or MemorySSA should be initialized.");
 
   // We want to visit children before parents. We will enque all the parents
   // before their children in the worklist and process the worklist in reverse
@@ -418,7 +451,7 @@
         LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
         salvageDebugInfo(I);
         ++II;
-        eraseInstruction(I, *SafetyInfo, CurAST);
+        eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
         Changed = true;
         continue;
       }
@@ -430,18 +463,20 @@
       //
       bool FreeInLoop = false;
       if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
           !I.mayHaveSideEffects()) {
-        if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
+        if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
           if (!FreeInLoop) {
             ++II;
-            eraseInstruction(I, *SafetyInfo, CurAST);
+            eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
           }
           Changed = true;
         }
       }
     }
   }
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
   return Changed;
 }
 
@@ -458,6 +493,7 @@
   LoopInfo *LI;
   DominatorTree *DT;
   Loop *CurLoop;
+  MemorySSAUpdater *MSSAU;
 
   // A map of blocks in the loop to the block their instructions will be hoisted
   // to.
@@ -468,8 +504,9 @@
   DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
 
 public:
-  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop)
-      : LI(LI), DT(DT), CurLoop(CurLoop) {}
+  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
+                     MemorySSAUpdater *MSSAU)
+      : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
 
   void registerPossiblyHoistableBranch(BranchInst *BI) {
     // We can only hoist conditional branches with loop invariant operands.
@@ -644,6 +681,9 @@
     if (HoistTarget == InitialPreheader) {
       // Phis in the loop header now need to use the new preheader.
       InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
+      if (MSSAU)
+        MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+            HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
       // The new preheader dominates the loop header.
       DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
       DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
@@ -674,14 +714,17 @@
 ///
 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                        DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
-                       AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
+                       AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+                       ICFLoopSafetyInfo *SafetyInfo,
                        OptimizationRemarkEmitter *ORE) {
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
-         CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
-         "Unexpected input to hoistRegion");
+         CurLoop != nullptr && SafetyInfo != nullptr &&
+         "Unexpected input to hoistRegion.");
+  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
+         "Either AliasSetTracker or MemorySSA should be initialized.");
 
-  ControlFlowHoister CFH(LI, DT, CurLoop);
+  ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
 
   // Keep track of instructions that have been hoisted, as they may need to be
   // re-hoisted if they end up not dominating all of their uses.
@@ -708,10 +751,12 @@
               &I, I.getModule()->getDataLayout(), TLI)) {
         LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C
                           << '\n');
-        CurAST->copyValue(&I, C);
+        if (CurAST)
+          CurAST->copyValue(&I, C);
+        // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
         I.replaceAllUsesWith(C);
         if (isInstructionTriviallyDead(&I, TLI))
-          eraseInstruction(I, *SafetyInfo, CurAST);
+          eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
         Changed = true;
         continue;
       }
@@ -723,11 +768,12 @@
       // and we have accurately duplicated the control flow from the loop header
       // to that block.
       if (CurLoop->hasLoopInvariantOperands(&I) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
           isSafeToExecuteUnconditionally(
               I, DT, CurLoop, SafetyInfo, ORE,
               CurLoop->getLoopPreheader()->getTerminator())) {
-        hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, ORE);
+        hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+              MSSAU, ORE);
         HoistedInstructions.push_back(&I);
         Changed = true;
         continue;
@@ -751,10 +797,10 @@
         SafetyInfo->insertInstructionTo(Product, I.getParent());
         Product->insertAfter(&I);
         I.replaceAllUsesWith(Product);
-        eraseInstruction(I, *SafetyInfo, CurAST);
+        eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
 
         hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
-              SafetyInfo, ORE);
+              SafetyInfo, MSSAU, ORE);
         HoistedInstructions.push_back(ReciprocalDivisor);
         Changed = true;
         continue;
@@ -767,7 +813,8 @@
           CurLoop->hasLoopInvariantOperands(&I) &&
           SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
           SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
-        hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, ORE);
+        hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+              MSSAU, ORE);
         HoistedInstructions.push_back(&I);
         Changed = true;
         continue;
@@ -781,7 +828,7 @@
             PN->setIncomingBlock(
                 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
           hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
-                ORE);
+                MSSAU, ORE);
           assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
           Changed = true;
           continue;
@@ -824,8 +871,11 @@
       }
     }
   }
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
 
-  // Now that we've finished hoisting make sure that LI and DT are still valid.
+    // Now that we've finished hoisting make sure that LI and DT are still
+    // valid.
 #ifndef NDEBUG
   if (Changed) {
     assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
@@ -905,25 +955,53 @@
           isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
 }
 /// Return true if all of the alias sets within this AST are known not to
-/// contain a Mod.
-bool isReadOnly(AliasSetTracker *CurAST) {
-  for (AliasSet &AS : *CurAST) {
-    if (!AS.isForwardingAliasSet() && AS.isMod()) {
-      return false;
+/// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
+bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
+                const Loop *L) {
+  if (CurAST) {
+    for (AliasSet &AS : *CurAST) {
+      if (!AS.isForwardingAliasSet() && AS.isMod()) {
+        return false;
+      }
     }
+    return true;
+  } else { /*MSSAU*/
+    for (auto *BB : L->getBlocks())
+      if (MSSAU->getMemorySSA()->getBlockDefs(BB))
+        return false;
+    return true;
   }
+}
+
+/// Return true if I is the only Instruction with a MemoryAccess in L.
+bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
+                        const MemorySSAUpdater *MSSAU) {
+  for (auto *BB : L->getBlocks())
+    if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
+      int NotAPhi = 0;
+      for (const auto &Acc : *Accs) {
+        if (isa<MemoryPhi>(&Acc))
+          continue;
+        const auto *MUD = cast<MemoryUseOrDef>(&Acc);
+        if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
+          return false;
+      }
+    }
   return true;
 }
 }
 
 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                               Loop *CurLoop, AliasSetTracker *CurAST,
+                              MemorySSAUpdater *MSSAU,
                               bool TargetExecutesOncePerLoop,
                               OptimizationRemarkEmitter *ORE) {
   // If we don't understand the instruction, bail early.
   if (!isHoistableAndSinkableInst(I))
     return false;
-  
+
+  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
+
   // Loads have extra constraints we have to verify before we can hoist them.
   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
     if (!LI->isUnordered())
@@ -943,8 +1021,13 @@
     if (isLoadInvariantInLoop(LI, DT, CurLoop))
       return true;
 
-    bool Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI),
-                                                CurAST, CurLoop, AA);
+    bool Invalidated;
+    if (CurAST)
+      Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
+                                             CurLoop, AA);
+    else
+      Invalidated = pointerInvalidatedByLoopWithMSSA(
+          MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop);
     // Check loop-invariant address because this may also be a sinkable load
     // whose address is not necessarily loop-invariant.
     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -969,7 +1052,7 @@
     if (match(CI, m_Intrinsic<Intrinsic::assume>()))
       // Assumes don't actually alias anything or throw
       return true;
-    
+
     // Handle simple cases by querying alias analysis.
     FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
     if (Behavior == FMRB_DoesNotAccessMemory)
@@ -981,17 +1064,24 @@
       if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
         // TODO: expand to writeable arguments
         for (Value *Op : CI->arg_operands())
-          if (Op->getType()->isPointerTy() &&
-              pointerInvalidatedByLoop(
+          if (Op->getType()->isPointerTy()) {
+            bool Invalidated;
+            if (CurAST)
+              Invalidated = pointerInvalidatedByLoop(
                   MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()),
-                  CurAST, CurLoop, AA))
-            return false;
+                  CurAST, CurLoop, AA);
+            else
+              Invalidated = pointerInvalidatedByLoopWithMSSA(
+                  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop);
+            if (Invalidated)
+              return false;
+          }
         return true;
       }
 
       // If this call only reads from memory and there are no writes to memory
       // in the loop, we can hoist or sink the call as appropriate.
-      if (isReadOnly(CurAST))
+      if (isReadOnly(CurAST, MSSAU, CurLoop))
         return true;
     }
 
@@ -1002,18 +1092,21 @@
   } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
     // Fences alias (most) everything to provide ordering.  For the moment,
     // just give up if there are any other memory operations in the loop.
-    auto Begin = CurAST->begin();
-    assert(Begin != CurAST->end() && "must contain FI");
-    if (std::next(Begin) != CurAST->end())
-      // constant memory for instance, TODO: handle better
-      return false;
-    auto *UniqueI = Begin->getUniqueInstruction();
-    if (!UniqueI)
-      // other memory op, give up
-      return false;
-    (void)FI; //suppress unused variable warning
-    assert(UniqueI == FI && "AS must contain FI");
-    return true;
+    if (CurAST) {
+      auto Begin = CurAST->begin();
+      assert(Begin != CurAST->end() && "must contain FI");
+      if (std::next(Begin) != CurAST->end())
+        // constant memory for instance, TODO: handle better
+        return false;
+      auto *UniqueI = Begin->getUniqueInstruction();
+      if (!UniqueI)
+        // other memory op, give up
+        return false;
+      (void)FI; // suppress unused variable warning
+      assert(UniqueI == FI && "AS must contain FI");
+      return true;
+    } else // MSSAU
+      return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
   } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
     if (!SI->isUnordered())
       return false; // Don't sink/hoist volatile or ordered atomic store!
@@ -1023,17 +1116,29 @@
     // load store promotion instead.  TODO: We can extend this to cases where
     // there is exactly one write to the location and that write dominates an
     // arbitrary number of reads in the loop.
-    auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
+    if (CurAST) {
+      auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
 
-    if (AS.isRef() || !AS.isMustAlias())
-      // Quick exit test, handled by the full path below as well.
+      if (AS.isRef() || !AS.isMustAlias())
+        // Quick exit test, handled by the full path below as well.
+        return false;
+      auto *UniqueI = AS.getUniqueInstruction();
+      if (!UniqueI)
+        // other memory op, give up
+        return false;
+      assert(UniqueI == SI && "AS must contain SI");
+      return true;
+    } else { // MSSAU
+      if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
+        return true;
+      if (!EnableLicmCap) {
+        auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
+        if (MSSA->isLiveOnEntryDef(Source) ||
+            !CurLoop->contains(Source->getBlock()))
+          return true;
+      }
       return false;
-    auto *UniqueI = AS.getUniqueInstruction();
-    if (!UniqueI)
-      // other memory op, give up
-      return false;
-    assert(UniqueI == SI && "AS must contain SI");
-    return true;
+    }
   }
 
   assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
@@ -1117,10 +1222,9 @@
   return true;
 }
 
-static Instruction *
-CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
-                            const LoopInfo *LI,
-                            const LoopSafetyInfo *SafetyInfo) {
+static Instruction *CloneInstructionInExitBlock(
+    Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
+    const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
   Instruction *New;
   if (auto *CI = dyn_cast<CallInst>(&I)) {
     const auto &BlockColors = SafetyInfo->getBlockColors();
@@ -1156,6 +1260,21 @@
   if (!I.getName().empty())
     New->setName(I.getName() + ".le");
 
+  MemoryAccess *OldMemAcc;
+  if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
+    // Create a new MemoryAccess and let MemorySSA set its defining access.
+    MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
+        New, nullptr, New->getParent(), MemorySSA::Beginning);
+    if (NewMemAcc) {
+      if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
+        MSSAU->insertDef(MemDef, /*RenameUses=*/true);
+      else {
+        auto *MemUse = cast<MemoryUse>(NewMemAcc);
+        MSSAU->insertUse(MemUse);
+      }
+    }
+  }
+
   // Build LCSSA PHI nodes for any in-loop operands. Note that this is
   // particularly cheap because we can rip off the PHI node that we're
   // replacing for the number and blocks of the predecessors.
@@ -1179,9 +1298,11 @@
 }
 
 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
-                             AliasSetTracker *AST) {
+                             AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
   if (AST)
     AST->deleteValue(&I);
+  if (MSSAU)
+    MSSAU->removeMemoryAccess(&I);
   SafetyInfo.removeInstruction(&I);
   I.eraseFromParent();
 }
@@ -1196,7 +1317,8 @@
 static Instruction *sinkThroughTriviallyReplaceablePHI(
     PHINode *TPN, Instruction *I, LoopInfo *LI,
     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
-    const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) {
+    const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
+    MemorySSAUpdater *MSSAU) {
   assert(isTriviallyReplaceablePHI(*TPN, *I) &&
          "Expect only trivially replaceable PHI");
   BasicBlock *ExitBlock = TPN->getParent();
@@ -1205,8 +1327,8 @@
   if (It != SunkCopies.end())
     New = It->second;
   else
-    New = SunkCopies[ExitBlock] =
-        CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo);
+    New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
+        *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
   return New;
 }
 
@@ -1230,7 +1352,8 @@
 
 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
                                         LoopInfo *LI, const Loop *CurLoop,
-                                        LoopSafetyInfo *SafetyInfo) {
+                                        LoopSafetyInfo *SafetyInfo,
+                                        MemorySSAUpdater *MSSAU) {
 #ifndef NDEBUG
   SmallVector<BasicBlock *, 32> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
@@ -1280,7 +1403,7 @@
            "Expect all predecessors are in the loop");
     if (PN->getBasicBlockIndex(PredBB) >= 0) {
       BasicBlock *NewPred = SplitBlockPredecessors(
-          ExitBB, PredBB, ".split.loop.exit", DT, LI, nullptr, true);
+          ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
       // Since we do not allow splitting EH-block with BlockColors in
       // canSplitPredecessors(), we can simply assign predecessor's color to
       // the new block.
@@ -1301,7 +1424,8 @@
 ///
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
-                 OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
+                 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
+                 bool FreeInLoop) {
   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@@ -1353,7 +1477,7 @@
 
     // Split predecessors of the PHI so that we can make users trivially
     // replaceable.
-    splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo);
+    splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
 
     // Should rebuild the iterators, as they may be invalidated by
     // splitPredecessorsOfLoopExit().
@@ -1388,10 +1512,10 @@
     assert(ExitBlockSet.count(PN->getParent()) &&
            "The LCSSA PHI is not in an exit block!");
     // The PHI must be trivially replaceable.
-    Instruction *New = sinkThroughTriviallyReplaceablePHI(PN, &I, LI, SunkCopies,
-                                                          SafetyInfo, CurLoop);
+    Instruction *New = sinkThroughTriviallyReplaceablePHI(
+        PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
     PN->replaceAllUsesWith(New);
-    eraseInstruction(*PN, *SafetyInfo, nullptr);
+    eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
     Changed = true;
   }
   return Changed;
@@ -1402,7 +1526,7 @@
 ///
 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
-                  OptimizationRemarkEmitter *ORE) {
+                  MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
   LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
                     << "\n");
   ORE->emit([&]() {
@@ -1427,6 +1551,13 @@
   else
     // Move the new node to the destination block, before its terminator.
     moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo);
+  if (MSSAU) {
+    // If moving, I just moved a load or store, so update MemorySSA.
+    MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+        MSSAU->getMemorySSA()->getMemoryAccess(&I));
+    if (OldMemAcc)
+      MSSAU->moveToPlace(OldMemAcc, Dest, MemorySSA::End);
+  }
 
   // Do not retain debug locations when we are moving instructions to different
   // basic blocks, because we want to avoid jumpy line tables. Calls, however,
@@ -1831,7 +1962,7 @@
 
   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
   if (PreheaderLoad->use_empty())
-    eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST);
+    eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, nullptr);
 
   return true;
 }
@@ -1961,6 +2092,18 @@
   return false;
 }
 
+static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
+                                             Loop *CurLoop) {
+  MemoryAccess *Source;
+  // See declaration of EnableLicmCap for usage details.
+  if (EnableLicmCap)
+    Source = MU->getDefiningAccess();
+  else
+    Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
+  return !MSSA->isLiveOnEntryDef(Source) &&
+         CurLoop->contains(Source->getBlock());
+}
+
 /// Little predicate that returns true if the specified basic block is in
 /// a subloop of the current one, not the current one itself.
 ///
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index 540d19f..2f7ad21 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -304,7 +304,7 @@
     // No need to check for instruction's operands are loop invariant.
     assert(L.hasLoopInvariantOperands(I) &&
            "Insts in a loop's preheader should have loop invariant operands!");
-    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, false))
+    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
       continue;
     if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
       Changed = true;
diff --git a/test/Transforms/LICM/argmemonly-call.ll b/test/Transforms/LICM/argmemonly-call.ll
index b9e0818..86c92a5 100644
--- a/test/Transforms/LICM/argmemonly-call.ll
+++ b/test/Transforms/LICM/argmemonly-call.ll
@@ -2,6 +2,7 @@
 ; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2
 ; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
 ; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2
+; RUN: opt -S -basicaa -licm -licm-n2-threshold=0 -enable-mssa-loop-dependency=true -verify-memoryssa %s | FileCheck %s --check-prefix=ALIAS-N2
 
 declare i32 @foo() readonly argmemonly nounwind
 declare i32 @foo2() readonly nounwind
@@ -11,6 +12,9 @@
 ; CHECK-LABEL: @test
 ; CHECK: @foo
 ; CHECK-LABEL: loop:
+; ALIAS-N2-LABEL: @test
+; ALIAS-N2: @foo
+; ALIAS-N2-LABEL: loop:
   br label %loop
 
 loop:
@@ -24,6 +28,9 @@
 ; CHECK-LABEL: @test_neg
 ; CHECK-LABEL: loop:
 ; CHECK: @foo
+; ALIAS-N2-LABEL: @test_neg
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: @foo
   br label %loop
 
 loop:
@@ -36,6 +43,9 @@
 ; CHECK-LABEL: @test2
 ; CHECK: @bar
 ; CHECK-LABEL: loop:
+; ALIAS-N2-LABEL: @test2
+; ALIAS-N2: @bar
+; ALIAS-N2-LABEL: loop:
   br label %loop
 
 loop:
@@ -49,6 +59,9 @@
 ; CHECK-LABEL: @test3
 ; CHECK-LABEL: loop:
 ; CHECK: @bar
+; ALIAS-N2-LABEL: @test3
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: @bar
   br label %loop
 
 loop:
@@ -64,6 +77,9 @@
 ; CHECK-LABEL: @test4
 ; CHECK-LABEL: loop:
 ; CHECK: @bar
+; ALIAS-N2-LABEL: @test4
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: @bar
   br label %loop
 
 loop:
@@ -77,6 +93,7 @@
 ; we clump foo_new with bar.
 ; With the N2 Alias analysis diagnostic tool, we are able to hoist the
 ; argmemonly bar call out of the loop.
+; Using MemorySSA we can also hoist bar.
 
 define void @test5(i32* %loc2, i32* noalias %loc) {
 ; ALIAS-N2-LABEL: @test5
@@ -103,6 +120,10 @@
 ; CHECK: %val = load i32, i32* %loc2
 ; CHECK-LABEL: loop:
 ; CHECK: @llvm.memcpy
+; ALIAS-N2-LABEL: @test6
+; ALIAS-N2: %val = load i32, i32* %loc2
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: @llvm.memcpy
   br label %loop
 
 loop:
@@ -119,6 +140,10 @@
 ; CHECK: %val = load i32, i32* %loc2
 ; CHECK-LABEL: loop:
 ; CHECK: @custom_memcpy
+; ALIAS-N2-LABEL: @test7
+; ALIAS-N2: %val = load i32, i32* %loc2
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: @custom_memcpy
   br label %loop
 
 loop:
diff --git a/test/Transforms/LICM/hoist-bitcast-load.ll b/test/Transforms/LICM/hoist-bitcast-load.ll
index 956c728..fcd6d08 100644
--- a/test/Transforms/LICM/hoist-bitcast-load.ll
+++ b/test/Transforms/LICM/hoist-bitcast-load.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -S -basicaa -licm < %s | FileCheck %s
 ; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
+; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LICM/hoist-debuginvariant.ll b/test/Transforms/LICM/hoist-debuginvariant.ll
index 95c4018..71aeb75 100644
--- a/test/Transforms/LICM/hoist-debuginvariant.ll
+++ b/test/Transforms/LICM/hoist-debuginvariant.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -licm -S | FileCheck %s
 ; RUN: opt < %s -strip-debug -licm -S | FileCheck %s
+; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s --check-prefixes=CHECK,MSSA
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -16,7 +17,9 @@
 ; CHECK-NEXT: [[_TMP2:%.*]] = load i32, i32* @a, align 4
 ; CHECK-NEXT: [[_TMP3:%.*]] = load i32, i32* @b, align 4
 ; CHECK-NEXT: [[_TMP4:%.*]] = sdiv i32 [[_TMP2]], [[_TMP3]]
+; MSSA-NEXT: store i32 [[_TMP4:%.*]], i32* @c, align 4
 ; CHECK-NEXT: br label [[BB3:%.*]]
+
   br label %bb3
 
 bb3:                                              ; preds = %bb3, %0
diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll
index 6c9a817..aacff88 100644
--- a/test/Transforms/LICM/hoist-deref-load.ll
+++ b/test/Transforms/LICM/hoist-deref-load.ll
@@ -1,5 +1,7 @@
 ; RUN: opt -S -basicaa -licm < %s | FileCheck %s
 ; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
+; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LICM/hoist-fast-fdiv.ll b/test/Transforms/LICM/hoist-fast-fdiv.ll
index 57df241..bdefcf9 100644
--- a/test/Transforms/LICM/hoist-fast-fdiv.ll
+++ b/test/Transforms/LICM/hoist-fast-fdiv.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -licm -S < %s | FileCheck %s
+; RUN: opt -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
 
 ; Function Attrs: noinline norecurse nounwind readnone ssp uwtable
 define zeroext i1 @invariant_denom(double %v) #0 {
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll
index ed669f3..909652b 100644
--- a/test/Transforms/LICM/hoist-invariant-load.ll
+++ b/test/Transforms/LICM/hoist-invariant-load.ll
@@ -1,5 +1,6 @@
 ; REQUIRES: asserts
 ; RUN: opt < %s -licm -disable-basicaa -stats -S 2>&1 | grep "1 licm"
+; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -disable-basicaa -stats -S 2>&1 | grep "1 licm"
 
 @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
 @"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
diff --git a/test/Transforms/LICM/hoist-nounwind.ll b/test/Transforms/LICM/hoist-nounwind.ll
index d53e404..a582173 100644
--- a/test/Transforms/LICM/hoist-nounwind.ll
+++ b/test/Transforms/LICM/hoist-nounwind.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -S -basicaa -licm < %s | FileCheck %s
 ; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/Transforms/LICM/hoist-phi.ll b/test/Transforms/LICM/hoist-phi.ll
index a711156..f3a137d 100644
--- a/test/Transforms/LICM/hoist-phi.ll
+++ b/test/Transforms/LICM/hoist-phi.ll
@@ -5,6 +5,11 @@
 ; RUN: opt -passes='require<opt-remark-emit>,loop(licm)' -licm-control-flow-hoisting=1 -S < %s | FileCheck %s -check-prefixes=CHECK,CHECK-ENABLED
 ; RUN: opt -passes='require<opt-remark-emit>,loop(licm)' -licm-control-flow-hoisting=0 -S < %s | FileCheck %s -check-prefixes=CHECK,CHECK-DISABLED
 
+; RUN: opt -passes='require<opt-remark-emit>,loop(licm)' -licm-control-flow-hoisting=1 -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s -check-prefixes=CHECK,CHECK-ENABLED
+; Enable run below when adding promotion. e.g. "store i32 %phi, i32* %p" is promoted to phi.lcssa.
+; opt -passes='require<opt-remark-emit>,loop(licm)' -licm-control-flow-hoisting=0 -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s -check-prefixes=CHECK,CHECK-DISABLED
+
+
 ; CHECK-LABEL: @triangle_phi
 define void @triangle_phi(i32 %x, i32* %p) {
 ; CHECK-LABEL: entry:
diff --git a/test/Transforms/LICM/hoist-round.ll b/test/Transforms/LICM/hoist-round.ll
index 35851f3..10f75be 100644
--- a/test/Transforms/LICM/hoist-round.ll
+++ b/test/Transforms/LICM/hoist-round.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -S -licm < %s | FileCheck %s
 ; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
 
 target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n32"
 
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index 4572ad3..f65b965 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -licm -S | FileCheck %s
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
+; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s
 
 @X = global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/Transforms/LICM/sink-promote.ll b/test/Transforms/LICM/sink-promote.ll
new file mode 100644
index 0000000..45024c4
--- /dev/null
+++ b/test/Transforms/LICM/sink-promote.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+
+; Test moved from sinking.ll, as it tests sinking of a store who alone touches
+; a memory location in a loop.
+; Store can be sunk out of exit block containing indirectbr instructions after
+; D50925. Updated to use an argument instead of undef, due to PR38989.
+define void @test12(i32* %ptr) {
+; CHECK-LABEL: @test12
+; CHECK: store
+; CHECK-NEXT: br label %lab4
+  br label %lab4
+
+lab4:
+  br label %lab20
+
+lab5:
+  br label %lab20
+
+lab6:
+  br label %lab4
+
+lab7:
+  br i1 undef, label %lab8, label %lab13
+
+lab8:
+  br i1 undef, label %lab13, label %lab10
+
+lab10:
+  br label %lab7
+
+lab13:
+  ret void
+
+lab20:
+  br label %lab21
+
+lab21:
+; CHECK: lab21:
+; CHECK-NOT: store
+; CHECK: br i1 false, label %lab21, label %lab22
+  store i32 36127957, i32* %ptr, align 4
+  br i1 undef, label %lab21, label %lab22
+
+lab22:
+; CHECK: lab22:
+; CHECK-NOT: store
+; CHECK-NEXT: indirectbr i8* undef
+  indirectbr i8* undef, [label %lab5, label %lab6, label %lab7]
+}
+
diff --git a/test/Transforms/LICM/sink.ll b/test/Transforms/LICM/sink.ll
index 70fa6fa..17170f5 100644
--- a/test/Transforms/LICM/sink.ll
+++ b/test/Transforms/LICM/sink.ll
@@ -2,6 +2,7 @@
 ; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK
 ; RUN: opt -S < %s -passes='require<opt-remark-emit>,loop(licm),loop-sink' \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-SINK
+; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-LICM
 
 ; Original source code:
 ; int g;
diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll
index 3bcbc5d..cc30494 100644
--- a/test/Transforms/LICM/sinking.ll
+++ b/test/Transforms/LICM/sinking.ll
@@ -1,5 +1,7 @@
 ; RUN: opt < %s -basicaa -licm -S | FileCheck %s
 ; RUN: opt < %s -debugify -basicaa -licm -S | FileCheck %s -check-prefix=DEBUGIFY
+; RUN: opt < %s -basicaa -licm -S -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s
+
 
 declare i32 @strlen(i8*) readonly nounwind
 
@@ -358,50 +360,7 @@
   ret i32 %lcssa
 }
 
-; Can't sink stores out of exit blocks containing indirectbr instructions
-; because loop simplify does not create dedicated exits for such blocks. Test
-; that by sinking the store from lab21 to lab22, but not further.
-define void @test12() {
-; CHECK-LABEL: @test12
-  br label %lab4
-
-lab4:
-  br label %lab20
-
-lab5:
-  br label %lab20
-
-lab6:
-  br label %lab4
-
-lab7:
-  br i1 undef, label %lab8, label %lab13
-
-lab8:
-  br i1 undef, label %lab13, label %lab10
-
-lab10:
-  br label %lab7
-
-lab13:
-  ret void
-
-lab20:
-  br label %lab21
-
-lab21:
-; CHECK: lab21:
-; CHECK-NOT: store
-; CHECK: br i1 false, label %lab21, label %lab22
-  store i32 36127957, i32* undef, align 4
-  br i1 undef, label %lab21, label %lab22
-
-lab22:
-; CHECK: lab22:
-; CHECK: store
-; CHECK-NEXT: indirectbr i8* undef
-  indirectbr i8* undef, [label %lab5, label %lab6, label %lab7]
-}
+; @test12 moved to sink-promote.ll, as it tests sinking and promotion.
 
 ; Test that we don't crash when trying to sink stores and there's no preheader
 ; available (which is used for creating loads that may be used by the SSA
diff --git a/test/Transforms/LICM/volatile-alias.ll b/test/Transforms/LICM/volatile-alias.ll
index f387012..c1f727c 100644
--- a/test/Transforms/LICM/volatile-alias.ll
+++ b/test/Transforms/LICM/volatile-alias.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -basicaa -sroa -loop-rotate -licm -S < %s | FileCheck %s
 ; RUN: opt -basicaa -sroa -loop-rotate %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
+; RUN: opt -basicaa -sroa -loop-rotate -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
 ; The objects *p and *q are aliased to each other, but even though *q is
 ; volatile, *p can be considered invariant in the loop. Check if it is moved
 ; out of the loop.