[NPM][LTO] Update buildLTODefaultPipeline to be more in-line with the old pass manager

The NPM LTO pipeline has a lot of fixme's and missing passes, causing a
lot of regressions after the switch in c70737b. Notably unrolling and
vectorization were both disabled, but many other passes are missing
compared to the old pass manager. This attempt to enable the most
obvious missing passes like the unroller, vectorization and other loop
passes, fixing the existing FIXME comments.

Differential Revision: https://reviews.llvm.org/D96780

GitOrigin-RevId: 908ac47ef4c21d87c90e47859f81c747228e460e
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 6f35e56..1581af9 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -1727,14 +1727,15 @@
   // Run a few AA driver optimizations here and now to cleanup the code.
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
 
-  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
-              PostOrderFunctionAttrsPass()));
+  MPM.addPass(
+      createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
   // FIXME: here we run IP alias analysis in the legacy PM.
 
   FunctionPassManager MainFPM;
 
-  // FIXME: once we fix LoopPass Manager, add LICM here.
-  // FIXME: once we provide support for enabling MLSM, add it here.
+  MainFPM.addPass(createFunctionToLoopPassAdaptor(
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)));
+
   if (RunNewGVN)
     MainFPM.addPass(NewGVNPass());
   else
@@ -1745,11 +1746,37 @@
 
   // Nuke dead stores.
   MainFPM.addPass(DSEPass());
+  MainFPM.addPass(MergedLoadStoreMotionPass());
 
-  // FIXME: at this point, we run a bunch of loop passes:
-  // indVarSimplify, loopDeletion, loopInterchange, loopUnroll,
-  // loopVectorize. Enable them once the remaining issue with LPM
-  // are sorted out.
+  // More loops are countable; try to optimize them.
+  if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
+    MainFPM.addPass(LoopFlattenPass());
+
+  if (EnableConstraintElimination)
+    MainFPM.addPass(ConstraintEliminationPass());
+
+  LoopPassManager LPM(DebugLogging);
+  LPM.addPass(IndVarSimplifyPass());
+  LPM.addPass(LoopDeletionPass());
+  // FIXME: Add loop interchange.
+
+  // Unroll small loops and perform peeling.
+  LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+                                 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+                                 PTO.ForgetAllSCEVInLoopUnroll));
+  MainFPM.addPass(createFunctionToLoopPassAdaptor(
+      std::move(LPM), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
+      DebugLogging));
+
+  MainFPM.addPass(LoopDistributePass());
+  MainFPM.addPass(LoopVectorizePass(
+      LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
+  // The vectorizer may have significantly shortened a loop body; unroll again.
+  MainFPM.addPass(LoopUnrollPass(LoopUnrollOptions(
+      Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+      PTO.ForgetAllSCEVInLoopUnroll)));
+
+  MainFPM.addPass(WarnMissedTransformationsPass());
 
   MainFPM.addPass(InstCombinePass());
   MainFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
@@ -1757,12 +1784,19 @@
   MainFPM.addPass(InstCombinePass());
   MainFPM.addPass(BDCEPass());
 
-  // FIXME: We may want to run SLPVectorizer here.
-  // After vectorization, assume intrinsics may tell us more
-  // about pointer alignments.
-#if 0
-  MainFPM.add(AlignmentFromAssumptionsPass());
-#endif
+  // More scalar chains could be vectorized due to more alias information
+  if (PTO.SLPVectorization) {
+    MainFPM.addPass(SLPVectorizerPass());
+    if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+      MainFPM.addPass(EarlyCSEPass());
+    }
+  }
+
+  MainFPM.addPass(VectorCombinePass()); // Clean up partial vectorization.
+
+  // After vectorization, assume intrinsics may tell us more about pointer
+  // alignments.
+  MainFPM.addPass(AlignmentFromAssumptionsPass());
 
   // FIXME: Conditionally run LoadCombine here, after it's ported
   // (in case we still have this pass, given its questionable usefulness).
diff --git a/test/Other/new-pm-lto-defaults.ll b/test/Other/new-pm-lto-defaults.ll
index bf27ca9..50592ff 100644
--- a/test/Other/new-pm-lto-defaults.ll
+++ b/test/Other/new-pm-lto-defaults.ll
@@ -7,20 +7,22 @@
 ; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O1
 ; RUN: opt -disable-verify -debug-pass-manager \
 ; RUN:     -passes='lto<O2>' -S  %s 2>&1 \
-; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2
+; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O23SZ \
+; RUN:     --check-prefix=CHECK-O2
 ; RUN: opt -disable-verify -debug-pass-manager \
 ; RUN:     -passes='lto<O3>' -S  %s 2>&1 \
-; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O23SZ \
 ; RUN:     --check-prefix=CHECK-O3
 ; RUN: opt -disable-verify -debug-pass-manager \
 ; RUN:     -passes='lto<Os>' -S %s 2>&1 \
-; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2
+; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O23SZ \
+; RUN:     --check-prefix=CHECK-OS
 ; RUN: opt -disable-verify -debug-pass-manager \
 ; RUN:     -passes='lto<Oz>' -S %s 2>&1 \
-; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2
+; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O23SZ
 ; RUN: opt -disable-verify -debug-pass-manager \
 ; RUN:     -passes='lto<O3>' -S  %s -passes-ep-peephole='no-op-function' 2>&1 \
-; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O23SZ \
 ; RUN:     --check-prefix=CHECK-O3 --check-prefix=CHECK-EP-Peephole
 
 ; CHECK-O: Starting llvm::Module pass manager run.
@@ -30,18 +32,18 @@
 ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Module
 ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
-; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
-; CHECK-O2-NEXT: Running pass: CallSiteSplittingPass on foo
-; CHECK-O2-NEXT: Running analysis: TargetLibraryAnalysis on foo
-; CHECK-O2-NEXT: Running analysis: TargetIRAnalysis on foo
-; CHECK-O2-NEXT: Running analysis: DominatorTreeAnalysis on foo
-; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
-; CHECK-O2-NEXT: PGOIndirectCallPromotion
-; CHECK-O2-NEXT: Running analysis: ProfileSummaryAnalysis
-; CHECK-O2-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
-; CHECK-O2-NEXT: Running pass: IPSCCPPass
-; CHECK-O2-NEXT: Running analysis: AssumptionAnalysis on foo
-; CHECK-O2-NEXT: Running pass: CalledValuePropagationPass
+; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: CallSiteSplittingPass on foo
+; CHECK-O23SZ-NEXT: Running analysis: TargetLibraryAnalysis on foo
+; CHECK-O23SZ-NEXT: Running analysis: TargetIRAnalysis on foo
+; CHECK-O23SZ-NEXT: Running analysis: DominatorTreeAnalysis on foo
+; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run.
+; CHECK-O23SZ-NEXT: PGOIndirectCallPromotion
+; CHECK-O23SZ-NEXT: Running analysis: ProfileSummaryAnalysis
+; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
+; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo
+; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
 ; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
@@ -60,56 +62,83 @@
 ; CHECK-O-NEXT: Running pass: GlobalSplitPass
 ; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass
 ; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass
-; CHECK-O2-NEXT: Running pass: GlobalOptPass
-; CHECK-O2-NEXT: Running pass: PromotePass
-; CHECK-O2-NEXT: Running pass: ConstantMergePass
-; CHECK-O2-NEXT: Running pass: DeadArgumentEliminationPass
-; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: GlobalOptPass
+; CHECK-O23SZ-NEXT: Running pass: PromotePass
+; CHECK-O23SZ-NEXT: Running pass: ConstantMergePass
+; CHECK-O23SZ-NEXT: Running pass: DeadArgumentEliminationPass
+; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run.
 ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass
-; CHECK-O2-NEXT: Running pass: InstCombinePass
+; CHECK-O23SZ-NEXT: Running pass: InstCombinePass
 ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
-; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
-; CHECK-O2-NEXT: Running pass: ModuleInlinerWrapperPass
-; CHECK-O2-NEXT: Running analysis: InlineAdvisorAnalysis
-; CHECK-O2-NEXT: Starting llvm::Module pass manager run.
-; CHECK-O2-NEXT: Starting CGSCC pass manager run.
-; CHECK-O2-NEXT: Running pass: InlinerPass
-; CHECK-O2-NEXT: Running pass: InlinerPass
-; CHECK-O2-NEXT: Finished CGSCC pass manager run.
-; CHECK-O2-NEXT: Finished llvm::Module pass manager run.
-; CHECK-O2-NEXT: Running pass: GlobalOptPass
-; CHECK-O2-NEXT: Running pass: GlobalDCEPass
-; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
-; CHECK-O2-NEXT: Running pass: InstCombinePass
+; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: ModuleInlinerWrapperPass
+; CHECK-O23SZ-NEXT: Running analysis: InlineAdvisorAnalysis
+; CHECK-O23SZ-NEXT: Starting llvm::Module pass manager run.
+; CHECK-O23SZ-NEXT: Starting CGSCC pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: InlinerPass
+; CHECK-O23SZ-NEXT: Running pass: InlinerPass
+; CHECK-O23SZ-NEXT: Finished CGSCC pass manager run.
+; CHECK-O23SZ-NEXT: Finished llvm::Module pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: GlobalOptPass
+; CHECK-O23SZ-NEXT: Running pass: GlobalDCEPass
+; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: InstCombinePass
 ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
-; CHECK-O2-NEXT: Running pass: JumpThreadingPass
-; CHECK-O2-NEXT: Running analysis: LazyValueAnalysis
-; CHECK-O2-NEXT: Running pass: SROA on foo
-; CHECK-O2-NEXT: Running pass: TailCallElimPass on foo
-; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
-; CHECK-O2-NEXT: Running pass: PostOrderFunctionAttrsPass
-; CHECK-O2-NEXT: Running pass: GVN on foo
-; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis
-; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis
-; CHECK-O2-NEXT: Running pass: MemCpyOptPass on foo
-; CHECK-O2-NEXT: Running pass: DSEPass on foo
-; CHECK-O2-NEXT: Running analysis: MemorySSAAnalysis on foo
-; CHECK-O2-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
-; CHECK-O2-NEXT: Running pass: InstCombinePass on foo
-; CHECK-O2-NEXT: Running pass: SimplifyCFGPass on foo
-; CHECK-O2-NEXT: Running pass: SCCPPass on foo
-; CHECK-O2-NEXT: Running pass: InstCombinePass on foo
-; CHECK-O2-NEXT: Running pass: BDCEPass on foo
-; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis
-; CHECK-O2-NEXT: Running pass: InstCombinePass
-; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
-; CHECK-O2-NEXT: Running pass: JumpThreadingPass
-; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass
-; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass
+; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
+; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
+; CHECK-O23SZ-NEXT: Running pass: SROA on foo
+; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass on foo
+; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass on (foo)
+; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
+; CHECK-O23SZ-NEXT: Running analysis: LoopAnalysis on foo
+; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo
+; CHECK-O23SZ-NEXT: Running analysis: ScalarEvolutionAnalysis on foo
+; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy
+; CHECK-O23SZ-NEXT: Running pass: LICMPass on Loop
+; CHECK-O23SZ-NEXT: Running pass: GVN on foo
+; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis on foo
+; CHECK-O23SZ-NEXT: Running analysis: PhiValuesAnalysis on foo
+; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo
+; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
+; CHECK-O23SZ-NEXT: Running analysis: MemorySSAAnalysis on foo
+; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
+; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo
+; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
+; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo
+; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run.
+; CHECK-O23SZ-NEXT: Starting Loop pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: IndVarSimplifyPass on Loop
+; CHECK-O23SZ-NEXT: Running pass: LoopDeletionPass on Loop
+; CHECK-O23SZ-NEXT: Running pass: LoopFullUnrollPass on Loop
+; CHECK-O23SZ-NEXT: Finished Loop pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: LoopDistributePass on foo
+; CHECK-O23SZ-NEXT: Running pass: LoopVectorizePass on foo
+; CHECK-O23SZ-NEXT: Running analysis: BlockFrequencyAnalysis on foo
+; CHECK-O23SZ-NEXT: Running analysis: BranchProbabilityAnalysis on foo
+; CHECK-O23SZ-NEXT: Running analysis: DemandedBitsAnalysis on foo
+; CHECK-O23SZ-NEXT: Running pass: LoopUnrollPass on foo
+; CHECK-O23SZ-NEXT: WarnMissedTransformationsPass on foo
+; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo
+; CHECK-O23SZ-NEXT: Running pass: SimplifyCFGPass on foo
+; CHECK-O23SZ-NEXT: Running pass: SCCPPass on foo
+; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo
+; CHECK-O23SZ-NEXT: Running pass: BDCEPass on foo
+; CHECK-O2-NEXT: Running pass: SLPVectorizerPass on foo
+; CHECK-O3-NEXT: Running pass: SLPVectorizerPass on foo
+; CHECK-OS-NEXT: Running pass: SLPVectorizerPass on foo
+; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass on foo
+; CHECK-O23SZ-NEXT: Running pass: AlignmentFromAssumptionsPass on foo
+; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo
+; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass on foo
+; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass on foo
+; CHECK-O23SZ-NEXT: Running pass: CrossDSOCFIPass
+; CHECK-O23SZ-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
-; CHECK-O2-NEXT: Running pass: SimplifyCFGPass
-; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass
-; CHECK-O2-NEXT: Running pass: GlobalDCEPass
+; CHECK-O23SZ-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O23SZ-NEXT: Running pass: EliminateAvailableExternallyPass
+; CHECK-O23SZ-NEXT: Running pass: GlobalDCEPass
 ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo
 ; CHECK-O-NEXT: Running pass: PrintModulePass