lib/Analysis/ScopBuilder.cpp - llvm-project/polly - Git at Google

 //===- ScopBuilder.cpp ----------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // Create a polyhedral description for a static control flow region.
 //
 // The pass creates a polyhedral description of the Scops detected by the SCoP
 // detection derived from their LLVM-IR code.
 //
 //===----------------------------------------------------------------------===//

 #include "polly/ScopBuilder.h"
 #include "polly/Options.h"
 #include "polly/ScopDetection.h"
 #include "polly/ScopInfo.h"
 #include "polly/Support/GICHelper.h"
 #include "polly/Support/ISLTools.h"
 #include "polly/Support/SCEVValidator.h"
 #include "polly/Support/ScopHelper.h"
 #include "polly/Support/VirtualInstruction.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/Delinearization.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>

 using namespace llvm;
 using namespace polly;

 #define DEBUG_TYPE "polly-scops"

 STATISTIC(ScopFound, "Number of valid Scops");
 STATISTIC(RichScopFound, "Number of Scops containing a loop");
 STATISTIC(InfeasibleScops,
           "Number of SCoPs with statically infeasible context.");

 bool polly::ModelReadOnlyScalars;

 // The maximal number of dimensions we allow during invariant load construction.
 // More complex access ranges will result in very high compile time and are also
 // unlikely to result in good code. This value is very high and should only
 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
 static unsigned const MaxDimensionsInAccessRange = 9;

 static cl::opt<bool, true> XModelReadOnlyScalars(
     "polly-analyze-read-only-scalars",
     cl::desc("Model read-only scalar values in the scop description"),
     cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true),
     cl::cat(PollyCategory));

 static cl::opt<int>
     OptComputeOut("polly-analysis-computeout",
                   cl::desc("Bound the scop analysis by a maximal amount of "
                            "computational steps (0 means no bound)"),
                   cl::Hidden, cl::init(800000), cl::cat(PollyCategory));

 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
     "polly-allow-dereference-of-all-function-parameters",
     cl::desc(
         "Treat all parameters to functions that are pointers as dereferencible."
         " This is useful for invariant load hoisting, since we can generate"
         " less runtime checks. This is only valid if all pointers to functions"
         " are always initialized, so that Polly can choose to hoist"
         " their loads. "),
     cl::Hidden, cl::init(false), cl::cat(PollyCategory));

 static cl::opt<bool>
     PollyIgnoreInbounds("polly-ignore-inbounds",
                         cl::desc("Do not take inbounds assumptions at all"),
                         cl::Hidden, cl::init(false), cl::cat(PollyCategory));

 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
     "polly-rtc-max-arrays-per-group",
     cl::desc("The maximal number of arrays to compare in each alias group."),
     cl::Hidden, cl::init(20), cl::cat(PollyCategory));

 static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
     "polly-rtc-max-array-disjuncts",
     cl::desc("The maximal number of disjunts allowed in memory accesses to "
              "to build RTCs."),
     cl::Hidden, cl::init(8), cl::cat(PollyCategory));

 static cl::opt<unsigned> RunTimeChecksMaxParameters(
     "polly-rtc-max-parameters",
     cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
     cl::init(8), cl::cat(PollyCategory));

 static cl::opt<bool> UnprofitableScalarAccs(
     "polly-unprofitable-scalar-accs",
     cl::desc("Count statements with scalar accesses as not optimizable"),
     cl::Hidden, cl::init(false), cl::cat(PollyCategory));

 static cl::opt<std::string> UserContextStr(
     "polly-context", cl::value_desc("isl parameter set"),
     cl::desc("Provide additional constraints on the context parameters"),
     cl::init(""), cl::cat(PollyCategory));

 static cl::opt<bool> DetectReductions("polly-detect-reductions",
                                       cl::desc("Detect and exploit reductions"),
                                       cl::Hidden, cl::init(true),
                                       cl::cat(PollyCategory));

 // Multiplicative reductions can be disabled separately as these kind of
 // operations can overflow easily. Additive reductions and bit operations
 // are in contrast pretty stable.
 static cl::opt<bool> DisableMultiplicativeReductions(
     "polly-disable-multiplicative-reductions",
     cl::desc("Disable multiplicative reductions"), cl::Hidden,
     cl::cat(PollyCategory));

 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };

 static cl::opt<GranularityChoice> StmtGranularity(
     "polly-stmt-granularity",
     cl::desc(
         "Algorithm to use for splitting basic blocks into multiple statements"),
     cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
                           "One statement per basic block"),
                clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
                           "Scalar independence heuristic"),
                clEnumValN(GranularityChoice::Stores, "store",
                           "Store-level granularity")),
     cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));

 /// Helper to treat non-affine regions and basic blocks the same.
 ///
 ///{

 /// Return the block that is the representing block for @p RN.
 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
                            : RN->getNodeAs<BasicBlock>();
 }

 /// Return the @p idx'th block that is executed after @p RN.
 static inline BasicBlock *
 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
   if (RN->isSubRegion()) {
     assert(idx == 0);
     return RN->getNodeAs<Region>()->getExit();
   }
   return TI->getSuccessor(idx);
 }

 static bool containsErrorBlock(RegionNode *RN, const Region &R,
                                ScopDetection *SD) {
   if (!RN->isSubRegion())
     return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
   for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
     if (SD->isErrorBlock(*BB, R))
       return true;
   return false;
 }

 ///}

 /// Create a map to map from a given iteration to a subsequent iteration.
 ///
 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
 /// is incremented by one and all other dimensions are equal, e.g.,
 ///             [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
 ///
 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
   isl::space MapSpace = SetSpace.map_from_set();
   isl::map NextIterationMap = isl::map::universe(MapSpace);
   for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim()))
     if (u != Dim)
       NextIterationMap =
           NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
   isl::constraint C =
       isl::constraint::alloc_equality(isl::local_space(MapSpace));
   C = C.set_constant_si(1);
   C = C.set_coefficient_si(isl::dim::in, Dim, 1);
   C = C.set_coefficient_si(isl::dim::out, Dim, -1);
   NextIterationMap = NextIterationMap.add_constraint(C);
   return NextIterationMap;
 }

 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
 static isl::set collectBoundedParts(isl::set S) {
   isl::set BoundedParts = isl::set::empty(S.get_space());
   for (isl::basic_set BSet : S.get_basic_set_list())
     if (BSet.is_bounded())
       BoundedParts = BoundedParts.unite(isl::set(BSet));
   return BoundedParts;
 }

 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
 ///
 /// @returns A separation of @p S into first an unbounded then a bounded subset,
 ///          both with regards to the dimension @p Dim.
 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
                                                        unsigned Dim) {
   for (unsigned u : rangeIslSize(0, S.tuple_dim()))
     S = S.lower_bound_si(isl::dim::set, u, 0);

   unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim());
   isl::set OnlyDimS = S;

   // Remove dimensions that are greater than Dim as they are not interesting.
   assert(NumDimsS >= Dim + 1);
   OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);

   // Create artificial parametric upper bounds for dimensions smaller than Dim
   // as we are not interested in them.
   OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);

   for (unsigned u = 0; u < Dim; u++) {
     isl::constraint C = isl::constraint::alloc_inequality(
         isl::local_space(OnlyDimS.get_space()));
     C = C.set_coefficient_si(isl::dim::param, u, 1);
     C = C.set_coefficient_si(isl::dim::set, u, -1);
     OnlyDimS = OnlyDimS.add_constraint(C);
   }

   // Collect all bounded parts of OnlyDimS.
   isl::set BoundedParts = collectBoundedParts(OnlyDimS);

   // Create the dimensions greater than Dim again.
   BoundedParts =
       BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);

   // Remove the artificial upper bound parameters again.
   BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);

   isl::set UnboundedParts = S.subtract(BoundedParts);
   return std::make_pair(UnboundedParts, BoundedParts);
 }

 /// Create the conditions under which @p L @p Pred @p R is true.
 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
                                   isl::pw_aff R) {
   switch (Pred) {
   case ICmpInst::ICMP_EQ:
     return L.eq_set(R);
   case ICmpInst::ICMP_NE:
     return L.ne_set(R);
   case ICmpInst::ICMP_SLT:
     return L.lt_set(R);
   case ICmpInst::ICMP_SLE:
     return L.le_set(R);
   case ICmpInst::ICMP_SGT:
     return L.gt_set(R);
   case ICmpInst::ICMP_SGE:
     return L.ge_set(R);
   case ICmpInst::ICMP_ULT:
     return L.lt_set(R);
   case ICmpInst::ICMP_UGT:
     return L.gt_set(R);
   case ICmpInst::ICMP_ULE:
     return L.le_set(R);
   case ICmpInst::ICMP_UGE:
     return L.ge_set(R);
   default:
     llvm_unreachable("Non integer predicate not supported");
   }
 }

 isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
                                              Loop *NewL) {
   // If the loops are the same there is nothing to do.
   if (NewL == OldL)
     return Dom;

   int OldDepth = scop->getRelativeLoopDepth(OldL);
   int NewDepth = scop->getRelativeLoopDepth(NewL);
   // If both loops are non-affine loops there is nothing to do.
   if (OldDepth == -1 && NewDepth == -1)
     return Dom;

   // Distinguish three cases:
   //   1) The depth is the same but the loops are not.
   //      => One loop was left one was entered.
   //   2) The depth increased from OldL to NewL.
   //      => One loop was entered, none was left.
   //   3) The depth decreased from OldL to NewL.
   //      => Loops were left were difference of the depths defines how many.
   if (OldDepth == NewDepth) {
     assert(OldL->getParentLoop() == NewL->getParentLoop());
     Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
     Dom = Dom.add_dims(isl::dim::set, 1);
   } else if (OldDepth < NewDepth) {
     assert(OldDepth + 1 == NewDepth);
     auto &R = scop->getRegion();
     (void)R;
     assert(NewL->getParentLoop() == OldL ||
            ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
     Dom = Dom.add_dims(isl::dim::set, 1);
   } else {
     assert(OldDepth > NewDepth);
     unsigned Diff = OldDepth - NewDepth;
     unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim());
     assert(NumDim >= Diff);
     Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
   }

   return Dom;
 }

 /// Compute the isl representation for the SCEV @p E in this BB.
 ///
 /// @param BB               The BB for which isl representation is to be
 /// computed.
 /// @param InvalidDomainMap A map of BB to their invalid domains.
 /// @param E                The SCEV that should be translated.
 /// @param NonNegative      Flag to indicate the @p E has to be non-negative.
 ///
 /// Note that this function will also adjust the invalid context accordingly.

 __isl_give isl_pw_aff *
 ScopBuilder::getPwAff(BasicBlock *BB,
                       DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
                       const SCEV *E, bool NonNegative) {
   PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions);
   InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
   return PWAC.first.release();
 }

 /// Build condition sets for unsigned ICmpInst(s).
 /// Special handling is required for unsigned operands to ensure that if
 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
 /// it should wrap around.
 ///
 /// @param IsStrictUpperBound holds information on the predicate relation
 /// between TestVal and UpperBound, i.e,
 /// TestVal < UpperBound  OR  TestVal <= UpperBound
 __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
     BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
     const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
     bool IsStrictUpperBound) {
   // Do not take NonNeg assumption on TestVal
   // as it might have MSB (Sign bit) set.
   isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
   // Take NonNeg assumption on UpperBound.
   isl_pw_aff *UpperBound =
       getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);

   // 0 <= TestVal
   isl_set *First =
       isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
                             isl_pw_aff_get_domain_space(TestVal))),
                         isl_pw_aff_copy(TestVal));

   isl_set *Second;
   if (IsStrictUpperBound)
     // TestVal < UpperBound
     Second = isl_pw_aff_lt_set(TestVal, UpperBound);
   else
     // TestVal <= UpperBound
     Second = isl_pw_aff_le_set(TestVal, UpperBound);

   isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
   return ConsequenceCondSet;
 }

 bool ScopBuilder::buildConditionSets(
     BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
   Value *Condition = getConditionFromTerminator(SI);
   assert(Condition && "No condition for switch");

   isl_pw_aff *LHS, *RHS;
   LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));

   unsigned NumSuccessors = SI->getNumSuccessors();
   ConditionSets.resize(NumSuccessors);
   for (auto &Case : SI->cases()) {
     unsigned Idx = Case.getSuccessorIndex();
     ConstantInt *CaseValue = Case.getCaseValue();

     RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
     isl_set *CaseConditionSet =
         buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
                           isl::manage(RHS))
             .release();
     ConditionSets[Idx] = isl_set_coalesce(
         isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
   }

   assert(ConditionSets[0] == nullptr && "Default condition set was set");
   isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
   for (unsigned u = 2; u < NumSuccessors; u++)
     ConditionSetUnion =
         isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
   ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);

   isl_pw_aff_free(LHS);

   return true;
 }

 bool ScopBuilder::buildConditionSets(
     BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
     __isl_keep isl_set *Domain,
     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
   isl_set *ConsequenceCondSet = nullptr;

   if (auto Load = dyn_cast<LoadInst>(Condition)) {
     const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
     const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
     bool NonNeg = false;
     isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
     isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
     ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
                                            isl::manage(RHS))
                              .release();
   } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
     auto *Unique = dyn_cast<ConstantInt>(
         getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD));
     assert(Unique &&
            "A PHINode condition should only be accepted by ScopDetection if "
            "getUniqueNonErrorValue returns non-NULL");

     if (Unique->isZero())
       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
     else
       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
   } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
     if (CCond->isZero())
       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
     else
       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
   } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
     auto Opcode = BinOp->getOpcode();
     assert(Opcode == Instruction::And || Opcode == Instruction::Or);

     bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
                                     InvalidDomainMap, ConditionSets) &&
                  buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
                                     InvalidDomainMap, ConditionSets);
     if (!Valid) {
       while (!ConditionSets.empty())
         isl_set_free(ConditionSets.pop_back_val());
       return false;
     }

     isl_set_free(ConditionSets.pop_back_val());
     isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
     isl_set_free(ConditionSets.pop_back_val());
     isl_set *ConsCondPart1 = ConditionSets.pop_back_val();

     if (Opcode == Instruction::And)
       ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
     else
       ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
   } else {
     auto *ICond = dyn_cast<ICmpInst>(Condition);
     assert(ICond &&
            "Condition of exiting branch was neither constant nor ICmp!");

     Region &R = scop->getRegion();

     isl_pw_aff *LHS, *RHS;
     // For unsigned comparisons we assumed the signed bit of neither operand
     // to be set. The comparison is equal to a signed comparison under this
     // assumption.
     bool NonNeg = ICond->isUnsigned();
     const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
                *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);

     LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD);
     RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD);

     switch (ICond->getPredicate()) {
     case ICmpInst::ICMP_ULT:
       ConsequenceCondSet =
           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
                                      RightOperand, InvalidDomainMap, true);
       break;
     case ICmpInst::ICMP_ULE:
       ConsequenceCondSet =
           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
                                      RightOperand, InvalidDomainMap, false);
       break;
     case ICmpInst::ICMP_UGT:
       ConsequenceCondSet =
           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
                                      LeftOperand, InvalidDomainMap, true);
       break;
     case ICmpInst::ICMP_UGE:
       ConsequenceCondSet =
           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
                                      LeftOperand, InvalidDomainMap, false);
       break;
     default:
       LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
       RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
       ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
                                              isl::manage(LHS), isl::manage(RHS))
                                .release();
       break;
     }
   }

   // If no terminator was given we are only looking for parameter constraints
   // under which @p Condition is true/false.
   if (!TI)
     ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
   assert(ConsequenceCondSet);
   ConsequenceCondSet = isl_set_coalesce(
       isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));

   isl_set *AlternativeCondSet = nullptr;
   bool TooComplex =
       isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;

   if (!TooComplex) {
     AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
                                           isl_set_copy(ConsequenceCondSet));
     TooComplex =
         isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
   }

   if (TooComplex) {
     scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
                      TI ? TI->getParent() : nullptr /* BasicBlock */);
     isl_set_free(AlternativeCondSet);
     isl_set_free(ConsequenceCondSet);
     return false;
   }

   ConditionSets.push_back(ConsequenceCondSet);
   ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));

   return true;
 }

 bool ScopBuilder::buildConditionSets(
     BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
     return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
                               ConditionSets);

   assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");

   if (TI->getNumSuccessors() == 1) {
     ConditionSets.push_back(isl_set_copy(Domain));
     return true;
   }

   Value *Condition = getConditionFromTerminator(TI);
   assert(Condition && "No condition for Terminator");

   return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
                             ConditionSets);
 }

 bool ScopBuilder::propagateDomainConstraints(
     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
   // Iterate over the region R and propagate the domain constrains from the
   // predecessors to the current node. In contrast to the
   // buildDomainsWithBranchConstraints function, this one will pull the domain
   // information from the predecessors instead of pushing it to the successors.
   // Additionally, we assume the domains to be already present in the domain
   // map here. However, we iterate again in reverse post order so we know all
   // predecessors have been visited before a block or non-affine subregion is
   // visited.

   ReversePostOrderTraversal<Region *> RTraversal(R);
   for (auto *RN : RTraversal) {
     // Recurse for affine subregions but go on for basic blocks and non-affine
     // subregions.
     if (RN->isSubRegion()) {
       Region *SubRegion = RN->getNodeAs<Region>();
       if (!scop->isNonAffineSubRegion(SubRegion)) {
         if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
           return false;
         continue;
       }
     }

     BasicBlock *BB = getRegionNodeBasicBlock(RN);
     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
     assert(!Domain.is_null());

     // Under the union of all predecessor conditions we can reach this block.
     isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
     Domain = Domain.intersect(PredDom).coalesce();
     Domain = Domain.align_params(scop->getParamSpace());

     Loop *BBLoop = getRegionNodeLoop(RN, LI);
     if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
       if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
         return false;
   }

   return true;
 }

 void ScopBuilder::propagateDomainConstraintsToRegionExit(
     BasicBlock *BB, Loop *BBLoop,
     SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
   // Check if the block @p BB is the entry of a region. If so we propagate it's
   // domain to the exit block of the region. Otherwise we are done.
   auto *RI = scop->getRegion().getRegionInfo();
   auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
   auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
   if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
     return;

   // Do not propagate the domain if there is a loop backedge inside the region
   // that would prevent the exit block from being executed.
   auto *L = BBLoop;
   while (L && scop->contains(L)) {
     SmallVector<BasicBlock *, 4> LatchBBs;
     BBLoop->getLoopLatches(LatchBBs);
     for (auto *LatchBB : LatchBBs)
       if (BB != LatchBB && BBReg->contains(LatchBB))
         return;
     L = L->getParentLoop();
   }

   isl::set Domain = scop->getOrInitEmptyDomain(BB);
   assert(!Domain.is_null() && "Cannot propagate a nullptr");

   Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());

   // Since the dimensions of @p BB and @p ExitBB might be different we have to
   // adjust the domain before we can propagate it.
   isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
   isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);

   // If the exit domain is not yet created we set it otherwise we "add" the
   // current domain.
   ExitDomain =
       !ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;

   // Initialize the invalid domain.
   InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());

   FinishedExitBlocks.insert(ExitBB);
 }

 isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
                                                       isl::set Domain) {
   // If @p BB is the ScopEntry we are done
   if (scop->getRegion().getEntry() == BB)
     return isl::set::universe(Domain.get_space());

   // The region info of this function.
   auto &RI = *scop->getRegion().getRegionInfo();

   Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());

   // A domain to collect all predecessor domains, thus all conditions under
   // which the block is executed. To this end we start with the empty domain.
   isl::set PredDom = isl::set::empty(Domain.get_space());

   // Set of regions of which the entry block domain has been propagated to BB.
   // all predecessors inside any of the regions can be skipped.
   SmallSet<Region *, 8> PropagatedRegions;

   for (auto *PredBB : predecessors(BB)) {
     // Skip backedges.
     if (DT.dominates(BB, PredBB))
       continue;

     // If the predecessor is in a region we used for propagation we can skip it.
     auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
     if (llvm::any_of(PropagatedRegions, PredBBInRegion)) {
       continue;
     }

     // Check if there is a valid region we can use for propagation, thus look
     // for a region that contains the predecessor and has @p BB as exit block.
     // FIXME: This was an side-effect-free (and possibly infinite) loop when
     //        committed and seems not to be needed.
     auto *PredR = RI.getRegionFor(PredBB);
     while (PredR->getExit() != BB && !PredR->contains(BB))
       PredR = PredR->getParent();

     // If a valid region for propagation was found use the entry of that region
     // for propagation, otherwise the PredBB directly.
     if (PredR->getExit() == BB) {
       PredBB = PredR->getEntry();
       PropagatedRegions.insert(PredR);
     }

     isl::set PredBBDom = scop->getDomainConditions(PredBB);
     Loop *PredBBLoop =
         getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
     PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
     PredDom = PredDom.unite(PredBBDom);
   }

   return PredDom;
 }

 bool ScopBuilder::addLoopBoundsToHeaderDomain(
     Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
   int LoopDepth = scop->getRelativeLoopDepth(L);
   assert(LoopDepth >= 0 && "Loop in region should have at least depth one");

   BasicBlock *HeaderBB = L->getHeader();
   assert(scop->isDomainDefined(HeaderBB));
   isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);

   isl::map NextIterationMap =
       createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);

   isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());

   SmallVector<BasicBlock *, 4> LatchBlocks;
   L->getLoopLatches(LatchBlocks);

   for (BasicBlock *LatchBB : LatchBlocks) {
     // If the latch is only reachable via error statements we skip it.
     if (!scop->isDomainDefined(LatchBB))
       continue;

     isl::set LatchBBDom = scop->getDomainConditions(LatchBB);

     isl::set BackedgeCondition;

     Instruction *TI = LatchBB->getTerminator();
     BranchInst *BI = dyn_cast<BranchInst>(TI);
     assert(BI && "Only branch instructions allowed in loop latches");

     if (BI->isUnconditional())
       BackedgeCondition = LatchBBDom;
     else {
       SmallVector<isl_set *, 8> ConditionSets;
       int idx = BI->getSuccessor(0) != HeaderBB;
       if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
                               InvalidDomainMap, ConditionSets))
         return false;

       // Free the non back edge condition set as we do not need it.
       isl_set_free(ConditionSets[1 - idx]);

       BackedgeCondition = isl::manage(ConditionSets[idx]);
     }

     int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
     assert(LatchLoopDepth >= LoopDepth);
     BackedgeCondition = BackedgeCondition.project_out(
         isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
     UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
   }

   isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
   for (int i = 0; i < LoopDepth; i++)
     ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);

   isl::set UnionBackedgeConditionComplement =
       UnionBackedgeCondition.complement();
   UnionBackedgeConditionComplement =
       UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
                                                       0);
   UnionBackedgeConditionComplement =
       UnionBackedgeConditionComplement.apply(ForwardMap);
   HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
   HeaderBBDom = HeaderBBDom.apply(NextIterationMap);

   auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
   HeaderBBDom = Parts.second;

   // Check if there is a <nsw> tagged AddRec for this loop and if so do not
   // require a runtime check. The assumption is already implied by the <nsw>
   // tag.
   bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);

   isl::set UnboundedCtx = Parts.first.params();
   recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx,
                    HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION,
                    nullptr, RequiresRTC);
   return true;
 }

 void ScopBuilder::buildInvariantEquivalenceClasses() {
   DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;

   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
   for (LoadInst *LInst : RIL) {
     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());

     Type *Ty = LInst->getType();
     LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
     if (ClassRep) {
       scop->addInvariantLoadMapping(LInst, ClassRep);
       continue;
     }

     ClassRep = LInst;
     scop->addInvariantEquivClass(
         InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty});
   }
 }

 bool ScopBuilder::buildDomains(
     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
   bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
   auto *EntryBB = R->getEntry();
   auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
   int LD = scop->getRelativeLoopDepth(L);
   auto *S =
       isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));

   InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
   isl::set Domain = isl::manage(S);
   scop->setDomain(EntryBB, Domain);

   if (IsOnlyNonAffineRegion)
     return !containsErrorBlock(R->getNode(), *R, &SD);

   if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
     return false;

   if (!propagateDomainConstraints(R, InvalidDomainMap))
     return false;

   // Error blocks and blocks dominated by them have been assumed to never be
   // executed. Representing them in the Scop does not add any value. In fact,
   // it is likely to cause issues during construction of the ScopStmts. The
   // contents of error blocks have not been verified to be expressible and
   // will cause problems when building up a ScopStmt for them.
   // Furthermore, basic blocks dominated by error blocks may reference
   // instructions in the error block which, if the error block is not modeled,
   // can themselves not be constructed properly. To this end we will replace
   // the domains of error blocks and those only reachable via error blocks
   // with an empty set. Additionally, we will record for each block under which
   // parameter combination it would be reached via an error block in its
   // InvalidDomain. This information is needed during load hoisting.
   if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
     return false;

   return true;
 }

 bool ScopBuilder::buildDomainsWithBranchConstraints(
     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
   // To create the domain for each block in R we iterate over all blocks and
   // subregions in R and propagate the conditions under which the current region
   // element is executed. To this end we iterate in reverse post order over R as
   // it ensures that we first visit all predecessors of a region node (either a
   // basic block or a subregion) before we visit the region node itself.
   // Initially, only the domain for the SCoP region entry block is set and from
   // there we propagate the current domain to all successors, however we add the
   // condition that the successor is actually executed next.
   // As we are only interested in non-loop carried constraints here we can
   // simply skip loop back edges.

   SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
   ReversePostOrderTraversal<Region *> RTraversal(R);
   for (auto *RN : RTraversal) {
     // Recurse for affine subregions but go on for basic blocks and non-affine
     // subregions.
     if (RN->isSubRegion()) {
       Region *SubRegion = RN->getNodeAs<Region>();
       if (!scop->isNonAffineSubRegion(SubRegion)) {
         if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
           return false;
         continue;
       }
     }

     if (containsErrorBlock(RN, scop->getRegion(), &SD))
       scop->notifyErrorBlock();
     ;

     BasicBlock *BB = getRegionNodeBasicBlock(RN);
     Instruction *TI = BB->getTerminator();

     if (isa<UnreachableInst>(TI))
       continue;

     if (!scop->isDomainDefined(BB))
       continue;
     isl::set Domain = scop->getDomainConditions(BB);

     scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim()));

     auto *BBLoop = getRegionNodeLoop(RN, LI);
     // Propagate the domain from BB directly to blocks that have a superset
     // domain, at the moment only region exit nodes of regions that start in BB.
     propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
                                            InvalidDomainMap);

     // If all successors of BB have been set a domain through the propagation
     // above we do not need to build condition sets but can just skip this
     // block. However, it is important to note that this is a local property
     // with regards to the region @p R. To this end FinishedExitBlocks is a
     // local variable.
     auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
       return FinishedExitBlocks.count(SuccBB);
     };
     if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
       continue;

     // Build the condition sets for the successor nodes of the current region
     // node. If it is a non-affine subregion we will always execute the single
     // exit node, hence the single entry node domain is the condition set. For
     // basic blocks we use the helper function buildConditionSets.
     SmallVector<isl_set *, 8> ConditionSets;
     if (RN->isSubRegion())
       ConditionSets.push_back(Domain.copy());
     else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
                                  ConditionSets))
       return false;

     // Now iterate over the successors and set their initial domain based on
     // their condition set. We skip back edges here and have to be careful when
     // we leave a loop not to keep constraints over a dimension that doesn't
     // exist anymore.
     assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
     for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
       isl::set CondSet = isl::manage(ConditionSets[u]);
       BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);

       // Skip blocks outside the region.
       if (!scop->contains(SuccBB))
         continue;

       // If we propagate the domain of some block to "SuccBB" we do not have to
       // adjust the domain.
       if (FinishedExitBlocks.count(SuccBB))
         continue;

       // Skip back edges.
       if (DT.dominates(SuccBB, BB))
         continue;

       Loop *SuccBBLoop =
           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());

       CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);

       // Set the domain for the successor or merge it with an existing domain in
       // case there are multiple paths (without loop back edges) to the
       // successor block.
       isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);

       if (!SuccDomain.is_null()) {
         SuccDomain = SuccDomain.unite(CondSet).coalesce();
       } else {
         // Initialize the invalid domain.
         InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
         SuccDomain = CondSet;
       }

       SuccDomain = SuccDomain.detect_equalities();

       // Check if the maximal number of domain disjunctions was reached.
       // In case this happens we will clean up and bail.
       if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain)
         continue;

       scop->invalidate(COMPLEXITY, DebugLoc());
       while (++u < ConditionSets.size())
         isl_set_free(ConditionSets[u]);
       return false;
     }
   }

   return true;
 }

 bool ScopBuilder::propagateInvalidStmtDomains(
     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
   ReversePostOrderTraversal<Region *> RTraversal(R);
   for (auto *RN : RTraversal) {

     // Recurse for affine subregions but go on for basic blocks and non-affine
     // subregions.
     if (RN->isSubRegion()) {
       Region *SubRegion = RN->getNodeAs<Region>();
       if (!scop->isNonAffineSubRegion(SubRegion)) {
         propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
         continue;
       }
     }

     bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD);
     BasicBlock *BB = getRegionNodeBasicBlock(RN);
     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
     assert(!Domain.is_null() && "Cannot propagate a nullptr");

     isl::set InvalidDomain = InvalidDomainMap[BB];

     bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);

     if (!IsInvalidBlock) {
       InvalidDomain = InvalidDomain.intersect(Domain);
     } else {
       InvalidDomain = Domain;
       isl::set DomPar = Domain.params();
       recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar,
                        BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
       Domain = isl::set::empty(Domain.get_space());
     }

     if (InvalidDomain.is_empty()) {
       InvalidDomainMap[BB] = InvalidDomain;
       continue;
     }

     auto *BBLoop = getRegionNodeLoop(RN, LI);
     auto *TI = BB->getTerminator();
     unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
     for (unsigned u = 0; u < NumSuccs; u++) {
       auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);

       // Skip successors outside the SCoP.
       if (!scop->contains(SuccBB))
         continue;

       // Skip backedges.
       if (DT.dominates(SuccBB, BB))
         continue;

       Loop *SuccBBLoop =
           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());

       auto AdjustedInvalidDomain =
           adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);

       isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
       SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
       SuccInvalidDomain = SuccInvalidDomain.coalesce();

       InvalidDomainMap[SuccBB] = SuccInvalidDomain;

       // Check if the maximal number of domain disjunctions was reached.
       // In case this happens we will bail.
       if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) <
           MaxDisjunctsInDomain)
         continue;

       InvalidDomainMap.erase(BB);
       scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
       return false;
     }

     InvalidDomainMap[BB] = InvalidDomain;
   }

   return true;
 }

 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
                                    Region *NonAffineSubRegion,
                                    bool IsExitBlock) {
   // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
   // true, are not modeled as ordinary PHI nodes as they are not part of the
   // region. However, we model the operands in the predecessor blocks that are
   // part of the region as regular scalar accesses.

   // If we can synthesize a PHI we can skip it, however only if it is in
   // the region. If it is not it can only be in the exit block of the region.
   // In this case we model the operands but not the PHI itself.
   auto *Scope = LI.getLoopFor(PHI->getParent());
   if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
     return;

   // PHI nodes are modeled as if they had been demoted prior to the SCoP
   // detection. Hence, the PHI is a load of a new memory location in which the
   // incoming value was written at the end of the incoming basic block.
   bool OnlyNonAffineSubRegionOperands = true;
   for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
     Value *Op = PHI->getIncomingValue(u);
     BasicBlock *OpBB = PHI->getIncomingBlock(u);
     ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));

     // Do not build PHI dependences inside a non-affine subregion, but make
     // sure that the necessary scalar values are still made available.
     if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
       auto *OpInst = dyn_cast<Instruction>(Op);
       if (!OpInst || !NonAffineSubRegion->contains(OpInst))
         ensureValueRead(Op, OpStmt);
       continue;
     }

     OnlyNonAffineSubRegionOperands = false;
     ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
   }

   if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
     addPHIReadAccess(PHIStmt, PHI);
   }
 }

 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
                                          Instruction *Inst) {
   assert(!isa<PHINode>(Inst));

   // Pull-in required operands.
   for (Use &Op : Inst->operands())
     ensureValueRead(Op.get(), UserStmt);
 }

 // Create a sequence of two schedules. Either argument may be null and is
 // interpreted as the empty schedule. Can also return null if both schedules are
 // empty.
 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
   if (Prev.is_null())
     return Succ;
   if (Succ.is_null())
     return Prev;

   return Prev.sequence(Succ);
 }

 // Create an isl_multi_union_aff that defines an identity mapping from the
 // elements of USet to their N-th dimension.
 //
 // # Example:
 //
 //            Domain: { A[i,j]; B[i,j,k] }
 //                 N: 1
 //
 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
 //
 // @param USet   A union set describing the elements for which to generate a
 //               mapping.
 // @param N      The dimension to map to.
 // @returns      A mapping from USet to its N-th dimension.
 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) {
   assert(!USet.is_null());
   assert(!USet.is_empty());

   auto Result = isl::union_pw_multi_aff::empty(USet.get_space());

   for (isl::set S : USet.get_set_list()) {
     unsigned Dim = unsignedFromIslSize(S.tuple_dim());
     assert(Dim >= N);
     auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
                                                   N, Dim - N);
     if (N > 1)
       PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);

     Result = Result.add_pw_multi_aff(PMA);
   }

   return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
 }

 void ScopBuilder::buildSchedule() {
   Loop *L = getLoopSurroundingScop(*scop, LI);
   LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)});
   buildSchedule(scop->getRegion().getNode(), LoopStack);
   assert(LoopStack.size() == 1 && LoopStack.back().L == L);
   scop->setScheduleTree(LoopStack[0].Schedule);
 }

 /// To generate a schedule for the elements in a Region we traverse the Region
 /// in reverse-post-order and add the contained RegionNodes in traversal order
 /// to the schedule of the loop that is currently at the top of the LoopStack.
 /// For loop-free codes, this results in a correct sequential ordering.
 ///
 /// Example:
 ///           bb1(0)
 ///         /     \.
 ///      bb2(1)   bb3(2)
 ///         \    /  \.
 ///          bb4(3)  bb5(4)
 ///             \   /
 ///              bb6(5)
 ///
 /// Including loops requires additional processing. Whenever a loop header is
 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
 /// from an empty schedule, we first process all RegionNodes that are within
 /// this loop and complete the sequential schedule at this loop-level before
 /// processing about any other nodes. To implement this
 /// loop-nodes-first-processing, the reverse post-order traversal is
 /// insufficient. Hence, we additionally check if the traversal yields
 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
 /// These region-nodes are then queue and only traverse after the all nodes
 /// within the current loop have been processed.
 void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
   Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);

   ReversePostOrderTraversal<Region *> RTraversal(R);
   std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
   std::deque<RegionNode *> DelayList;
   bool LastRNWaiting = false;

   // Iterate over the region @p R in reverse post-order but queue
   // sub-regions/blocks iff they are not part of the last encountered but not
   // completely traversed loop. The variable LastRNWaiting is a flag to indicate
   // that we queued the last sub-region/block from the reverse post-order
   // iterator. If it is set we have to explore the next sub-region/block from
   // the iterator (if any) to guarantee progress. If it is not set we first try
   // the next queued sub-region/blocks.
   while (!WorkList.empty() || !DelayList.empty()) {
     RegionNode *RN;

     if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
       RN = WorkList.front();
       WorkList.pop_front();
       LastRNWaiting = false;
     } else {
       RN = DelayList.front();
       DelayList.pop_front();
     }

     Loop *L = getRegionNodeLoop(RN, LI);
     if (!scop->contains(L))
       L = OuterScopLoop;

     Loop *LastLoop = LoopStack.back().L;
     if (LastLoop != L) {
       if (LastLoop && !LastLoop->contains(L)) {
         LastRNWaiting = true;
         DelayList.push_back(RN);
         continue;
       }
       LoopStack.push_back({L, {}, 0});
     }
     buildSchedule(RN, LoopStack);
   }
 }

 void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
   if (RN->isSubRegion()) {
     auto *LocalRegion = RN->getNodeAs<Region>();
     if (!scop->isNonAffineSubRegion(LocalRegion)) {
       buildSchedule(LocalRegion, LoopStack);
       return;
     }
   }

   assert(LoopStack.rbegin() != LoopStack.rend());
   auto LoopData = LoopStack.rbegin();
   LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);

   for (auto *Stmt : scop->getStmtListFor(RN)) {
     isl::union_set UDomain{Stmt->getDomain()};
     auto StmtSchedule = isl::schedule::from_domain(UDomain);
     LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
   }

   // Check if we just processed the last node in this loop. If we did, finalize
   // the loop by:
   //
   //   - adding new schedule dimensions
   //   - folding the resulting schedule into the parent loop schedule
   //   - dropping the loop schedule from the LoopStack.
   //
   // Then continue to check surrounding loops, which might also have been
   // completed by this node.
   size_t Dimension = LoopStack.size();
   while (LoopData->L &&
          LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
     isl::schedule Schedule = LoopData->Schedule;
     auto NumBlocksProcessed = LoopData->NumBlocksProcessed;

     assert(std::next(LoopData) != LoopStack.rend());
     Loop *L = LoopData->L;
     ++LoopData;
     --Dimension;

     if (!Schedule.is_null()) {
       isl::union_set Domain = Schedule.get_domain();
       isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
       Schedule = Schedule.insert_partial_schedule(MUPA);

       if (hasDisableAllTransformsHint(L)) {
         /// If any of the loops has a disable_nonforced heuristic, mark the
         /// entire SCoP as such. The ISL rescheduler can only reschedule the
         /// SCoP in its entirety.
         /// TODO: ScopDetection could avoid including such loops or warp them as
         /// boxed loop. It still needs to pass-through loop with user-defined
         /// metadata.
         scop->markDisableHeuristics();
       }

       // It is easier to insert the marks here that do it retroactively.
       isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L);
       if (!IslLoopId.is_null())
         Schedule =
             Schedule.get_root().child(0).insert_mark(IslLoopId).get_schedule();

       LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
     }

     LoopData->NumBlocksProcessed += NumBlocksProcessed;
   }
   // Now pop all loops processed up there from the LoopStack
   LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
 }

 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
   // Check for uses of this instruction outside the scop. Because we do not
   // iterate over such instructions and therefore did not "ensure" the existence
   // of a write, we must determine such use here.
   if (scop->isEscaping(Inst))
     ensureValueWrite(Inst);
 }

 void ScopBuilder::addRecordedAssumptions() {
   for (auto &AS : llvm::reverse(RecordedAssumptions)) {

     if (!AS.BB) {
       scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
                           nullptr /* BasicBlock */, AS.RequiresRTC);
       continue;
     }

     // If the domain was deleted the assumptions are void.
     isl_set *Dom = scop->getDomainConditions(AS.BB).release();
     if (!Dom)
       continue;

     // If a basic block was given use its domain to simplify the assumption.
     // In case of restrictions we know they only have to hold on the domain,
     // thus we can intersect them with the domain of the block. However, for
     // assumptions the domain has to imply them, thus:
     //                     _              _____
     //   Dom => S   <==>   A v B   <==>   A - B
     //
     // To avoid the complement we will register A - B as a restriction not an
     // assumption.
     isl_set *S = AS.Set.copy();
     if (AS.Sign == AS_RESTRICTION)
       S = isl_set_params(isl_set_intersect(S, Dom));
     else /* (AS.Sign == AS_ASSUMPTION) */
       S = isl_set_params(isl_set_subtract(Dom, S));

     scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB,
                         AS.RequiresRTC);
   }
 }

 void ScopBuilder::addUserAssumptions(
     AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
   for (auto &Assumption : AC.assumptions()) {
     auto *CI = dyn_cast_or_null<CallInst>(Assumption);
     if (!CI || CI->arg_size() != 1)
       continue;

     bool InScop = scop->contains(CI);
     if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
       continue;

     auto *L = LI.getLoopFor(CI->getParent());
     auto *Val = CI->getArgOperand(0);
     ParameterSetTy DetectedParams;
     auto &R = scop->getRegion();
     if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
       ORE.emit(
           OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
           << "Non-affine user assumption ignored.");
       continue;
     }

     // Collect all newly introduced parameters.
     ParameterSetTy NewParams;
     for (auto *Param : DetectedParams) {
       Param = extractConstantFactor(Param, SE).second;
       Param = scop->getRepresentingInvariantLoadSCEV(Param);
       if (scop->isParam(Param))
         continue;
       NewParams.insert(Param);
     }

     SmallVector<isl_set *, 2> ConditionSets;
     auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
     BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
     auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
                        : isl_set_copy(scop->getContext().get());
     assert(Dom && "Cannot propagate a nullptr.");
     bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
                                     ConditionSets);
     isl_set_free(Dom);

     if (!Valid)
       continue;

     isl_set *AssumptionCtx = nullptr;
     if (InScop) {
       AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
       isl_set_free(ConditionSets[0]);
     } else {
       AssumptionCtx = isl_set_complement(ConditionSets[1]);
       AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
     }

     // Project out newly introduced parameters as they are not otherwise useful.
     if (!NewParams.empty()) {
       for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
         auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
         auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
         isl_id_free(Id);

         if (!NewParams.count(Param))
           continue;

         AssumptionCtx =
             isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
       }
     }
     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
              << "Use user assumption: "
              << stringFromIslObj(AssumptionCtx, "null"));
     isl::set newContext =
         scop->getContext().intersect(isl::manage(AssumptionCtx));
     scop->setContext(newContext);
   }
 }

 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
   // Memory builtins are not considered in this function.
   if (!Inst.isLoad() && !Inst.isStore())
     return false;

   Value *Val = Inst.getValueOperand();
   Type *ElementType = Val->getType();
   Value *Address = Inst.getPointerOperand();
   const SCEV *AccessFunction =
       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
   const SCEVUnknown *BasePointer =
       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
   enum MemoryAccess::AccessType AccType =
       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;

   if (auto *BitCast = dyn_cast<BitCastInst>(Address))
     Address = BitCast->getOperand(0);

   auto *GEP = dyn_cast<GetElementPtrInst>(Address);
   if (!GEP || DL.getTypeAllocSize(GEP->getResultElementType()) !=
                   DL.getTypeAllocSize(ElementType))
     return false;

   SmallVector<const SCEV *, 4> Subscripts;
   SmallVector<int, 4> Sizes;
   getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
   auto *BasePtr = GEP->getOperand(0);

   if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
     BasePtr = BasePtrCast->getOperand(0);

   // Check for identical base pointers to ensure that we do not miss index
   // offsets that have been added before this GEP is applied.
   if (BasePtr != BasePointer->getValue())
     return false;

   std::vector<const SCEV *> SizesSCEV;

   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();

   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
   for (auto *Subscript : Subscripts) {
     InvariantLoadsSetTy AccessILS;
     if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
                       &AccessILS))
       return false;

     for (LoadInst *LInst : AccessILS)
       if (!ScopRIL.count(LInst))
         return false;
   }

   if (Sizes.empty())
     return false;

   SizesSCEV.push_back(nullptr);

   for (auto V : Sizes)
     SizesSCEV.push_back(SE.getSCEV(
         ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));

   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
                  true, Subscripts, SizesSCEV, Val);
   return true;
 }

 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
   // Memory builtins are not considered by this function.
   if (!Inst.isLoad() && !Inst.isStore())
     return false;

   if (!PollyDelinearize)
     return false;

   Value *Address = Inst.getPointerOperand();
   Value *Val = Inst.getValueOperand();
   Type *ElementType = Val->getType();
   unsigned ElementSize = DL.getTypeAllocSize(ElementType);
   enum MemoryAccess::AccessType AccType =
       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;

   const SCEV *AccessFunction =
       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
   const SCEVUnknown *BasePointer =
       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));

   assert(BasePointer && "Could not find base pointer");

   auto &InsnToMemAcc = scop->getInsnToMemAccMap();
   auto AccItr = InsnToMemAcc.find(Inst);
   if (AccItr == InsnToMemAcc.end())
     return false;

   std::vector<const SCEV *> Sizes = {nullptr};

   Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
                AccItr->second.Shape->DelinearizedSizes.end());

   // In case only the element size is contained in the 'Sizes' array, the
   // access does not access a real multi-dimensional array. Hence, we allow
   // the normal single-dimensional access construction to handle this.
   if (Sizes.size() == 1)
     return false;

   // Remove the element size. This information is already provided by the
   // ElementSize parameter. In case the element size of this access and the
   // element size used for delinearization differs the delinearization is
   // incorrect. Hence, we invalidate the scop.
   //
   // TODO: Handle delinearization with differing element sizes.
   auto DelinearizedSize =
       cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
   Sizes.pop_back();
   if (ElementSize != DelinearizedSize)
     scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());

   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
                  true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
   return true;
 }

 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
   auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);

   if (MemIntr == nullptr)
     return false;

   auto *L = LI.getLoopFor(Inst->getParent());
   auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
   assert(LengthVal);

   // Check if the length val is actually affine or if we overapproximate it
   InvariantLoadsSetTy AccessILS;
   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();

   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
   bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
                                      LengthVal, SE, &AccessILS);
   for (LoadInst *LInst : AccessILS)
     if (!ScopRIL.count(LInst))
       LengthIsAffine = false;
   if (!LengthIsAffine)
     LengthVal = nullptr;

   auto *DestPtrVal = MemIntr->getDest();
   assert(DestPtrVal);

   auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
   assert(DestAccFunc);
   // Ignore accesses to "NULL".
   // TODO: We could use this to optimize the region further, e.g., intersect
   //       the context with
   //          isl_set_complement(isl_set_params(getDomain()))
   //       as we know it would be undefined to execute this instruction anyway.
   if (DestAccFunc->isZero())
     return true;

   if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
     if (isa<ConstantPointerNull>(U->getValue()))
       return true;
   }

   auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
   assert(DestPtrSCEV);
   DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
   addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
                  IntegerType::getInt8Ty(DestPtrVal->getContext()),
                  LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
                  Inst.getValueOperand());

   auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
   if (!MemTrans)
     return true;

   auto *SrcPtrVal = MemTrans->getSource();
   assert(SrcPtrVal);

   auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
   assert(SrcAccFunc);
   // Ignore accesses to "NULL".
   // TODO: See above TODO
   if (SrcAccFunc->isZero())
     return true;

   auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
   assert(SrcPtrSCEV);
   SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
   addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
                  IntegerType::getInt8Ty(SrcPtrVal->getContext()),
                  LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
                  Inst.getValueOperand());

   return true;
 }

 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
   auto *CI = dyn_cast_or_null<CallInst>(Inst);

   if (CI == nullptr)
     return false;

   if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
     return true;

   auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
   auto *CalledFunction = CI->getCalledFunction();
   MemoryEffects ME = AA.getMemoryEffects(CalledFunction);
   if (ME.doesNotAccessMemory())
     return true;

   if (ME.onlyAccessesArgPointees()) {
     ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem);
     auto AccType =
         !isModSet(ArgMR) ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
     Loop *L = LI.getLoopFor(Inst->getParent());
     for (const auto &Arg : CI->args()) {
       if (!Arg->getType()->isPointerTy())
         continue;

       auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
       if (ArgSCEV->isZero())
         continue;

       if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
         if (isa<ConstantPointerNull>(U->getValue()))
           return true;
       }

       auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
       addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
                      ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
     }
     return true;
   }

   if (ME.onlyReadsMemory()) {
     GlobalReads.emplace_back(Stmt, CI);
     return true;
   }
   return false;
 }

 bool ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
   // Memory builtins are not considered by this function.
   if (!Inst.isLoad() && !Inst.isStore())
     return false;

   Value *Address = Inst.getPointerOperand();
   Value *Val = Inst.getValueOperand();
   Type *ElementType = Val->getType();
   enum MemoryAccess::AccessType AccType =
       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;

   const SCEV *AccessFunction =
       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
   const SCEVUnknown *BasePointer =
       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));

   assert(BasePointer && "Could not find base pointer");
   AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);

   // Check if the access depends on a loop contained in a non-affine subregion.
   bool isVariantInNonAffineLoop = false;
   SetVector<const Loop *> Loops;
   findLoops(AccessFunction, Loops);
   for (const Loop *L : Loops)
     if (Stmt->contains(L)) {
       isVariantInNonAffineLoop = true;
       break;
     }

   InvariantLoadsSetTy AccessILS;

   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
   bool IsAffine = !isVariantInNonAffineLoop &&
                   isAffineExpr(&scop->getRegion(), SurroundingLoop,
                                AccessFunction, SE, &AccessILS);

   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
   for (LoadInst *LInst : AccessILS)
     if (!ScopRIL.count(LInst))
       IsAffine = false;

   if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
     AccType = MemoryAccess::MAY_WRITE;

   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
                  IsAffine, {AccessFunction}, {nullptr}, Val);
   return true;
 }

 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
   if (buildAccessMemIntrinsic(Inst, Stmt))
     return;

   if (buildAccessCallInst(Inst, Stmt))
     return;

   if (buildAccessMultiDimFixed(Inst, Stmt))
     return;

   if (buildAccessMultiDimParam(Inst, Stmt))
     return;

   if (buildAccessSingleDim(Inst, Stmt))
     return;

   llvm_unreachable(
       "At least one of the buildAccess functions must handled this access, or "
       "ScopDetection should have rejected this SCoP");
 }

 void ScopBuilder::buildAccessFunctions() {
   for (auto &Stmt : *scop) {
     if (Stmt.isBlockStmt()) {
       buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
       continue;
     }

     Region *R = Stmt.getRegion();
     for (BasicBlock *BB : R->blocks())
       buildAccessFunctions(&Stmt, *BB, R);
   }

   // Build write accesses for values that are used after the SCoP.
   // The instructions defining them might be synthesizable and therefore not
   // contained in any statement, hence we iterate over the original instructions
   // to identify all escaping values.
   for (BasicBlock *BB : scop->getRegion().blocks()) {
     for (Instruction &Inst : *BB)
       buildEscapingDependences(&Inst);
   }
 }

 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
   return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
          !canSynthesize(Inst, *scop, &SE, L);
 }

 /// Generate a name for a statement.
 ///
 /// @param BB     The basic block the statement will represent.
 /// @param BBIdx  The index of the @p BB relative to other BBs/regions.
 /// @param Count  The index of the created statement in @p BB.
 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
 ///               no suffix will be added.
 /// @param IsLast Uses a special indicator for the last statement of a BB.
 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
                                 bool IsMain, bool IsLast = false) {
   std::string Suffix;
   if (!IsMain) {
     if (UseInstructionNames)
       Suffix = '_';
     if (IsLast)
       Suffix += "last";
     else if (Count < 26)
       Suffix += 'a' + Count;
     else
       Suffix += std::to_string(Count);
   }
   return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
 }

 /// Generate a name for a statement that represents a non-affine subregion.
 ///
 /// @param R    The region the statement will represent.
 /// @param RIdx The index of the @p R relative to other BBs/regions.
 static std::string makeStmtName(Region *R, long RIdx) {
   return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
                               UseInstructionNames);
 }

 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
   Loop *SurroundingLoop = LI.getLoopFor(BB);

   int Count = 0;
   long BBIdx = scop->getNextStmtIdx();
   std::vector<Instruction *> Instructions;
   for (Instruction &Inst : *BB) {
     if (shouldModelInst(&Inst, SurroundingLoop))
       Instructions.push_back(&Inst);
     if (Inst.getMetadata("polly_split_after") ||
         (SplitOnStore && isa<StoreInst>(Inst))) {
       std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
       scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
       Count++;
       Instructions.clear();
     }
   }

   std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
   scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
 }

 /// Is @p Inst an ordered instruction?
 ///
 /// An unordered instruction is an instruction, such that a sequence of
 /// unordered instructions can be permuted without changing semantics. Any
 /// instruction for which this is not always the case is ordered.
 static bool isOrderedInstruction(Instruction *Inst) {
   return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
 }

 /// Join instructions to the same statement if one uses the scalar result of the
 /// other.
 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
                             ArrayRef<Instruction *> ModeledInsts) {
   for (Instruction *Inst : ModeledInsts) {
     if (isa<PHINode>(Inst))
       continue;

     for (Use &Op : Inst->operands()) {
       Instruction *OpInst = dyn_cast<Instruction>(Op.get());
       if (!OpInst)
         continue;

       // Check if OpInst is in the BB and is a modeled instruction.
       auto OpVal = UnionFind.findValue(OpInst);
       if (OpVal == UnionFind.end())
         continue;

       UnionFind.unionSets(Inst, OpInst);
     }
   }
 }

 /// Ensure that the order of ordered instructions does not change.
 ///
 /// If we encounter an ordered instruction enclosed in instructions belonging to
 /// a different statement (which might as well contain ordered instructions, but
 /// this is not tested here), join them.
 static void
 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
                         ArrayRef<Instruction *> ModeledInsts) {
   SetVector<Instruction *> SeenLeaders;
   for (Instruction *Inst : ModeledInsts) {
     if (!isOrderedInstruction(Inst))
       continue;

     Instruction *Leader = UnionFind.getLeaderValue(Inst);
     // Since previous iterations might have merged sets, some items in
     // SeenLeaders are not leaders anymore. However, The new leader of
     // previously merged instructions must be one of the former leaders of
     // these merged instructions.
     bool Inserted = SeenLeaders.insert(Leader);
     if (Inserted)
       continue;

     // Merge statements to close holes. Say, we have already seen statements A
     // and B, in this order. Then we see an instruction of A again and we would
     // see the pattern "A B A". This function joins all statements until the
     // only seen occurrence of A.
     for (Instruction *Prev : reverse(SeenLeaders)) {
       // We are backtracking from the last element until we see Inst's leader
       // in SeenLeaders and merge all into one set. Although leaders of
       // instructions change during the execution of this loop, it's irrelevant
       // as we are just searching for the element that we already confirmed is
       // in the list.
       if (Prev == Leader)
         break;
       UnionFind.unionSets(Prev, Leader);
     }
   }
 }

 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
 /// the incoming values from this block are executed after the PHI READ.
 ///
 /// Otherwise it could overwrite the incoming value from before the BB with the
 /// value for the next execution. This can happen if the PHI WRITE is added to
 /// the statement with the instruction that defines the incoming value (instead
 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
 /// are in order, we put both into the statement. PHI WRITEs are always executed
 /// after PHI READs when they are in the same statement.
 ///
 /// TODO: This is an overpessimization. We only have to ensure that the PHI
 /// WRITE is not put into a statement containing the PHI itself. That could also
 /// be done by
 /// - having all (strongly connected) PHIs in a single statement,
 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
 ///   has a chance of being lifted before a PHI by being in a statement with a
 ///   PHI that comes before in the basic block), or
 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
                             ArrayRef<Instruction *> ModeledInsts) {
   for (Instruction *Inst : ModeledInsts) {
     PHINode *PHI = dyn_cast<PHINode>(Inst);
     if (!PHI)
       continue;

     int Idx = PHI->getBasicBlockIndex(PHI->getParent());
     if (Idx < 0)
       continue;

     Instruction *IncomingVal =
         dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
     if (!IncomingVal)
       continue;

     UnionFind.unionSets(PHI, IncomingVal);
   }
 }

 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
   Loop *L = LI.getLoopFor(BB);

   // Extracting out modeled instructions saves us from checking
   // shouldModelInst() repeatedly.
   SmallVector<Instruction *, 32> ModeledInsts;
   EquivalenceClasses<Instruction *> UnionFind;
   Instruction *MainInst = nullptr, *MainLeader = nullptr;
   for (Instruction &Inst : *BB) {
     if (!shouldModelInst(&Inst, L))
       continue;
     ModeledInsts.push_back(&Inst);
     UnionFind.insert(&Inst);

     // When a BB is split into multiple statements, the main statement is the
     // one containing the 'main' instruction. We select the first instruction
     // that is unlikely to be removed (because it has side-effects) as the main
     // one. It is used to ensure that at least one statement from the bb has the
     // same name as with -polly-stmt-granularity=bb.
     if (!MainInst && (isa<StoreInst>(Inst) ||
                       (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
       MainInst = &Inst;
   }

   joinOperandTree(UnionFind, ModeledInsts);
   joinOrderedInstructions(UnionFind, ModeledInsts);
   joinOrderedPHIs(UnionFind, ModeledInsts);

   // The list of instructions for statement (statement represented by the leader
   // instruction).
   MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;

   // The order of statements must be preserved w.r.t. their ordered
   // instructions. Without this explicit scan, we would also use non-ordered
   // instructions (whose order is arbitrary) to determine statement order.
   for (Instruction *Inst : ModeledInsts) {
     if (!isOrderedInstruction(Inst))
       continue;

     auto LeaderIt = UnionFind.findLeader(Inst);
     if (LeaderIt == UnionFind.member_end())
       continue;

     // Insert element for the leader instruction.
     (void)LeaderToInstList[*LeaderIt];
   }

   // Collect the instructions of all leaders. UnionFind's member iterator
   // unfortunately are not in any specific order.
   for (Instruction *Inst : ModeledInsts) {
     auto LeaderIt = UnionFind.findLeader(Inst);
     if (LeaderIt == UnionFind.member_end())
       continue;

     if (Inst == MainInst)
       MainLeader = *LeaderIt;
     std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
     InstList.push_back(Inst);
   }

   // Finally build the statements.
   int Count = 0;
   long BBIdx = scop->getNextStmtIdx();
   for (auto &Instructions : LeaderToInstList) {
     std::vector<Instruction *> &InstList = Instructions.second;

     // If there is no main instruction, make the first statement the main.
     bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);

     std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
     scop->addScopStmt(BB, Name, L, std::move(InstList));
     Count += 1;
   }

   // Unconditionally add an epilogue (last statement). It contains no
   // instructions, but holds the PHI write accesses for successor basic blocks,
   // if the incoming value is not defined in another statement if the same BB.
   // The epilogue becomes the main statement only if there is no other
   // statement that could become main.
   // The epilogue will be removed if no PHIWrite is added to it.
   std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
   scop->addScopStmt(BB, EpilogueName, L, {});
 }

 void ScopBuilder::buildStmts(Region &SR) {
   if (scop->isNonAffineSubRegion(&SR)) {
     std::vector<Instruction *> Instructions;
     Loop *SurroundingLoop =
         getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
     for (Instruction &Inst : *SR.getEntry())
       if (shouldModelInst(&Inst, SurroundingLoop))
         Instructions.push_back(&Inst);
     long RIdx = scop->getNextStmtIdx();
     std::string Name = makeStmtName(&SR, RIdx);
     scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
     return;
   }

   for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
     if (I->isSubRegion())
       buildStmts(*I->getNodeAs<Region>());
     else {
       BasicBlock *BB = I->getNodeAs<BasicBlock>();
       switch (StmtGranularity) {
       case GranularityChoice::BasicBlocks:
         buildSequentialBlockStmts(BB);
         break;
       case GranularityChoice::ScalarIndependence:
         buildEqivClassBlockStmts(BB);
         break;
       case GranularityChoice::Stores:
         buildSequentialBlockStmts(BB, true);
         break;
       }
     }
 }

 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
                                        Region *NonAffineSubRegion) {
   assert(
       Stmt &&
       "The exit BB is the only one that cannot be represented by a statement");
   assert(Stmt->represents(&BB));

   // We do not build access functions for error blocks, as they may contain
   // instructions we can not model.
   if (SD.isErrorBlock(BB, scop->getRegion()))
     return;

   auto BuildAccessesForInst = [this, Stmt,
                                NonAffineSubRegion](Instruction *Inst) {
     PHINode *PHI = dyn_cast<PHINode>(Inst);
     if (PHI)
       buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);

     if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
       assert(Stmt && "Cannot build access function in non-existing statement");
       buildMemoryAccess(MemInst, Stmt);
     }

     // PHI nodes have already been modeled above and terminators that are
     // not part of a non-affine subregion are fully modeled and regenerated
     // from the polyhedral domains. Hence, they do not need to be modeled as
     // explicit data dependences.
     if (!PHI)
       buildScalarDependences(Stmt, Inst);
   };

   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
   bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
   if (IsEntryBlock) {
     for (Instruction *Inst : Stmt->getInstructions())
       BuildAccessesForInst(Inst);
     if (Stmt->isRegionStmt())
       BuildAccessesForInst(BB.getTerminator());
   } else {
     for (Instruction &Inst : BB) {
       if (isIgnoredIntrinsic(&Inst))
         continue;

       // Invariant loads already have been processed.
       if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
         continue;

       BuildAccessesForInst(&Inst);
     }
   }
 }

 MemoryAccess *ScopBuilder::addMemoryAccess(
     ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
     Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
     ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
     MemoryKind Kind) {
   bool isKnownMustAccess = false;

   // Accesses in single-basic block statements are always executed.
   if (Stmt->isBlockStmt())
     isKnownMustAccess = true;

   if (Stmt->isRegionStmt()) {
     // Accesses that dominate the exit block of a non-affine region are always
     // executed. In non-affine regions there may exist MemoryKind::Values that
     // do not dominate the exit. MemoryKind::Values will always dominate the
     // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
     // non-affine region.
     if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
       isKnownMustAccess = true;
   }

   // Non-affine PHI writes do not "happen" at a particular instruction, but
   // after exiting the statement. Therefore they are guaranteed to execute and
   // overwrite the old value.
   if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
     isKnownMustAccess = true;

   if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
     AccType = MemoryAccess::MAY_WRITE;

   auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
                                   Affine, Subscripts, Sizes, AccessValue, Kind);

   scop->addAccessFunction(Access);
   Stmt->addAccess(Access);
   return Access;
 }

 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
                                  MemoryAccess::AccessType AccType,
                                  Value *BaseAddress, Type *ElementType,
                                  bool IsAffine,
                                  ArrayRef<const SCEV *> Subscripts,
                                  ArrayRef<const SCEV *> Sizes,
                                  Value *AccessValue) {
   ArrayBasePointers.insert(BaseAddress);
   addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine,
                   AccessValue, Subscripts, Sizes, MemoryKind::Array);
 }

 /// Check if @p Expr is divisible by @p Size.
 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
   assert(Size != 0);
   if (Size == 1)
     return true;

   // Only one factor needs to be divisible.
   if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
     for (auto *FactorExpr : MulExpr->operands())
       if (isDivisible(FactorExpr, Size, SE))
         return true;
     return false;
   }

   // For other n-ary expressions (Add, AddRec, Max,...) all operands need
   // to be divisible.
   if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
     for (auto *OpExpr : NAryExpr->operands())
       if (!isDivisible(OpExpr, Size, SE))
         return false;
     return true;
   }

   auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
   auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
   auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
   return MulSCEV == Expr;
 }

 void ScopBuilder::foldSizeConstantsToRight() {
   isl::union_set Accessed = scop->getAccesses().range();

   for (auto Array : scop->arrays()) {
     if (Array->getNumberOfDimensions() <= 1)
       continue;

     isl::space Space = Array->getSpace();
     Space = Space.align_params(Accessed.get_space());

     if (!Accessed.contains(Space))
       continue;

     isl::set Elements = Accessed.extract_set(Space);
     isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());

     std::vector<int> Int;
     unsigned Dims = unsignedFromIslSize(Elements.tuple_dim());
     for (unsigned i = 0; i < Dims; i++) {
       isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
       DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
       DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);

       isl::basic_set DimHull = DimOnly.affine_hull();

       if (i == Dims - 1) {
         Int.push_back(1);
         Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
         continue;
       }

       if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) {
         isl::aff Diff = DimHull.get_div(0);
         isl::val Val = Diff.get_denominator_val();

         int ValInt = 1;
         if (Val.is_int()) {
           auto ValAPInt = APIntFromVal(Val);
           if (ValAPInt.isSignedIntN(32))
             ValInt = ValAPInt.getSExtValue();
         } else {
         }

         Int.push_back(ValInt);
         isl::constraint C = isl::constraint::alloc_equality(
             isl::local_space(Transform.get_space()));
         C = C.set_coefficient_si(isl::dim::out, i, ValInt);
         C = C.set_coefficient_si(isl::dim::in, i, -1);
         Transform = Transform.add_constraint(C);
         continue;
       }

       isl::basic_set ZeroSet = isl::basic_set(DimHull);
       ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);

       int ValInt = 1;
       if (ZeroSet.is_equal(DimHull)) {
         ValInt = 0;
       }

       Int.push_back(ValInt);
       Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
     }

     isl::set MappedElements = isl::map(Transform).domain();
     if (!Elements.is_subset(MappedElements))
       continue;

     bool CanFold = true;
     if (Int[0] <= 1)
       CanFold = false;

     unsigned NumDims = Array->getNumberOfDimensions();
     for (unsigned i = 1; i < NumDims - 1; i++)
       if (Int[0] != Int[i] && Int[i])
         CanFold = false;

     if (!CanFold)
       continue;

     for (auto &Access : scop->access_functions())
       if (Access->getScopArrayInfo() == Array)
         Access->setAccessRelation(
             Access->getAccessRelation().apply_range(Transform));

     std::vector<const SCEV *> Sizes;
     for (unsigned i = 0; i < NumDims; i++) {
       auto Size = Array->getDimensionSize(i);

       if (i == NumDims - 1)
         Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
       Sizes.push_back(Size);
     }

     Array->updateSizes(Sizes, false /* CheckConsistency */);
   }
 }

 void ScopBuilder::finalizeAccesses() {
   updateAccessDimensionality();
   foldSizeConstantsToRight();
   foldAccessRelations();
   assumeNoOutOfBounds();
 }

 void ScopBuilder::updateAccessDimensionality() {
   // Check all array accesses for each base pointer and find a (virtual) element
   // size for the base pointer that divides all access functions.
   for (ScopStmt &Stmt : *scop)
     for (MemoryAccess *Access : Stmt) {
       if (!Access->isArrayKind())
         continue;
       ScopArrayInfo *Array =
           const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());

       if (Array->getNumberOfDimensions() != 1)
         continue;
       unsigned DivisibleSize = Array->getElemSizeInBytes();
       const SCEV *Subscript = Access->getSubscript(0);
       while (!isDivisible(Subscript, DivisibleSize, SE))
         DivisibleSize /= 2;
       auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
       Array->updateElementType(Ty);
     }

   for (auto &Stmt : *scop)
     for (auto &Access : Stmt)
       Access->updateDimensionality();
 }

 void ScopBuilder::foldAccessRelations() {
   for (auto &Stmt : *scop)
     for (auto &Access : Stmt)
       Access->foldAccessRelation();
 }

 void ScopBuilder::assumeNoOutOfBounds() {
   if (PollyIgnoreInbounds)
     return;
   for (auto &Stmt : *scop)
     for (auto &Access : Stmt) {
       isl::set Outside = Access->assumeNoOutOfBound();
       const auto &Loc = Access->getAccessInstruction()
                             ? Access->getAccessInstruction()->getDebugLoc()
                             : DebugLoc();
       recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc,
                        AS_ASSUMPTION);
     }
 }

 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
   // Find the statement that defines the value of Inst. That statement has to
   // write the value to make it available to those statements that read it.
   ScopStmt *Stmt = scop->getStmtFor(Inst);

   // It is possible that the value is synthesizable within a loop (such that it
   // is not part of any statement), but not after the loop (where you need the
   // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
   // avoid this. In case the IR has no such PHI, use the last statement (where
   // the value is synthesizable) to write the value.
   if (!Stmt)
     Stmt = scop->getLastStmtFor(Inst->getParent());

   // Inst not defined within this SCoP.
   if (!Stmt)
     return;

   // Do not process further if the instruction is already written.
   if (Stmt->lookupValueWriteOf(Inst))
     return;

   addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
                   true, Inst, ArrayRef<const SCEV *>(),
                   ArrayRef<const SCEV *>(), MemoryKind::Value);
 }

 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
   // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
   // to be able to replace this one. Currently, there is a split responsibility.
   // In a first step, the MemoryAccess is created, but without the
   // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
   // AccessRelation is created. At least for scalar accesses, there is no new
   // information available at ScopStmt::buildAccessRelations(), so we could
   // create the AccessRelation right away. This is what
   // ScopStmt::ensureValueRead(Value*) does.

   auto *Scope = UserStmt->getSurroundingLoop();
   auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
   switch (VUse.getKind()) {
   case VirtualUse::Constant:
   case VirtualUse::Block:
   case VirtualUse::Synthesizable:
   case VirtualUse::Hoisted:
   case VirtualUse::Intra:
     // Uses of these kinds do not need a MemoryAccess.
     break;

   case VirtualUse::ReadOnly:
     // Add MemoryAccess for invariant values only if requested.
     if (!ModelReadOnlyScalars)
       break;

     [[fallthrough]];
   case VirtualUse::Inter:

     // Do not create another MemoryAccess for reloading the value if one already
     // exists.
     if (UserStmt->lookupValueReadOf(V))
       break;

     addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
                     true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
                     MemoryKind::Value);

     // Inter-statement uses need to write the value in their defining statement.
     if (VUse.isInter())
       ensureValueWrite(cast<Instruction>(V));
     break;
   }
 }

 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
                                  BasicBlock *IncomingBlock,
                                  Value *IncomingValue, bool IsExitBlock) {
   // As the incoming block might turn out to be an error statement ensure we
   // will create an exit PHI SAI object. It is needed during code generation
   // and would be created later anyway.
   if (IsExitBlock)
     scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
                                    MemoryKind::ExitPHI);

   // This is possible if PHI is in the SCoP's entry block. The incoming blocks
   // from outside the SCoP's region have no statement representation.
   if (!IncomingStmt)
     return;

   // Take care for the incoming value being available in the incoming block.
   // This must be done before the check for multiple PHI writes because multiple
   // exiting edges from subregion each can be the effective written value of the
   // subregion. As such, all of them must be made available in the subregion
   // statement.
   ensureValueRead(IncomingValue, IncomingStmt);

   // Do not add more than one MemoryAccess per PHINode and ScopStmt.
   if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
     assert(Acc->getAccessInstruction() == PHI);
     Acc->addIncoming(IncomingBlock, IncomingValue);
     return;
   }

   MemoryAccess *Acc = addMemoryAccess(
       IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
       PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
       IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
   assert(Acc);
   Acc->addIncoming(IncomingBlock, IncomingValue);
 }

 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
   addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
                   PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
                   MemoryKind::PHI);
 }

 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
   isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);

   Stmt.Domain = scop->getDomainConditions(&Stmt);
   Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
 }

 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
   isl::set Domain = Stmt.getDomain();
   BasicBlock *BB = Stmt.getEntryBlock();

   Loop *L = LI.getLoopFor(BB);

   while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
     L = L->getParentLoop();

   SmallVector<llvm::Loop *, 8> Loops;

   while (L && Stmt.getParent()->getRegion().contains(L)) {
     Loops.push_back(L);
     L = L->getParentLoop();
   }

   Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
 }

 /// Return the reduction type for a given binary operator.
 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
                                                     const Instruction *Load) {
   if (!BinOp)
     return MemoryAccess::RT_NONE;
   switch (BinOp->getOpcode()) {
   case Instruction::FAdd:
     if (!BinOp->isFast())
       return MemoryAccess::RT_NONE;
     [[fallthrough]];
   case Instruction::Add:
     return MemoryAccess::RT_ADD;
   case Instruction::Or:
     return MemoryAccess::RT_BOR;
   case Instruction::Xor:
     return MemoryAccess::RT_BXOR;
   case Instruction::And:
     return MemoryAccess::RT_BAND;
   case Instruction::FMul:
     if (!BinOp->isFast())
       return MemoryAccess::RT_NONE;
     [[fallthrough]];
   case Instruction::Mul:
     if (DisableMultiplicativeReductions)
       return MemoryAccess::RT_NONE;
     return MemoryAccess::RT_MUL;
   default:
     return MemoryAccess::RT_NONE;
   }
 }

 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
   SmallVector<MemoryAccess *, 2> Loads;
   SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;

   // First collect candidate load-store reduction chains by iterating over all
   // stores and collecting possible reduction loads.
   for (MemoryAccess *StoreMA : Stmt) {
     if (StoreMA->isRead())
       continue;

     Loads.clear();
     collectCandidateReductionLoads(StoreMA, Loads);
     for (MemoryAccess *LoadMA : Loads)
       Candidates.push_back(std::make_pair(LoadMA, StoreMA));
   }

   // Then check each possible candidate pair.
   for (const auto &CandidatePair : Candidates) {
     bool Valid = true;
     isl::map LoadAccs = CandidatePair.first->getAccessRelation();
     isl::map StoreAccs = CandidatePair.second->getAccessRelation();

     // Skip those with obviously unequal base addresses.
     if (!LoadAccs.has_equal_space(StoreAccs)) {
       continue;
     }

     // And check if the remaining for overlap with other memory accesses.
     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
     AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
     isl::set AllAccs = AllAccsRel.range();

     for (MemoryAccess *MA : Stmt) {
       if (MA == CandidatePair.first || MA == CandidatePair.second)
         continue;

       isl::map AccRel =
           MA->getAccessRelation().intersect_domain(Stmt.getDomain());
       isl::set Accs = AccRel.range();

       if (AllAccs.has_equal_space(Accs)) {
         isl::set OverlapAccs = Accs.intersect(AllAccs);
         Valid = Valid && OverlapAccs.is_empty();
       }
     }

     if (!Valid)
       continue;

     const LoadInst *Load =
         dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
     MemoryAccess::ReductionType RT =
         getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);

     // If no overlapping access was found we mark the load and store as
     // reduction like.
     CandidatePair.first->markAsReductionLike(RT);
     CandidatePair.second->markAsReductionLike(RT);
   }
 }

 void ScopBuilder::verifyInvariantLoads() {
   auto &RIL = scop->getRequiredInvariantLoads();
   for (LoadInst *LI : RIL) {
     assert(LI && scop->contains(LI));
     // If there exists a statement in the scop which has a memory access for
     // @p LI, then mark this scop as infeasible for optimization.
     for (ScopStmt &Stmt : *scop)
       if (Stmt.getArrayAccessOrNULLFor(LI)) {
         scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
         return;
       }
   }
 }

 void ScopBuilder::hoistInvariantLoads() {
   if (!PollyInvariantLoadHoisting)
     return;

   isl::union_map Writes = scop->getWrites();
   for (ScopStmt &Stmt : *scop) {
     InvariantAccessesTy InvariantAccesses;

     for (MemoryAccess *Access : Stmt) {
       isl::set NHCtx = getNonHoistableCtx(Access, Writes);
       if (!NHCtx.is_null())
         InvariantAccesses.push_back({Access, NHCtx});
     }

     // Transfer the memory access from the statement to the SCoP.
     for (auto InvMA : InvariantAccesses)
       Stmt.removeMemoryAccess(InvMA.MA);
     addInvariantLoads(Stmt, InvariantAccesses);
   }
 }

 /// Check if an access range is too complex.
 ///
 /// An access range is too complex, if it contains either many disjuncts or
 /// very complex expressions. As a simple heuristic, we assume if a set to
 /// be too complex if the sum of existentially quantified dimensions and
 /// set dimensions is larger than a threshold. This reliably detects both
 /// sets with many disjuncts as well as sets with many divisions as they
 /// arise in h264.
 ///
 /// @param AccessRange The range to check for complexity.
 ///
 /// @returns True if the access range is too complex.
 static bool isAccessRangeTooComplex(isl::set AccessRange) {
   unsigned NumTotalDims = 0;

   for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
     NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div));
     NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set));
   }

   if (NumTotalDims > MaxDimensionsInAccessRange)
     return true;

   return false;
 }

 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
                                                isl::union_map Writes) {
   if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
     return getNonHoistableCtx(BasePtrMA, Writes).is_null();
   }

   Value *BaseAddr = MA->getOriginalBaseAddr();
   if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
     if (!isa<LoadInst>(BasePtrInst))
       return scop->contains(BasePtrInst);

   return false;
 }

 void ScopBuilder::addUserContext() {
   if (UserContextStr.empty())
     return;

   isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
   isl::space Space = scop->getParamSpace();
   isl::size SpaceParams = Space.dim(isl::dim::param);
   if (unsignedFromIslSize(SpaceParams) !=
       unsignedFromIslSize(UserContext.dim(isl::dim::param))) {
     std::string SpaceStr = stringFromIslObj(Space, "null");
     errs() << "Error: the context provided in -polly-context has not the same "
            << "number of dimensions than the computed context. Due to this "
            << "mismatch, the -polly-context option is ignored. Please provide "
            << "the context in the parameter space: " << SpaceStr << ".\n";
     return;
   }

   for (auto i : rangeIslSize(0, SpaceParams)) {
     std::string NameContext =
         scop->getContext().get_dim_name(isl::dim::param, i);
     std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);

     if (NameContext != NameUserContext) {
       std::string SpaceStr = stringFromIslObj(Space, "null");
       errs() << "Error: the name of dimension " << i
              << " provided in -polly-context "
              << "is '" << NameUserContext << "', but the name in the computed "
              << "context is '" << NameContext
              << "'. Due to this name mismatch, "
              << "the -polly-context option is ignored. Please provide "
              << "the context in the parameter space: " << SpaceStr << ".\n";
       return;
     }

     UserContext = UserContext.set_dim_id(isl::dim::param, i,
                                          Space.get_dim_id(isl::dim::param, i));
   }
   isl::set newContext = scop->getContext().intersect(UserContext);
   scop->setContext(newContext);
 }

 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
                                          isl::union_map Writes) {
   // TODO: Loads that are not loop carried, hence are in a statement with
   //       zero iterators, are by construction invariant, though we
   //       currently "hoist" them anyway. This is necessary because we allow
   //       them to be treated as parameters (e.g., in conditions) and our code
   //       generation would otherwise use the old value.

   auto &Stmt = *Access->getStatement();
   BasicBlock *BB = Stmt.getEntryBlock();

   if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
       Access->isMemoryIntrinsic())
     return {};

   // Skip accesses that have an invariant base pointer which is defined but
   // not loaded inside the SCoP. This can happened e.g., if a readnone call
   // returns a pointer that is used as a base address. However, as we want
   // to hoist indirect pointers, we allow the base pointer to be defined in
   // the region if it is also a memory access. Each ScopArrayInfo object
   // that has a base pointer origin has a base pointer that is loaded and
   // that it is invariant, thus it will be hoisted too. However, if there is
   // no base pointer origin we check that the base pointer is defined
   // outside the region.
   auto *LI = cast<LoadInst>(Access->getAccessInstruction());
   if (hasNonHoistableBasePtrInScop(Access, Writes))
     return {};

   isl::map AccessRelation = Access->getAccessRelation();
   assert(!AccessRelation.is_empty());

   if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
     return {};

   AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
   isl::set SafeToLoad;

   auto &DL = scop->getFunction().getParent()->getDataLayout();
   if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
                                   LI->getAlign(), DL)) {
     SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
   } else if (BB != LI->getParent()) {
     // Skip accesses in non-affine subregions as they might not be executed
     // under the same condition as the entry of the non-affine subregion.
     return {};
   } else {
     SafeToLoad = AccessRelation.range();
   }

   if (isAccessRangeTooComplex(AccessRelation.range()))
     return {};

   isl::union_map Written = Writes.intersect_range(SafeToLoad);
   isl::set WrittenCtx = Written.params();
   bool IsWritten = !WrittenCtx.is_empty();

   if (!IsWritten)
     return WrittenCtx;

   WrittenCtx = WrittenCtx.remove_divs();
   bool TooComplex =
       unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain;
   if (TooComplex || !isRequiredInvariantLoad(LI))
     return {};

   scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
                       AS_RESTRICTION, LI->getParent());
   return WrittenCtx;
 }

 static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
   for (const llvm::Argument &Arg : F.args())
     if (&Arg == maybeParam)
       return true;

   return false;
 }

 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
                                      bool StmtInvalidCtxIsEmpty,
                                      bool MAInvalidCtxIsEmpty,
                                      bool NonHoistableCtxIsEmpty) {
   LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
   const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
   if (PollyAllowDereferenceOfAllFunctionParams &&
       isAParameter(LInst->getPointerOperand(), scop->getFunction()))
     return true;

   // TODO: We can provide more information for better but more expensive
   //       results.
   if (!isDereferenceableAndAlignedPointer(
           LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL))
     return false;

   // If the location might be overwritten we do not hoist it unconditionally.
   //
   // TODO: This is probably too conservative.
   if (!NonHoistableCtxIsEmpty)
     return false;

   // If a dereferenceable load is in a statement that is modeled precisely we
   // can hoist it.
   if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
     return true;

   // Even if the statement is not modeled precisely we can hoist the load if it
   // does not involve any parameters that might have been specialized by the
   // statement domain.
   for (const SCEV *Subscript : MA->subscripts())
     if (!isa<SCEVConstant>(Subscript))
       return false;
   return true;
 }

 void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
                                     InvariantAccessesTy &InvMAs) {
   if (InvMAs.empty())
     return;

   isl::set StmtInvalidCtx = Stmt.getInvalidContext();
   bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();

   // Get the context under which the statement is executed but remove the error
   // context under which this statement is reached.
   isl::set DomainCtx = Stmt.getDomain().params();
   DomainCtx = DomainCtx.subtract(StmtInvalidCtx);

   if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) {
     auto *AccInst = InvMAs.front().MA->getAccessInstruction();
     scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
     return;
   }

   // Project out all parameters that relate to loads in the statement. Otherwise
   // we could have cyclic dependences on the constraints under which the
   // hoisted loads are executed and we could not determine an order in which to
   // pre-load them. This happens because not only lower bounds are part of the
   // domain but also upper bounds.
   for (auto &InvMA : InvMAs) {
     auto *MA = InvMA.MA;
     Instruction *AccInst = MA->getAccessInstruction();
     if (SE.isSCEVable(AccInst->getType())) {
       SetVector<Value *> Values;
       for (const SCEV *Parameter : scop->parameters()) {
         Values.clear();
         findValues(Parameter, SE, Values);
         if (!Values.count(AccInst))
           continue;

         isl::id ParamId = scop->getIdForParam(Parameter);
         if (!ParamId.is_null()) {
           int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
           if (Dim >= 0)
             DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
         }
       }
     }
   }

   for (auto &InvMA : InvMAs) {
     auto *MA = InvMA.MA;
     isl::set NHCtx = InvMA.NonHoistableCtx;

     // Check for another invariant access that accesses the same location as
     // MA and if found consolidate them. Otherwise create a new equivalence
     // class at the end of InvariantEquivClasses.
     LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
     Type *Ty = LInst->getType();
     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());

     isl::set MAInvalidCtx = MA->getInvalidContext();
     bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
     bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();

     isl::set MACtx;
     // Check if we know that this pointer can be speculatively accessed.
     if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
                            NonHoistableCtxIsEmpty)) {
       MACtx = isl::set::universe(DomainCtx.get_space());
     } else {
       MACtx = DomainCtx;
       MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
       MACtx = MACtx.gist_params(scop->getContext());
     }

     bool Consolidated = false;
     for (auto &IAClass : scop->invariantEquivClasses()) {
       if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
         continue;

       // If the pointer and the type is equal check if the access function wrt.
       // to the domain is equal too. It can happen that the domain fixes
       // parameter values and these can be different for distinct part of the
       // SCoP. If this happens we cannot consolidate the loads but need to
       // create a new invariant load equivalence class.
       auto &MAs = IAClass.InvariantAccesses;
       if (!MAs.empty()) {
         auto *LastMA = MAs.front();

         isl::set AR = MA->getAccessRelation().range();
         isl::set LastAR = LastMA->getAccessRelation().range();
         bool SameAR = AR.is_equal(LastAR);

         if (!SameAR)
           continue;
       }

       // Add MA to the list of accesses that are in this class.
       MAs.push_front(MA);

       Consolidated = true;

       // Unify the execution context of the class and this statement.
       isl::set IAClassDomainCtx = IAClass.ExecutionContext;
       if (!IAClassDomainCtx.is_null())
         IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
       else
         IAClassDomainCtx = MACtx;
       IAClass.ExecutionContext = IAClassDomainCtx;
       break;
     }

     if (Consolidated)
       continue;

     MACtx = MACtx.coalesce();

     // If we did not consolidate MA, thus did not find an equivalence class
     // for it, we create a new one.
     scop->addInvariantEquivClass(
         InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
   }
 }

 void ScopBuilder::collectCandidateReductionLoads(
     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
   ScopStmt *Stmt = StoreMA->getStatement();

   auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
   if (!Store)
     return;

   // Skip if there is not one binary operator between the load and the store
   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
   if (!BinOp)
     return;

   // Skip if the binary operators has multiple uses
   if (BinOp->getNumUses() != 1)
     return;

   // Skip if the opcode of the binary operator is not commutative/associative
   if (!BinOp->isCommutative() || !BinOp->isAssociative())
     return;

   // Skip if the binary operator is outside the current SCoP
   if (BinOp->getParent() != Store->getParent())
     return;

   // Skip if it is a multiplicative reduction and we disabled them
   if (DisableMultiplicativeReductions &&
       (BinOp->getOpcode() == Instruction::Mul ||
        BinOp->getOpcode() == Instruction::FMul))
     return;

   // Check the binary operator operands for a candidate load
   auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
   auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
   if (!PossibleLoad0 && !PossibleLoad1)
     return;

   // A load is only a candidate if it cannot escape (thus has only this use)
   if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
     if (PossibleLoad0->getParent() == Store->getParent())
       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
   if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
     if (PossibleLoad1->getParent() == Store->getParent())
       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
 }

 /// Find the canonical scop array info object for a set of invariant load
 /// hoisted loads. The canonical array is the one that corresponds to the
 /// first load in the list of accesses which is used as base pointer of a
 /// scop array.
 static const ScopArrayInfo *findCanonicalArray(Scop &S,
                                                MemoryAccessList &Accesses) {
   for (MemoryAccess *Access : Accesses) {
     const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
         Access->getAccessInstruction(), MemoryKind::Array);
     if (CanonicalArray)
       return CanonicalArray;
   }
   return nullptr;
 }

 /// Check if @p Array severs as base array in an invariant load.
 static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
   for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
     for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
       if (Access2->getScopArrayInfo() == Array)
         return true;
   return false;
 }

 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
 /// with a reference to @p New.
 static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
                                  const ScopArrayInfo *New) {
   for (ScopStmt &Stmt : S)
     for (MemoryAccess *Access : Stmt) {
       if (Access->getLatestScopArrayInfo() != Old)
         continue;

       isl::id Id = New->getBasePtrId();
       isl::map Map = Access->getAccessRelation();
       Map = Map.set_tuple_id(isl::dim::out, Id);
       Access->setAccessRelation(Map);
     }
 }

 void ScopBuilder::canonicalizeDynamicBasePtrs() {
   for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
     MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;

     const ScopArrayInfo *CanonicalBasePtrSAI =
         findCanonicalArray(*scop, BasePtrAccesses);

     if (!CanonicalBasePtrSAI)
       continue;

     for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
       const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
           BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
       if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
           !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
         continue;

       // we currently do not canonicalize arrays where some accesses are
       // hoisted as invariant loads. If we would, we need to update the access
       // function of the invariant loads as well. However, as this is not a
       // very common situation, we leave this for now to avoid further
       // complexity increases.
       if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
         continue;

       replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
     }
   }
 }

 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
   for (MemoryAccess *Access : Stmt.MemAccs) {
     Type *ElementType = Access->getElementType();

     MemoryKind Ty;
     if (Access->isPHIKind())
       Ty = MemoryKind::PHI;
     else if (Access->isExitPHIKind())
       Ty = MemoryKind::ExitPHI;
     else if (Access->isValueKind())
       Ty = MemoryKind::Value;
     else
       Ty = MemoryKind::Array;

     // Create isl::pw_aff for SCEVs which describe sizes. Collect all
     // assumptions which are taken. isl::pw_aff objects are cached internally
     // and they are used later by scop.
     for (const SCEV *Size : Access->Sizes) {
       if (!Size)
         continue;
       scop->getPwAff(Size, nullptr, false, &RecordedAssumptions);
     }
     auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
                                                ElementType, Access->Sizes, Ty);

     // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
     // assumptions which are taken. isl::pw_aff objects are cached internally
     // and they are used later by scop.
     for (const SCEV *Subscript : Access->subscripts()) {
       if (!Access->isAffine() || !Subscript)
         continue;
       scop->getPwAff(Subscript, Stmt.getEntryBlock(), false,
                      &RecordedAssumptions);
     }
     Access->buildAccessRelation(SAI);
     scop->addAccessData(Access);
   }
 }

 /// Add the minimal/maximal access in @p Set to @p User.
 ///
 /// @return True if more accesses should be added, false if we reached the
 ///         maximal number of run-time checks to be generated.
 static bool buildMinMaxAccess(isl::set Set,
                               Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
   isl::pw_multi_aff MinPMA, MaxPMA;
   isl::pw_aff LastDimAff;
   isl::aff OneAff;
   unsigned Pos;

   Set = Set.remove_divs();
   polly::simplify(Set);

   if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts)
     Set = Set.simple_hull();

   // Restrict the number of parameters involved in the access as the lexmin/
   // lexmax computation will take too long if this number is high.
   //
   // Experiments with a simple test case using an i7 4800MQ:
   //
   //  #Parameters involved | Time (in sec)
   //            6          |     0.01
   //            7          |     0.04
   //            8          |     0.12
   //            9          |     0.40
   //           10          |     1.54
   //           11          |     6.78
   //           12          |    30.38
   //
   if (isl_set_n_param(Set.get()) >
       static_cast<isl_size>(RunTimeChecksMaxParameters)) {
     unsigned InvolvedParams = 0;
     for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
       if (Set.involves_dims(isl::dim::param, u, 1))
         InvolvedParams++;

     if (InvolvedParams > RunTimeChecksMaxParameters)
       return false;
   }

   MinPMA = Set.lexmin_pw_multi_aff();
   MaxPMA = Set.lexmax_pw_multi_aff();

   MinPMA = MinPMA.coalesce();
   MaxPMA = MaxPMA.coalesce();

   if (MaxPMA.is_null())
     return false;

   unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out));

   // Adjust the last dimension of the maximal access by one as we want to
   // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
   // we test during code generation might now point after the end of the
   // allocated array but we will never dereference it anyway.
   assert(MaxOutputSize >= 1 && "Assumed at least one output dimension");

   Pos = MaxOutputSize - 1;
   LastDimAff = MaxPMA.at(Pos);
   OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
   OneAff = OneAff.add_constant_si(1);
   LastDimAff = LastDimAff.add(OneAff);
   MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);

   if (MinPMA.is_null() || MaxPMA.is_null())
     return false;

   MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));

   return true;
 }

 /// Wrapper function to calculate minimal/maximal accesses to each array.
 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
                                         Scop::MinMaxVectorTy &MinMaxAccesses) {
   MinMaxAccesses.reserve(AliasGroup.size());

   isl::union_set Domains = scop->getDomains();
   isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx());

   for (MemoryAccess *MA : AliasGroup)
     Accesses = Accesses.unite(MA->getAccessRelation());

   Accesses = Accesses.intersect_domain(Domains);
   isl::union_set Locations = Accesses.range();

   bool LimitReached = false;
   for (isl::set Set : Locations.get_set_list()) {
     LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
     if (LimitReached)
       break;
   }

   return !LimitReached;
 }

 static isl::set getAccessDomain(MemoryAccess *MA) {
   isl::set Domain = MA->getStatement()->getDomain();
   Domain = Domain.project_out(isl::dim::set, 0,
                               unsignedFromIslSize(Domain.tuple_dim()));
   return Domain.reset_tuple_id();
 }

 bool ScopBuilder::buildAliasChecks() {
   if (!PollyUseRuntimeAliasChecks)
     return true;

   if (buildAliasGroups()) {
     // Aliasing assumptions do not go through addAssumption but we still want to
     // collect statistics so we do it here explicitly.
     if (scop->getAliasGroups().size())
       Scop::incrementNumberOfAliasingAssumptions(1);
     return true;
   }

   // If a problem occurs while building the alias groups we need to delete
   // this SCoP and pretend it wasn't valid in the first place. To this end
   // we make the assumed context infeasible.
   scop->invalidate(ALIASING, DebugLoc());

   LLVM_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
                     << " could not be created. This SCoP has been dismissed.");
   return false;
 }

 std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
 ScopBuilder::buildAliasGroupsForAccesses() {
   BatchAAResults BAA(AA);
   AliasSetTracker AST(BAA);

   DenseMap<Value *, MemoryAccess *> PtrToAcc;
   DenseSet<const ScopArrayInfo *> HasWriteAccess;
   for (ScopStmt &Stmt : *scop) {

     isl::set StmtDomain = Stmt.getDomain();
     bool StmtDomainEmpty = StmtDomain.is_empty();

     // Statements with an empty domain will never be executed.
     if (StmtDomainEmpty)
       continue;

     for (MemoryAccess *MA : Stmt) {
       if (MA->isScalarKind())
         continue;
       if (!MA->isRead())
         HasWriteAccess.insert(MA->getScopArrayInfo());
       MemAccInst Acc(MA->getAccessInstruction());
       if (MA->isRead() && isa<MemTransferInst>(Acc))
         PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
       else
         PtrToAcc[Acc.getPointerOperand()] = MA;
       AST.add(Acc);
     }
   }

   AliasGroupVectorTy AliasGroups;
   for (AliasSet &AS : AST) {
     if (AS.isMustAlias() || AS.isForwardingAliasSet())
       continue;
     AliasGroupTy AG;
     for (auto &PR : AS)
       AG.push_back(PtrToAcc[PR.getValue()]);
     if (AG.size() < 2)
       continue;
     AliasGroups.push_back(std::move(AG));
   }

   return std::make_tuple(AliasGroups, HasWriteAccess);
 }

 bool ScopBuilder::buildAliasGroups() {
   // To create sound alias checks we perform the following steps:
   //   o) We partition each group into read only and non read only accesses.
   //   o) For each group with more than one base pointer we then compute minimal
   //      and maximal accesses to each array of a group in read only and non
   //      read only partitions separately.
   AliasGroupVectorTy AliasGroups;
   DenseSet<const ScopArrayInfo *> HasWriteAccess;

   std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();

   splitAliasGroupsByDomain(AliasGroups);

   for (AliasGroupTy &AG : AliasGroups) {
     if (!scop->hasFeasibleRuntimeContext())
       return false;

     {
       IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
       bool Valid = buildAliasGroup(AG, HasWriteAccess);
       if (!Valid)
         return false;
     }
     if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
       scop->invalidate(COMPLEXITY, DebugLoc());
       return false;
     }
   }

   return true;
 }

 bool ScopBuilder::buildAliasGroup(
     AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
   AliasGroupTy ReadOnlyAccesses;
   AliasGroupTy ReadWriteAccesses;
   SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
   SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;

   if (AliasGroup.size() < 2)
     return true;

   for (MemoryAccess *Access : AliasGroup) {
     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
                                         Access->getAccessInstruction())
              << "Possibly aliasing pointer, use restrict keyword.");
     const ScopArrayInfo *Array = Access->getScopArrayInfo();
     if (HasWriteAccess.count(Array)) {
       ReadWriteArrays.insert(Array);
       ReadWriteAccesses.push_back(Access);
     } else {
       ReadOnlyArrays.insert(Array);
       ReadOnlyAccesses.push_back(Access);
     }
   }

   // If there are no read-only pointers, and less than two read-write pointers,
   // no alias check is needed.
   if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
     return true;

   // If there is no read-write pointer, no alias check is needed.
   if (ReadWriteArrays.empty())
     return true;

   // For non-affine accesses, no alias check can be generated as we cannot
   // compute a sufficiently tight lower and upper bound: bail out.
   for (MemoryAccess *MA : AliasGroup) {
     if (!MA->isAffine()) {
       scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
                        MA->getAccessInstruction()->getParent());
       return false;
     }
   }

   // Ensure that for all memory accesses for which we generate alias checks,
   // their base pointers are available.
   for (MemoryAccess *MA : AliasGroup) {
     if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
       scop->addRequiredInvariantLoad(
           cast<LoadInst>(BasePtrMA->getAccessInstruction()));
   }

   //  scop->getAliasGroups().emplace_back();
   //  Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
   Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
   Scop::MinMaxVectorTy MinMaxAccessesReadOnly;

   bool Valid;

   Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);

   if (!Valid)
     return false;

   // Bail out if the number of values we need to compare is too large.
   // This is important as the number of comparisons grows quadratically with
   // the number of values we need to compare.
   if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
       RunTimeChecksMaxArraysPerGroup)
     return false;

   Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);

   scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
   if (!Valid)
     return false;

   return true;
 }

 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
   for (unsigned u = 0; u < AliasGroups.size(); u++) {
     AliasGroupTy NewAG;
     AliasGroupTy &AG = AliasGroups[u];
     AliasGroupTy::iterator AGI = AG.begin();
     isl::set AGDomain = getAccessDomain(*AGI);
     while (AGI != AG.end()) {
       MemoryAccess *MA = *AGI;
       isl::set MADomain = getAccessDomain(MA);
       if (AGDomain.is_disjoint(MADomain)) {
         NewAG.push_back(MA);
         AGI = AG.erase(AGI);
       } else {
         AGDomain = AGDomain.unite(MADomain);
         AGI++;
       }
     }
     if (NewAG.size() > 1)
       AliasGroups.push_back(std::move(NewAG));
   }
 }

 #ifndef NDEBUG
 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
   auto PhysUse = VirtualUse::create(S, Op, &LI, false);
   auto VirtUse = VirtualUse::create(S, Op, &LI, true);
   assert(PhysUse.getKind() == VirtUse.getKind());
 }

 /// Check the consistency of every statement's MemoryAccesses.
 ///
 /// The check is carried out by expecting the "physical" kind of use (derived
 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
 /// kind of use (derived from a statement's MemoryAccess).
 ///
 /// The "physical" uses are taken by ensureValueRead to determine whether to
 /// create MemoryAccesses. When done, the kind of scalar access should be the
 /// same no matter which way it was derived.
 ///
 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
 /// can intentionally influence on the kind of uses (not corresponding to the
 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
 /// to pick up the virtual uses. But here in the code generator, this has not
 /// happened yet, such that virtual and physical uses are equivalent.
 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
   for (auto *BB : S->getRegion().blocks()) {
     for (auto &Inst : *BB) {
       auto *Stmt = S->getStmtFor(&Inst);
       if (!Stmt)
         continue;

       if (isIgnoredIntrinsic(&Inst))
         continue;

       // Branch conditions are encoded in the statement domains.
       if (Inst.isTerminator() && Stmt->isBlockStmt())
         continue;

       // Verify all uses.
       for (auto &Op : Inst.operands())
         verifyUse(S, Op, LI);

       // Stores do not produce values used by other statements.
       if (isa<StoreInst>(Inst))
         continue;

       // For every value defined in the block, also check that a use of that
       // value in the same statement would not be an inter-statement use. It can
       // still be synthesizable or load-hoisted, but these kind of instructions
       // are not directly copied in code-generation.
       auto VirtDef =
           VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
       assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
              VirtDef.getKind() == VirtualUse::Intra ||
              VirtDef.getKind() == VirtualUse::Hoisted);
     }
   }

   if (S->hasSingleExitEdge())
     return;

   // PHINodes in the SCoP region's exit block are also uses to be checked.
   if (!S->getRegion().isTopLevelRegion()) {
     for (auto &Inst : *S->getRegion().getExit()) {
       if (!isa<PHINode>(Inst))
         break;

       for (auto &Op : Inst.operands())
         verifyUse(S, Op, LI);
     }
   }
 }
 #endif

 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
   scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE,
                       SD.getNextID()));

   buildStmts(R);

   // Create all invariant load instructions first. These are categorized as
   // 'synthesizable', therefore are not part of any ScopStmt but need to be
   // created somewhere.
   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
   for (BasicBlock *BB : scop->getRegion().blocks()) {
     if (SD.isErrorBlock(*BB, scop->getRegion()))
       continue;

     for (Instruction &Inst : *BB) {
       LoadInst *Load = dyn_cast<LoadInst>(&Inst);
       if (!Load)
         continue;

       if (!RIL.count(Load))
         continue;

       // Invariant loads require a MemoryAccess to be created in some statement.
       // It is not important to which statement the MemoryAccess is added
       // because it will later be removed from the ScopStmt again. We chose the
       // first statement of the basic block the LoadInst is in.
       ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
       assert(!List.empty());
       ScopStmt *RILStmt = List.front();
       buildMemoryAccess(Load, RILStmt);
     }
   }
   buildAccessFunctions();

   // In case the region does not have an exiting block we will later (during
   // code generation) split the exit block. This will move potential PHI nodes
   // from the current exit block into the new region exiting block. Hence, PHI
   // nodes that are at this point not part of the region will be.
   // To handle these PHI nodes later we will now model their operands as scalar
   // accesses. Note that we do not model anything in the exit block if we have
   // an exiting block in the region, as there will not be any splitting later.
   if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
     for (Instruction &Inst : *R.getExit()) {
       PHINode *PHI = dyn_cast<PHINode>(&Inst);
       if (!PHI)
         break;

       buildPHIAccesses(nullptr, PHI, nullptr, true);
     }
   }

   // Create memory accesses for global reads since all arrays are now known.
   auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
   for (auto GlobalReadPair : GlobalReads) {
     ScopStmt *GlobalReadStmt = GlobalReadPair.first;
     Instruction *GlobalRead = GlobalReadPair.second;
     for (auto *BP : ArrayBasePointers)
       addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
                      BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
   }

   buildInvariantEquivalenceClasses();

   /// A map from basic blocks to their invalid domains.
   DenseMap<BasicBlock *, isl::set> InvalidDomainMap;

   if (!buildDomains(&R, InvalidDomainMap)) {
     LLVM_DEBUG(
         dbgs() << "Bailing-out because buildDomains encountered problems\n");
     return;
   }

   addUserAssumptions(AC, InvalidDomainMap);

   // Initialize the invalid domain.
   for (ScopStmt &Stmt : scop->Stmts)
     if (Stmt.isBlockStmt())
       Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
     else
       Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
           Stmt.getRegion()->getNode())]);

   // Remove empty statements.
   // Exit early in case there are no executable statements left in this scop.
   scop->removeStmtNotInDomainMap();
   scop->simplifySCoP(false);
   if (scop->isEmpty()) {
     LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
     return;
   }

   // The ScopStmts now have enough information to initialize themselves.
   for (ScopStmt &Stmt : *scop) {
     collectSurroundingLoops(Stmt);

     buildDomain(Stmt);
     buildAccessRelations(Stmt);

     if (DetectReductions)
       checkForReductions(Stmt);
   }

   // Check early for a feasible runtime context.
   if (!scop->hasFeasibleRuntimeContext()) {
     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
     return;
   }

   // Check early for profitability. Afterwards it cannot change anymore,
   // only the runtime context could become infeasible.
   if (!scop->isProfitable(UnprofitableScalarAccs)) {
     scop->invalidate(PROFITABLE, DebugLoc());
     LLVM_DEBUG(
         dbgs() << "Bailing-out because SCoP is not considered profitable\n");
     return;
   }

   buildSchedule();

   finalizeAccesses();

   scop->realignParams();
   addUserContext();

   // After the context was fully constructed, thus all our knowledge about
   // the parameters is in there, we add all recorded assumptions to the
   // assumed/invalid context.
   addRecordedAssumptions();

   scop->simplifyContexts();
   if (!buildAliasChecks()) {
     LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
     return;
   }

   hoistInvariantLoads();
   canonicalizeDynamicBasePtrs();
   verifyInvariantLoads();
   scop->simplifySCoP(true);

   // Check late for a feasible runtime context because profitability did not
   // change.
   if (!scop->hasFeasibleRuntimeContext()) {
     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
     return;
   }

 #ifndef NDEBUG
   verifyUses(scop.get(), LI, DT);
 #endif
 }

 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA,
                          const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
                          ScopDetection &SD, ScalarEvolution &SE,
                          OptimizationRemarkEmitter &ORE)
     : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
   DebugLoc Beg, End;
   auto P = getBBPairForRegion(R);
   getDebugLocations(P, Beg, End);

   std::string Msg = "SCoP begins here.";
   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
            << Msg);

   buildScop(*R, AC);

   LLVM_DEBUG(dbgs() << *scop);

   if (!scop->hasFeasibleRuntimeContext()) {
     InfeasibleScops++;
     Msg = "SCoP ends here but was dismissed.";
     LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
     RecordedAssumptions.clear();
     scop.reset();
   } else {
     Msg = "SCoP ends here.";
     ++ScopFound;
     if (scop->getMaxLoopDepth() > 0)
       ++RichScopFound;
   }

   if (R->isTopLevelRegion())
     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
              << Msg);
   else
     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
              << Msg);
 }