llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp - llvm-project - Git at Google

 //-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "SystemZMachineScheduler.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"

 using namespace llvm;

 #define DEBUG_TYPE "machine-scheduler"

 /// Pre-RA scheduling ///

 static bool isRegDef(const MachineOperand &MO) {
   return MO.isReg() && MO.isDef();
 }

 static bool isPhysRegDef(const MachineOperand &MO) {
   return isRegDef(MO) && MO.getReg().isPhysical();
 }

 void SystemZPreRASchedStrategy::initializeLatencyReduction() {
   // Enable latency reduction for a region that has a considerable amount of
   // data sequences that should be interlaved. These are SUs that only have
   // one data predecessor / successor edge(s) to their adjacent instruction(s)
   // in the input order. Disable if region has many SUs relative to the
   // overall height.
   unsigned DAGHeight = 0;
   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
     DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight());
   RegionPolicy.DisableLatencyHeuristic =
       DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u);
   if ((HasDataSequences = !RegionPolicy.DisableLatencyHeuristic)) {
     unsigned CurrSequence = 0, NumSeqNodes = 0;
     auto countSequence = [&CurrSequence, &NumSeqNodes]() {
       if (CurrSequence >= 2)
         NumSeqNodes += CurrSequence;
       CurrSequence = 0;
     };
     for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
       const SUnit *SU = &DAG->SUnits[Idx];
       bool InDataSequence = true;
       // One Data pred to MI just above, or no preds.
       unsigned NumPreds = 0;
       for (const SDep &Pred : SU->Preds)
         if (++NumPreds != 1 || Pred.getKind() != SDep::Data ||
             Pred.getSUnit()->NodeNum != Idx - 1)
           InDataSequence = false;
       // One Data succ or no succs (ignoring ExitSU).
       unsigned NumSuccs = 0;
       for (const SDep &Succ : SU->Succs)
         if (Succ.getSUnit() != &DAG->ExitSU &&
             (++NumSuccs != 1 || Succ.getKind() != SDep::Data))
           InDataSequence = false;
       // Another type of node or one that does not have a single data pred
       // ends any previous sequence.
       if (!InDataSequence || !NumPreds)
         countSequence();
       if (InDataSequence)
         CurrSequence++;
     }
     countSequence();
     if (NumSeqNodes >= std::max(size_t(4), DAG->SUnits.size() / 4)) {
       LLVM_DEBUG(dbgs() << "Number of nodes in def-use sequences: "
                         << NumSeqNodes << ". ";);
     } else
       HasDataSequences = false;
   }
 }

 bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI,
                                                bool CCDef) const {
   if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() &&
       (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) || !CCDef)) {
     const MachineOperand &MO0 = MI->getOperand(0);
     if (isRegDef(MO0) && MO0.getReg() == Cmp0SrcReg)
       return true;
   }
   return false;
 }

 static int biasPhysRegExtra(const SUnit *SU) {
   if (int Res = biasPhysReg(SU, /*isTop=*/false))
     return Res;

   // Also recognize Load Address. Most of these are with an FI operand.
   const MachineInstr *MI = SU->getInstr();
   return MI->getNumOperands() && !MI->isCopy() &&
          isPhysRegDef(MI->getOperand(0));
 }

 bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
                                              SchedCandidate &TryCand,
                                              SchedBoundary *Zone) const {
   assert(Zone && !Zone->isTop() && "Bottom-Up scheduling only.");

   // Initialize the candidate if needed.
   if (!Cand.isValid()) {
     TryCand.Reason = FirstValid;
     return true;
   }

   // Bias physreg defs and copies to their uses and definitions respectively.
   int TryCandPRegBias = biasPhysRegExtra(TryCand.SU);
   int CandPRegBias = biasPhysRegExtra(Cand.SU);
   if (tryGreater(TryCandPRegBias, CandPRegBias, TryCand, Cand, PhysReg))
     return TryCand.Reason != NoCand;
   if (TryCandPRegBias && CandPRegBias) {
     // Both biased same way.
     tryGreater(TryCand.SU->NodeNum, Cand.SU->NodeNum, TryCand, Cand, NodeOrder);
     return TryCand.Reason != NoCand;
   }

   // Don't extend the scheduled latency in regions with many nodes in data
   // sequences, or for (single block loop) regions that are acyclically
   // (within a single loop iteration) latency limited. IsAcyclicLatencyLimited
   // is set only after initialization in registerRoots(), which is why it is
   // checked here instead of earlier.
   if (!RegionPolicy.DisableLatencyHeuristic &&
       (HasDataSequences || Rem.IsAcyclicLatencyLimited))
     if (const SUnit *HigherSU =
             TryCand.SU->getHeight() > Cand.SU->getHeight()   ? TryCand.SU
             : TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU
                                                              : nullptr)
       if (HigherSU->getHeight() > Zone->getScheduledLatency() &&
           HigherSU->getDepth() < computeRemLatency(*Zone)) {
         // One or both SUs increase the scheduled latency.
         tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand,
                 GenericSchedulerBase::BotHeightReduce);
         return TryCand.Reason != NoCand;
       }

   // Weak edges help copy coalescing.
   if (tryLess(TryCand.SU->WeakSuccsLeft, Cand.SU->WeakSuccsLeft, TryCand, Cand,
               Weak))
     return TryCand.Reason != NoCand;

   // Help compare with zero elimination.
   if (tryGreater(definesCmp0Src(TryCand.SU->getInstr()),
                  definesCmp0Src(Cand.SU->getInstr()), TryCand, Cand, Weak))
     return TryCand.Reason != NoCand;

   // Fall through to original instruction order.
   if (TryCand.SU->NodeNum > Cand.SU->NodeNum) {
     TryCand.Reason = NodeOrder;
     return true;
   }

   return false;
 }

 void SystemZPreRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
                                            MachineBasicBlock::iterator End,
                                            unsigned NumRegionInstrs) {
   // Avoid setting up the register pressure tracker for small regions to save
   // compile time. Currently only used for computeCyclicCriticalPath() which
   // is used for single block loops.
   MachineBasicBlock *MBB = Begin->getParent();
   RegionPolicy.ShouldTrackPressure =
       MBB->isSuccessor(MBB) && NumRegionInstrs >= 8;

   // These heuristics has so far seemed to work better without adding a
   // top-down boundary.
   RegionPolicy.OnlyBottomUp = true;
   BotIdx = NumRegionInstrs - 1;
   this->NumRegionInstrs = NumRegionInstrs;
 }

 void SystemZPreRASchedStrategy::initialize(ScheduleDAGMI *dag) {
   GenericScheduler::initialize(dag);

   Cmp0SrcReg = SystemZ::NoRegister;

   initializeLatencyReduction();
   LLVM_DEBUG(dbgs() << "Latency scheduling " << (HasDataSequences ? "" : "not ")
                     << "enabled for data sequences.\n";);
 }

 void SystemZPreRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
   GenericScheduler::schedNode(SU, IsTopNode);

   const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(DAG->TII);
   MachineInstr *MI = SU->getInstr();
   if (TII->isCompareZero(*MI))
     Cmp0SrcReg = TII->getCompareSourceReg(*MI);
   else if (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) ||
            definesCmp0Src(MI, /*CCDef=*/false))
     Cmp0SrcReg = SystemZ::NoRegister;
 }

 /// Post-RA scheduling ///

 #ifndef NDEBUG
 // Print the set of SUs
 void SystemZPostRASchedStrategy::SUSet::
 dump(SystemZHazardRecognizer &HazardRec) const {
   dbgs() << "{";
   for (auto &SU : *this) {
     HazardRec.dumpSU(SU, dbgs());
     if (SU != *rbegin())
       dbgs() << ",  ";
   }
   dbgs() << "}\n";
 }
 #endif

 // Try to find a single predecessor that would be interesting for the
 // scheduler in the top-most region of MBB.
 static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB,
                                              const MachineLoop *Loop) {
   MachineBasicBlock *PredMBB = nullptr;
   if (MBB->pred_size() == 1)
     PredMBB = *MBB->pred_begin();

   // The loop header has two predecessors, return the latch, but not for a
   // single block loop.
   if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
     for (MachineBasicBlock *Pred : MBB->predecessors())
       if (Loop->contains(Pred))
         PredMBB = (Pred == MBB ? nullptr : Pred);
   }

   assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
           && "Loop MBB should not consider predecessor outside of loop.");

   return PredMBB;
 }

 void SystemZPostRASchedStrategy::
 advanceTo(MachineBasicBlock::iterator NextBegin) {
   MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
   MachineBasicBlock::iterator I =
     ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
      std::next(LastEmittedMI) : MBB->begin());

   for (; I != NextBegin; ++I) {
     if (I->isPosition() || I->isDebugInstr())
       continue;
     HazardRec->emitInstruction(&*I);
   }
 }

 void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
   Available.clear();  // -misched-cutoff.
   LLVM_DEBUG(HazardRec->dumpState(););
 }

 void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) {
   assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
           "Entering MBB twice?");
   LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB));

   MBB = NextMBB;

   /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
   /// point to it.
   HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
   LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB);
              if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)";
              dbgs() << ":\n";);

   // Try to take over the state from a single predecessor, if it has been
   // scheduled. If this is not possible, we are done.
   MachineBasicBlock *SinglePredMBB =
     getSingleSchedPred(MBB, MLI->getLoopFor(MBB));
   if (SinglePredMBB == nullptr)
     return;
   auto It = SchedStates.find(SinglePredMBB);
   if (It == SchedStates.end())
     return;

   LLVM_DEBUG(dbgs() << "** Continued scheduling from "
                     << printMBBReference(*SinglePredMBB) << "\n";);

   HazardRec->copyState(It->second);
   LLVM_DEBUG(HazardRec->dumpState(););

   // Emit incoming terminator(s). Be optimistic and assume that branch
   // prediction will generally do "the right thing".
   for (MachineInstr &MI : SinglePredMBB->terminators()) {
     LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
     bool TakenBranch = (MI.isBranch() &&
                         (TII->getBranchInfo(MI).isIndirect() ||
                          TII->getBranchInfo(MI).getMBBTarget() == MBB));
     HazardRec->emitInstruction(&MI, TakenBranch);
     if (TakenBranch)
       break;
   }
 }

 void SystemZPostRASchedStrategy::leaveMBB() {
   LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";);

   // Advance to first terminator. The successor block will handle terminators
   // dependent on CFG layout (T/NT branch etc).
   advanceTo(MBB->getFirstTerminator());
 }

 SystemZPostRASchedStrategy::
 SystemZPostRASchedStrategy(const MachineSchedContext *C)
   : MLI(C->MLI),
     TII(static_cast<const SystemZInstrInfo *>
         (C->MF->getSubtarget().getInstrInfo())),
     MBB(nullptr), HazardRec(nullptr) {
   const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
   SchedModel.init(ST);
 }

 SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() {
   // Delete hazard recognizers kept around for each MBB.
   for (auto I : SchedStates) {
     SystemZHazardRecognizer *hazrec = I.second;
     delete hazrec;
   }
 }

 void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
                                             MachineBasicBlock::iterator End,
                                             unsigned NumRegionInstrs) {
   // Don't emit the terminators.
   if (Begin->isTerminator())
     return;

   // Emit any instructions before start of region.
   advanceTo(Begin);
 }

 // Pick the next node to schedule.
 SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
   // Only scheduling top-down.
   IsTopNode = true;

   if (Available.empty())
     return nullptr;

   // If only one choice, return it.
   if (Available.size() == 1) {
     LLVM_DEBUG(dbgs() << "** Only one: ";
                HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
     return *Available.begin();
   }

   // All nodes that are possible to schedule are stored in the Available set.
   LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););

   Candidate Best;
   for (auto *SU : Available) {

     // SU is the next candidate to be compared against current Best.
     Candidate c(SU, *HazardRec);

     // Remeber which SU is the best candidate.
     if (Best.SU == nullptr || c < Best) {
       Best = c;
       LLVM_DEBUG(dbgs() << "** Best so far: ";);
     } else
       LLVM_DEBUG(dbgs() << "** Tried      : ";);
     LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts();
                dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";);

     // Once we know we have seen all SUs that affect grouping or use unbuffered
     // resources, we can stop iterating if Best looks good.
     if (!SU->isScheduleHigh && Best.noCost())
       break;
   }

   assert (Best.SU != nullptr);
   return Best.SU;
 }

 SystemZPostRASchedStrategy::Candidate::
 Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
   SU = SU_;

   // Check the grouping cost. For a node that must begin / end a
   // group, it is positive if it would do so prematurely, or negative
   // if it would fit naturally into the schedule.
   GroupingCost = HazardRec.groupingCost(SU);

   // Check the resources cost for this SU.
   ResourcesCost = HazardRec.resourcesCost(SU);
 }

 bool SystemZPostRASchedStrategy::Candidate::
 operator<(const Candidate &other) {

   // Check decoder grouping.
   if (GroupingCost < other.GroupingCost)
     return true;
   if (GroupingCost > other.GroupingCost)
     return false;

   // Compare the use of resources.
   if (ResourcesCost < other.ResourcesCost)
     return true;
   if (ResourcesCost > other.ResourcesCost)
     return false;

   // Higher SU is otherwise generally better.
   if (SU->getHeight() > other.SU->getHeight())
     return true;
   if (SU->getHeight() < other.SU->getHeight())
     return false;

   // If all same, fall back to original order.
   if (SU->NodeNum < other.SU->NodeNum)
     return true;

   return false;
 }

 void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
   LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") ";
              if (Available.size() == 1) dbgs() << "(only one) ";
              Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";);

   // Remove SU from Available set and update HazardRec.
   Available.erase(SU);
   HazardRec->EmitInstruction(SU);
 }

 void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
   // Set isScheduleHigh flag on all SUs that we want to consider first in
   // pickNode().
   const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
   bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
   SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);

   // Put all released SUs in the Available set.
   Available.insert(SU);
 }
	//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -- C++ ----==//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "SystemZMachineScheduler.h"
	#include "llvm/CodeGen/MachineLoopInfo.h"

	using namespace llvm;

	#define DEBUG_TYPE "machine-scheduler"

	/// Pre-RA scheduling ///

	static bool isRegDef(const MachineOperand &MO) {
	return MO.isReg() && MO.isDef();
	}

	static bool isPhysRegDef(const MachineOperand &MO) {
	return isRegDef(MO) && MO.getReg().isPhysical();
	}

	void SystemZPreRASchedStrategy::initializeLatencyReduction() {
	// Enable latency reduction for a region that has a considerable amount of
	// data sequences that should be interlaved. These are SUs that only have
	// one data predecessor / successor edge(s) to their adjacent instruction(s)
	// in the input order. Disable if region has many SUs relative to the
	// overall height.
	unsigned DAGHeight = 0;
	for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
	DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight());
	RegionPolicy.DisableLatencyHeuristic =
	DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u);
	if ((HasDataSequences = !RegionPolicy.DisableLatencyHeuristic)) {
	unsigned CurrSequence = 0, NumSeqNodes = 0;
	auto countSequence = [&CurrSequence, &NumSeqNodes]() {
	if (CurrSequence >= 2)
	NumSeqNodes += CurrSequence;
	CurrSequence = 0;
	};
	for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
	const SUnit *SU = &DAG->SUnits[Idx];
	bool InDataSequence = true;
	// One Data pred to MI just above, or no preds.
	unsigned NumPreds = 0;
	for (const SDep &Pred : SU->Preds)
	if (++NumPreds != 1 \|\| Pred.getKind() != SDep::Data \|\|
	Pred.getSUnit()->NodeNum != Idx - 1)
	InDataSequence = false;
	// One Data succ or no succs (ignoring ExitSU).
	unsigned NumSuccs = 0;
	for (const SDep &Succ : SU->Succs)
	if (Succ.getSUnit() != &DAG->ExitSU &&
	(++NumSuccs != 1 \|\| Succ.getKind() != SDep::Data))
	InDataSequence = false;
	// Another type of node or one that does not have a single data pred
	// ends any previous sequence.
	if (!InDataSequence \|\| !NumPreds)
	countSequence();
	if (InDataSequence)
	CurrSequence++;
	}
	countSequence();
	if (NumSeqNodes >= std::max(size_t(4), DAG->SUnits.size() / 4)) {
	LLVM_DEBUG(dbgs() << "Number of nodes in def-use sequences: "
	<< NumSeqNodes << ". ";);
	} else
	HasDataSequences = false;
	}
	}

	bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI,
	bool CCDef) const {
	if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() &&
	(MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) \|\| !CCDef)) {
	const MachineOperand &MO0 = MI->getOperand(0);
	if (isRegDef(MO0) && MO0.getReg() == Cmp0SrcReg)
	return true;
	}
	return false;
	}

	static int biasPhysRegExtra(const SUnit *SU) {
	if (int Res = biasPhysReg(SU, /isTop=/false))
	return Res;

	// Also recognize Load Address. Most of these are with an FI operand.
	const MachineInstr *MI = SU->getInstr();
	return MI->getNumOperands() && !MI->isCopy() &&
	isPhysRegDef(MI->getOperand(0));
	}

	bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
	SchedCandidate &TryCand,
	SchedBoundary *Zone) const {
	assert(Zone && !Zone->isTop() && "Bottom-Up scheduling only.");

	// Initialize the candidate if needed.
	if (!Cand.isValid()) {
	TryCand.Reason = FirstValid;
	return true;
	}

	// Bias physreg defs and copies to their uses and definitions respectively.
	int TryCandPRegBias = biasPhysRegExtra(TryCand.SU);
	int CandPRegBias = biasPhysRegExtra(Cand.SU);
	if (tryGreater(TryCandPRegBias, CandPRegBias, TryCand, Cand, PhysReg))
	return TryCand.Reason != NoCand;
	if (TryCandPRegBias && CandPRegBias) {
	// Both biased same way.
	tryGreater(TryCand.SU->NodeNum, Cand.SU->NodeNum, TryCand, Cand, NodeOrder);
	return TryCand.Reason != NoCand;
	}

	// Don't extend the scheduled latency in regions with many nodes in data
	// sequences, or for (single block loop) regions that are acyclically
	// (within a single loop iteration) latency limited. IsAcyclicLatencyLimited
	// is set only after initialization in registerRoots(), which is why it is
	// checked here instead of earlier.
	if (!RegionPolicy.DisableLatencyHeuristic &&
	(HasDataSequences \|\| Rem.IsAcyclicLatencyLimited))
	if (const SUnit *HigherSU =
	TryCand.SU->getHeight() > Cand.SU->getHeight() ? TryCand.SU
	: TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU
	: nullptr)
	if (HigherSU->getHeight() > Zone->getScheduledLatency() &&
	HigherSU->getDepth() < computeRemLatency(*Zone)) {
	// One or both SUs increase the scheduled latency.
	tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand,
	GenericSchedulerBase::BotHeightReduce);
	return TryCand.Reason != NoCand;
	}

	// Weak edges help copy coalescing.
	if (tryLess(TryCand.SU->WeakSuccsLeft, Cand.SU->WeakSuccsLeft, TryCand, Cand,
	Weak))
	return TryCand.Reason != NoCand;

	// Help compare with zero elimination.
	if (tryGreater(definesCmp0Src(TryCand.SU->getInstr()),
	definesCmp0Src(Cand.SU->getInstr()), TryCand, Cand, Weak))
	return TryCand.Reason != NoCand;

	// Fall through to original instruction order.
	if (TryCand.SU->NodeNum > Cand.SU->NodeNum) {
	TryCand.Reason = NodeOrder;
	return true;
	}

	return false;
	}

	void SystemZPreRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
	MachineBasicBlock::iterator End,
	unsigned NumRegionInstrs) {
	// Avoid setting up the register pressure tracker for small regions to save
	// compile time. Currently only used for computeCyclicCriticalPath() which
	// is used for single block loops.
	MachineBasicBlock *MBB = Begin->getParent();
	RegionPolicy.ShouldTrackPressure =
	MBB->isSuccessor(MBB) && NumRegionInstrs >= 8;

	// These heuristics has so far seemed to work better without adding a
	// top-down boundary.
	RegionPolicy.OnlyBottomUp = true;
	BotIdx = NumRegionInstrs - 1;
	this->NumRegionInstrs = NumRegionInstrs;
	}

	void SystemZPreRASchedStrategy::initialize(ScheduleDAGMI *dag) {
	GenericScheduler::initialize(dag);

	Cmp0SrcReg = SystemZ::NoRegister;

	initializeLatencyReduction();
	LLVM_DEBUG(dbgs() << "Latency scheduling " << (HasDataSequences ? "" : "not ")
	<< "enabled for data sequences.\n";);
	}

	void SystemZPreRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
	GenericScheduler::schedNode(SU, IsTopNode);

	const SystemZInstrInfo TII = static_cast<const SystemZInstrInfo >(DAG->TII);
	MachineInstr *MI = SU->getInstr();
	if (TII->isCompareZero(*MI))
	Cmp0SrcReg = TII->getCompareSourceReg(*MI);
	else if (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) \|\|
	definesCmp0Src(MI, /CCDef=/false))
	Cmp0SrcReg = SystemZ::NoRegister;
	}

	/// Post-RA scheduling ///

	#ifndef NDEBUG
	// Print the set of SUs
	void SystemZPostRASchedStrategy::SUSet::
	dump(SystemZHazardRecognizer &HazardRec) const {
	dbgs() << "{";
	for (auto &SU : *this) {
	HazardRec.dumpSU(SU, dbgs());
	if (SU != *rbegin())
	dbgs() << ", ";
	}
	dbgs() << "}\n";
	}
	#endif

	// Try to find a single predecessor that would be interesting for the
	// scheduler in the top-most region of MBB.
	static MachineBasicBlock getSingleSchedPred(MachineBasicBlock MBB,
	const MachineLoop *Loop) {
	MachineBasicBlock *PredMBB = nullptr;
	if (MBB->pred_size() == 1)
	PredMBB = *MBB->pred_begin();

	// The loop header has two predecessors, return the latch, but not for a
	// single block loop.
	if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
	for (MachineBasicBlock *Pred : MBB->predecessors())
	if (Loop->contains(Pred))
	PredMBB = (Pred == MBB ? nullptr : Pred);
	}

	assert ((PredMBB == nullptr \|\| !Loop \|\| Loop->contains(PredMBB))
	&& "Loop MBB should not consider predecessor outside of loop.");

	return PredMBB;
	}

	void SystemZPostRASchedStrategy::
	advanceTo(MachineBasicBlock::iterator NextBegin) {
	MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
	MachineBasicBlock::iterator I =
	((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
	std::next(LastEmittedMI) : MBB->begin());

	for (; I != NextBegin; ++I) {
	if (I->isPosition() \|\| I->isDebugInstr())
	continue;
	HazardRec->emitInstruction(&*I);
	}
	}

	void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
	Available.clear(); // -misched-cutoff.
	LLVM_DEBUG(HazardRec->dumpState(););
	}

	void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) {
	assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
	"Entering MBB twice?");
	LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB));

	MBB = NextMBB;

	/// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
	/// point to it.
	HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
	LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB);
	if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)";
	dbgs() << ":\n";);

	// Try to take over the state from a single predecessor, if it has been
	// scheduled. If this is not possible, we are done.
	MachineBasicBlock *SinglePredMBB =
	getSingleSchedPred(MBB, MLI->getLoopFor(MBB));
	if (SinglePredMBB == nullptr)
	return;
	auto It = SchedStates.find(SinglePredMBB);
	if (It == SchedStates.end())
	return;

	LLVM_DEBUG(dbgs() << "** Continued scheduling from "
	<< printMBBReference(*SinglePredMBB) << "\n";);

	HazardRec->copyState(It->second);
	LLVM_DEBUG(HazardRec->dumpState(););

	// Emit incoming terminator(s). Be optimistic and assume that branch
	// prediction will generally do "the right thing".
	for (MachineInstr &MI : SinglePredMBB->terminators()) {
	LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
	bool TakenBranch = (MI.isBranch() &&
	(TII->getBranchInfo(MI).isIndirect() \|\|
	TII->getBranchInfo(MI).getMBBTarget() == MBB));
	HazardRec->emitInstruction(&MI, TakenBranch);
	if (TakenBranch)
	break;
	}
	}

	void SystemZPostRASchedStrategy::leaveMBB() {
	LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";);

	// Advance to first terminator. The successor block will handle terminators
	// dependent on CFG layout (T/NT branch etc).
	advanceTo(MBB->getFirstTerminator());
	}

	SystemZPostRASchedStrategy::
	SystemZPostRASchedStrategy(const MachineSchedContext *C)
	: MLI(C->MLI),
	TII(static_cast<const SystemZInstrInfo *>
	(C->MF->getSubtarget().getInstrInfo())),
	MBB(nullptr), HazardRec(nullptr) {
	const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
	SchedModel.init(ST);
	}

	SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() {
	// Delete hazard recognizers kept around for each MBB.
	for (auto I : SchedStates) {
	SystemZHazardRecognizer *hazrec = I.second;
	delete hazrec;
	}
	}

	void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
	MachineBasicBlock::iterator End,
	unsigned NumRegionInstrs) {
	// Don't emit the terminators.
	if (Begin->isTerminator())
	return;

	// Emit any instructions before start of region.
	advanceTo(Begin);
	}

	// Pick the next node to schedule.
	SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
	// Only scheduling top-down.
	IsTopNode = true;

	if (Available.empty())
	return nullptr;

	// If only one choice, return it.
	if (Available.size() == 1) {
	LLVM_DEBUG(dbgs() << "** Only one: ";
	HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
	return *Available.begin();
	}

	// All nodes that are possible to schedule are stored in the Available set.
	LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););

	Candidate Best;
	for (auto *SU : Available) {

	// SU is the next candidate to be compared against current Best.
	Candidate c(SU, *HazardRec);

	// Remeber which SU is the best candidate.
	if (Best.SU == nullptr \|\| c < Best) {
	Best = c;
	LLVM_DEBUG(dbgs() << "** Best so far: ";);
	} else
	LLVM_DEBUG(dbgs() << "** Tried : ";);
	LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts();
	dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";);

	// Once we know we have seen all SUs that affect grouping or use unbuffered
	// resources, we can stop iterating if Best looks good.
	if (!SU->isScheduleHigh && Best.noCost())
	break;
	}

	assert (Best.SU != nullptr);
	return Best.SU;
	}

	SystemZPostRASchedStrategy::Candidate::
	Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
	SU = SU_;

	// Check the grouping cost. For a node that must begin / end a
	// group, it is positive if it would do so prematurely, or negative
	// if it would fit naturally into the schedule.
	GroupingCost = HazardRec.groupingCost(SU);

	// Check the resources cost for this SU.
	ResourcesCost = HazardRec.resourcesCost(SU);
	}

	bool SystemZPostRASchedStrategy::Candidate::
	operator<(const Candidate &other) {

	// Check decoder grouping.
	if (GroupingCost < other.GroupingCost)
	return true;
	if (GroupingCost > other.GroupingCost)
	return false;

	// Compare the use of resources.
	if (ResourcesCost < other.ResourcesCost)
	return true;
	if (ResourcesCost > other.ResourcesCost)
	return false;

	// Higher SU is otherwise generally better.
	if (SU->getHeight() > other.SU->getHeight())
	return true;
	if (SU->getHeight() < other.SU->getHeight())
	return false;

	// If all same, fall back to original order.
	if (SU->NodeNum < other.SU->NodeNum)
	return true;

	return false;
	}

	void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
	LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") ";
	if (Available.size() == 1) dbgs() << "(only one) ";
	Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";);

	// Remove SU from Available set and update HazardRec.
	Available.erase(SU);
	HazardRec->EmitInstruction(SU);
	}

	void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
	// Set isScheduleHigh flag on all SUs that we want to consider first in
	// pickNode().
	const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
	bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup \|\| SC->EndGroup));
	SU->isScheduleHigh = (AffectsGrouping \|\| SU->isUnbuffered);

	// Put all released SUs in the Available set.
	Available.insert(SU);
	}