llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp - llvm-project - Git at Google

 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements the ResourcePriorityQueue class, which is a
 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
 // reduce the length of the critical path through the basic block
 // on VLIW platforms.
 // The scheduler is basically a top-down adaptable list scheduler with DFA
 // resource tracking added to the cost function.
 // DFA is queried as a state machine to model "packets/bundles" during
 // schedule. Currently packets/bundles are discarded at the end of
 // scheduling, affecting only order of instructions.
 //
 //===----------------------------------------------------------------------===//

 #include "llvm/CodeGen/ResourcePriorityQueue.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"

 using namespace llvm;

 #define DEBUG_TYPE "scheduler"

 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
   cl::ZeroOrMore, cl::init(false),
   cl::desc("Disable use of DFA during scheduling"));

 static cl::opt<int> RegPressureThreshold(
   "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
   cl::desc("Track reg pressure and switch priority to in-depth"));

 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
     : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
   TRI = STI.getRegisterInfo();
   TLI = IS->TLI;
   TII = STI.getInstrInfo();
   ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
   // This hard requirement could be relaxed, but for now
   // do not let it proceed.
   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");

   unsigned NumRC = TRI->getNumRegClasses();
   RegLimit.resize(NumRC);
   RegPressure.resize(NumRC);
   std::fill(RegLimit.begin(), RegLimit.end(), 0);
   std::fill(RegPressure.begin(), RegPressure.end(), 0);
   for (const TargetRegisterClass *RC : TRI->regclasses())
     RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);

   ParallelLiveRanges = 0;
   HorizontalVerticalBalance = 0;
 }

 unsigned
 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
   unsigned NumberDeps = 0;
   for (SDep &Pred : SU->Preds) {
     if (Pred.isCtrl())
       continue;

     SUnit *PredSU = Pred.getSUnit();
     const SDNode *ScegN = PredSU->getNode();

     if (!ScegN)
       continue;

     // If value is passed to CopyToReg, it is probably
     // live outside BB.
     switch (ScegN->getOpcode()) {
       default:  break;
       case ISD::TokenFactor:    break;
       case ISD::CopyFromReg:    NumberDeps++;  break;
       case ISD::CopyToReg:      break;
       case ISD::INLINEASM:      break;
       case ISD::INLINEASM_BR:   break;
     }
     if (!ScegN->isMachineOpcode())
       continue;

     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
       MVT VT = ScegN->getSimpleValueType(i);
       if (TLI->isTypeLegal(VT)
           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
         NumberDeps++;
         break;
       }
     }
   }
   return NumberDeps;
 }

 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
                                                     unsigned RCId) {
   unsigned NumberDeps = 0;
   for (const SDep &Succ : SU->Succs) {
     if (Succ.isCtrl())
       continue;

     SUnit *SuccSU = Succ.getSUnit();
     const SDNode *ScegN = SuccSU->getNode();
     if (!ScegN)
       continue;

     // If value is passed to CopyToReg, it is probably
     // live outside BB.
     switch (ScegN->getOpcode()) {
       default:  break;
       case ISD::TokenFactor:    break;
       case ISD::CopyFromReg:    break;
       case ISD::CopyToReg:      NumberDeps++;  break;
       case ISD::INLINEASM:      break;
       case ISD::INLINEASM_BR:   break;
     }
     if (!ScegN->isMachineOpcode())
       continue;

     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
       const SDValue &Op = ScegN->getOperand(i);
       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
       if (TLI->isTypeLegal(VT)
           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
         NumberDeps++;
         break;
       }
     }
   }
   return NumberDeps;
 }

 static unsigned numberCtrlDepsInSU(SUnit *SU) {
   unsigned NumberDeps = 0;
   for (const SDep &Succ : SU->Succs)
     if (Succ.isCtrl())
       NumberDeps++;

   return NumberDeps;
 }

 static unsigned numberCtrlPredInSU(SUnit *SU) {
   unsigned NumberDeps = 0;
   for (SDep &Pred : SU->Preds)
     if (Pred.isCtrl())
       NumberDeps++;

   return NumberDeps;
 }

 ///
 /// Initialize nodes.
 ///
 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
   SUnits = &sunits;
   NumNodesSolelyBlocking.resize(SUnits->size(), 0);

   for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
     SUnit *SU = &(*SUnits)[i];
     initNumRegDefsLeft(SU);
     SU->NodeQueueId = 0;
   }
 }

 /// This heuristic is used if DFA scheduling is not desired
 /// for some VLIW platform.
 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
   // The isScheduleHigh flag allows nodes with wraparound dependencies that
   // cannot easily be modeled as edges with latencies to be scheduled as
   // soon as possible in a top-down schedule.
   if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
     return false;

   if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
     return true;

   unsigned LHSNum = LHS->NodeNum;
   unsigned RHSNum = RHS->NodeNum;

   // The most important heuristic is scheduling the critical path.
   unsigned LHSLatency = PQ->getLatency(LHSNum);
   unsigned RHSLatency = PQ->getLatency(RHSNum);
   if (LHSLatency < RHSLatency) return true;
   if (LHSLatency > RHSLatency) return false;

   // After that, if two nodes have identical latencies, look to see if one will
   // unblock more other nodes than the other.
   unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
   unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
   if (LHSBlocked < RHSBlocked) return true;
   if (LHSBlocked > RHSBlocked) return false;

   // Finally, just to provide a stable ordering, use the node number as a
   // deciding factor.
   return LHSNum < RHSNum;
 }


 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
 /// of SU, return it, otherwise return null.
 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
   SUnit *OnlyAvailablePred = nullptr;
   for (const SDep &Pred : SU->Preds) {
     SUnit &PredSU = *Pred.getSUnit();
     if (!PredSU.isScheduled) {
       // We found an available, but not scheduled, predecessor.  If it's the
       // only one we have found, keep track of it... otherwise give up.
       if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
         return nullptr;
       OnlyAvailablePred = &PredSU;
     }
   }
   return OnlyAvailablePred;
 }

 void ResourcePriorityQueue::push(SUnit *SU) {
   // Look at all of the successors of this node.  Count the number of nodes that
   // this node is the sole unscheduled node for.
   unsigned NumNodesBlocking = 0;
   for (const SDep &Succ : SU->Succs)
     if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
       ++NumNodesBlocking;

   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
   Queue.push_back(SU);
 }

 /// Check if scheduling of this SU is possible
 /// in the current packet.
 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
   if (!SU || !SU->getNode())
     return false;

   // If this is a compound instruction,
   // it is likely to be a call. Do not delay it.
   if (SU->getNode()->getGluedNode())
     return true;

   // First see if the pipeline could receive this instruction
   // in the current cycle.
   if (SU->getNode()->isMachineOpcode())
     switch (SU->getNode()->getMachineOpcode()) {
     default:
       if (!ResourcesModel->canReserveResources(&TII->get(
           SU->getNode()->getMachineOpcode())))
            return false;
       break;
     case TargetOpcode::EXTRACT_SUBREG:
     case TargetOpcode::INSERT_SUBREG:
     case TargetOpcode::SUBREG_TO_REG:
     case TargetOpcode::REG_SEQUENCE:
     case TargetOpcode::IMPLICIT_DEF:
         break;
     }

   // Now see if there are no other dependencies
   // to instructions already in the packet.
   for (const SUnit *S : Packet)
     for (const SDep &Succ : S->Succs) {
       // Since we do not add pseudos to packets, might as well
       // ignore order deps.
       if (Succ.isCtrl())
         continue;

       if (Succ.getSUnit() == SU)
         return false;
     }

   return true;
 }

 /// Keep track of available resources.
 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
   // If this SU does not fit in the packet
   // start a new one.
   if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
     ResourcesModel->clearResources();
     Packet.clear();
   }

   if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
     switch (SU->getNode()->getMachineOpcode()) {
     default:
       ResourcesModel->reserveResources(&TII->get(
         SU->getNode()->getMachineOpcode()));
       break;
     case TargetOpcode::EXTRACT_SUBREG:
     case TargetOpcode::INSERT_SUBREG:
     case TargetOpcode::SUBREG_TO_REG:
     case TargetOpcode::REG_SEQUENCE:
     case TargetOpcode::IMPLICIT_DEF:
       break;
     }
     Packet.push_back(SU);
   }
   // Forcefully end packet for PseudoOps.
   else {
     ResourcesModel->clearResources();
     Packet.clear();
   }

   // If packet is now full, reset the state so in the next cycle
   // we start fresh.
   if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
     ResourcesModel->clearResources();
     Packet.clear();
   }
 }

 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
   int RegBalance = 0;

   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
     return RegBalance;

   // Gen estimate.
   for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
       MVT VT = SU->getNode()->getSimpleValueType(i);
       if (TLI->isTypeLegal(VT)
           && TLI->getRegClassFor(VT)
           && TLI->getRegClassFor(VT)->getID() == RCId)
         RegBalance += numberRCValSuccInSU(SU, RCId);
   }
   // Kill estimate.
   for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
       const SDValue &Op = SU->getNode()->getOperand(i);
       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
       if (isa<ConstantSDNode>(Op.getNode()))
         continue;

       if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
           && TLI->getRegClassFor(VT)->getID() == RCId)
         RegBalance -= numberRCValPredInSU(SU, RCId);
   }
   return RegBalance;
 }

 /// Estimates change in reg pressure from this SU.
 /// It is achieved by trivial tracking of defined
 /// and used vregs in dependent instructions.
 /// The RawPressure flag makes this function to ignore
 /// existing reg file sizes, and report raw def/use
 /// balance.
 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
   int RegBalance = 0;

   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
     return RegBalance;

   if (RawPressure) {
     for (const TargetRegisterClass *RC : TRI->regclasses())
       RegBalance += rawRegPressureDelta(SU, RC->getID());
   }
   else {
     for (const TargetRegisterClass *RC : TRI->regclasses()) {
       if ((RegPressure[RC->getID()] +
            rawRegPressureDelta(SU, RC->getID()) > 0) &&
           (RegPressure[RC->getID()] +
            rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
         RegBalance += rawRegPressureDelta(SU, RC->getID());
     }
   }

   return RegBalance;
 }

 // Constants used to denote relative importance of
 // heuristic components for cost computation.
 static const unsigned PriorityOne = 200;
 static const unsigned PriorityTwo = 50;
 static const unsigned PriorityThree = 15;
 static const unsigned PriorityFour = 5;
 static const unsigned ScaleOne = 20;
 static const unsigned ScaleTwo = 10;
 static const unsigned ScaleThree = 5;
 static const unsigned FactorOne = 2;

 /// Returns single number reflecting benefit of scheduling SU
 /// in the current cycle.
 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
   // Initial trivial priority.
   int ResCount = 1;

   // Do not waste time on a node that is already scheduled.
   if (SU->isScheduled)
     return ResCount;

   // Forced priority is high.
   if (SU->isScheduleHigh)
     ResCount += PriorityOne;

   // Adaptable scheduling
   // A small, but very parallel
   // region, where reg pressure is an issue.
   if (HorizontalVerticalBalance > RegPressureThreshold) {
     // Critical path first
     ResCount += (SU->getHeight() * ScaleTwo);
     // If resources are available for it, multiply the
     // chance of scheduling.
     if (isResourceAvailable(SU))
       ResCount <<= FactorOne;

     // Consider change to reg pressure from scheduling
     // this SU.
     ResCount -= (regPressureDelta(SU,true) * ScaleOne);
   }
   // Default heuristic, greeady and
   // critical path driven.
   else {
     // Critical path first.
     ResCount += (SU->getHeight() * ScaleTwo);
     // Now see how many instructions is blocked by this SU.
     ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
     // If resources are available for it, multiply the
     // chance of scheduling.
     if (isResourceAvailable(SU))
       ResCount <<= FactorOne;

     ResCount -= (regPressureDelta(SU) * ScaleTwo);
   }

   // These are platform-specific things.
   // Will need to go into the back end
   // and accessed from here via a hook.
   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
     if (N->isMachineOpcode()) {
       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
       if (TID.isCall())
         ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
     }
     else
       switch (N->getOpcode()) {
       default:  break;
       case ISD::TokenFactor:
       case ISD::CopyFromReg:
       case ISD::CopyToReg:
         ResCount += PriorityFour;
         break;

       case ISD::INLINEASM:
       case ISD::INLINEASM_BR:
         ResCount += PriorityThree;
         break;
       }
   }
   return ResCount;
 }


 /// Main resource tracking point.
 void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
   // Use NULL entry as an event marker to reset
   // the DFA state.
   if (!SU) {
     ResourcesModel->clearResources();
     Packet.clear();
     return;
   }

   const SDNode *ScegN = SU->getNode();
   // Update reg pressure tracking.
   // First update current node.
   if (ScegN->isMachineOpcode()) {
     // Estimate generated regs.
     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
       MVT VT = ScegN->getSimpleValueType(i);

       if (TLI->isTypeLegal(VT)) {
         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
         if (RC)
           RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
       }
     }
     // Estimate killed regs.
     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
       const SDValue &Op = ScegN->getOperand(i);
       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());

       if (TLI->isTypeLegal(VT)) {
         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
         if (RC) {
           if (RegPressure[RC->getID()] >
             (numberRCValPredInSU(SU, RC->getID())))
             RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
           else RegPressure[RC->getID()] = 0;
         }
       }
     }
     for (SDep &Pred : SU->Preds) {
       if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
         continue;
       --Pred.getSUnit()->NumRegDefsLeft;
     }
   }

   // Reserve resources for this SU.
   reserveResources(SU);

   // Adjust number of parallel live ranges.
   // Heuristic is simple - node with no data successors reduces
   // number of live ranges. All others, increase it.
   unsigned NumberNonControlDeps = 0;

   for (const SDep &Succ : SU->Succs) {
     adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
     if (!Succ.isCtrl())
       NumberNonControlDeps++;
   }

   if (!NumberNonControlDeps) {
     if (ParallelLiveRanges >= SU->NumPreds)
       ParallelLiveRanges -= SU->NumPreds;
     else
       ParallelLiveRanges = 0;

   }
   else
     ParallelLiveRanges += SU->NumRegDefsLeft;

   // Track parallel live chains.
   HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
   HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
 }

 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
   unsigned  NodeNumDefs = 0;
   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
     if (N->isMachineOpcode()) {
       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
       // No register need be allocated for this.
       if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
         NodeNumDefs = 0;
         break;
       }
       NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
     }
     else
       switch(N->getOpcode()) {
         default:     break;
         case ISD::CopyFromReg:
           NodeNumDefs++;
           break;
         case ISD::INLINEASM:
         case ISD::INLINEASM_BR:
           NodeNumDefs++;
           break;
       }

   SU->NumRegDefsLeft = NodeNumDefs;
 }

 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
 /// scheduled.  If SU is not itself available, then there is at least one
 /// predecessor node that has not been scheduled yet.  If SU has exactly ONE
 /// unscheduled predecessor, we want to increase its priority: it getting
 /// scheduled will make this node available, so it is better than some other
 /// node of the same priority that will not make a node available.
 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
   if (SU->isAvailable) return;  // All preds scheduled.

   SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
   if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
     return;

   // Okay, we found a single predecessor that is available, but not scheduled.
   // Since it is available, it must be in the priority queue.  First remove it.
   remove(OnlyAvailablePred);

   // Reinsert the node into the priority queue, which recomputes its
   // NumNodesSolelyBlocking value.
   push(OnlyAvailablePred);
 }


 /// Main access point - returns next instructions
 /// to be placed in scheduling sequence.
 SUnit *ResourcePriorityQueue::pop() {
   if (empty())
     return nullptr;

   std::vector<SUnit *>::iterator Best = Queue.begin();
   if (!DisableDFASched) {
     int BestCost = SUSchedulingCost(*Best);
     for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {

       if (SUSchedulingCost(*I) > BestCost) {
         BestCost = SUSchedulingCost(*I);
         Best = I;
       }
     }
   }
   // Use default TD scheduling mechanism.
   else {
     for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
       if (Picker(*Best, *I))
         Best = I;
   }

   SUnit *V = *Best;
   if (Best != std::prev(Queue.end()))
     std::swap(*Best, Queue.back());

   Queue.pop_back();

   return V;
 }


 void ResourcePriorityQueue::remove(SUnit *SU) {
   assert(!Queue.empty() && "Queue is empty!");
   std::vector<SUnit *>::iterator I = find(Queue, SU);
   if (I != std::prev(Queue.end()))
     std::swap(*I, Queue.back());

   Queue.pop_back();
 }
	//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -- C++ --==//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the ResourcePriorityQueue class, which is a
	// SchedulingPriorityQueue that prioritizes instructions using DFA state to
	// reduce the length of the critical path through the basic block
	// on VLIW platforms.
	// The scheduler is basically a top-down adaptable list scheduler with DFA
	// resource tracking added to the cost function.
	// DFA is queried as a state machine to model "packets/bundles" during
	// schedule. Currently packets/bundles are discarded at the end of
	// scheduling, affecting only order of instructions.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/CodeGen/ResourcePriorityQueue.h"
	#include "llvm/CodeGen/DFAPacketizer.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/SelectionDAGISel.h"
	#include "llvm/CodeGen/SelectionDAGNodes.h"
	#include "llvm/CodeGen/TargetInstrInfo.h"
	#include "llvm/CodeGen/TargetLowering.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetMachine.h"

	using namespace llvm;

	#define DEBUG_TYPE "scheduler"

	static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
	cl::ZeroOrMore, cl::init(false),
	cl::desc("Disable use of DFA during scheduling"));

	static cl::opt<int> RegPressureThreshold(
	"dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
	cl::desc("Track reg pressure and switch priority to in-depth"));

	ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
	: Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
	const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
	TRI = STI.getRegisterInfo();
	TLI = IS->TLI;
	TII = STI.getInstrInfo();
	ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
	// This hard requirement could be relaxed, but for now
	// do not let it proceed.
	assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");

	unsigned NumRC = TRI->getNumRegClasses();
	RegLimit.resize(NumRC);
	RegPressure.resize(NumRC);
	std::fill(RegLimit.begin(), RegLimit.end(), 0);
	std::fill(RegPressure.begin(), RegPressure.end(), 0);
	for (const TargetRegisterClass *RC : TRI->regclasses())
	RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);

	ParallelLiveRanges = 0;
	HorizontalVerticalBalance = 0;
	}

	unsigned
	ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
	unsigned NumberDeps = 0;
	for (SDep &Pred : SU->Preds) {
	if (Pred.isCtrl())
	continue;

	SUnit *PredSU = Pred.getSUnit();
	const SDNode *ScegN = PredSU->getNode();

	if (!ScegN)
	continue;

	// If value is passed to CopyToReg, it is probably
	// live outside BB.
	switch (ScegN->getOpcode()) {
	default: break;
	case ISD::TokenFactor: break;
	case ISD::CopyFromReg: NumberDeps++; break;
	case ISD::CopyToReg: break;
	case ISD::INLINEASM: break;
	case ISD::INLINEASM_BR: break;
	}
	if (!ScegN->isMachineOpcode())
	continue;

	for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
	MVT VT = ScegN->getSimpleValueType(i);
	if (TLI->isTypeLegal(VT)
	&& (TLI->getRegClassFor(VT)->getID() == RCId)) {
	NumberDeps++;
	break;
	}
	}
	}
	return NumberDeps;
	}

	unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
	unsigned RCId) {
	unsigned NumberDeps = 0;
	for (const SDep &Succ : SU->Succs) {
	if (Succ.isCtrl())
	continue;

	SUnit *SuccSU = Succ.getSUnit();
	const SDNode *ScegN = SuccSU->getNode();
	if (!ScegN)
	continue;

	// If value is passed to CopyToReg, it is probably
	// live outside BB.
	switch (ScegN->getOpcode()) {
	default: break;
	case ISD::TokenFactor: break;
	case ISD::CopyFromReg: break;
	case ISD::CopyToReg: NumberDeps++; break;
	case ISD::INLINEASM: break;
	case ISD::INLINEASM_BR: break;
	}
	if (!ScegN->isMachineOpcode())
	continue;

	for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
	const SDValue &Op = ScegN->getOperand(i);
	MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
	if (TLI->isTypeLegal(VT)
	&& (TLI->getRegClassFor(VT)->getID() == RCId)) {
	NumberDeps++;
	break;
	}
	}
	}
	return NumberDeps;
	}

	static unsigned numberCtrlDepsInSU(SUnit *SU) {
	unsigned NumberDeps = 0;
	for (const SDep &Succ : SU->Succs)
	if (Succ.isCtrl())
	NumberDeps++;

	return NumberDeps;
	}

	static unsigned numberCtrlPredInSU(SUnit *SU) {
	unsigned NumberDeps = 0;
	for (SDep &Pred : SU->Preds)
	if (Pred.isCtrl())
	NumberDeps++;

	return NumberDeps;
	}

	///
	/// Initialize nodes.
	///
	void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
	SUnits = &sunits;
	NumNodesSolelyBlocking.resize(SUnits->size(), 0);

	for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
	SUnit SU = &(SUnits)[i];
	initNumRegDefsLeft(SU);
	SU->NodeQueueId = 0;
	}
	}

	/// This heuristic is used if DFA scheduling is not desired
	/// for some VLIW platform.
	bool resource_sort::operator()(const SUnit LHS, const SUnit RHS) const {
	// The isScheduleHigh flag allows nodes with wraparound dependencies that
	// cannot easily be modeled as edges with latencies to be scheduled as
	// soon as possible in a top-down schedule.
	if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
	return false;

	if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
	return true;

	unsigned LHSNum = LHS->NodeNum;
	unsigned RHSNum = RHS->NodeNum;

	// The most important heuristic is scheduling the critical path.
	unsigned LHSLatency = PQ->getLatency(LHSNum);
	unsigned RHSLatency = PQ->getLatency(RHSNum);
	if (LHSLatency < RHSLatency) return true;
	if (LHSLatency > RHSLatency) return false;

	// After that, if two nodes have identical latencies, look to see if one will
	// unblock more other nodes than the other.
	unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
	unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
	if (LHSBlocked < RHSBlocked) return true;
	if (LHSBlocked > RHSBlocked) return false;

	// Finally, just to provide a stable ordering, use the node number as a
	// deciding factor.
	return LHSNum < RHSNum;
	}


	/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
	/// of SU, return it, otherwise return null.
	SUnit ResourcePriorityQueue::getSingleUnscheduledPred(SUnit SU) {
	SUnit *OnlyAvailablePred = nullptr;
	for (const SDep &Pred : SU->Preds) {
	SUnit &PredSU = *Pred.getSUnit();
	if (!PredSU.isScheduled) {
	// We found an available, but not scheduled, predecessor. If it's the
	// only one we have found, keep track of it... otherwise give up.
	if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
	return nullptr;
	OnlyAvailablePred = &PredSU;
	}
	}
	return OnlyAvailablePred;
	}

	void ResourcePriorityQueue::push(SUnit *SU) {
	// Look at all of the successors of this node. Count the number of nodes that
	// this node is the sole unscheduled node for.
	unsigned NumNodesBlocking = 0;
	for (const SDep &Succ : SU->Succs)
	if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
	++NumNodesBlocking;

	NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
	Queue.push_back(SU);
	}

	/// Check if scheduling of this SU is possible
	/// in the current packet.
	bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
	if (!SU \|\| !SU->getNode())
	return false;

	// If this is a compound instruction,
	// it is likely to be a call. Do not delay it.
	if (SU->getNode()->getGluedNode())
	return true;

	// First see if the pipeline could receive this instruction
	// in the current cycle.
	if (SU->getNode()->isMachineOpcode())
	switch (SU->getNode()->getMachineOpcode()) {
	default:
	if (!ResourcesModel->canReserveResources(&TII->get(
	SU->getNode()->getMachineOpcode())))
	return false;
	break;
	case TargetOpcode::EXTRACT_SUBREG:
	case TargetOpcode::INSERT_SUBREG:
	case TargetOpcode::SUBREG_TO_REG:
	case TargetOpcode::REG_SEQUENCE:
	case TargetOpcode::IMPLICIT_DEF:
	break;
	}

	// Now see if there are no other dependencies
	// to instructions already in the packet.
	for (const SUnit *S : Packet)
	for (const SDep &Succ : S->Succs) {
	// Since we do not add pseudos to packets, might as well
	// ignore order deps.
	if (Succ.isCtrl())
	continue;

	if (Succ.getSUnit() == SU)
	return false;
	}

	return true;
	}

	/// Keep track of available resources.
	void ResourcePriorityQueue::reserveResources(SUnit *SU) {
	// If this SU does not fit in the packet
	// start a new one.
	if (!isResourceAvailable(SU) \|\| SU->getNode()->getGluedNode()) {
	ResourcesModel->clearResources();
	Packet.clear();
	}

	if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
	switch (SU->getNode()->getMachineOpcode()) {
	default:
	ResourcesModel->reserveResources(&TII->get(
	SU->getNode()->getMachineOpcode()));
	break;
	case TargetOpcode::EXTRACT_SUBREG:
	case TargetOpcode::INSERT_SUBREG:
	case TargetOpcode::SUBREG_TO_REG:
	case TargetOpcode::REG_SEQUENCE:
	case TargetOpcode::IMPLICIT_DEF:
	break;
	}
	Packet.push_back(SU);
	}
	// Forcefully end packet for PseudoOps.
	else {
	ResourcesModel->clearResources();
	Packet.clear();
	}

	// If packet is now full, reset the state so in the next cycle
	// we start fresh.
	if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
	ResourcesModel->clearResources();
	Packet.clear();
	}
	}

	int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
	int RegBalance = 0;

	if (!SU \|\| !SU->getNode() \|\| !SU->getNode()->isMachineOpcode())
	return RegBalance;

	// Gen estimate.
	for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
	MVT VT = SU->getNode()->getSimpleValueType(i);
	if (TLI->isTypeLegal(VT)
	&& TLI->getRegClassFor(VT)
	&& TLI->getRegClassFor(VT)->getID() == RCId)
	RegBalance += numberRCValSuccInSU(SU, RCId);
	}
	// Kill estimate.
	for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
	const SDValue &Op = SU->getNode()->getOperand(i);
	MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
	if (isa<ConstantSDNode>(Op.getNode()))
	continue;

	if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
	&& TLI->getRegClassFor(VT)->getID() == RCId)
	RegBalance -= numberRCValPredInSU(SU, RCId);
	}
	return RegBalance;
	}

	/// Estimates change in reg pressure from this SU.
	/// It is achieved by trivial tracking of defined
	/// and used vregs in dependent instructions.
	/// The RawPressure flag makes this function to ignore
	/// existing reg file sizes, and report raw def/use
	/// balance.
	int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
	int RegBalance = 0;

	if (!SU \|\| !SU->getNode() \|\| !SU->getNode()->isMachineOpcode())
	return RegBalance;

	if (RawPressure) {
	for (const TargetRegisterClass *RC : TRI->regclasses())
	RegBalance += rawRegPressureDelta(SU, RC->getID());
	}
	else {
	for (const TargetRegisterClass *RC : TRI->regclasses()) {
	if ((RegPressure[RC->getID()] +
	rawRegPressureDelta(SU, RC->getID()) > 0) &&
	(RegPressure[RC->getID()] +
	rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
	RegBalance += rawRegPressureDelta(SU, RC->getID());
	}
	}

	return RegBalance;
	}

	// Constants used to denote relative importance of
	// heuristic components for cost computation.
	static const unsigned PriorityOne = 200;
	static const unsigned PriorityTwo = 50;
	static const unsigned PriorityThree = 15;
	static const unsigned PriorityFour = 5;
	static const unsigned ScaleOne = 20;
	static const unsigned ScaleTwo = 10;
	static const unsigned ScaleThree = 5;
	static const unsigned FactorOne = 2;

	/// Returns single number reflecting benefit of scheduling SU
	/// in the current cycle.
	int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
	// Initial trivial priority.
	int ResCount = 1;

	// Do not waste time on a node that is already scheduled.
	if (SU->isScheduled)
	return ResCount;

	// Forced priority is high.
	if (SU->isScheduleHigh)
	ResCount += PriorityOne;

	// Adaptable scheduling
	// A small, but very parallel
	// region, where reg pressure is an issue.
	if (HorizontalVerticalBalance > RegPressureThreshold) {
	// Critical path first
	ResCount += (SU->getHeight() * ScaleTwo);
	// If resources are available for it, multiply the
	// chance of scheduling.
	if (isResourceAvailable(SU))
	ResCount <<= FactorOne;

	// Consider change to reg pressure from scheduling
	// this SU.
	ResCount -= (regPressureDelta(SU,true) * ScaleOne);
	}
	// Default heuristic, greeady and
	// critical path driven.
	else {
	// Critical path first.
	ResCount += (SU->getHeight() * ScaleTwo);
	// Now see how many instructions is blocked by this SU.
	ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
	// If resources are available for it, multiply the
	// chance of scheduling.
	if (isResourceAvailable(SU))
	ResCount <<= FactorOne;

	ResCount -= (regPressureDelta(SU) * ScaleTwo);
	}

	// These are platform-specific things.
	// Will need to go into the back end
	// and accessed from here via a hook.
	for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
	if (N->isMachineOpcode()) {
	const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
	if (TID.isCall())
	ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
	}
	else
	switch (N->getOpcode()) {
	default: break;
	case ISD::TokenFactor:
	case ISD::CopyFromReg:
	case ISD::CopyToReg:
	ResCount += PriorityFour;
	break;

	case ISD::INLINEASM:
	case ISD::INLINEASM_BR:
	ResCount += PriorityThree;
	break;
	}
	}
	return ResCount;
	}


	/// Main resource tracking point.
	void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
	// Use NULL entry as an event marker to reset
	// the DFA state.
	if (!SU) {
	ResourcesModel->clearResources();
	Packet.clear();
	return;
	}

	const SDNode *ScegN = SU->getNode();
	// Update reg pressure tracking.
	// First update current node.
	if (ScegN->isMachineOpcode()) {
	// Estimate generated regs.
	for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
	MVT VT = ScegN->getSimpleValueType(i);

	if (TLI->isTypeLegal(VT)) {
	const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
	if (RC)
	RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
	}
	}
	// Estimate killed regs.
	for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
	const SDValue &Op = ScegN->getOperand(i);
	MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());

	if (TLI->isTypeLegal(VT)) {
	const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
	if (RC) {
	if (RegPressure[RC->getID()] >
	(numberRCValPredInSU(SU, RC->getID())))
	RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
	else RegPressure[RC->getID()] = 0;
	}
	}
	}
	for (SDep &Pred : SU->Preds) {
	if (Pred.isCtrl() \|\| (Pred.getSUnit()->NumRegDefsLeft == 0))
	continue;
	--Pred.getSUnit()->NumRegDefsLeft;
	}
	}

	// Reserve resources for this SU.
	reserveResources(SU);

	// Adjust number of parallel live ranges.
	// Heuristic is simple - node with no data successors reduces
	// number of live ranges. All others, increase it.
	unsigned NumberNonControlDeps = 0;

	for (const SDep &Succ : SU->Succs) {
	adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
	if (!Succ.isCtrl())
	NumberNonControlDeps++;
	}

	if (!NumberNonControlDeps) {
	if (ParallelLiveRanges >= SU->NumPreds)
	ParallelLiveRanges -= SU->NumPreds;
	else
	ParallelLiveRanges = 0;

	}
	else
	ParallelLiveRanges += SU->NumRegDefsLeft;

	// Track parallel live chains.
	HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
	HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
	}

	void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
	unsigned NodeNumDefs = 0;
	for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
	if (N->isMachineOpcode()) {
	const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
	// No register need be allocated for this.
	if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
	NodeNumDefs = 0;
	break;
	}
	NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
	}
	else
	switch(N->getOpcode()) {
	default: break;
	case ISD::CopyFromReg:
	NodeNumDefs++;
	break;
	case ISD::INLINEASM:
	case ISD::INLINEASM_BR:
	NodeNumDefs++;
	break;
	}

	SU->NumRegDefsLeft = NodeNumDefs;
	}

	/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
	/// scheduled. If SU is not itself available, then there is at least one
	/// predecessor node that has not been scheduled yet. If SU has exactly ONE
	/// unscheduled predecessor, we want to increase its priority: it getting
	/// scheduled will make this node available, so it is better than some other
	/// node of the same priority that will not make a node available.
	void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
	if (SU->isAvailable) return; // All preds scheduled.

	SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
	if (!OnlyAvailablePred \|\| !OnlyAvailablePred->isAvailable)
	return;

	// Okay, we found a single predecessor that is available, but not scheduled.
	// Since it is available, it must be in the priority queue. First remove it.
	remove(OnlyAvailablePred);

	// Reinsert the node into the priority queue, which recomputes its
	// NumNodesSolelyBlocking value.
	push(OnlyAvailablePred);
	}


	/// Main access point - returns next instructions
	/// to be placed in scheduling sequence.
	SUnit *ResourcePriorityQueue::pop() {
	if (empty())
	return nullptr;

	std::vector<SUnit *>::iterator Best = Queue.begin();
	if (!DisableDFASched) {
	int BestCost = SUSchedulingCost(*Best);
	for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {

	if (SUSchedulingCost(*I) > BestCost) {
	BestCost = SUSchedulingCost(*I);
	Best = I;
	}
	}
	}
	// Use default TD scheduling mechanism.
	else {
	for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
	if (Picker(Best, I))
	Best = I;
	}

	SUnit V = Best;
	if (Best != std::prev(Queue.end()))
	std::swap(*Best, Queue.back());

	Queue.pop_back();

	return V;
	}


	void ResourcePriorityQueue::remove(SUnit *SU) {
	assert(!Queue.empty() && "Queue is empty!");
	std::vector<SUnit *>::iterator I = find(Queue, SU);
	if (I != std::prev(Queue.end()))
	std::swap(*I, Queue.back());

	Queue.pop_back();
	}