blob: 8df9540d1a0e5b948fae3b498348985396477eb5 [file] [log] [blame] [edit]
//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SystemZMachineScheduler.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
/// Pre-RA scheduling ///
static bool isRegDef(const MachineOperand &MO) {
return MO.isReg() && MO.isDef();
}
static bool isPhysRegDef(const MachineOperand &MO) {
return isRegDef(MO) && MO.getReg().isPhysical();
}
void SystemZPreRASchedStrategy::initializeLatencyReduction() {
// Enable latency reduction for a region that has a considerable amount of
// data sequences that should be interlaved. These are SUs that only have
// one data predecessor / successor edge(s) to their adjacent instruction(s)
// in the input order. Disable if region has many SUs relative to the
// overall height.
unsigned DAGHeight = 0;
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight());
RegionPolicy.DisableLatencyHeuristic =
DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u);
if ((HasDataSequences = !RegionPolicy.DisableLatencyHeuristic)) {
unsigned CurrSequence = 0, NumSeqNodes = 0;
auto countSequence = [&CurrSequence, &NumSeqNodes]() {
if (CurrSequence >= 2)
NumSeqNodes += CurrSequence;
CurrSequence = 0;
};
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
const SUnit *SU = &DAG->SUnits[Idx];
bool InDataSequence = true;
// One Data pred to MI just above, or no preds.
unsigned NumPreds = 0;
for (const SDep &Pred : SU->Preds)
if (++NumPreds != 1 || Pred.getKind() != SDep::Data ||
Pred.getSUnit()->NodeNum != Idx - 1)
InDataSequence = false;
// One Data succ or no succs (ignoring ExitSU).
unsigned NumSuccs = 0;
for (const SDep &Succ : SU->Succs)
if (Succ.getSUnit() != &DAG->ExitSU &&
(++NumSuccs != 1 || Succ.getKind() != SDep::Data))
InDataSequence = false;
// Another type of node or one that does not have a single data pred
// ends any previous sequence.
if (!InDataSequence || !NumPreds)
countSequence();
if (InDataSequence)
CurrSequence++;
}
countSequence();
if (NumSeqNodes >= std::max(size_t(4), DAG->SUnits.size() / 4)) {
LLVM_DEBUG(dbgs() << "Number of nodes in def-use sequences: "
<< NumSeqNodes << ". ";);
} else
HasDataSequences = false;
}
}
bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI,
bool CCDef) const {
if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() &&
(MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) || !CCDef)) {
const MachineOperand &MO0 = MI->getOperand(0);
if (isRegDef(MO0) && MO0.getReg() == Cmp0SrcReg)
return true;
}
return false;
}
static int biasPhysRegExtra(const SUnit *SU) {
if (int Res = biasPhysReg(SU, /*isTop=*/false))
return Res;
// Also recognize Load Address. Most of these are with an FI operand.
const MachineInstr *MI = SU->getInstr();
return MI->getNumOperands() && !MI->isCopy() &&
isPhysRegDef(MI->getOperand(0));
}
bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary *Zone) const {
assert(Zone && !Zone->isTop() && "Bottom-Up scheduling only.");
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = FirstValid;
return true;
}
// Bias physreg defs and copies to their uses and definitions respectively.
int TryCandPRegBias = biasPhysRegExtra(TryCand.SU);
int CandPRegBias = biasPhysRegExtra(Cand.SU);
if (tryGreater(TryCandPRegBias, CandPRegBias, TryCand, Cand, PhysReg))
return TryCand.Reason != NoCand;
if (TryCandPRegBias && CandPRegBias) {
// Both biased same way.
tryGreater(TryCand.SU->NodeNum, Cand.SU->NodeNum, TryCand, Cand, NodeOrder);
return TryCand.Reason != NoCand;
}
// Don't extend the scheduled latency in regions with many nodes in data
// sequences, or for (single block loop) regions that are acyclically
// (within a single loop iteration) latency limited. IsAcyclicLatencyLimited
// is set only after initialization in registerRoots(), which is why it is
// checked here instead of earlier.
if (!RegionPolicy.DisableLatencyHeuristic &&
(HasDataSequences || Rem.IsAcyclicLatencyLimited))
if (const SUnit *HigherSU =
TryCand.SU->getHeight() > Cand.SU->getHeight() ? TryCand.SU
: TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU
: nullptr)
if (HigherSU->getHeight() > Zone->getScheduledLatency() &&
HigherSU->getDepth() < computeRemLatency(*Zone)) {
// One or both SUs increase the scheduled latency.
tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand,
GenericSchedulerBase::BotHeightReduce);
return TryCand.Reason != NoCand;
}
// Weak edges help copy coalescing.
if (tryLess(TryCand.SU->WeakSuccsLeft, Cand.SU->WeakSuccsLeft, TryCand, Cand,
Weak))
return TryCand.Reason != NoCand;
// Help compare with zero elimination.
if (tryGreater(definesCmp0Src(TryCand.SU->getInstr()),
definesCmp0Src(Cand.SU->getInstr()), TryCand, Cand, Weak))
return TryCand.Reason != NoCand;
// Fall through to original instruction order.
if (TryCand.SU->NodeNum > Cand.SU->NodeNum) {
TryCand.Reason = NodeOrder;
return true;
}
return false;
}
void SystemZPreRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) {
// Avoid setting up the register pressure tracker for small regions to save
// compile time. Currently only used for computeCyclicCriticalPath() which
// is used for single block loops.
MachineBasicBlock *MBB = Begin->getParent();
RegionPolicy.ShouldTrackPressure =
MBB->isSuccessor(MBB) && NumRegionInstrs >= 8;
// These heuristics has so far seemed to work better without adding a
// top-down boundary.
RegionPolicy.OnlyBottomUp = true;
BotIdx = NumRegionInstrs - 1;
this->NumRegionInstrs = NumRegionInstrs;
}
void SystemZPreRASchedStrategy::initialize(ScheduleDAGMI *dag) {
GenericScheduler::initialize(dag);
Cmp0SrcReg = SystemZ::NoRegister;
initializeLatencyReduction();
LLVM_DEBUG(dbgs() << "Latency scheduling " << (HasDataSequences ? "" : "not ")
<< "enabled for data sequences.\n";);
}
void SystemZPreRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
GenericScheduler::schedNode(SU, IsTopNode);
const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(DAG->TII);
MachineInstr *MI = SU->getInstr();
if (TII->isCompareZero(*MI))
Cmp0SrcReg = TII->getCompareSourceReg(*MI);
else if (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) ||
definesCmp0Src(MI, /*CCDef=*/false))
Cmp0SrcReg = SystemZ::NoRegister;
}
/// Post-RA scheduling ///
#ifndef NDEBUG
// Print the set of SUs
void SystemZPostRASchedStrategy::SUSet::
dump(SystemZHazardRecognizer &HazardRec) const {
dbgs() << "{";
for (auto &SU : *this) {
HazardRec.dumpSU(SU, dbgs());
if (SU != *rbegin())
dbgs() << ", ";
}
dbgs() << "}\n";
}
#endif
// Try to find a single predecessor that would be interesting for the
// scheduler in the top-most region of MBB.
static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB,
const MachineLoop *Loop) {
MachineBasicBlock *PredMBB = nullptr;
if (MBB->pred_size() == 1)
PredMBB = *MBB->pred_begin();
// The loop header has two predecessors, return the latch, but not for a
// single block loop.
if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
for (MachineBasicBlock *Pred : MBB->predecessors())
if (Loop->contains(Pred))
PredMBB = (Pred == MBB ? nullptr : Pred);
}
assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
&& "Loop MBB should not consider predecessor outside of loop.");
return PredMBB;
}
void SystemZPostRASchedStrategy::
advanceTo(MachineBasicBlock::iterator NextBegin) {
MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
MachineBasicBlock::iterator I =
((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
std::next(LastEmittedMI) : MBB->begin());
for (; I != NextBegin; ++I) {
if (I->isPosition() || I->isDebugInstr())
continue;
HazardRec->emitInstruction(&*I);
}
}
void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
Available.clear(); // -misched-cutoff.
LLVM_DEBUG(HazardRec->dumpState(););
}
void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) {
assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
"Entering MBB twice?");
LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB));
MBB = NextMBB;
/// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
/// point to it.
HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB);
if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)";
dbgs() << ":\n";);
// Try to take over the state from a single predecessor, if it has been
// scheduled. If this is not possible, we are done.
MachineBasicBlock *SinglePredMBB =
getSingleSchedPred(MBB, MLI->getLoopFor(MBB));
if (SinglePredMBB == nullptr)
return;
auto It = SchedStates.find(SinglePredMBB);
if (It == SchedStates.end())
return;
LLVM_DEBUG(dbgs() << "** Continued scheduling from "
<< printMBBReference(*SinglePredMBB) << "\n";);
HazardRec->copyState(It->second);
LLVM_DEBUG(HazardRec->dumpState(););
// Emit incoming terminator(s). Be optimistic and assume that branch
// prediction will generally do "the right thing".
for (MachineInstr &MI : SinglePredMBB->terminators()) {
LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
bool TakenBranch = (MI.isBranch() &&
(TII->getBranchInfo(MI).isIndirect() ||
TII->getBranchInfo(MI).getMBBTarget() == MBB));
HazardRec->emitInstruction(&MI, TakenBranch);
if (TakenBranch)
break;
}
}
void SystemZPostRASchedStrategy::leaveMBB() {
LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";);
// Advance to first terminator. The successor block will handle terminators
// dependent on CFG layout (T/NT branch etc).
advanceTo(MBB->getFirstTerminator());
}
SystemZPostRASchedStrategy::
SystemZPostRASchedStrategy(const MachineSchedContext *C)
: MLI(C->MLI),
TII(static_cast<const SystemZInstrInfo *>
(C->MF->getSubtarget().getInstrInfo())),
MBB(nullptr), HazardRec(nullptr) {
const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
SchedModel.init(ST);
}
SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() {
// Delete hazard recognizers kept around for each MBB.
for (auto I : SchedStates) {
SystemZHazardRecognizer *hazrec = I.second;
delete hazrec;
}
}
void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) {
// Don't emit the terminators.
if (Begin->isTerminator())
return;
// Emit any instructions before start of region.
advanceTo(Begin);
}
// Pick the next node to schedule.
SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
// Only scheduling top-down.
IsTopNode = true;
if (Available.empty())
return nullptr;
// If only one choice, return it.
if (Available.size() == 1) {
LLVM_DEBUG(dbgs() << "** Only one: ";
HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
return *Available.begin();
}
// All nodes that are possible to schedule are stored in the Available set.
LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
Candidate Best;
for (auto *SU : Available) {
// SU is the next candidate to be compared against current Best.
Candidate c(SU, *HazardRec);
// Remeber which SU is the best candidate.
if (Best.SU == nullptr || c < Best) {
Best = c;
LLVM_DEBUG(dbgs() << "** Best so far: ";);
} else
LLVM_DEBUG(dbgs() << "** Tried : ";);
LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts();
dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";);
// Once we know we have seen all SUs that affect grouping or use unbuffered
// resources, we can stop iterating if Best looks good.
if (!SU->isScheduleHigh && Best.noCost())
break;
}
assert (Best.SU != nullptr);
return Best.SU;
}
SystemZPostRASchedStrategy::Candidate::
Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
SU = SU_;
// Check the grouping cost. For a node that must begin / end a
// group, it is positive if it would do so prematurely, or negative
// if it would fit naturally into the schedule.
GroupingCost = HazardRec.groupingCost(SU);
// Check the resources cost for this SU.
ResourcesCost = HazardRec.resourcesCost(SU);
}
bool SystemZPostRASchedStrategy::Candidate::
operator<(const Candidate &other) {
// Check decoder grouping.
if (GroupingCost < other.GroupingCost)
return true;
if (GroupingCost > other.GroupingCost)
return false;
// Compare the use of resources.
if (ResourcesCost < other.ResourcesCost)
return true;
if (ResourcesCost > other.ResourcesCost)
return false;
// Higher SU is otherwise generally better.
if (SU->getHeight() > other.SU->getHeight())
return true;
if (SU->getHeight() < other.SU->getHeight())
return false;
// If all same, fall back to original order.
if (SU->NodeNum < other.SU->NodeNum)
return true;
return false;
}
void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") ";
if (Available.size() == 1) dbgs() << "(only one) ";
Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";);
// Remove SU from Available set and update HazardRec.
Available.erase(SU);
HazardRec->EmitInstruction(SU);
}
void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
// Set isScheduleHigh flag on all SUs that we want to consider first in
// pickNode().
const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
// Put all released SUs in the Available set.
Available.insert(SU);
}