| //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines a hazard recognizer for the SystemZ scheduler. |
| // |
| // This class is used by the SystemZ scheduling strategy to maintain |
| // the state during scheduling, and provide cost functions for |
| // scheduling candidates. This includes: |
| // |
| // * Decoder grouping. A decoder group can maximally hold 3 uops, and |
| // instructions that always begin a new group should be scheduled when |
| // the current decoder group is empty. |
| // * Processor resources usage. It is beneficial to balance the use of |
| // resources. |
| // |
| // ===---------------------------------------------------------------------===// |
| |
| #include "SystemZHazardRecognizer.h" |
| #include "llvm/ADT/Statistic.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "machine-scheduler" |
| |
| // This is the limit of processor resource usage at which the |
| // scheduler should try to look for other instructions (not using the |
| // critical resource). |
| static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, |
| cl::desc("The OOO window for processor " |
| "resources during scheduling."), |
| cl::init(8)); |
| |
| SystemZHazardRecognizer:: |
| SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr), |
| SchedModel(nullptr) {} |
| |
| unsigned SystemZHazardRecognizer:: |
| getNumDecoderSlots(SUnit *SU) const { |
| const MCSchedClassDesc *SC = DAG->getSchedClass(SU); |
| if (!SC->isValid()) |
| return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. |
| |
| if (SC->BeginGroup) { |
| if (!SC->EndGroup) |
| return 2; // Cracked instruction |
| else |
| return 3; // Expanded/group-alone instruction |
| } |
| |
| return 1; // Normal instruction |
| } |
| |
| unsigned SystemZHazardRecognizer::getCurrCycleIdx() { |
| unsigned Idx = CurrGroupSize; |
| if (GrpCount % 2) |
| Idx += 3; |
| return Idx; |
| } |
| |
| ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: |
| getHazardType(SUnit *m, int Stalls) { |
| return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard); |
| } |
| |
| void SystemZHazardRecognizer::Reset() { |
| CurrGroupSize = 0; |
| clearProcResCounters(); |
| GrpCount = 0; |
| LastFPdOpCycleIdx = UINT_MAX; |
| DEBUG(CurGroupDbg = "";); |
| } |
| |
| bool |
| SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { |
| const MCSchedClassDesc *SC = DAG->getSchedClass(SU); |
| if (!SC->isValid()) |
| return true; |
| |
| // A cracked instruction only fits into schedule if the current |
| // group is empty. |
| if (SC->BeginGroup) |
| return (CurrGroupSize == 0); |
| |
| // Since a full group is handled immediately in EmitInstruction(), |
| // SU should fit into current group. NumSlots should be 1 or 0, |
| // since it is not a cracked or expanded instruction. |
| assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && |
| "Expected normal instruction to fit in non-full group!"); |
| |
| return true; |
| } |
| |
| void SystemZHazardRecognizer::nextGroup(bool DbgOutput) { |
| if (CurrGroupSize > 0) { |
| DEBUG(dumpCurrGroup("Completed decode group")); |
| DEBUG(CurGroupDbg = "";); |
| |
| GrpCount++; |
| |
| // Reset counter for next group. |
| CurrGroupSize = 0; |
| |
| // Decrease counters for execution units by one. |
| for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) |
| if (ProcResourceCounters[i] > 0) |
| ProcResourceCounters[i]--; |
| |
| // Clear CriticalResourceIdx if it is now below the threshold. |
| if (CriticalResourceIdx != UINT_MAX && |
| (ProcResourceCounters[CriticalResourceIdx] <= |
| ProcResCostLim)) |
| CriticalResourceIdx = UINT_MAX; |
| } |
| |
| DEBUG(if (DbgOutput) |
| dumpProcResourceCounters();); |
| } |
| |
| #ifndef NDEBUG // Debug output |
| void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { |
| OS << "SU(" << SU->NodeNum << "):"; |
| OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode()); |
| |
| const MCSchedClassDesc *SC = DAG->getSchedClass(SU); |
| if (!SC->isValid()) |
| return; |
| |
| for (TargetSchedModel::ProcResIter |
| PI = SchedModel->getWriteProcResBegin(SC), |
| PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { |
| const MCProcResourceDesc &PRD = |
| *SchedModel->getProcResource(PI->ProcResourceIdx); |
| std::string FU(PRD.Name); |
| // trim e.g. Z13_FXaUnit -> FXa |
| FU = FU.substr(FU.find("_") + 1); |
| FU.resize(FU.find("Unit")); |
| OS << "/" << FU; |
| |
| if (PI->Cycles > 1) |
| OS << "(" << PI->Cycles << "cyc)"; |
| } |
| |
| if (SC->NumMicroOps > 1) |
| OS << "/" << SC->NumMicroOps << "uops"; |
| if (SC->BeginGroup && SC->EndGroup) |
| OS << "/GroupsAlone"; |
| else if (SC->BeginGroup) |
| OS << "/BeginsGroup"; |
| else if (SC->EndGroup) |
| OS << "/EndsGroup"; |
| if (SU->isUnbuffered) |
| OS << "/Unbuffered"; |
| } |
| |
| void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { |
| dbgs() << "+++ " << Msg; |
| dbgs() << ": "; |
| |
| if (CurGroupDbg.empty()) |
| dbgs() << " <empty>\n"; |
| else { |
| dbgs() << "{ " << CurGroupDbg << " }"; |
| dbgs() << " (" << CurrGroupSize << " decoder slot" |
| << (CurrGroupSize > 1 ? "s":"") |
| << ")\n"; |
| } |
| } |
| |
| void SystemZHazardRecognizer::dumpProcResourceCounters() const { |
| bool any = false; |
| |
| for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) |
| if (ProcResourceCounters[i] > 0) { |
| any = true; |
| break; |
| } |
| |
| if (!any) |
| return; |
| |
| dbgs() << "+++ Resource counters:\n"; |
| for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) |
| if (ProcResourceCounters[i] > 0) { |
| dbgs() << "+++ Extra schedule for execution unit " |
| << SchedModel->getProcResource(i)->Name |
| << ": " << ProcResourceCounters[i] << "\n"; |
| any = true; |
| } |
| } |
| #endif //NDEBUG |
| |
| void SystemZHazardRecognizer::clearProcResCounters() { |
| ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); |
| CriticalResourceIdx = UINT_MAX; |
| } |
| |
| // Update state with SU as the next scheduled unit. |
| void SystemZHazardRecognizer:: |
| EmitInstruction(SUnit *SU) { |
| const MCSchedClassDesc *SC = DAG->getSchedClass(SU); |
| DEBUG( dumpCurrGroup("Decode group before emission");); |
| |
| // If scheduling an SU that must begin a new decoder group, move on |
| // to next group. |
| if (!fitsIntoCurrentGroup(SU)) |
| nextGroup(); |
| |
| DEBUG( dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); |
| dbgs() << "\n"; |
| raw_string_ostream cgd(CurGroupDbg); |
| if (CurGroupDbg.length()) |
| cgd << ", "; |
| dumpSU(SU, cgd);); |
| |
| // After returning from a call, we don't know much about the state. |
| if (SU->getInstr()->isCall()) { |
| DEBUG (dbgs() << "+++ Clearing state after call.\n";); |
| clearProcResCounters(); |
| LastFPdOpCycleIdx = UINT_MAX; |
| CurrGroupSize += getNumDecoderSlots(SU); |
| assert (CurrGroupSize <= 3); |
| nextGroup(); |
| return; |
| } |
| |
| // Increase counter for execution unit(s). |
| for (TargetSchedModel::ProcResIter |
| PI = SchedModel->getWriteProcResBegin(SC), |
| PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { |
| // Don't handle FPd together with the other resources. |
| if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) |
| continue; |
| int &CurrCounter = |
| ProcResourceCounters[PI->ProcResourceIdx]; |
| CurrCounter += PI->Cycles; |
| // Check if this is now the new critical resource. |
| if ((CurrCounter > ProcResCostLim) && |
| (CriticalResourceIdx == UINT_MAX || |
| (PI->ProcResourceIdx != CriticalResourceIdx && |
| CurrCounter > |
| ProcResourceCounters[CriticalResourceIdx]))) { |
| DEBUG( dbgs() << "+++ New critical resource: " |
| << SchedModel->getProcResource(PI->ProcResourceIdx)->Name |
| << "\n";); |
| CriticalResourceIdx = PI->ProcResourceIdx; |
| } |
| } |
| |
| // Make note of an instruction that uses a blocking resource (FPd). |
| if (SU->isUnbuffered) { |
| LastFPdOpCycleIdx = getCurrCycleIdx(); |
| DEBUG (dbgs() << "+++ Last FPd cycle index: " |
| << LastFPdOpCycleIdx << "\n";); |
| } |
| |
| // Insert SU into current group by increasing number of slots used |
| // in current group. |
| CurrGroupSize += getNumDecoderSlots(SU); |
| assert (CurrGroupSize <= 3); |
| |
| // Check if current group is now full/ended. If so, move on to next |
| // group to be ready to evaluate more candidates. |
| if (CurrGroupSize == 3 || SC->EndGroup) |
| nextGroup(); |
| } |
| |
| int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { |
| const MCSchedClassDesc *SC = DAG->getSchedClass(SU); |
| if (!SC->isValid()) |
| return 0; |
| |
| // If SU begins new group, it can either break a current group early |
| // or fit naturally if current group is empty (negative cost). |
| if (SC->BeginGroup) { |
| if (CurrGroupSize) |
| return 3 - CurrGroupSize; |
| return -1; |
| } |
| |
| // Similarly, a group-ending SU may either fit well (last in group), or |
| // end the group prematurely. |
| if (SC->EndGroup) { |
| unsigned resultingGroupSize = |
| (CurrGroupSize + getNumDecoderSlots(SU)); |
| if (resultingGroupSize < 3) |
| return (3 - resultingGroupSize); |
| return -1; |
| } |
| |
| // Most instructions can be placed in any decoder slot. |
| return 0; |
| } |
| |
| bool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) { |
| assert (SU->isUnbuffered); |
| // If this is the first FPd op, it should be scheduled high. |
| if (LastFPdOpCycleIdx == UINT_MAX) |
| return true; |
| // If this is not the first PFd op, it should go into the other side |
| // of the processor to use the other FPd unit there. This should |
| // generally happen if two FPd ops are placed with 2 other |
| // instructions between them (modulo 6). |
| if (LastFPdOpCycleIdx > getCurrCycleIdx()) |
| return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3); |
| return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3); |
| } |
| |
| int SystemZHazardRecognizer:: |
| resourcesCost(SUnit *SU) { |
| int Cost = 0; |
| |
| const MCSchedClassDesc *SC = DAG->getSchedClass(SU); |
| if (!SC->isValid()) |
| return 0; |
| |
| // For a FPd op, either return min or max value as indicated by the |
| // distance to any prior FPd op. |
| if (SU->isUnbuffered) |
| Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); |
| // For other instructions, give a cost to the use of the critical resource. |
| else if (CriticalResourceIdx != UINT_MAX) { |
| for (TargetSchedModel::ProcResIter |
| PI = SchedModel->getWriteProcResBegin(SC), |
| PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) |
| if (PI->ProcResourceIdx == CriticalResourceIdx) |
| Cost = PI->Cycles; |
| } |
| |
| return Cost; |
| } |
| |