| //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// \file |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H |
| #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H |
| |
| #include "GCNRegPressure.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/MapVector.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
| #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineScheduler.h" |
| |
| namespace llvm { |
| |
| class SIMachineFunctionInfo; |
| class SIRegisterInfo; |
| class GCNSubtarget; |
| class GCNSchedStage; |
| |
| enum class GCNSchedStageID : unsigned { |
| OccInitialSchedule = 0, |
| RewriteMFMAForm = 1, |
| UnclusteredHighRPReschedule = 2, |
| ClusteredLowOccupancyReschedule = 3, |
| PreRARematerialize = 4, |
| ILPInitialSchedule = 5, |
| MemoryClauseInitialSchedule = 6 |
| }; |
| |
| #ifndef NDEBUG |
| raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID); |
| #endif |
| |
| /// This is a minimal scheduler strategy. The main difference between this |
| /// and the GenericScheduler is that GCNSchedStrategy uses different |
| /// heuristics to determine excess/critical pressure sets. |
| class GCNSchedStrategy : public GenericScheduler { |
| protected: |
| SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending); |
| |
| void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, |
| const RegPressureTracker &RPTracker, |
| SchedCandidate &Cand, bool &IsPending, |
| bool IsBottomUp); |
| |
| void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, |
| const RegPressureTracker &RPTracker, |
| const SIRegisterInfo *SRI, unsigned SGPRPressure, |
| unsigned VGPRPressure, bool IsBottomUp); |
| |
| /// Evaluates instructions in the pending queue using a subset of scheduling |
| /// heuristics. |
| /// |
| /// Instructions that cannot be issued due to hardware constraints are placed |
| /// in the pending queue rather than the available queue, making them normally |
| /// invisible to scheduling heuristics. However, in certain scenarios (such as |
| /// avoiding register spilling), it may be beneficial to consider scheduling |
| /// these not-yet-ready instructions. |
| bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, |
| SchedBoundary *Zone) const; |
| |
| void printCandidateDecision(const SchedCandidate &Current, |
| const SchedCandidate &Preferred); |
| |
| std::vector<unsigned> Pressure; |
| |
| std::vector<unsigned> MaxPressure; |
| |
| unsigned SGPRExcessLimit; |
| |
| unsigned VGPRExcessLimit; |
| |
| unsigned TargetOccupancy; |
| |
| MachineFunction *MF; |
| |
| // Scheduling stages for this strategy. |
| SmallVector<GCNSchedStageID, 4> SchedStages; |
| |
| // Pointer to the current SchedStageID. |
| SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr; |
| |
| // GCN RP Tracker for top-down scheduling |
| mutable GCNDownwardRPTracker DownwardTracker; |
| |
| // GCN RP Tracker for botttom-up scheduling |
| mutable GCNUpwardRPTracker UpwardTracker; |
| |
| public: |
| // schedule() have seen register pressure over the critical limits and had to |
| // track register pressure for actual scheduling heuristics. |
| bool HasHighPressure; |
| |
| // Schedule known to have excess register pressure. Be more conservative in |
| // increasing ILP and preserving VGPRs. |
| bool KnownExcessRP = false; |
| |
| // An error margin is necessary because of poor performance of the generic RP |
| // tracker and can be adjusted up for tuning heuristics to try and more |
| // aggressively reduce register pressure. |
| unsigned ErrorMargin = 3; |
| |
| // Bias for SGPR limits under a high register pressure. |
| const unsigned HighRPSGPRBias = 7; |
| |
| // Bias for VGPR limits under a high register pressure. |
| const unsigned HighRPVGPRBias = 7; |
| |
| unsigned SGPRCriticalLimit; |
| |
| unsigned VGPRCriticalLimit; |
| |
| unsigned SGPRLimitBias = 0; |
| |
| unsigned VGPRLimitBias = 0; |
| |
| GCNSchedStrategy(const MachineSchedContext *C); |
| |
| SUnit *pickNode(bool &IsTopNode) override; |
| |
| void schedNode(SUnit *SU, bool IsTopNode) override; |
| |
| void initialize(ScheduleDAGMI *DAG) override; |
| |
| unsigned getTargetOccupancy() { return TargetOccupancy; } |
| |
| void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; } |
| |
| GCNSchedStageID getCurrentStage(); |
| |
| // Advances stage. Returns true if there are remaining stages. |
| bool advanceStage(); |
| |
| bool hasNextStage() const; |
| |
| GCNSchedStageID getNextStage() const; |
| |
| GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; } |
| |
| GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; } |
| }; |
| |
| /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e. |
| /// maximum number of waves per simd). |
| class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy { |
| public: |
| GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, |
| bool IsLegacyScheduler = false); |
| }; |
| |
| /// The goal of this scheduling strategy is to maximize ILP for a single wave |
| /// (i.e. latency hiding). |
| class GCNMaxILPSchedStrategy final : public GCNSchedStrategy { |
| protected: |
| bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, |
| SchedBoundary *Zone) const override; |
| |
| public: |
| GCNMaxILPSchedStrategy(const MachineSchedContext *C); |
| }; |
| |
| /// The goal of this scheduling strategy is to maximize memory clause for a |
| /// single wave. |
| class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy { |
| protected: |
| bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, |
| SchedBoundary *Zone) const override; |
| |
| public: |
| GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C); |
| }; |
| |
| class ScheduleMetrics { |
| unsigned ScheduleLength; |
| unsigned BubbleCycles; |
| |
| public: |
| ScheduleMetrics() = default; |
| ScheduleMetrics(unsigned L, unsigned BC) |
| : ScheduleLength(L), BubbleCycles(BC) {} |
| unsigned getLength() const { return ScheduleLength; } |
| unsigned getBubbles() const { return BubbleCycles; } |
| unsigned getMetric() const { |
| unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength; |
| // Metric is zero if the amount of bubbles is less than 1% which is too |
| // small. So, return 1. |
| return Metric ? Metric : 1; |
| } |
| static const unsigned ScaleFactor; |
| }; |
| |
| inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) { |
| dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor |
| << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/" |
| << Sm.getLength() << " ]\n"; |
| return OS; |
| } |
| |
| class GCNScheduleDAGMILive; |
| class RegionPressureMap { |
| GCNScheduleDAGMILive *DAG; |
| // The live in/out pressure as indexed by the first or last MI in the region |
| // before scheduling. |
| DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap; |
| // The mapping of RegionIDx to key instruction |
| DenseMap<unsigned, MachineInstr *> IdxToInstruction; |
| // Whether we are calculating LiveOuts or LiveIns |
| bool IsLiveOut; |
| |
| public: |
| RegionPressureMap() = default; |
| RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut) |
| : DAG(GCNDAG), IsLiveOut(LiveOut) {} |
| // Build the Instr->LiveReg and RegionIdx->Instr maps |
| void buildLiveRegMap(); |
| |
| // Retrieve the LiveReg for a given RegionIdx |
| GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) { |
| assert(IdxToInstruction.contains(RegionIdx)); |
| MachineInstr *Key = IdxToInstruction[RegionIdx]; |
| return RegionLiveRegMap[Key]; |
| } |
| }; |
| |
| /// A region's boundaries i.e. a pair of instruction bundle iterators. The lower |
| /// boundary is inclusive, the upper boundary is exclusive. |
| using RegionBoundaries = |
| std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>; |
| |
| class GCNScheduleDAGMILive final : public ScheduleDAGMILive { |
| friend class GCNSchedStage; |
| friend class OccInitialScheduleStage; |
| friend class RewriteMFMAFormStage; |
| friend class UnclusteredHighRPStage; |
| friend class ClusteredLowOccStage; |
| friend class PreRARematStage; |
| friend class ILPInitialScheduleStage; |
| friend class RegionPressureMap; |
| |
| const GCNSubtarget &ST; |
| |
| SIMachineFunctionInfo &MFI; |
| |
| // Occupancy target at the beginning of function scheduling cycle. |
| unsigned StartingOccupancy; |
| |
| // Minimal real occupancy recorder for the function. |
| unsigned MinOccupancy; |
| |
| // Vector of regions recorder for later rescheduling |
| SmallVector<RegionBoundaries, 32> Regions; |
| |
| // Record regions with high register pressure. |
| BitVector RegionsWithHighRP; |
| |
| // Record regions with excess register pressure over the physical register |
| // limit. Register pressure in these regions usually will result in spilling. |
| BitVector RegionsWithExcessRP; |
| |
| // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT). |
| BitVector RegionsWithIGLPInstrs; |
| |
| // Region live-in cache. |
| SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns; |
| |
| // Region pressure cache. |
| SmallVector<GCNRegPressure, 32> Pressure; |
| |
| // Temporary basic block live-in cache. |
| DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns; |
| |
| // The map of the initial first region instruction to region live in registers |
| DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap; |
| |
| // Calculate the map of the initial first region instruction to region live in |
| // registers |
| DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const; |
| |
| // Calculate the map of the initial last region instruction to region live out |
| // registers |
| DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> |
| getRegionLiveOutMap() const; |
| |
| // The live out registers per region. These are internally stored as a map of |
| // the initial last region instruction to region live out registers, but can |
| // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx. |
| RegionPressureMap RegionLiveOuts; |
| |
| // Return current region pressure. |
| GCNRegPressure getRealRegPressure(unsigned RegionIdx) const; |
| |
| // Compute and cache live-ins and pressure for all regions in block. |
| void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB); |
| |
| /// Makes the scheduler try to achieve an occupancy of \p TargetOccupancy. |
| void setTargetOccupancy(unsigned TargetOccupancy); |
| |
| void runSchedStages(); |
| |
| std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID); |
| |
| void deleteMI(unsigned RegionIdx, MachineInstr *MI); |
| |
| public: |
| GCNScheduleDAGMILive(MachineSchedContext *C, |
| std::unique_ptr<MachineSchedStrategy> S); |
| |
| void schedule() override; |
| |
| void finalizeSchedule() override; |
| }; |
| |
| // GCNSchedStrategy applies multiple scheduling stages to a function. |
| class GCNSchedStage { |
| protected: |
| GCNScheduleDAGMILive &DAG; |
| |
| GCNSchedStrategy &S; |
| |
| MachineFunction &MF; |
| |
| SIMachineFunctionInfo &MFI; |
| |
| const GCNSubtarget &ST; |
| |
| const GCNSchedStageID StageID; |
| |
| // The current block being scheduled. |
| MachineBasicBlock *CurrentMBB = nullptr; |
| |
| // Current region index. |
| unsigned RegionIdx = 0; |
| |
| // Record the original order of instructions before scheduling. |
| std::vector<MachineInstr *> Unsched; |
| |
| // RP before scheduling the current region. |
| GCNRegPressure PressureBefore; |
| |
| // RP after scheduling the current region. |
| GCNRegPressure PressureAfter; |
| |
| std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; |
| |
| GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG); |
| |
| public: |
| // Initialize state for a scheduling stage. Returns false if the current stage |
| // should be skipped. |
| virtual bool initGCNSchedStage(); |
| |
| // Finalize state after finishing a scheduling pass on the function. |
| virtual void finalizeGCNSchedStage(); |
| |
| // Setup for scheduling a region. Returns false if the current region should |
| // be skipped. |
| virtual bool initGCNRegion(); |
| |
| // Finalize state after scheduling a region. |
| virtual void finalizeGCNRegion(); |
| |
| // Track whether a new region is also a new MBB. |
| void setupNewBlock(); |
| |
| // Check result of scheduling. |
| void checkScheduling(); |
| |
| // computes the given schedule virtual execution time in clocks |
| ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule); |
| ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG); |
| unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, |
| DenseMap<unsigned, unsigned> &ReadyCycles, |
| const TargetSchedModel &SM); |
| |
| // Returns true if scheduling should be reverted. |
| virtual bool shouldRevertScheduling(unsigned WavesAfter); |
| |
| // Returns true if current region has known excess pressure. |
| bool isRegionWithExcessRP() const { |
| return DAG.RegionsWithExcessRP[RegionIdx]; |
| } |
| |
| // The region number this stage is currently working on |
| unsigned getRegionIdx() { return RegionIdx; } |
| |
| // Returns true if the new schedule may result in more spilling. |
| bool mayCauseSpilling(unsigned WavesAfter); |
| |
| /// Sets the schedule of region \p RegionIdx in block \p MBB to \p MIOrder. |
| /// The MIs in \p MIOrder must be exactly the same as the ones currently |
| /// existing inside the region, only in a different order that honors def-use |
| /// chains. |
| void modifyRegionSchedule(unsigned RegionIdx, MachineBasicBlock *MBB, |
| ArrayRef<MachineInstr *> MIOrder); |
| |
| void advanceRegion() { RegionIdx++; } |
| |
| virtual ~GCNSchedStage() = default; |
| }; |
| |
| class OccInitialScheduleStage : public GCNSchedStage { |
| public: |
| bool shouldRevertScheduling(unsigned WavesAfter) override; |
| |
| OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
| : GCNSchedStage(StageID, DAG) {} |
| }; |
| |
| class RewriteMFMAFormStage : public GCNSchedStage { |
| private: |
| // Record regions with excess archvgpr register pressure over the physical |
| // register limit. Register pressure in these regions usually will result in |
| // spilling. |
| BitVector RegionsWithExcessArchVGPR; |
| |
| const SIInstrInfo *TII; |
| const SIRegisterInfo *SRI; |
| |
| /// Do a speculative rewrite and collect copy locations. The speculative |
| /// rewrite allows us to calculate the RP of the code after the rewrite, and |
| /// the copy locations allow us to calculate the total cost of copies required |
| /// for the rewrite. Stores the rewritten instructions in \p RewriteCands , |
| /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the |
| /// copy locations for defs (of the MFMA operands) in \p CopyForDef |
| bool |
| initHeuristics(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands, |
| DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse, |
| SmallPtrSetImpl<MachineInstr *> &CopyForDef); |
| |
| /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done |
| /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy |
| /// costs, and \p RewriteCands to undo rewriting. |
| int64_t getRewriteCost( |
| const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands, |
| const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse, |
| const SmallPtrSetImpl<MachineInstr *> &CopyForDef); |
| |
| /// Do the final rewrite on \p RewriteCands and insert any needed copies. |
| bool |
| rewrite(const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands); |
| |
| /// \returns true if this MI is a rewrite candidate. |
| bool isRewriteCandidate(MachineInstr *MI) const; |
| |
| /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p |
| /// DefIdxs |
| void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS, |
| SmallVectorImpl<SlotIndex> &DefIdxs); |
| |
| /// Finds all the reaching uses of \p DefMI and stores the use operands in \p |
| /// ReachingUses |
| void findReachingUses(MachineInstr *DefMI, LiveIntervals *LIS, |
| SmallVectorImpl<MachineOperand *> &ReachingUses); |
| |
| public: |
| bool initGCNSchedStage() override; |
| |
| RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
| : GCNSchedStage(StageID, DAG) {} |
| }; |
| |
| class UnclusteredHighRPStage : public GCNSchedStage { |
| private: |
| // Save the initial occupancy before starting this stage. |
| unsigned InitialOccupancy; |
| // Save the temporary target occupancy before starting this stage. |
| unsigned TempTargetOccupancy; |
| // Track whether any region was scheduled by this stage. |
| bool IsAnyRegionScheduled; |
| |
| public: |
| bool initGCNSchedStage() override; |
| |
| void finalizeGCNSchedStage() override; |
| |
| bool initGCNRegion() override; |
| |
| bool shouldRevertScheduling(unsigned WavesAfter) override; |
| |
| UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
| : GCNSchedStage(StageID, DAG) {} |
| }; |
| |
| // Retry function scheduling if we found resulting occupancy and it is |
| // lower than used for other scheduling passes. This will give more freedom |
| // to schedule low register pressure blocks. |
| class ClusteredLowOccStage : public GCNSchedStage { |
| public: |
| bool initGCNSchedStage() override; |
| |
| bool initGCNRegion() override; |
| |
| bool shouldRevertScheduling(unsigned WavesAfter) override; |
| |
| ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
| : GCNSchedStage(StageID, DAG) {} |
| }; |
| |
| /// Attempts to reduce function spilling or, if there is no spilling, to |
| /// increase function occupancy by one with respect to register usage by sinking |
| /// rematerializable instructions to their use. When the stage estimates that |
| /// reducing spilling or increasing occupancy is possible, it tries to |
| /// rematerialize as few registers as possible to reduce potential negative |
| /// effects on function latency. |
| /// |
| /// The stage only supports rematerializing registers that meet all of the |
| /// following constraints. |
| /// 1. The register is virtual and has a single defining instruction. |
| /// 2. The single defining instruction is either deemed rematerializable by the |
| /// target-independent logic, or if not, has no non-constant and |
| /// non-ignorable physical register use. |
| /// 3 The register has no virtual register use whose live range would be |
| /// extended by the rematerialization. |
| /// 4. The register has a single non-debug user in a different region from its |
| /// defining region. |
| /// 5. The register is not used by or using another register that is going to be |
| /// rematerialized. |
| class PreRARematStage : public GCNSchedStage { |
| private: |
| /// A rematerializable register. |
| struct RematReg { |
| /// Single MI defining the rematerializable register. |
| MachineInstr *DefMI; |
| /// Single user of the rematerializable register. |
| MachineInstr *UseMI; |
| /// Regions in which the register is live-in/live-out/live anywhere. |
| BitVector LiveIn, LiveOut, Live; |
| /// The rematerializable register's lane bitmask. |
| LaneBitmask Mask; |
| /// Defining and using regions. |
| unsigned DefRegion, UseRegion; |
| |
| RematReg(MachineInstr *DefMI, MachineInstr *UseMI, |
| GCNScheduleDAGMILive &DAG, |
| const DenseMap<MachineInstr *, unsigned> &MIRegion); |
| |
| /// Returns the rematerializable register. Do not call after deleting the |
| /// original defining instruction. |
| Register getReg() const { return DefMI->getOperand(0).getReg(); } |
| |
| /// Determines whether this rematerialization may be beneficial in at least |
| /// one target region. |
| bool maybeBeneficial(const BitVector &TargetRegions, |
| ArrayRef<GCNRPTarget> RPTargets) const; |
| |
| /// Determines if the register is both unused and live-through in region \p |
| /// I. This guarantees that rematerializing it will reduce RP in the region. |
| bool isUnusedLiveThrough(unsigned I) const { |
| assert(I < Live.size() && "region index out of range"); |
| return LiveIn[I] && LiveOut[I] && I != UseRegion; |
| } |
| |
| /// Updates internal structures following a MI rematerialization. Part of |
| /// the stage instead of the DAG because it makes assumptions that are |
| /// specific to the rematerialization process. |
| void insertMI(unsigned RegionIdx, MachineInstr *RematMI, |
| GCNScheduleDAGMILive &DAG) const; |
| }; |
| |
| /// A scored rematerialization candidate. Higher scores indicate more |
| /// beneficial rematerializations. A null score indicate the rematerialization |
| /// is not helpful to reduce RP in target regions. |
| struct ScoredRemat { |
| /// The rematerializable register under consideration. |
| RematReg *Remat; |
| |
| /// Execution frequency information required by scoring heuristics. |
| /// Frequencies are scaled down if they are high to avoid overflow/underflow |
| /// when combining them. |
| struct FreqInfo { |
| /// Per-region execution frequencies. 0 when unknown. |
| SmallVector<uint64_t> Regions; |
| /// Minimum and maximum observed frequencies. |
| uint64_t MinFreq, MaxFreq; |
| |
| FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG); |
| |
| private: |
| static const uint64_t ScaleFactor = 1024; |
| }; |
| |
| /// This only initializes state-independent characteristics of \p Remat, not |
| /// the actual score. |
| ScoredRemat(RematReg *Remat, const FreqInfo &Freq, |
| const GCNScheduleDAGMILive &DAG); |
| |
| /// Updates the rematerialization's score w.r.t. the current \p RPTargets. |
| /// \p RegionFreq indicates the frequency of each region |
| void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets, |
| const FreqInfo &Freq, bool ReduceSpill); |
| |
| /// Returns whether the current score is null, indicating the |
| /// rematerialization is useless. |
| bool hasNullScore() const { return !RegionImpact; } |
| |
| /// Compare score components of non-null scores pair-wise. A null score is |
| /// always strictly lesser than another non-null score. |
| bool operator<(const ScoredRemat &O) const { |
| if (hasNullScore()) |
| return !O.hasNullScore(); |
| if (O.hasNullScore()) |
| return false; |
| if (MaxFreq != O.MaxFreq) |
| return MaxFreq < O.MaxFreq; |
| if (FreqDiff != O.FreqDiff) |
| return FreqDiff < O.FreqDiff; |
| if (RegionImpact != O.RegionImpact) |
| return RegionImpact < O.RegionImpact; |
| // Break ties using pointer to rematerializable register. Rematerializable |
| // registers are collected in instruction order so, within the same |
| // region, this will prefer registers defined earlier that have longer |
| // live ranges in their defining region (since the registers we consider |
| // are always live-out in their defining region). |
| return Remat > O.Remat; |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| Printable print() const; |
| #endif |
| |
| private: |
| /// Number of 32-bit registers this rematerialization covers. |
| unsigned NumRegs; |
| |
| // The three members below are the scoring components, top to bottom from |
| // most important to least important when comparing candidates. |
| |
| /// Frequency of impacted target region with highest known frequency. This |
| /// only matters when the stage is trying to reduce spilling, so it is |
| /// always 0 when it is not. |
| uint64_t MaxFreq; |
| /// Frequency difference between defining and using regions. Negative values |
| /// indicate we are rematerializing to higher frequency regions; positive |
| /// values indicate the contrary. |
| int64_t FreqDiff; |
| /// Expected number of target regions impacted by the rematerialization, |
| /// scaled by the size of the register being rematerialized. |
| unsigned RegionImpact; |
| |
| unsigned getNumRegs(const GCNScheduleDAGMILive &DAG) const; |
| |
| int64_t getFreqDiff(const FreqInfo &Freq) const; |
| }; |
| |
| /// Parent MBB to each region, in region order. |
| SmallVector<MachineBasicBlock *> RegionBB; |
| /// Register pressure targets for all regions. |
| SmallVector<GCNRPTarget> RPTargets; |
| /// Regions which are above the stage's RP target. |
| BitVector TargetRegions; |
| /// The target occupancy the set is trying to achieve. Empty when the |
| /// objective is spilling reduction. |
| std::optional<unsigned> TargetOcc; |
| /// Achieved occupancy *only* through rematerializations (pre-rescheduling). |
| unsigned AchievedOcc; |
| /// After successful stage initialization, indicates which regions should be |
| /// rescheduled. |
| BitVector RescheduleRegions; |
| |
| /// List of rematerializable registers. |
| SmallVector<RematReg> RematRegs; |
| |
| /// Holds enough information to rollback a rematerialization decision post |
| /// re-scheduling. |
| struct RollbackInfo { |
| /// The rematerializable register under consideration. |
| const RematReg *Remat; |
| /// The rematerialized MI replacing the original defining MI. |
| MachineInstr *RematMI; |
| /// Maps register machine operand indices to their original register. |
| SmallDenseMap<unsigned, Register, 4> RegMap; |
| |
| RollbackInfo(const RematReg *Remat) : Remat(Remat) {} |
| }; |
| /// List of rematerializations to rollback if rematerialization does not end |
| /// up being beneficial. |
| SmallVector<RollbackInfo> Rollbacks; |
| |
| /// State of a region pre-re-scheduling but post-rematerializations that we |
| /// must keep to be able to revert re-scheduling effects. |
| struct RegionSchedRevert { |
| /// Region number; |
| unsigned RegionIdx; |
| /// Original instruction order (both debug and non-debug MIs). |
| std::vector<MachineInstr *> OrigMIOrder; |
| /// Maximum pressure recorded in the region. |
| GCNRegPressure MaxPressure; |
| |
| RegionSchedRevert(unsigned RegionIdx, ArrayRef<MachineInstr *> OrigMIOrder, |
| const GCNRegPressure &MaxPressure) |
| : RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder), |
| MaxPressure(MaxPressure) {} |
| }; |
| /// After re-scheduling, contains pre-re-scheduling data for all re-scheduled |
| /// regions. |
| SmallVector<RegionSchedRevert> RegionReverts; |
| |
| /// Returns the occupancy the stage is trying to achieve. |
| unsigned getStageTargetOccupancy() const; |
| |
| /// Determines the stage's objective (increasing occupancy or reducing |
| /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to |
| /// achieve that objective and mark those that don't achieve it in \ref |
| /// TargetRegions. Returns whether there is any target region. |
| bool setObjective(); |
| |
| /// Unsets target regions in \p Regions whose RP target has been reached. |
| void unsetSatisifedRPTargets(const BitVector &Regions); |
| |
| /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets |
| /// again all \ref TargetRegions that were optimistically marked as satisfied |
| /// but are actually not, and returns whether there were any such regions. |
| bool updateAndVerifyRPTargets(const BitVector &Regions); |
| |
| /// Collects all rematerializable registers and appends them to \ref |
| /// RematRegs. \p MIRegion maps MIs to their region. Returns whether any |
| /// rematerializable register was found. |
| bool collectRematRegs(const DenseMap<MachineInstr *, unsigned> &MIRegion); |
| |
| /// Rematerializes \p Remat. This removes the rematerialized register from |
| /// live-in/out lists in the DAG and updates RP targets in all affected |
| /// regions, which are also marked in \ref RescheduleRegions. Regions in which |
| /// RP savings are not guaranteed are set in \p RecomputeRP. When \p Rollback |
| /// is non-null, fills it with required information to be able to rollback the |
| /// rematerialization post-rescheduling. |
| void rematerialize(const RematReg &Remat, BitVector &RecomputeRP, |
| RollbackInfo *Rollback); |
| |
| /// Rollbacks the rematerialization decision represented by \p Rollback. This |
| /// update live-in/out lists in the DAG but does not update cached register |
| /// pressures. |
| void rollback(const RollbackInfo &Rollback) const; |
| |
| /// Deletes all rematerialized MIs from the MIR when they were kept around for |
| /// potential rollback. |
| void commitRematerializations() const; |
| |
| /// Whether the MI is rematerializable |
| bool isReMaterializable(const MachineInstr &MI); |
| |
| /// If remat alone did not increase occupancy to the target one, rollbacks all |
| /// rematerializations and resets live-ins/RP in all regions impacted by the |
| /// stage to their pre-stage values. |
| void finalizeGCNSchedStage() override; |
| |
| public: |
| bool initGCNSchedStage() override; |
| |
| bool initGCNRegion() override; |
| |
| void finalizeGCNRegion() override; |
| |
| bool shouldRevertScheduling(unsigned WavesAfter) override; |
| |
| PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
| : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()), |
| RescheduleRegions(DAG.Regions.size()) { |
| const unsigned NumRegions = DAG.Regions.size(); |
| RPTargets.reserve(NumRegions); |
| RegionBB.reserve(NumRegions); |
| } |
| }; |
| |
| class ILPInitialScheduleStage : public GCNSchedStage { |
| public: |
| bool shouldRevertScheduling(unsigned WavesAfter) override; |
| |
| ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
| : GCNSchedStage(StageID, DAG) {} |
| }; |
| |
| class MemoryClauseInitialScheduleStage : public GCNSchedStage { |
| public: |
| bool shouldRevertScheduling(unsigned WavesAfter) override; |
| |
| MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, |
| GCNScheduleDAGMILive &DAG) |
| : GCNSchedStage(StageID, DAG) {} |
| }; |
| |
| class GCNPostScheduleDAGMILive final : public ScheduleDAGMI { |
| private: |
| std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; |
| |
| bool HasIGLPInstrs = false; |
| |
| public: |
| void schedule() override; |
| |
| void finalizeSchedule() override; |
| |
| GCNPostScheduleDAGMILive(MachineSchedContext *C, |
| std::unique_ptr<MachineSchedStrategy> S, |
| bool RemoveKillFlags); |
| }; |
| |
| } // End namespace llvm |
| |
| #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H |