|  | //===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | /// \file | 
|  | /// Pass to temporarily raise the wave priority beginning the start of | 
|  | /// the shader function until its last VMEM instructions to allow younger | 
|  | /// waves to issue their VMEM instructions as well. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AMDGPU.h" | 
|  | #include "GCNSubtarget.h" | 
|  | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | 
|  | #include "SIInstrInfo.h" | 
|  | #include "llvm/ADT/PostOrderIterator.h" | 
|  | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | #include "llvm/InitializePasses.h" | 
|  | #include "llvm/Support/Allocator.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "amdgpu-set-wave-priority" | 
|  |  | 
|  | static cl::opt<unsigned> DefaultVALUInstsThreshold( | 
|  | "amdgpu-set-wave-priority-valu-insts-threshold", | 
|  | cl::desc("VALU instruction count threshold for adjusting wave priority"), | 
|  | cl::init(100), cl::Hidden); | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | struct MBBInfo { | 
|  | MBBInfo() = default; | 
|  | unsigned NumVALUInstsAtStart = 0; | 
|  | bool MayReachVMEMLoad = false; | 
|  | MachineInstr *LastVMEMLoad = nullptr; | 
|  | }; | 
|  |  | 
|  | using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>; | 
|  |  | 
|  | class AMDGPUSetWavePriority : public MachineFunctionPass { | 
|  | public: | 
|  | static char ID; | 
|  |  | 
|  | AMDGPUSetWavePriority() : MachineFunctionPass(ID) {} | 
|  |  | 
|  | StringRef getPassName() const override { return "Set wave priority"; } | 
|  |  | 
|  | bool runOnMachineFunction(MachineFunction &MF) override; | 
|  |  | 
|  | private: | 
|  | MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator I, | 
|  | unsigned priority) const; | 
|  |  | 
|  | const SIInstrInfo *TII; | 
|  | }; | 
|  |  | 
|  | } // End anonymous namespace. | 
|  |  | 
|  | INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false, | 
|  | false) | 
|  |  | 
|  | char AMDGPUSetWavePriority::ID = 0; | 
|  |  | 
|  | FunctionPass *llvm::createAMDGPUSetWavePriorityPass() { | 
|  | return new AMDGPUSetWavePriority(); | 
|  | } | 
|  |  | 
|  | MachineInstr * | 
|  | AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator I, | 
|  | unsigned priority) const { | 
|  | return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO)) | 
|  | .addImm(priority); | 
|  | } | 
|  |  | 
|  | // Checks that for every predecessor Pred that can reach a VMEM load, | 
|  | // none of Pred's successors can reach a VMEM load. | 
|  | static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB, | 
|  | MBBInfoSet &MBBInfos) { | 
|  | for (const MachineBasicBlock *Pred : MBB.predecessors()) { | 
|  | if (!MBBInfos[Pred].MayReachVMEMLoad) | 
|  | continue; | 
|  | for (const MachineBasicBlock *Succ : Pred->successors()) { | 
|  | if (MBBInfos[Succ].MayReachVMEMLoad) | 
|  | return false; | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | static bool isVMEMLoad(const MachineInstr &MI) { | 
|  | return SIInstrInfo::isVMEM(MI) && MI.mayLoad(); | 
|  | } | 
|  |  | 
|  | bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) { | 
|  | const unsigned HighPriority = 3; | 
|  | const unsigned LowPriority = 0; | 
|  |  | 
|  | Function &F = MF.getFunction(); | 
|  | if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) | 
|  | return false; | 
|  |  | 
|  | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | 
|  | TII = ST.getInstrInfo(); | 
|  |  | 
|  | unsigned VALUInstsThreshold = DefaultVALUInstsThreshold; | 
|  | Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold"); | 
|  | if (A.isValid()) | 
|  | A.getValueAsString().getAsInteger(0, VALUInstsThreshold); | 
|  |  | 
|  | // Find VMEM loads that may be executed before long-enough sequences of | 
|  | // VALU instructions. We currently assume that backedges/loops, branch | 
|  | // probabilities and other details can be ignored, so we essentially | 
|  | // determine the largest number of VALU instructions along every | 
|  | // possible path from the start of the function that may potentially be | 
|  | // executed provided no backedge is ever taken. | 
|  | MBBInfoSet MBBInfos; | 
|  | for (MachineBasicBlock *MBB : post_order(&MF)) { | 
|  | bool AtStart = true; | 
|  | unsigned MaxNumVALUInstsInMiddle = 0; | 
|  | unsigned NumVALUInstsAtEnd = 0; | 
|  | for (MachineInstr &MI : *MBB) { | 
|  | if (isVMEMLoad(MI)) { | 
|  | AtStart = false; | 
|  | MBBInfo &Info = MBBInfos[MBB]; | 
|  | Info.NumVALUInstsAtStart = 0; | 
|  | MaxNumVALUInstsInMiddle = 0; | 
|  | NumVALUInstsAtEnd = 0; | 
|  | Info.LastVMEMLoad = &MI; | 
|  | } else if (SIInstrInfo::isDS(MI)) { | 
|  | AtStart = false; | 
|  | MaxNumVALUInstsInMiddle = | 
|  | std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd); | 
|  | NumVALUInstsAtEnd = 0; | 
|  | } else if (SIInstrInfo::isVALU(MI)) { | 
|  | if (AtStart) | 
|  | ++MBBInfos[MBB].NumVALUInstsAtStart; | 
|  | ++NumVALUInstsAtEnd; | 
|  | } | 
|  | } | 
|  |  | 
|  | bool SuccsMayReachVMEMLoad = false; | 
|  | unsigned NumFollowingVALUInsts = 0; | 
|  | for (const MachineBasicBlock *Succ : MBB->successors()) { | 
|  | SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad; | 
|  | NumFollowingVALUInsts = | 
|  | std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart); | 
|  | } | 
|  | MBBInfo &Info = MBBInfos[MBB]; | 
|  | if (AtStart) | 
|  | Info.NumVALUInstsAtStart += NumFollowingVALUInsts; | 
|  | NumVALUInstsAtEnd += NumFollowingVALUInsts; | 
|  |  | 
|  | unsigned MaxNumVALUInsts = | 
|  | std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd); | 
|  | Info.MayReachVMEMLoad = | 
|  | SuccsMayReachVMEMLoad || | 
|  | (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold); | 
|  | } | 
|  |  | 
|  | MachineBasicBlock &Entry = MF.front(); | 
|  | if (!MBBInfos[&Entry].MayReachVMEMLoad) | 
|  | return false; | 
|  |  | 
|  | // Raise the priority at the beginning of the shader. | 
|  | MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end(); | 
|  | while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator()) | 
|  | ++I; | 
|  | BuildSetprioMI(Entry, I, HighPriority); | 
|  |  | 
|  | // Lower the priority on edges where control leaves blocks from which | 
|  | // the VMEM loads are reachable. | 
|  | SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks; | 
|  | for (MachineBasicBlock &MBB : MF) { | 
|  | if (MBBInfos[&MBB].MayReachVMEMLoad) { | 
|  | if (MBB.succ_empty()) | 
|  | PriorityLoweringBlocks.insert(&MBB); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) { | 
|  | for (MachineBasicBlock *Pred : MBB.predecessors()) { | 
|  | if (MBBInfos[Pred].MayReachVMEMLoad) | 
|  | PriorityLoweringBlocks.insert(Pred); | 
|  | } | 
|  | continue; | 
|  | } | 
|  |  | 
|  | // Where lowering the priority in predecessors is not possible, the | 
|  | // block receiving control either was not part of a loop in the first | 
|  | // place or the loop simplification/canonicalization pass should have | 
|  | // already tried to split the edge and insert a preheader, and if for | 
|  | // whatever reason it failed to do so, then this leaves us with the | 
|  | // only option of lowering the priority within the loop. | 
|  | PriorityLoweringBlocks.insert(&MBB); | 
|  | } | 
|  |  | 
|  | for (MachineBasicBlock *MBB : PriorityLoweringBlocks) { | 
|  | BuildSetprioMI( | 
|  | *MBB, | 
|  | MBBInfos[MBB].LastVMEMLoad | 
|  | ? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad)) | 
|  | : MBB->begin(), | 
|  | LowPriority); | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } |