| //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// \file |
| /// Insert s_clause instructions to form hard clauses. |
| /// |
| /// Clausing load instructions can give cache coherency benefits. Before gfx10, |
| /// the hardware automatically detected "soft clauses", which were sequences of |
| /// memory instructions of the same type. In gfx10 this detection was removed, |
| /// and the s_clause instruction was introduced to explicitly mark "hard |
| /// clauses". |
| /// |
| /// It's the scheduler's job to form the clauses by putting similar memory |
| /// instructions next to each other. Our job is just to insert an s_clause |
| /// instruction to mark the start of each clause. |
| /// |
| /// Note that hard clauses are very similar to, but logically distinct from, the |
| /// groups of instructions that have to be restartable when XNACK is enabled. |
| /// The rules are slightly different in each case. For example an s_nop |
| /// instruction breaks a restartable group, but can appear in the middle of a |
| /// hard clause. (Before gfx10 there wasn't a distinction, and both were called |
| /// "soft clauses" or just "clauses".) |
| /// |
| /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable |
| /// groups, not hard clauses. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPU.h" |
| #include "GCNSubtarget.h" |
| #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| #include "llvm/ADT/SmallVector.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "si-insert-hard-clauses" |
| |
| namespace { |
| |
| enum HardClauseType { |
| // Texture, buffer, global or scratch memory instructions. |
| HARDCLAUSE_VMEM, |
| // Flat (not global or scratch) memory instructions. |
| HARDCLAUSE_FLAT, |
| // Instructions that access LDS. |
| HARDCLAUSE_LDS, |
| // Scalar memory instructions. |
| HARDCLAUSE_SMEM, |
| // VALU instructions. |
| HARDCLAUSE_VALU, |
| LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, |
| |
| // Internal instructions, which are allowed in the middle of a hard clause, |
| // except for s_waitcnt. |
| HARDCLAUSE_INTERNAL, |
| // Meta instructions that do not result in any ISA like KILL. |
| HARDCLAUSE_IGNORE, |
| // Instructions that are not allowed in a hard clause: SALU, export, branch, |
| // message, GDS, s_waitcnt and anything else not mentioned above. |
| HARDCLAUSE_ILLEGAL, |
| }; |
| |
| class SIInsertHardClauses : public MachineFunctionPass { |
| public: |
| static char ID; |
| const GCNSubtarget *ST = nullptr; |
| |
| SIInsertHardClauses() : MachineFunctionPass(ID) {} |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.setPreservesCFG(); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| |
| HardClauseType getHardClauseType(const MachineInstr &MI) { |
| |
| // On current architectures we only get a benefit from clausing loads. |
| if (MI.mayLoad()) { |
| if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { |
| if (ST->hasNSAClauseBug()) { |
| const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); |
| if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) |
| return HARDCLAUSE_ILLEGAL; |
| } |
| return HARDCLAUSE_VMEM; |
| } |
| if (SIInstrInfo::isFLAT(MI)) |
| return HARDCLAUSE_FLAT; |
| // TODO: LDS |
| if (SIInstrInfo::isSMRD(MI)) |
| return HARDCLAUSE_SMEM; |
| } |
| |
| // Don't form VALU clauses. It's not clear what benefit they give, if any. |
| |
| // In practice s_nop is the only internal instruction we're likely to see. |
| // It's safe to treat the rest as illegal. |
| if (MI.getOpcode() == AMDGPU::S_NOP) |
| return HARDCLAUSE_INTERNAL; |
| if (MI.isMetaInstruction()) |
| return HARDCLAUSE_IGNORE; |
| return HARDCLAUSE_ILLEGAL; |
| } |
| |
| // Track information about a clause as we discover it. |
| struct ClauseInfo { |
| // The type of all (non-internal) instructions in the clause. |
| HardClauseType Type = HARDCLAUSE_ILLEGAL; |
| // The first (necessarily non-internal) instruction in the clause. |
| MachineInstr *First = nullptr; |
| // The last non-internal instruction in the clause. |
| MachineInstr *Last = nullptr; |
| // The length of the clause including any internal instructions in the |
| // middle (but not at the end) of the clause. |
| unsigned Length = 0; |
| // Internal instructions at the and of a clause should not be included in |
| // the clause. Count them in TrailingInternalLength until a new memory |
| // instruction is added. |
| unsigned TrailingInternalLength = 0; |
| // The base operands of *Last. |
| SmallVector<const MachineOperand *, 4> BaseOps; |
| }; |
| |
| bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { |
| if (CI.First == CI.Last) |
| return false; |
| assert(CI.Length <= 64 && "Hard clause is too long!"); |
| |
| auto &MBB = *CI.First->getParent(); |
| auto ClauseMI = |
| BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) |
| .addImm(CI.Length - 1); |
| finalizeBundle(MBB, ClauseMI->getIterator(), |
| std::next(CI.Last->getIterator())); |
| return true; |
| } |
| |
| bool runOnMachineFunction(MachineFunction &MF) override { |
| if (skipFunction(MF.getFunction())) |
| return false; |
| |
| ST = &MF.getSubtarget<GCNSubtarget>(); |
| if (!ST->hasHardClauses()) |
| return false; |
| |
| const SIInstrInfo *SII = ST->getInstrInfo(); |
| const TargetRegisterInfo *TRI = ST->getRegisterInfo(); |
| |
| bool Changed = false; |
| for (auto &MBB : MF) { |
| ClauseInfo CI; |
| for (auto &MI : MBB) { |
| HardClauseType Type = getHardClauseType(MI); |
| |
| int64_t Dummy1; |
| bool Dummy2; |
| unsigned Dummy3; |
| SmallVector<const MachineOperand *, 4> BaseOps; |
| if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { |
| if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, |
| Dummy3, TRI)) { |
| // We failed to get the base operands, so we'll never clause this |
| // instruction with any other, so pretend it's illegal. |
| Type = HARDCLAUSE_ILLEGAL; |
| } |
| } |
| |
| if (CI.Length == 64 || |
| (CI.Length && Type != HARDCLAUSE_INTERNAL && |
| Type != HARDCLAUSE_IGNORE && |
| (Type != CI.Type || |
| // Note that we lie to shouldClusterMemOps about the size of the |
| // cluster. When shouldClusterMemOps is called from the machine |
| // scheduler it limits the size of the cluster to avoid increasing |
| // register pressure too much, but this pass runs after register |
| // allocation so there is no need for that kind of limit. |
| !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) { |
| // Finish the current clause. |
| Changed |= emitClause(CI, SII); |
| CI = ClauseInfo(); |
| } |
| |
| if (CI.Length) { |
| // Extend the current clause. |
| if (Type != HARDCLAUSE_IGNORE) { |
| if (Type == HARDCLAUSE_INTERNAL) { |
| ++CI.TrailingInternalLength; |
| } else { |
| ++CI.Length; |
| CI.Length += CI.TrailingInternalLength; |
| CI.TrailingInternalLength = 0; |
| CI.Last = &MI; |
| CI.BaseOps = std::move(BaseOps); |
| } |
| } |
| } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { |
| // Start a new clause. |
| CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)}; |
| } |
| } |
| |
| // Finish the last clause in the basic block if any. |
| if (CI.Length) |
| Changed |= emitClause(CI, SII); |
| } |
| |
| return Changed; |
| } |
| }; |
| |
| } // namespace |
| |
| char SIInsertHardClauses::ID = 0; |
| |
| char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; |
| |
| INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", |
| false, false) |