| //===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Load/Store Pairing: It identifies pairs of load or store instructions |
| // operating on consecutive memory locations and merges them into a single |
| // paired instruction, leveraging hardware support for paired memory accesses. |
| // Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass. |
| // |
| // NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as |
| // merging zero store instructions, promoting loads that read directly from a |
| // preceding store, and merging base register updates with load/store |
| // instructions (via pre-/post-indexed addressing). These advanced |
| // transformations are not yet implemented in the RISC-V pass but represent |
| // potential future enhancements for further optimizing RISC-V memory |
| // operations. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "RISCV.h" |
| #include "RISCVTargetMachine.h" |
| #include "llvm/Analysis/AliasAnalysis.h" |
| #include "llvm/CodeGen/Passes.h" |
| #include "llvm/MC/TargetRegistry.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Target/TargetOptions.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "riscv-load-store-opt" |
| #define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer" |
| |
| // The LdStLimit limits number of instructions how far we search for load/store |
| // pairs. |
| static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128), |
| cl::Hidden); |
| |
| namespace { |
| |
| struct RISCVLoadStoreOpt : public MachineFunctionPass { |
| static char ID; |
| bool runOnMachineFunction(MachineFunction &Fn) override; |
| |
| RISCVLoadStoreOpt() : MachineFunctionPass(ID) {} |
| |
| MachineFunctionProperties getRequiredProperties() const override { |
| return MachineFunctionProperties().set( |
| MachineFunctionProperties::Property::NoVRegs); |
| } |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.addRequired<AAResultsWrapperPass>(); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| |
| StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; } |
| |
| // Find and pair load/store instructions. |
| bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); |
| |
| // Convert load/store pairs to single instructions. |
| bool tryConvertToLdStPair(MachineBasicBlock::iterator First, |
| MachineBasicBlock::iterator Second); |
| |
| // Scan the instructions looking for a load/store that can be combined |
| // with the current instruction into a load/store pair. |
| // Return the matching instruction if one is found, else MBB->end(). |
| MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, |
| bool &MergeForward); |
| |
| MachineBasicBlock::iterator |
| mergePairedInsns(MachineBasicBlock::iterator I, |
| MachineBasicBlock::iterator Paired, bool MergeForward); |
| |
| private: |
| AliasAnalysis *AA; |
| MachineRegisterInfo *MRI; |
| const RISCVInstrInfo *TII; |
| const RISCVRegisterInfo *TRI; |
| LiveRegUnits ModifiedRegUnits, UsedRegUnits; |
| }; |
| } // end anonymous namespace |
| |
| char RISCVLoadStoreOpt::ID = 0; |
| INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false, |
| false) |
| |
| bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { |
| if (skipFunction(Fn.getFunction())) |
| return false; |
| const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>(); |
| if (!Subtarget.useLoadStorePairs()) |
| return false; |
| |
| bool MadeChange = false; |
| TII = Subtarget.getInstrInfo(); |
| TRI = Subtarget.getRegisterInfo(); |
| MRI = &Fn.getRegInfo(); |
| AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
| ModifiedRegUnits.init(*TRI); |
| UsedRegUnits.init(*TRI); |
| |
| for (MachineBasicBlock &MBB : Fn) { |
| LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); |
| |
| for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
| MBBI != E;) { |
| if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) && |
| tryToPairLdStInst(MBBI)) |
| MadeChange = true; |
| else |
| ++MBBI; |
| } |
| } |
| return MadeChange; |
| } |
| |
| // Find loads and stores that can be merged into a single load or store pair |
| // instruction. |
| bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { |
| MachineInstr &MI = *MBBI; |
| |
| // If this is volatile, it is not a candidate. |
| if (MI.hasOrderedMemoryRef()) |
| return false; |
| |
| if (!TII->isLdStSafeToPair(MI, TRI)) |
| return false; |
| |
| // Look ahead for a pairable instruction. |
| MachineBasicBlock::iterator E = MI.getParent()->end(); |
| bool MergeForward; |
| MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward); |
| if (Paired != E) { |
| MBBI = mergePairedInsns(MBBI, Paired, MergeForward); |
| return true; |
| } |
| return false; |
| } |
| |
| // Merge two adjacent load/store instructions into a paired instruction |
| // (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of |
| // SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the |
| // appropriate paired opcode, verifies that the memory operand is properly |
| // aligned, and checks that the offset is valid. If all conditions are met, it |
| // builds and inserts the paired instruction. |
| bool RISCVLoadStoreOpt::tryConvertToLdStPair( |
| MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { |
| unsigned PairOpc; |
| Align RequiredAlignment; |
| switch (First->getOpcode()) { |
| default: |
| llvm_unreachable("Unsupported load/store instruction for pairing"); |
| case RISCV::SW: |
| PairOpc = RISCV::MIPS_SWP; |
| RequiredAlignment = Align(8); |
| break; |
| case RISCV::LW: |
| PairOpc = RISCV::MIPS_LWP; |
| RequiredAlignment = Align(8); |
| break; |
| case RISCV::SD: |
| PairOpc = RISCV::MIPS_SDP; |
| RequiredAlignment = Align(16); |
| break; |
| case RISCV::LD: |
| PairOpc = RISCV::MIPS_LDP; |
| RequiredAlignment = Align(16); |
| break; |
| } |
| |
| MachineFunction *MF = First->getMF(); |
| const MachineMemOperand *MMO = *First->memoperands_begin(); |
| Align MMOAlign = MMO->getAlign(); |
| |
| if (MMOAlign < RequiredAlignment) |
| return false; |
| |
| int64_t Offset = First->getOperand(2).getImm(); |
| if (!isUInt<7>(Offset)) |
| return false; |
| |
| MachineInstrBuilder MIB = BuildMI( |
| *MF, First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(), |
| TII->get(PairOpc)); |
| MIB.add(First->getOperand(0)) |
| .add(Second->getOperand(0)) |
| .add(First->getOperand(1)) |
| .add(First->getOperand(2)) |
| .cloneMergedMemRefs({&*First, &*Second}); |
| |
| First->getParent()->insert(First, MIB); |
| |
| First->removeFromParent(); |
| Second->removeFromParent(); |
| |
| return true; |
| } |
| |
| static bool mayAlias(MachineInstr &MIa, |
| SmallVectorImpl<MachineInstr *> &MemInsns, |
| AliasAnalysis *AA) { |
| for (MachineInstr *MIb : MemInsns) |
| if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) |
| return true; |
| |
| return false; |
| } |
| |
| // Scan the instructions looking for a load/store that can be combined with the |
| // current instruction into a wider equivalent or a load/store pair. |
| // TODO: Extend pairing logic to consider reordering both instructions |
| // to a safe "middle" position rather than only merging forward/backward. |
| // This requires more sophisticated checks for aliasing, register |
| // liveness, and potential scheduling hazards. |
| MachineBasicBlock::iterator |
| RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, |
| bool &MergeForward) { |
| MachineBasicBlock::iterator E = I->getParent()->end(); |
| MachineBasicBlock::iterator MBBI = I; |
| MachineInstr &FirstMI = *I; |
| MBBI = next_nodbg(MBBI, E); |
| |
| bool MayLoad = FirstMI.mayLoad(); |
| Register Reg = FirstMI.getOperand(0).getReg(); |
| Register BaseReg = FirstMI.getOperand(1).getReg(); |
| int64_t Offset = FirstMI.getOperand(2).getImm(); |
| int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue(); |
| |
| MergeForward = false; |
| |
| // Track which register units have been modified and used between the first |
| // insn (inclusive) and the second insn. |
| ModifiedRegUnits.clear(); |
| UsedRegUnits.clear(); |
| |
| // Remember any instructions that read/write memory between FirstMI and MI. |
| SmallVector<MachineInstr *, 4> MemInsns; |
| |
| for (unsigned Count = 0; MBBI != E && Count < LdStLimit; |
| MBBI = next_nodbg(MBBI, E)) { |
| MachineInstr &MI = *MBBI; |
| |
| // Don't count transient instructions towards the search limit since there |
| // may be different numbers of them if e.g. debug information is present. |
| if (!MI.isTransient()) |
| ++Count; |
| |
| if (MI.getOpcode() == FirstMI.getOpcode() && |
| TII->isLdStSafeToPair(MI, TRI)) { |
| Register MIBaseReg = MI.getOperand(1).getReg(); |
| int64_t MIOffset = MI.getOperand(2).getImm(); |
| |
| if (BaseReg == MIBaseReg) { |
| if ((Offset != MIOffset + OffsetStride) && |
| (Offset + OffsetStride != MIOffset)) { |
| LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
| TRI); |
| MemInsns.push_back(&MI); |
| continue; |
| } |
| |
| // If the destination register of one load is the same register or a |
| // sub/super register of the other load, bail and keep looking. |
| if (MayLoad && |
| TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) { |
| LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
| TRI); |
| MemInsns.push_back(&MI); |
| continue; |
| } |
| |
| // If the BaseReg has been modified, then we cannot do the optimization. |
| if (!ModifiedRegUnits.available(BaseReg)) |
| return E; |
| |
| // If the Rt of the second instruction was not modified or used between |
| // the two instructions and none of the instructions between the second |
| // and first alias with the second, we can combine the second into the |
| // first. |
| if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) && |
| !(MI.mayLoad() && |
| !UsedRegUnits.available(MI.getOperand(0).getReg())) && |
| !mayAlias(MI, MemInsns, AA)) { |
| |
| MergeForward = false; |
| return MBBI; |
| } |
| |
| // Likewise, if the Rt of the first instruction is not modified or used |
| // between the two instructions and none of the instructions between the |
| // first and the second alias with the first, we can combine the first |
| // into the second. |
| if (!(MayLoad && |
| !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) && |
| !mayAlias(FirstMI, MemInsns, AA)) { |
| |
| if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) { |
| MergeForward = true; |
| return MBBI; |
| } |
| } |
| // Unable to combine these instructions due to interference in between. |
| // Keep looking. |
| } |
| } |
| |
| // If the instruction wasn't a matching load or store. Stop searching if we |
| // encounter a call instruction that might modify memory. |
| if (MI.isCall()) |
| return E; |
| |
| // Update modified / uses register units. |
| LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); |
| |
| // Otherwise, if the base register is modified, we have no match, so |
| // return early. |
| if (!ModifiedRegUnits.available(BaseReg)) |
| return E; |
| |
| // Update list of instructions that read/write memory. |
| if (MI.mayLoadOrStore()) |
| MemInsns.push_back(&MI); |
| } |
| return E; |
| } |
| |
| MachineBasicBlock::iterator |
| RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, |
| MachineBasicBlock::iterator Paired, |
| bool MergeForward) { |
| MachineBasicBlock::iterator E = I->getParent()->end(); |
| MachineBasicBlock::iterator NextI = next_nodbg(I, E); |
| // If NextI is the second of the two instructions to be merged, we need |
| // to skip one further. Either way we merge will invalidate the iterator, |
| // and we don't need to scan the new instruction, as it's a pairwise |
| // instruction, which we're not considering for further action anyway. |
| if (NextI == Paired) |
| NextI = next_nodbg(NextI, E); |
| |
| // Insert our new paired instruction after whichever of the paired |
| // instructions MergeForward indicates. |
| MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; |
| MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired; |
| int Offset = I->getOperand(2).getImm(); |
| int PairedOffset = Paired->getOperand(2).getImm(); |
| bool InsertAfter = (Offset < PairedOffset) ^ MergeForward; |
| |
| if (!MergeForward) |
| Paired->getOperand(1).setIsKill(false); |
| |
| // Kill flags may become invalid when moving stores for pairing. |
| if (I->getOperand(0).isUse()) { |
| if (!MergeForward) { |
| // Check if the Paired store's source register has a kill flag and clear |
| // it only if there are intermediate uses between I and Paired. |
| MachineOperand &PairedRegOp = Paired->getOperand(0); |
| if (PairedRegOp.isKill()) { |
| for (auto It = std::next(I); It != Paired; ++It) { |
| if (It->readsRegister(PairedRegOp.getReg(), TRI)) { |
| PairedRegOp.setIsKill(false); |
| break; |
| } |
| } |
| } |
| } else { |
| // Clear kill flags of the first store's register in the forward |
| // direction. |
| Register Reg = I->getOperand(0).getReg(); |
| for (MachineInstr &MI : make_range(std::next(I), std::next(Paired))) |
| MI.clearRegisterKills(Reg, TRI); |
| } |
| } |
| |
| MachineInstr *ToInsert = DeletionPoint->removeFromParent(); |
| MachineBasicBlock &MBB = *InsertionPoint->getParent(); |
| MachineBasicBlock::iterator First, Second; |
| |
| if (!InsertAfter) { |
| First = MBB.insert(InsertionPoint, ToInsert); |
| Second = InsertionPoint; |
| } else { |
| Second = MBB.insertAfter(InsertionPoint, ToInsert); |
| First = InsertionPoint; |
| } |
| |
| if (tryConvertToLdStPair(First, Second)) { |
| LLVM_DEBUG(dbgs() << "Pairing load/store:\n "); |
| LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs())); |
| } |
| |
| return NextI; |
| } |
| |
| // Returns an instance of the Load / Store Optimization pass. |
| FunctionPass *llvm::createRISCVLoadStoreOptPass() { |
| return new RISCVLoadStoreOpt(); |
| } |