blob: e5c0812931e66d9615ea0f7c3e53e05bbc9dc930 [file] [log] [blame] [edit]
//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Load/Store Pairing: It identifies pairs of load or store instructions
// operating on consecutive memory locations and merges them into a single
// paired instruction, leveraging hardware support for paired memory accesses.
// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
//
// Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if
// register allocation didn't provide suitable consecutive registers.
//
// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
// merging zero store instructions, promoting loads that read directly from a
// preceding store, and merging base register updates with load/store
// instructions (via pre-/post-indexed addressing). These advanced
// transformations are not yet implemented in the RISC-V pass but represent
// potential future enhancements for further optimizing RISC-V memory
// operations.
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-load-store-opt"
#define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer"
// The LdStLimit limits number of instructions how far we search for load/store
// pairs.
static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
cl::Hidden);
STATISTIC(NumLD2LW, "Number of LD instructions split back to LW");
STATISTIC(NumSD2SW, "Number of SD instructions split back to SW");
namespace {
struct RISCVLoadStoreOpt : public MachineFunctionPass {
static char ID;
bool runOnMachineFunction(MachineFunction &Fn) override;
RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().setNoVRegs();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
// Find and pair load/store instructions.
bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
// Convert load/store pairs to single instructions.
bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
MachineBasicBlock::iterator Second);
bool tryConvertToXqcilsmLdStPair(MachineFunction *MF,
MachineBasicBlock::iterator First,
MachineBasicBlock::iterator Second);
bool tryConvertToXqcilsmMultiLdSt(MachineBasicBlock::iterator &First);
bool tryConvertToMIPSLdStPair(MachineFunction *MF,
MachineBasicBlock::iterator First,
MachineBasicBlock::iterator Second);
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
bool &MergeForward);
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired, bool MergeForward);
// Post reg-alloc zilsd part
bool fixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
bool isValidZilsdRegPair(Register First, Register Second);
void splitLdSdIntoTwo(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, bool IsLoad);
private:
AliasAnalysis *AA;
MachineRegisterInfo *MRI;
const RISCVInstrInfo *TII;
const RISCVRegisterInfo *TRI;
const RISCVSubtarget *STI = nullptr;
LiveRegUnits ModifiedRegUnits, UsedRegUnits;
};
} // end anonymous namespace
char RISCVLoadStoreOpt::ID = 0;
INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
false)
bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
bool MadeChange = false;
STI = &Fn.getSubtarget<RISCVSubtarget>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
ModifiedRegUnits.init(*TRI);
UsedRegUnits.init(*TRI);
if (STI->useMIPSLoadStorePairs() || STI->hasVendorXqcilsm()) {
for (MachineBasicBlock &MBB : Fn) {
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
tryToPairLdStInst(MBBI))
MadeChange = true;
else
++MBBI;
}
}
}
if (!STI->is64Bit() && STI->hasStdExtZilsd()) {
for (auto &MBB : Fn) {
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) {
if (fixInvalidRegPairOp(MBB, MBBI)) {
MadeChange = true;
// Iterator was updated by fixInvalidRegPairOp
} else {
++MBBI;
}
}
}
}
return MadeChange;
}
// Find loads and stores that can be merged into a single load or store pair
// instruction.
bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
// If this is volatile, it is not a candidate.
if (MI.hasOrderedMemoryRef())
return false;
if (!TII->isLdStSafeToPair(MI, TRI))
return false;
// If Xqcilsm is available, first try to form a multi-instruction group (>2).
if (!STI->is64Bit() && STI->hasVendorXqcilsm()) {
if (tryConvertToXqcilsmMultiLdSt(MBBI))
return true;
}
// Look ahead for a pairable instruction.
MachineBasicBlock::iterator E = MI.getParent()->end();
bool MergeForward;
MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward);
if (Paired != E) {
MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
return true;
}
return false;
}
static bool isMemOpAligned(MachineInstr &MI, Align RequiredAlignment) {
const MachineMemOperand *MMO = *MI.memoperands_begin();
Align MMOAlign = MMO->getAlign();
return MMOAlign >= RequiredAlignment;
}
// Convert set of 3 or more LW/SW instructions to QC_LWMI/QC_SWMI/QC_SETWMI.
// For now this only handles consecutive loads and stores traversing the basic
// block top-down.
// TODO: Traverse the basic block bottom-up as well.
bool RISCVLoadStoreOpt::tryConvertToXqcilsmMultiLdSt(
MachineBasicBlock::iterator &FirstIt) {
MachineInstr &FirstMI = *FirstIt;
MachineFunction *MF = FirstMI.getMF();
if (STI->is64Bit() || !STI->hasVendorXqcilsm())
return false;
unsigned Opc = FirstMI.getOpcode();
if (Opc != RISCV::LW && Opc != RISCV::SW)
return false;
if (!FirstMI.hasOneMemOperand())
return false;
if (!isMemOpAligned(FirstMI, Align(4)))
return false;
// Require simple reg+imm addressing.
const MachineOperand &BaseOp = FirstMI.getOperand(1);
const MachineOperand &OffOp = FirstMI.getOperand(2);
if (!BaseOp.isReg() || !OffOp.isImm())
return false;
Register Base = BaseOp.getReg();
int64_t BaseOff = OffOp.getImm();
if (!isShiftedUInt<5, 2>(BaseOff))
return false;
Register StartReg = FirstMI.getOperand(0).getReg();
bool IsLoad = (Opc == RISCV::LW);
// Load rd cannot be x0 and must not clobber the base register.
if (IsLoad) {
if (StartReg == RISCV::X0)
return false;
if (StartReg == Base)
return false;
}
// Collect a set of consecutive matching instructions.
SmallVector<MachineInstr *, 8> Group;
Group.push_back(&FirstMI);
MachineBasicBlock::iterator E = FirstIt->getParent()->end();
MachineBasicBlock::iterator It = next_nodbg(FirstIt, E);
int64_t ExpectedOff = BaseOff + 4;
unsigned Index = 1;
enum class StoreMode { Unknown, Setwmi, Swmi };
StoreMode SMode = StoreMode::Unknown;
while (It != E) {
MachineInstr &MI = *It;
if (!TII->isPairableLdStInstOpc(MI.getOpcode()))
break;
if (MI.getOpcode() != Opc)
break;
if (!TII->isLdStSafeToPair(MI, TRI))
break;
if (!MI.hasOneMemOperand())
break;
if (!isMemOpAligned(MI, Align(4)))
break;
const MachineOperand &BaseMIOp = MI.getOperand(1);
const MachineOperand &OffsetMIOp = MI.getOperand(2);
if (!BaseMIOp.isReg() || !OffsetMIOp.isImm())
break;
if (BaseMIOp.getReg() != Base)
break;
int64_t Off = OffsetMIOp.getImm();
if (Off != ExpectedOff)
break;
Register Reg = MI.getOperand(0).getReg();
if (IsLoad) {
// For loads, require consecutive destination registers.
if (Reg != StartReg + Index)
break;
if (Reg == Base)
break;
} else {
// For stores, decide mode based on the second instruction and then
// enforce the same for the rest.
if (SMode == StoreMode::Unknown) {
if (Reg == StartReg)
SMode = StoreMode::Setwmi;
else if (Reg == StartReg + 1)
SMode = StoreMode::Swmi;
else
break;
} else if (SMode == StoreMode::Setwmi) {
if (Reg != StartReg)
break;
} else {
if (Reg != StartReg + Index)
break;
}
}
// Passed checks, extend the group.
Group.push_back(&MI);
++Index;
ExpectedOff += 4;
It = next_nodbg(It, E);
}
// We only handle more than 2 here. Pairs are handled in
// tryConvertToXqcilsmLdStPair.
unsigned Len = Group.size();
if (Len < 3 || Len > 31)
return false;
unsigned NewOpc;
unsigned StartRegState;
bool AddImplicitRegs = true;
if (IsLoad) {
NewOpc = RISCV::QC_LWMI;
StartRegState = static_cast<unsigned>(RegState::Define);
} else {
assert(SMode != StoreMode::Unknown &&
"Group should be large enough to know the store mode");
if (SMode == StoreMode::Setwmi) {
NewOpc = RISCV::QC_SETWMI;
// Kill if any of the individual stores killed the reg.
bool StartKill = false;
for (MachineInstr *MI : Group)
StartKill |= MI->getOperand(0).isKill();
StartRegState = getKillRegState(StartKill);
AddImplicitRegs = false;
} else {
// SWMI requires consecutive source regs and rd != x0.
if (StartReg == RISCV::X0)
return false;
NewOpc = RISCV::QC_SWMI;
StartRegState = getKillRegState(Group.front()->getOperand(0).isKill());
}
}
// Aggregate kill on base.
bool BaseKill = false;
for (MachineInstr *MI : Group)
BaseKill |= MI->getOperand(1).isKill();
// Build the new instruction.
DebugLoc DL = FirstMI.getDebugLoc();
if (!DL)
DL = Group.back()->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(*MF, DL, TII->get(NewOpc));
MIB.addReg(StartReg, StartRegState)
.addReg(Base, getKillRegState(BaseKill))
.addImm(Len)
.addImm(BaseOff);
// Merge memory references.
MIB.cloneMergedMemRefs(Group);
if (AddImplicitRegs) {
// Add implicit operands for the additional registers.
for (unsigned i = 1; i < Len; ++i) {
Register R = StartReg + i;
unsigned State = 0;
if (IsLoad)
State = static_cast<unsigned>(RegState::ImplicitDefine);
else
State = RegState::Implicit |
getKillRegState(Group[i]->getOperand(0).isKill());
MIB.addReg(R, State);
}
}
// Insert before the first instruction and remove all in the group.
MachineBasicBlock *MBB = FirstIt->getParent();
MachineBasicBlock::iterator NewIt = MBB->insert(FirstIt, MIB);
for (MachineInstr *MI : Group)
MI->removeFromParent();
// Advance the cursor to the next non-debug instruction after the group.
FirstIt = next_nodbg(NewIt, MBB->end());
return true;
}
bool RISCVLoadStoreOpt::tryConvertToXqcilsmLdStPair(
MachineFunction *MF, MachineBasicBlock::iterator First,
MachineBasicBlock::iterator Second) {
unsigned Opc = First->getOpcode();
if ((Opc != RISCV::LW && Opc != RISCV::SW) || Second->getOpcode() != Opc)
return false;
const auto &FirstOp1 = First->getOperand(1);
const auto &SecondOp1 = Second->getOperand(1);
const auto &FirstOp2 = First->getOperand(2);
const auto &SecondOp2 = Second->getOperand(2);
// Require simple reg+imm addressing for both.
if (!FirstOp1.isReg() || !SecondOp1.isReg() || !FirstOp2.isImm() ||
!SecondOp2.isImm())
return false;
Register Base1 = FirstOp1.getReg();
Register Base2 = SecondOp1.getReg();
if (Base1 != Base2)
return false;
if (!First->hasOneMemOperand() || !Second->hasOneMemOperand())
return false;
if (!isMemOpAligned(*First, Align(4)) || !isMemOpAligned(*Second, Align(4)))
return false;
auto &FirstOp0 = First->getOperand(0);
auto &SecondOp0 = Second->getOperand(0);
int64_t Off1 = FirstOp2.getImm();
int64_t Off2 = SecondOp2.getImm();
if (Off2 < Off1) {
std::swap(FirstOp0, SecondOp0);
std::swap(Off1, Off2);
}
if (!isShiftedUInt<5, 2>(Off1) || (Off2 - Off1 != 4))
return false;
Register StartReg = FirstOp0.getReg();
Register NextReg = SecondOp0.getReg();
unsigned XqciOpc;
unsigned StartRegState;
unsigned NextRegState = 0;
bool AddNextReg = true;
if (Opc == RISCV::LW) {
if (StartReg == RISCV::X0)
return false;
// If the base reg gets overwritten by one of the loads bail out.
if (StartReg == Base1 || NextReg == Base1)
return false;
// The registers need to be consecutive.
if (NextReg != StartReg + 1)
return false;
XqciOpc = RISCV::QC_LWMI;
StartRegState = static_cast<unsigned>(RegState::Define);
NextRegState = static_cast<unsigned>(RegState::ImplicitDefine);
} else {
assert(Opc == RISCV::SW && "Expected a SW instruction");
if (StartReg == NextReg) {
XqciOpc = RISCV::QC_SETWMI;
StartRegState = getKillRegState(FirstOp0.isKill() || SecondOp0.isKill());
AddNextReg = false;
} else if (NextReg == StartReg + 1 && StartReg != RISCV::X0) {
XqciOpc = RISCV::QC_SWMI;
StartRegState = getKillRegState(FirstOp0.isKill());
NextRegState = RegState::Implicit | getKillRegState(SecondOp0.isKill());
} else {
return false;
}
}
DebugLoc DL =
First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(*MF, DL, TII->get(XqciOpc));
MIB.addReg(StartReg, StartRegState)
.addReg(Base1, getKillRegState(FirstOp1.isKill() || SecondOp1.isKill()))
.addImm(2)
.addImm(Off1)
.cloneMergedMemRefs({&*First, &*Second});
if (AddNextReg)
MIB.addReg(NextReg, NextRegState);
First->getParent()->insert(First, MIB);
First->removeFromParent();
Second->removeFromParent();
return true;
}
bool RISCVLoadStoreOpt::tryConvertToMIPSLdStPair(
MachineFunction *MF, MachineBasicBlock::iterator First,
MachineBasicBlock::iterator Second) {
// Try converting to SWP/LWP/LDP/SDP.
// SWP/LWP requires 8-byte alignment whereas LDP/SDP needs 16-byte alignment.
unsigned PairOpc;
Align RequiredAlignment;
switch (First->getOpcode()) {
default:
llvm_unreachable("Unsupported load/store instruction for pairing");
case RISCV::SW:
PairOpc = RISCV::MIPS_SWP;
RequiredAlignment = Align(8);
break;
case RISCV::LW:
PairOpc = RISCV::MIPS_LWP;
RequiredAlignment = Align(8);
break;
case RISCV::SD:
PairOpc = RISCV::MIPS_SDP;
RequiredAlignment = Align(16);
break;
case RISCV::LD:
PairOpc = RISCV::MIPS_LDP;
RequiredAlignment = Align(16);
break;
}
if (!First->hasOneMemOperand())
return false;
if (!isMemOpAligned(*First, RequiredAlignment))
return false;
int64_t Offset = First->getOperand(2).getImm();
if (!isUInt<7>(Offset))
return false;
MachineInstrBuilder MIB = BuildMI(
*MF, First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(),
TII->get(PairOpc));
MIB.add(First->getOperand(0))
.add(Second->getOperand(0))
.add(First->getOperand(1))
.add(First->getOperand(2))
.cloneMergedMemRefs({&*First, &*Second});
First->getParent()->insert(First, MIB);
First->removeFromParent();
Second->removeFromParent();
return true;
}
// Merge two adjacent load/store instructions into a paired instruction.
// This function calls the vendor specific implementation that seelects the
// appropriate paired opcode, verifies that the memory operand is properly
// aligned, and checks that the offset is valid. If all conditions are met, it
// builds and inserts the paired instruction.
bool RISCVLoadStoreOpt::tryConvertToLdStPair(
MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
MachineFunction *MF = First->getMF();
// Try converting to QC_LWMI/QC_SWMI if the XQCILSM extension is enabled.
if (!STI->is64Bit() && STI->hasVendorXqcilsm())
return tryConvertToXqcilsmLdStPair(MF, First, Second);
// Else try to convert them into MIPS Paired Loads/Stores.
return tryConvertToMIPSLdStPair(MF, First, Second);
}
static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
AliasAnalysis *AA) {
for (MachineInstr *MIb : MemInsns)
if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
return true;
return false;
}
// Scan the instructions looking for a load/store that can be combined with the
// current instruction into a wider equivalent or a load/store pair.
// TODO: Extend pairing logic to consider reordering both instructions
// to a safe "middle" position rather than only merging forward/backward.
// This requires more sophisticated checks for aliasing, register
// liveness, and potential scheduling hazards.
MachineBasicBlock::iterator
RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool &MergeForward) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr &FirstMI = *I;
MBBI = next_nodbg(MBBI, E);
bool MayLoad = FirstMI.mayLoad();
Register Reg = FirstMI.getOperand(0).getReg();
Register BaseReg = FirstMI.getOperand(1).getReg();
int64_t Offset = FirstMI.getOperand(2).getImm();
int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
MergeForward = false;
// Track which register units have been modified and used between the first
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
UsedRegUnits.clear();
// Remember any instructions that read/write memory between FirstMI and MI.
SmallVector<MachineInstr *, 4> MemInsns;
for (unsigned Count = 0; MBBI != E && Count < LdStLimit;
MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
// may be different numbers of them if e.g. debug information is present.
if (!MI.isTransient())
++Count;
if (MI.getOpcode() == FirstMI.getOpcode() &&
TII->isLdStSafeToPair(MI, TRI)) {
Register MIBaseReg = MI.getOperand(1).getReg();
int64_t MIOffset = MI.getOperand(2).getImm();
if (BaseReg == MIBaseReg) {
if ((Offset != MIOffset + OffsetStride) &&
(Offset + OffsetStride != MIOffset)) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
MemInsns.push_back(&MI);
continue;
}
// If the destination register of one load is the same register or a
// sub/super register of the other load, bail and keep looking.
if (MayLoad &&
TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
MemInsns.push_back(&MI);
continue;
}
// If the BaseReg has been modified, then we cannot do the optimization.
if (!ModifiedRegUnits.available(BaseReg))
return E;
// If the Rt of the second instruction was not modified or used between
// the two instructions and none of the instructions between the second
// and first alias with the second, we can combine the second into the
// first.
if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) &&
!(MI.mayLoad() &&
!UsedRegUnits.available(MI.getOperand(0).getReg())) &&
!mayAlias(MI, MemInsns, AA)) {
MergeForward = false;
return MBBI;
}
// Likewise, if the Rt of the first instruction is not modified or used
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
if (!(MayLoad &&
!UsedRegUnits.available(FirstMI.getOperand(0).getReg())) &&
!mayAlias(FirstMI, MemInsns, AA)) {
if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) {
MergeForward = true;
return MBBI;
}
}
// Unable to combine these instructions due to interference in between.
// Keep looking.
}
}
// If the instruction wasn't a matching load or store. Stop searching if we
// encounter a call instruction that might modify memory.
if (MI.isCall())
return E;
// Update modified / uses register units.
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
// Otherwise, if the base register is modified, we have no match, so
// return early.
if (!ModifiedRegUnits.available(BaseReg))
return E;
// Update list of instructions that read/write memory.
if (MI.mayLoadOrStore())
MemInsns.push_back(&MI);
}
return E;
}
MachineBasicBlock::iterator
RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
bool MergeForward) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// If NextI is the second of the two instructions to be merged, skip one
// further for now. For the MIPS load/store, the merge will invalidate the
// iterator, and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway. For the
// Xqcilsm load/store, we may not want to do this as the second instruction
// could possibly be the first in another pair if we do not merge here. This
// is handled in the else block after the call to tryConvertToLdStPair below.
if (NextI == Paired)
NextI = next_nodbg(NextI, E);
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired;
int Offset = I->getOperand(2).getImm();
int PairedOffset = Paired->getOperand(2).getImm();
bool InsertAfter = (Offset < PairedOffset) ^ MergeForward;
if (!MergeForward)
Paired->getOperand(1).setIsKill(false);
// Kill flags may become invalid when moving stores for pairing.
if (I->getOperand(0).isUse()) {
if (!MergeForward) {
// Check if the Paired store's source register has a kill flag and clear
// it only if there are intermediate uses between I and Paired.
MachineOperand &PairedRegOp = Paired->getOperand(0);
if (PairedRegOp.isKill()) {
for (auto It = std::next(I); It != Paired; ++It) {
if (It->readsRegister(PairedRegOp.getReg(), TRI)) {
PairedRegOp.setIsKill(false);
break;
}
}
}
} else {
// Clear kill flags of the first store's register in the forward
// direction.
Register Reg = I->getOperand(0).getReg();
for (MachineInstr &MI : make_range(std::next(I), std::next(Paired)))
MI.clearRegisterKills(Reg, TRI);
}
}
MachineInstr *ToInsert = DeletionPoint->removeFromParent();
MachineBasicBlock &MBB = *InsertionPoint->getParent();
MachineBasicBlock::iterator First, Second;
if (!InsertAfter) {
First = MBB.insert(InsertionPoint, ToInsert);
Second = InsertionPoint;
} else {
Second = MBB.insertAfter(InsertionPoint, ToInsert);
First = InsertionPoint;
}
if (tryConvertToLdStPair(First, Second)) {
LLVM_DEBUG(dbgs() << "Pairing load/store:\n ");
LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
} else if (!STI->is64Bit() && STI->hasVendorXqcilsm()) {
// We were unable to form the pair, so use the next non-debug instruction
// after the first instruction we had wanted to merge.
NextI = next_nodbg(I, E);
}
return NextI;
}
//===----------------------------------------------------------------------===//
// Post reg-alloc zilsd pass implementation
//===----------------------------------------------------------------------===//
bool RISCVLoadStoreOpt::isValidZilsdRegPair(Register First, Register Second) {
// Special case: First register can not be zero unless both registers are
// zeros.
// Spec says: LD instructions with destination x0 are processed as any other
// load, but the result is discarded entirely and x1 is not written. If using
// x0 as src of SD, the entire 64-bit operand is zero — i.e., register x1 is
// not accessed.
if (First == RISCV::X0)
return Second == RISCV::X0;
// Check if registers form a valid even/odd pair for Zilsd
unsigned FirstNum = TRI->getEncodingValue(First);
unsigned SecondNum = TRI->getEncodingValue(Second);
// Must be consecutive and first must be even
return (FirstNum % 2 == 0) && (SecondNum == FirstNum + 1);
}
void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
bool IsLoad) {
MachineInstr *MI = &*MBBI;
DebugLoc DL = MI->getDebugLoc();
const MachineOperand &FirstOp = MI->getOperand(0);
const MachineOperand &SecondOp = MI->getOperand(1);
const MachineOperand &BaseOp = MI->getOperand(2);
Register FirstReg = FirstOp.getReg();
Register SecondReg = SecondOp.getReg();
Register BaseReg = BaseOp.getReg();
// Handle both immediate and symbolic operands for offset
const MachineOperand &OffsetOp = MI->getOperand(3);
int BaseOffset;
if (OffsetOp.isImm())
BaseOffset = OffsetOp.getImm();
else
// For symbolic operands, extract the embedded offset
BaseOffset = OffsetOp.getOffset();
unsigned Opc = IsLoad ? RISCV::LW : RISCV::SW;
MachineInstrBuilder MIB1, MIB2;
// Create two separate instructions
if (IsLoad) {
// It's possible that first register is same as base register, when we split
// it becomes incorrect because base register is overwritten, e.g.
// X10, X13 = PseudoLD_RV32_OPT killed X10, 0
// =>
// X10 = LW X10, 0
// X13 = LW killed X10, 4
// we can just switch the order to resolve that:
// X13 = LW X10, 4
// X10 = LW killed X10, 0
if (FirstReg == BaseReg) {
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
.addReg(SecondReg,
RegState::Define | getDeadRegState(SecondOp.isDead()))
.addReg(BaseReg);
MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
.addReg(FirstReg,
RegState::Define | getDeadRegState(FirstOp.isDead()))
.addReg(BaseReg, getKillRegState(BaseOp.isKill()));
} else {
MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
.addReg(FirstReg,
RegState::Define | getDeadRegState(FirstOp.isDead()))
.addReg(BaseReg);
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
.addReg(SecondReg,
RegState::Define | getDeadRegState(SecondOp.isDead()))
.addReg(BaseReg, getKillRegState(BaseOp.isKill()));
}
++NumLD2LW;
LLVM_DEBUG(dbgs() << "Split LD back to two LW instructions\n");
} else {
assert(
FirstReg != SecondReg &&
"First register and second register is impossible to be same register");
MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
.addReg(FirstReg, getKillRegState(FirstOp.isKill()))
.addReg(BaseReg);
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
.addReg(SecondReg, getKillRegState(SecondOp.isKill()))
.addReg(BaseReg, getKillRegState(BaseOp.isKill()));
++NumSD2SW;
LLVM_DEBUG(dbgs() << "Split SD back to two SW instructions\n");
}
// Add offset operands - preserve symbolic references
MIB1.add(OffsetOp);
if (OffsetOp.isImm())
MIB2.addImm(BaseOffset + 4);
else if (OffsetOp.isGlobal())
MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4,
OffsetOp.getTargetFlags());
else if (OffsetOp.isCPI())
MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4,
OffsetOp.getTargetFlags());
else if (OffsetOp.isBlockAddress())
MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4,
OffsetOp.getTargetFlags());
// Copy memory operands if the original instruction had them
// FIXME: This is overly conservative; the new instruction accesses 4 bytes,
// not 8.
MIB1.cloneMemRefs(*MI);
MIB2.cloneMemRefs(*MI);
// Remove the original paired instruction and update iterator
MBBI = MBB.erase(MBBI);
}
bool RISCVLoadStoreOpt::fixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr *MI = &*MBBI;
unsigned Opcode = MI->getOpcode();
// Check if this is a Zilsd pseudo that needs fixing
if (Opcode != RISCV::PseudoLD_RV32_OPT && Opcode != RISCV::PseudoSD_RV32_OPT)
return false;
bool IsLoad = Opcode == RISCV::PseudoLD_RV32_OPT;
const MachineOperand &FirstOp = MI->getOperand(0);
const MachineOperand &SecondOp = MI->getOperand(1);
Register FirstReg = FirstOp.getReg();
Register SecondReg = SecondOp.getReg();
if (!isValidZilsdRegPair(FirstReg, SecondReg)) {
// Need to split back into two instructions
splitLdSdIntoTwo(MBB, MBBI, IsLoad);
return true;
}
// Registers are valid, convert to real LD/SD instruction
const MachineOperand &BaseOp = MI->getOperand(2);
Register BaseReg = BaseOp.getReg();
DebugLoc DL = MI->getDebugLoc();
// Handle both immediate and symbolic operands for offset
const MachineOperand &OffsetOp = MI->getOperand(3);
unsigned RealOpc = IsLoad ? RISCV::LD_RV32 : RISCV::SD_RV32;
// Create register pair from the two individual registers
unsigned RegPair = TRI->getMatchingSuperReg(FirstReg, RISCV::sub_gpr_even,
&RISCV::GPRPairRegClass);
// Create the real LD/SD instruction with register pair
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(RealOpc));
if (IsLoad) {
// For LD, the register pair is the destination
MIB.addReg(RegPair, RegState::Define | getDeadRegState(FirstOp.isDead() &&
SecondOp.isDead()));
} else {
// For SD, the register pair is the source
MIB.addReg(RegPair, getKillRegState(FirstOp.isKill() && SecondOp.isKill()));
}
MIB.addReg(BaseReg, getKillRegState(BaseOp.isKill()))
.add(OffsetOp)
.cloneMemRefs(*MI);
LLVM_DEBUG(dbgs() << "Converted pseudo to real instruction: " << *MIB
<< "\n");
// Remove the pseudo instruction and update iterator
MBBI = MBB.erase(MBBI);
return true;
}
// Returns an instance of the Load / Store Optimization pass.
FunctionPass *llvm::createRISCVLoadStoreOptPass() {
return new RISCVLoadStoreOpt();
}