blob: d2488f61eb4beb074d57fb055c428126013cc111 [file] [log] [blame]
//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AArch64.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
"stack-tagging-unchecked-ld-st", cl::Hidden,
cl::init(UncheckedSafe),
cl::desc(
"Unconditionally apply unchecked-ld-st optimization (even for large "
"stack frames, or in the presence of variable sized allocas)."),
cl::values(
clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
clEnumValN(
UncheckedSafe, "safe",
"apply unchecked-ld-st when the target is definitely within range"),
clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
static cl::opt<bool>
ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
cl::ZeroOrMore,
cl::desc("Apply first slot optimization for stack tagging "
"(eliminate ADDG Rt, Rn, 0, 0)."));
namespace {
class AArch64StackTaggingPreRA : public MachineFunctionPass {
MachineFunction *MF;
AArch64FunctionInfo *AFI;
MachineFrameInfo *MFI;
MachineRegisterInfo *MRI;
const AArch64RegisterInfo *TRI;
const AArch64InstrInfo *TII;
SmallVector<MachineInstr*, 16> ReTags;
public:
static char ID;
AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
}
bool mayUseUncheckedLoadStore();
void uncheckUsesOf(unsigned TaggedReg, int FI);
void uncheckLoadsAndStores();
Optional<int> findFirstSlotCandidate();
bool runOnMachineFunction(MachineFunction &Func) override;
StringRef getPassName() const override {
return "AArch64 Stack Tagging PreRA";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
} // end anonymous namespace
char AArch64StackTaggingPreRA::ID = 0;
INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
"AArch64 Stack Tagging PreRA Pass", false, false)
INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
"AArch64 Stack Tagging PreRA Pass", false, false)
FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
return new AArch64StackTaggingPreRA();
}
static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
switch (Opcode) {
case AArch64::LDRBBui:
case AArch64::LDRHHui:
case AArch64::LDRWui:
case AArch64::LDRXui:
case AArch64::LDRBui:
case AArch64::LDRHui:
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
case AArch64::LDRSHWui:
case AArch64::LDRSHXui:
case AArch64::LDRSBWui:
case AArch64::LDRSBXui:
case AArch64::LDRSWui:
case AArch64::STRBBui:
case AArch64::STRHHui:
case AArch64::STRWui:
case AArch64::STRXui:
case AArch64::STRBui:
case AArch64::STRHui:
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
case AArch64::LDPWi:
case AArch64::LDPXi:
case AArch64::LDPSi:
case AArch64::LDPDi:
case AArch64::LDPQi:
case AArch64::LDPSWi:
case AArch64::STPWi:
case AArch64::STPXi:
case AArch64::STPSi:
case AArch64::STPDi:
case AArch64::STPQi:
return true;
default:
return false;
}
}
bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
if (ClUncheckedLdSt == UncheckedNever)
return false;
else if (ClUncheckedLdSt == UncheckedAlways)
return true;
// This estimate can be improved if we had harder guarantees about stack frame
// layout. With LocalStackAllocation we can estimate SP offset to any
// preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
// objects ahead of non-tagged ones, but that's not always desirable.
//
// Underestimating SP offset here may require the use of LDG to materialize
// the tagged address of the stack slot, along with a scratch register
// allocation (post-regalloc!).
//
// For now we do the safe thing here and require that the entire stack frame
// is within range of the shortest of the unchecked instructions.
unsigned FrameSize = 0;
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
FrameSize += MFI->getObjectSize(i);
bool EntireFrameReachableFromSP = FrameSize < 0xf00;
return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
}
void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
for (MachineInstr &UseI :
llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
// FI operand is always the one before the immediate offset.
unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
if (UseI.getOperand(OpIdx).isReg() &&
UseI.getOperand(OpIdx).getReg() == TaggedReg) {
UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
}
} else if (UseI.isCopy() &&
Register::isVirtualRegister(UseI.getOperand(0).getReg())) {
uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
}
}
}
void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
for (auto *I : ReTags) {
unsigned TaggedReg = I->getOperand(0).getReg();
int FI = I->getOperand(1).getIndex();
uncheckUsesOf(TaggedReg, FI);
}
}
namespace {
struct SlotWithTag {
int FI;
int Tag;
SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
explicit SlotWithTag(const MachineInstr &MI)
: FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
bool operator==(const SlotWithTag &Other) const {
return FI == Other.FI && Tag == Other.Tag;
}
};
} // namespace
namespace llvm {
template <> struct DenseMapInfo<SlotWithTag> {
static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
static unsigned getHashValue(const SlotWithTag &V) {
return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
DenseMapInfo<int>::getHashValue(V.Tag));
}
static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
return A == B;
}
};
} // namespace llvm
static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
return MFI->getUseLocalStackAllocationBlock() &&
MFI->isObjectPreAllocated(FI);
}
// Pin one of the tagged slots to offset 0 from the tagged base pointer.
// This would make its address available in a virtual register (IRG's def), as
// opposed to requiring an ADDG instruction to materialize. This effectively
// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
// live almost everywhere anyway), and therefore needs to happen before
// regalloc.
Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
// Find the best (FI, Tag) pair to pin to offset 0.
// Looking at the possible uses of a tagged address, the advantage of pinning
// is:
// - COPY to physical register.
// Does not matter, this would trade a MOV instruction for an ADDG.
// - ST*G matter, but those mostly appear near the function prologue where all
// the tagged addresses need to be materialized anyway; also, counting ST*G
// uses would overweight large allocas that require more than one ST*G
// instruction.
// - Load/Store instructions in the address operand do not require a tagged
// pointer, so they also do not benefit. These operands have already been
// eliminated (see uncheckLoadsAndStores) so all remaining load/store
// instructions count.
// - Any other instruction may benefit from being pinned to offset 0.
LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
if (!ClFirstSlot)
return None;
DenseMap<SlotWithTag, int> RetagScore;
SlotWithTag MaxScoreST{-1, -1};
int MaxScore = -1;
for (auto *I : ReTags) {
SlotWithTag ST{*I};
if (isSlotPreAllocated(MFI, ST.FI))
continue;
Register RetagReg = I->getOperand(0).getReg();
if (!Register::isVirtualRegister(RetagReg))
continue;
int Score = 0;
SmallVector<Register, 8> WorkList;
WorkList.push_back(RetagReg);
while (!WorkList.empty()) {
Register UseReg = WorkList.pop_back_val();
for (auto &UseI : MRI->use_instructions(UseReg)) {
unsigned Opcode = UseI.getOpcode();
if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
Opcode == AArch64::STZGloop_wback)
continue;
if (UseI.isCopy()) {
Register DstReg = UseI.getOperand(0).getReg();
if (Register::isVirtualRegister(DstReg))
WorkList.push_back(DstReg);
continue;
}
LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
<< Register::virtReg2Index(UseReg) << " in " << UseI
<< "\n");
Score++;
}
}
int TotalScore = RetagScore[ST] += Score;
if (TotalScore > MaxScore ||
(TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
MaxScore = TotalScore;
MaxScoreST = ST;
}
}
if (MaxScoreST.FI < 0)
return None;
// If FI's tag is already 0, we are done.
if (MaxScoreST.Tag == 0)
return MaxScoreST.FI;
// Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
SlotWithTag SwapST{-1, -1};
for (auto *I : ReTags) {
SlotWithTag ST{*I};
if (ST.Tag == 0) {
SwapST = ST;
break;
}
}
// Swap tags between the victim and the highest scoring pair.
// If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
// the highest score slot without changing anything else.
for (auto *&I : ReTags) {
SlotWithTag ST{*I};
MachineOperand &TagOp = I->getOperand(4);
if (ST == MaxScoreST) {
TagOp.setImm(0);
} else if (ST == SwapST) {
TagOp.setImm(MaxScoreST.Tag);
}
}
return MaxScoreST.FI;
}
bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
MRI = &MF->getRegInfo();
AFI = MF->getInfo<AArch64FunctionInfo>();
TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
TRI = static_cast<const AArch64RegisterInfo *>(
MF->getSubtarget().getRegisterInfo());
MFI = &MF->getFrameInfo();
ReTags.clear();
assert(MRI->isSSA());
LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
<< "********** Function: " << MF->getName() << '\n');
SmallSetVector<int, 8> TaggedSlots;
for (auto &BB : *MF) {
for (auto &I : BB) {
if (I.getOpcode() == AArch64::TAGPstack) {
ReTags.push_back(&I);
int FI = I.getOperand(1).getIndex();
TaggedSlots.insert(FI);
// There should be no offsets in TAGP yet.
assert(I.getOperand(2).getImm() == 0);
}
}
}
// Take over from SSP. It does nothing for tagged slots, and should not really
// have been enabled in the first place.
for (int FI : TaggedSlots)
MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
if (ReTags.empty())
return false;
if (mayUseUncheckedLoadStore())
uncheckLoadsAndStores();
// Find a slot that is used with zero tag offset, like ADDG #fi, 0.
// If the base tagged pointer is set up to the address of this slot,
// the ADDG instruction can be eliminated.
Optional<int> BaseSlot = findFirstSlotCandidate();
if (BaseSlot)
AFI->setTaggedBasePointerIndex(*BaseSlot);
for (auto *I : ReTags) {
int FI = I->getOperand(1).getIndex();
int Tag = I->getOperand(4).getImm();
Register Base = I->getOperand(3).getReg();
if (Tag == 0 && FI == BaseSlot) {
BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
I->getOperand(0).getReg())
.addReg(Base);
I->eraseFromParent();
}
}
return true;
}