blob: 92ebd8592f85481c7fb7a6e4c151c2cef7d22d0c [file] [edit]
//===-- AArch64CodeLayoutOpt.cpp - Code Layout Optimizations --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass runs after instruction scheduling and employs code layout
// optimizations for certain patterns.
//
// Option -aarch64-code-layout-opt-enable selects instruction pairs to optimize:
// cmp-csel: Enable CMP/CMN-CSEL code layout optimization
// fcmp-fcsel: Enable FCMP-FCSEL code layout optimization
//
// The initial implementation induces function alignment when a supported
// pattern is detected, and possibly instruction-alignment when a pair would
// straddle cache-lines.
//===----------------------------------------------------------------------===//
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-code-layout-opt"
#define DBG(...) LLVM_DEBUG(dbgs() << DEBUG_TYPE ": " << __VA_ARGS__)
#define AARCH64_CODE_LAYOUT_OPT_NAME "AArch64 Code Layout Optimization"
enum CodeLayoutOpt {
CmpCsel, // Align CMP/CMN-CSEL pairs
FcmpFcsel, // Align FCMP-FCSEL pairs
};
static cl::bits<CodeLayoutOpt> EnableCodeAlignment(
"aarch64-code-layout-opt-enable", cl::Hidden, cl::CommaSeparated,
cl::desc("Enable code alignment optimization for instruction pairs"),
cl::values(
clEnumValN(CmpCsel, "cmp-csel", "CMP/CMN-CSEL pair alignment (32-bit)"),
clEnumValN(FcmpFcsel, "fcmp-fcsel", "FCMP-FCSEL pair alignment")));
static cl::opt<unsigned> FunctionAlignBytes(
"aarch64-code-layout-opt-align-functions", cl::Hidden,
cl::desc("Function alignment in bytes for code layout optimization "
"(must be a power of 2)"),
cl::init(64), cl::callback([](const unsigned &Val) {
if (!isPowerOf2_32(Val))
report_fatal_error(
"aarch64-code-layout-opt-align must be a power of 2");
}));
STATISTIC(NumFunctionsAligned,
"Number of functions with aligned (to 64-bytes by default)");
STATISTIC(NumCmpCselPairsDetected,
"Number of CMP/CMN-CSEL pairs detected for alignment");
STATISTIC(NumFcmpFcselPairsDetected,
"Number of FCMP-FCSEL pairs detected for alignment");
namespace {
class AArch64CodeLayoutOpt : public MachineFunctionPass {
public:
static char ID;
AArch64CodeLayoutOpt() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return AARCH64_CODE_LAYOUT_OPT_NAME;
}
private:
const AArch64InstrInfo *TII = nullptr;
/// Align each fusible CMP/CMN-CSEL or FCMP-FCSEL pair in \p MBB by emitting
/// .p2align before the lead instruction (splitting the block if needed).
/// \returns true iff at least one pair was found and aligned.
bool alignLayoutSensitivePatterns(MachineBasicBlock *MBB);
/// Emit .p2align before MI. Splits the block if MI is not at its start.
void emitP2Align(MachineInstr &MI, Align DesiredAlign,
unsigned MaxSkipBytes = 4);
bool optimizeForCodeLayout(MachineFunction &MF);
};
} // end anonymous namespace
char AArch64CodeLayoutOpt::ID = 0;
INITIALIZE_PASS(AArch64CodeLayoutOpt, "aarch64-code-layout-opt",
AARCH64_CODE_LAYOUT_OPT_NAME, false, false)
void AArch64CodeLayoutOpt::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
FunctionPass *llvm::createAArch64CodeLayoutOptPass() {
return new AArch64CodeLayoutOpt();
}
/// \returns true iff Opc is a floating-point comparison (FCMP/FCMPE).
static bool isFloatingPointCompare(unsigned Opc) {
switch (Opc) {
case AArch64::FCMPSrr:
case AArch64::FCMPDrr:
case AArch64::FCMPESrr:
case AArch64::FCMPEDrr:
case AArch64::FCMPHrr:
case AArch64::FCMPEHrr:
return true;
default:
return false;
}
}
/// \returns true iff Opc is a floating-point conditional select (FCSEL).
static bool isFloatingPointConditionalSelect(unsigned Opc) {
switch (Opc) {
case AArch64::FCSELSrrr:
case AArch64::FCSELDrrr:
case AArch64::FCSELHrrr:
return true;
default:
return false;
}
}
/// \returns true if MI is a qualifying 32-bit CMP or CMN instruction.
/// CMP is encoded as SUBS with WZR destination, CMN as ADDS with WZR.
/// Only simple variants (no shifted/extended reg) qualify, and immediate
/// variants require no LSL shift and small immediates (<=15).
static bool isQualifyingIntCompare(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AArch64::SUBSWrr:
case AArch64::ADDSWrr:
return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr);
case AArch64::SUBSWri:
case AArch64::ADDSWri:
return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) &&
MI.getOperand(3).getImm() == 0 && MI.getOperand(2).getImm() <= 15;
case AArch64::SUBSWrs:
case AArch64::ADDSWrs:
return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) &&
!AArch64InstrInfo::hasShiftedReg(MI);
case AArch64::SUBSWrx:
return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) &&
!AArch64InstrInfo::hasExtendedReg(MI);
default:
return false;
}
}
bool AArch64CodeLayoutOpt::runOnMachineFunction(MachineFunction &MF) {
const Function &F = MF.getFunction();
// hasOptSize() returns true for both -Os and -Oz.
if (F.hasOptSize())
return false;
const auto *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
TII = Subtarget->getInstrInfo();
// Default: enable when the subtarget opts in via FeatureAlignCmpCSelPairs.
if (!EnableCodeAlignment.getBits() && Subtarget->hasAlignCmpCSelPairs()) {
if (Subtarget->hasFuseCmpCSel())
EnableCodeAlignment.addValue(CmpCsel);
if (Subtarget->hasFuseFCmpFCSel())
EnableCodeAlignment.addValue(FcmpFcsel);
}
if (!(EnableCodeAlignment.isSet(CmpCsel) && Subtarget->hasFuseCmpCSel()) &&
!(EnableCodeAlignment.isSet(FcmpFcsel) && Subtarget->hasFuseFCmpFCSel()))
return false;
return optimizeForCodeLayout(MF);
}
void AArch64CodeLayoutOpt::emitP2Align(MachineInstr &MI, Align DesiredAlign,
unsigned MaxSkipBytes) {
MachineBasicBlock *MBB = MI.getParent();
auto FirstReal =
skipDebugInstructionsForward(MBB->instr_begin(), MBB->instr_end());
if (&*FirstReal != &MI) {
auto PrevIt = prev_nodbg(MI.getIterator(), MBB->instr_begin());
MBB = MBB->splitAt(*PrevIt, /*UpdateLiveIns=*/true);
}
MBB->setAlignment(DesiredAlign);
MBB->setMaxBytesForAlignment(MaxSkipBytes);
}
// Align each fusible CMP/CMN-CSEL or FCMP-FCSEL pair in MBB by emitting
// .p2align before the lead instruction (splitting the block if needed).
// A pair is: a qualifying lead instruction immediately followed by its
// consumer (CMP/CMN→CSEL or FCMP→FCSEL), with no intervening instructions.
// Returns true iff at least one pair was found and aligned.
bool AArch64CodeLayoutOpt::alignLayoutSensitivePatterns(
MachineBasicBlock *MBB) {
auto End = MBB->instr_end();
SmallVector<std::pair<MachineInstr *, bool>, 4> Pairs;
for (auto &MI : instructionsWithoutDebug(MBB->begin(), MBB->end())) {
auto NextIt =
skipDebugInstructionsForward(std::next(MI.getIterator()), End);
if (NextIt == End)
break;
// --- CMP/CMN-CSEL detection ---
if (EnableCodeAlignment.isSet(CmpCsel) && isQualifyingIntCompare(MI) &&
NextIt->getOpcode() == AArch64::CSELWr) {
Pairs.push_back({&MI, true});
continue;
}
// --- FCMP-FCSEL detection ---
if (EnableCodeAlignment.isSet(FcmpFcsel) &&
isFloatingPointCompare(MI.getOpcode()) &&
isFloatingPointConditionalSelect(NextIt->getOpcode())) {
Pairs.push_back({&MI, false});
continue;
}
}
for (auto &[MI, IsCmpCsel] : Pairs) {
emitP2Align(*MI, Align(64));
DBG(".p2align 6, , 4 before " << *MI);
++(IsCmpCsel ? NumCmpCselPairsDetected : NumFcmpFcselPairsDetected);
}
return !Pairs.empty();
}
bool AArch64CodeLayoutOpt::optimizeForCodeLayout(MachineFunction &MF) {
DBG("optimizeForCodeLayout: " << MF.getName() << "\n");
bool Changed = false;
for (auto &MBB : MF)
Changed |= alignLayoutSensitivePatterns(&MBB);
if (!Changed)
return false;
if (MF.getAlignment() < Align(FunctionAlignBytes)) {
MF.setAlignment(Align(FunctionAlignBytes));
++NumFunctionsAligned;
DBG("Set " << FunctionAlignBytes << "-byte alignment for function "
<< MF.getName() << "\n");
} else {
DBG("Function " << MF.getName() << " already has sufficient alignment\n");
}
return true;
}