blob: 38eeee287b94e4599e8a82610c2a2c29c5863b56 [file] [log] [blame] [edit]
//===- AllocToken.cpp - Allocation token instrumentation ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements AllocToken, an instrumentation pass that
// replaces allocation calls with token-enabled versions.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Instrumentation/AllocToken.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Analysis.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/AllocToken.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/RandomNumberGenerator.h"
#include "llvm/Support/SipHash.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <variant>
using namespace llvm;
using TokenMode = AllocTokenMode;
#define DEBUG_TYPE "alloc-token"
namespace {
//===--- Command-line options ---------------------------------------------===//
cl::opt<std::string> ClFuncPrefix("alloc-token-prefix",
cl::desc("The allocation function prefix"),
cl::Hidden, cl::init("__alloc_token_"));
cl::opt<uint64_t>
ClMaxTokens("alloc-token-max",
cl::desc("Maximum number of tokens (0 = target SIZE_MAX)"),
cl::Hidden, cl::init(0));
cl::opt<bool>
ClFastABI("alloc-token-fast-abi",
cl::desc("The token ID is encoded in the function name"),
cl::Hidden, cl::init(false));
// Instrument libcalls only by default - compatible allocators only need to take
// care of providing standard allocation functions. With extended coverage, also
// instrument non-libcall allocation function calls with !alloc_token
// metadata.
cl::opt<bool>
ClExtended("alloc-token-extended",
cl::desc("Extend coverage to custom allocation functions"),
cl::Hidden, cl::init(false));
// C++ defines ::operator new (and variants) as replaceable (vs. standard
// library versions), which are nobuiltin, and are therefore not covered by
// isAllocationFn(). Cover by default, as users of AllocToken are already
// required to provide token-aware allocation functions (no defaults).
cl::opt<bool> ClCoverReplaceableNew("alloc-token-cover-replaceable-new",
cl::desc("Cover replaceable operator new"),
cl::Hidden, cl::init(true));
cl::opt<uint64_t> ClFallbackToken(
"alloc-token-fallback",
cl::desc("The default fallback token where none could be determined"),
cl::Hidden, cl::init(0));
//===--- Statistics -------------------------------------------------------===//
STATISTIC(NumFunctionsModified, "Functions modified");
STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
//===----------------------------------------------------------------------===//
/// Returns the !alloc_token metadata if available.
///
/// Expected format is: !{<type-name>, <contains-pointer>}
MDNode *getAllocTokenMetadata(const CallBase &CB) {
MDNode *Ret = nullptr;
if (auto *II = dyn_cast<IntrinsicInst>(&CB);
II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
auto *MDV = cast<MetadataAsValue>(II->getArgOperand(0));
Ret = cast<MDNode>(MDV->getMetadata());
// If the intrinsic has an empty MDNode, type inference failed.
if (Ret->getNumOperands() == 0)
return nullptr;
} else {
Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
if (!Ret)
return nullptr;
}
assert(Ret->getNumOperands() == 2 && "bad !alloc_token");
assert(isa<MDString>(Ret->getOperand(0)));
assert(isa<ConstantAsMetadata>(Ret->getOperand(1)));
return Ret;
}
bool containsPointer(const MDNode *MD) {
ConstantAsMetadata *C = cast<ConstantAsMetadata>(MD->getOperand(1));
auto *CI = cast<ConstantInt>(C->getValue());
return CI->getValue().getBoolValue();
}
class ModeBase {
public:
explicit ModeBase(const IntegerType &TokenTy, uint64_t MaxTokens)
: MaxTokens(MaxTokens ? MaxTokens : TokenTy.getBitMask()) {
assert(MaxTokens <= TokenTy.getBitMask());
}
protected:
uint64_t boundedToken(uint64_t Val) const {
assert(MaxTokens != 0);
return Val % MaxTokens;
}
const uint64_t MaxTokens;
};
/// Implementation for TokenMode::Increment.
class IncrementMode : public ModeBase {
public:
using ModeBase::ModeBase;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &) {
return boundedToken(Counter++);
}
private:
uint64_t Counter = 0;
};
/// Implementation for TokenMode::Random.
class RandomMode : public ModeBase {
public:
RandomMode(const IntegerType &TokenTy, uint64_t MaxTokens,
std::unique_ptr<RandomNumberGenerator> RNG)
: ModeBase(TokenTy, MaxTokens), RNG(std::move(RNG)) {}
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &) {
return boundedToken((*RNG)());
}
private:
std::unique_ptr<RandomNumberGenerator> RNG;
};
/// Implementation for TokenMode::TypeHash. The implementation ensures
/// hashes are stable across different compiler invocations. Uses SipHash as the
/// hash function.
class TypeHashMode : public ModeBase {
public:
using ModeBase::ModeBase;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
if (MDNode *N = getAllocTokenMetadata(CB)) {
MDString *S = cast<MDString>(N->getOperand(0));
AllocTokenMetadata Metadata{S->getString(), containsPointer(N)};
if (auto Token = getAllocToken(TokenMode::TypeHash, Metadata, MaxTokens))
return *Token;
}
// Fallback.
remarkNoMetadata(CB, ORE);
return ClFallbackToken;
}
protected:
/// Remark that there was no precise type information.
static void remarkNoMetadata(const CallBase &CB,
OptimizationRemarkEmitter &ORE) {
ORE.emit([&] {
ore::NV FuncNV("Function", CB.getParent()->getParent());
const Function *Callee = CB.getCalledFunction();
ore::NV CalleeNV("Callee", Callee ? Callee->getName() : "<unknown>");
return OptimizationRemark(DEBUG_TYPE, "NoAllocToken", &CB)
<< "Call to '" << CalleeNV << "' in '" << FuncNV
<< "' without source-level type token";
});
}
};
/// Implementation for TokenMode::TypeHashPointerSplit.
class TypeHashPointerSplitMode : public TypeHashMode {
public:
using TypeHashMode::TypeHashMode;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
if (MDNode *N = getAllocTokenMetadata(CB)) {
MDString *S = cast<MDString>(N->getOperand(0));
AllocTokenMetadata Metadata{S->getString(), containsPointer(N)};
if (auto Token = getAllocToken(TokenMode::TypeHashPointerSplit, Metadata,
MaxTokens))
return *Token;
}
// Pick the fallback token (ClFallbackToken), which by default is 0, meaning
// it'll fall into the pointer-less bucket. Override by setting
// -alloc-token-fallback if that is the wrong choice.
remarkNoMetadata(CB, ORE);
return ClFallbackToken;
}
};
// Apply opt overrides and module flags.
static AllocTokenOptions resolveOptions(AllocTokenOptions Opts,
const Module &M) {
auto IntModuleFlagOrNull = [&](StringRef Key) {
return mdconst::extract_or_null<ConstantInt>(M.getModuleFlag(Key));
};
if (auto *S = dyn_cast_or_null<MDString>(M.getModuleFlag("alloc-token-mode")))
if (auto Mode = getAllocTokenModeFromString(S->getString()))
Opts.Mode = *Mode;
if (auto *Val = IntModuleFlagOrNull("alloc-token-max"))
Opts.MaxTokens = Val->getZExtValue();
if (auto *Val = IntModuleFlagOrNull("alloc-token-fast-abi"))
Opts.FastABI |= Val->isOne();
if (auto *Val = IntModuleFlagOrNull("alloc-token-extended"))
Opts.Extended |= Val->isOne();
// Allow overriding options from command line options.
if (ClMaxTokens.getNumOccurrences())
Opts.MaxTokens = ClMaxTokens;
if (ClFastABI.getNumOccurrences())
Opts.FastABI = ClFastABI;
if (ClExtended.getNumOccurrences())
Opts.Extended = ClExtended;
return Opts;
}
class AllocToken {
public:
explicit AllocToken(AllocTokenOptions Opts, Module &M,
ModuleAnalysisManager &MAM)
: Options(resolveOptions(std::move(Opts), M)), Mod(M),
FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
Mode(IncrementMode(*IntPtrTy, Options.MaxTokens)) {
switch (Options.Mode) {
case TokenMode::Increment:
break;
case TokenMode::Random:
Mode.emplace<RandomMode>(*IntPtrTy, Options.MaxTokens,
M.createRNG(DEBUG_TYPE));
break;
case TokenMode::TypeHash:
Mode.emplace<TypeHashMode>(*IntPtrTy, Options.MaxTokens);
break;
case TokenMode::TypeHashPointerSplit:
Mode.emplace<TypeHashPointerSplitMode>(*IntPtrTy, Options.MaxTokens);
break;
}
}
bool instrumentFunction(Function &F);
private:
/// Returns the LibFunc (or NotLibFunc) if this call should be instrumented.
std::optional<LibFunc>
shouldInstrumentCall(const CallBase &CB, const TargetLibraryInfo &TLI) const;
/// Returns true for functions that are eligible for instrumentation.
static bool isInstrumentableLibFunc(LibFunc Func, const CallBase &CB,
const TargetLibraryInfo &TLI);
/// Returns true for isAllocationFn() functions that we should ignore.
static bool ignoreInstrumentableLibFunc(LibFunc Func);
/// Replace a call/invoke with a call/invoke to the allocation function
/// with token ID.
bool replaceAllocationCall(CallBase *CB, LibFunc Func,
OptimizationRemarkEmitter &ORE,
const TargetLibraryInfo &TLI);
/// Return replacement function for a LibFunc that takes a token ID.
FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID,
LibFunc OriginalFunc);
/// Lower alloc_token_* intrinsics.
void replaceIntrinsicInst(IntrinsicInst *II, OptimizationRemarkEmitter &ORE);
/// Return the token ID from metadata in the call.
uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode);
}
const AllocTokenOptions Options;
Module &Mod;
IntegerType *IntPtrTy = Mod.getDataLayout().getIntPtrType(Mod.getContext());
FunctionAnalysisManager &FAM;
// Cache for replacement functions.
DenseMap<std::pair<LibFunc, uint64_t>, FunctionCallee> TokenAllocFunctions;
// Selected mode.
std::variant<IncrementMode, RandomMode, TypeHashMode,
TypeHashPointerSplitMode>
Mode;
};
bool AllocToken::instrumentFunction(Function &F) {
// Do not apply any instrumentation for naked functions.
if (F.hasFnAttribute(Attribute::Naked))
return false;
// Don't touch available_externally functions, their actual body is elsewhere.
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
return false;
SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls;
SmallVector<IntrinsicInst *, 4> IntrinsicInsts;
// Only instrument functions that have the sanitize_alloc_token attribute.
const bool InstrumentFunction =
F.hasFnAttribute(Attribute::SanitizeAllocToken) &&
!F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
// Get TLI only when required.
const TargetLibraryInfo *TLI =
InstrumentFunction ? &FAM.getResult<TargetLibraryAnalysis>(F) : nullptr;
// Collect all allocation calls to avoid iterator invalidation.
for (Instruction &I : instructions(F)) {
// Collect all alloc_token_* intrinsics.
if (auto *II = dyn_cast<IntrinsicInst>(&I);
II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
IntrinsicInsts.emplace_back(II);
continue;
}
if (!InstrumentFunction)
continue;
auto *CB = dyn_cast<CallBase>(&I);
if (!CB)
continue;
if (std::optional<LibFunc> Func = shouldInstrumentCall(*CB, *TLI))
AllocCalls.emplace_back(CB, Func.value());
}
// Return early to avoid unnecessarily instantiating the ORE.
if (AllocCalls.empty() && IntrinsicInsts.empty())
return false;
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
bool Modified = false;
for (auto &[CB, Func] : AllocCalls)
Modified |= replaceAllocationCall(CB, Func, ORE, *TLI);
for (auto *II : IntrinsicInsts) {
replaceIntrinsicInst(II, ORE);
Modified = true;
}
if (Modified)
NumFunctionsModified++;
return Modified;
}
std::optional<LibFunc>
AllocToken::shouldInstrumentCall(const CallBase &CB,
const TargetLibraryInfo &TLI) const {
const Function *Callee = CB.getCalledFunction();
if (!Callee)
return std::nullopt;
// Ignore nobuiltin of the CallBase, so that we can cover nobuiltin libcalls
// if requested via isInstrumentableLibFunc(). Note that isAllocationFn() is
// returning false for nobuiltin calls.
LibFunc Func;
if (TLI.getLibFunc(*Callee, Func)) {
if (isInstrumentableLibFunc(Func, CB, TLI))
return Func;
} else if (Options.Extended && CB.getMetadata(LLVMContext::MD_alloc_token)) {
return NotLibFunc;
}
return std::nullopt;
}
bool AllocToken::isInstrumentableLibFunc(LibFunc Func, const CallBase &CB,
const TargetLibraryInfo &TLI) {
if (ignoreInstrumentableLibFunc(Func))
return false;
if (isAllocationFn(&CB, &TLI))
return true;
switch (Func) {
// These libfuncs don't return normal pointers, and are therefore not handled
// by isAllocationFn().
case LibFunc_posix_memalign:
case LibFunc_size_returning_new:
case LibFunc_size_returning_new_hot_cold:
case LibFunc_size_returning_new_aligned:
case LibFunc_size_returning_new_aligned_hot_cold:
return true;
// See comment above ClCoverReplaceableNew.
case LibFunc_Znwj:
case LibFunc_ZnwjRKSt9nothrow_t:
case LibFunc_ZnwjSt11align_val_t:
case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t:
case LibFunc_Znwm:
case LibFunc_Znwm12__hot_cold_t:
case LibFunc_ZnwmRKSt9nothrow_t:
case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
case LibFunc_ZnwmSt11align_val_t:
case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
case LibFunc_Znaj:
case LibFunc_ZnajRKSt9nothrow_t:
case LibFunc_ZnajSt11align_val_t:
case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t:
case LibFunc_Znam:
case LibFunc_Znam12__hot_cold_t:
case LibFunc_ZnamRKSt9nothrow_t:
case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
case LibFunc_ZnamSt11align_val_t:
case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
return ClCoverReplaceableNew;
default:
return false;
}
}
bool AllocToken::ignoreInstrumentableLibFunc(LibFunc Func) {
switch (Func) {
case LibFunc_strdup:
case LibFunc_dunder_strdup:
case LibFunc_strndup:
case LibFunc_dunder_strndup:
return true;
default:
return false;
}
}
bool AllocToken::replaceAllocationCall(CallBase *CB, LibFunc Func,
OptimizationRemarkEmitter &ORE,
const TargetLibraryInfo &TLI) {
uint64_t TokenID = getToken(*CB, ORE);
FunctionCallee TokenAlloc = getTokenAllocFunction(*CB, TokenID, Func);
if (!TokenAlloc)
return false;
NumAllocationsInstrumented++;
if (Options.FastABI) {
assert(TokenAlloc.getFunctionType()->getNumParams() == CB->arg_size());
CB->setCalledFunction(TokenAlloc);
return true;
}
IRBuilder<> IRB(CB);
// Original args.
SmallVector<Value *, 4> NewArgs{CB->args()};
// Add token ID, truncated to IntPtrTy width.
NewArgs.push_back(ConstantInt::get(IntPtrTy, TokenID));
assert(TokenAlloc.getFunctionType()->getNumParams() == NewArgs.size());
// Preserve invoke vs call semantics for exception handling.
CallBase *NewCall;
if (auto *II = dyn_cast<InvokeInst>(CB)) {
NewCall = IRB.CreateInvoke(TokenAlloc, II->getNormalDest(),
II->getUnwindDest(), NewArgs);
} else {
NewCall = IRB.CreateCall(TokenAlloc, NewArgs);
cast<CallInst>(NewCall)->setTailCall(CB->isTailCall());
}
NewCall->setCallingConv(CB->getCallingConv());
NewCall->copyMetadata(*CB);
NewCall->setAttributes(CB->getAttributes());
// Replace all uses and delete the old call.
CB->replaceAllUsesWith(NewCall);
CB->eraseFromParent();
return true;
}
FunctionCallee AllocToken::getTokenAllocFunction(const CallBase &CB,
uint64_t TokenID,
LibFunc OriginalFunc) {
std::optional<std::pair<LibFunc, uint64_t>> Key;
if (OriginalFunc != NotLibFunc) {
Key = std::make_pair(OriginalFunc, Options.FastABI ? TokenID : 0);
auto It = TokenAllocFunctions.find(*Key);
if (It != TokenAllocFunctions.end())
return It->second;
}
const Function *Callee = CB.getCalledFunction();
if (!Callee)
return FunctionCallee();
const FunctionType *OldFTy = Callee->getFunctionType();
if (OldFTy->isVarArg())
return FunctionCallee();
// Copy params, and append token ID type.
Type *RetTy = OldFTy->getReturnType();
SmallVector<Type *, 4> NewParams{OldFTy->params()};
std::string TokenAllocName = ClFuncPrefix;
if (Options.FastABI)
TokenAllocName += utostr(TokenID) + "_";
else
NewParams.push_back(IntPtrTy); // token ID
TokenAllocName += Callee->getName();
FunctionType *NewFTy = FunctionType::get(RetTy, NewParams, false);
FunctionCallee TokenAlloc = Mod.getOrInsertFunction(TokenAllocName, NewFTy);
if (Function *F = dyn_cast<Function>(TokenAlloc.getCallee()))
F->copyAttributesFrom(Callee); // preserve attrs
if (Key.has_value())
TokenAllocFunctions[*Key] = TokenAlloc;
return TokenAlloc;
}
void AllocToken::replaceIntrinsicInst(IntrinsicInst *II,
OptimizationRemarkEmitter &ORE) {
assert(II->getIntrinsicID() == Intrinsic::alloc_token_id);
uint64_t TokenID = getToken(*II, ORE);
Value *V = ConstantInt::get(IntPtrTy, TokenID);
II->replaceAllUsesWith(V);
II->eraseFromParent();
}
} // namespace
AllocTokenPass::AllocTokenPass(AllocTokenOptions Opts)
: Options(std::move(Opts)) {}
PreservedAnalyses AllocTokenPass::run(Module &M, ModuleAnalysisManager &MAM) {
AllocToken Pass(Options, M, MAM);
bool Modified = false;
for (Function &F : M) {
if (F.empty())
continue; // declaration
Modified |= Pass.instrumentFunction(F);
}
return Modified ? PreservedAnalyses::none().preserveSet<CFGAnalyses>()
: PreservedAnalyses::all();
}