blob: 8b034137e4702d6ffe2f65d91c8988ecf9db51b3 [file] [log] [blame]
//===-- RandomIRBuilder.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/FuzzMutate/RandomIRBuilder.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/FuzzMutate/OpDescriptor.h"
#include "llvm/FuzzMutate/Random.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
using namespace llvm;
using namespace fuzzerop;
/// Return a vector of Blocks that dominates this block, excluding current
/// block.
static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
std::vector<BasicBlock *> ret;
DominatorTree DT(*BB->getParent());
DomTreeNode *Node = DT.getNode(BB);
// It's possible that an orphan block is not in the dom tree. In that case we
// just return nothing.
if (!Node)
return ret;
Node = Node->getIDom();
while (Node && Node->getBlock()) {
ret.push_back(Node->getBlock());
// Get parent block.
Node = Node->getIDom();
}
return ret;
}
/// Return a vector of Blocks that is dominated by this block, excluding current
/// block
static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
DominatorTree DT(*BB->getParent());
std::vector<BasicBlock *> ret;
DomTreeNode *Parent = DT.getNode(BB);
// It's possible that an orphan block is not in the dom tree. In that case we
// just return nothing.
if (!Parent)
return ret;
for (DomTreeNode *Child : Parent->children())
ret.push_back(Child->getBlock());
uint64_t Idx = 0;
while (Idx < ret.size()) {
DomTreeNode *Node = DT[ret[Idx]];
Idx++;
for (DomTreeNode *Child : Node->children())
ret.push_back(Child->getBlock());
}
return ret;
}
AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,
Value *Init) {
/// TODO: For all Allocas, maybe allocate an array.
BasicBlock *EntryBB = &F->getEntryBlock();
const DataLayout &DL = F->getDataLayout();
AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
EntryBB->getFirstInsertionPt());
if (Init)
new StoreInst(Init, Alloca, std::next(Alloca->getIterator()));
return Alloca;
}
std::pair<GlobalVariable *, bool>
RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
fuzzerop::SourcePred Pred) {
auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
// Can't directly compare GV's type, as it would be a pointer to the actual
// type.
return Pred.matches(Srcs, PoisonValue::get(GV->getValueType()));
};
bool DidCreate = false;
SmallVector<GlobalVariable *, 4> GlobalVars(
llvm::make_pointer_range(M->globals()));
auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred));
RS.sample(nullptr, 1);
GlobalVariable *GV = RS.getSelection();
if (!GV) {
DidCreate = true;
using LinkageTypes = GlobalVariable::LinkageTypes;
auto TRS = makeSampler<Constant *>(Rand);
TRS.sample(Pred.generate(Srcs, KnownTypes));
Constant *Init = TRS.getSelection();
Type *Ty = Init->getType();
GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
"G", nullptr,
GlobalValue::ThreadLocalMode::NotThreadLocal,
M->getDataLayout().getDefaultGlobalsAddressSpace());
}
return {GV, DidCreate};
}
Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
ArrayRef<Instruction *> Insts) {
return findOrCreateSource(BB, Insts, {}, anyType());
}
// Adapts the current pointer for a legal mem operation on the target arch.
static Value *buildTargetLegalPtr(Module *M, Value *Ptr, InsertPosition IP,
const Twine &Name,
SmallVector<Instruction *> *NewInsts) {
if (M && M->getTargetTriple().isAMDGCN()) {
// Check if we should perform an address space cast
PointerType *pointerType = dyn_cast<PointerType>(Ptr->getType());
if (pointerType && pointerType->getAddressSpace() == 8) {
// Perform address space cast from address space 8 to address space 7
auto NewPtr = new AddrSpaceCastInst(
Ptr, PointerType::get(M->getContext(), 7), Name + ".ASC", IP);
if (NewInsts)
NewInsts->push_back(NewPtr);
return NewPtr;
}
}
return Ptr;
}
// Stores a value to memory, considering the target triple's restrictions.
static Instruction *buildTargetLegalStore(Value *Val, Value *Ptr,
InsertPosition IP, Module *M) {
Value *StorePtr = buildTargetLegalPtr(M, Ptr, IP, "", nullptr);
Instruction *Store = new StoreInst(Val, StorePtr, IP);
return Store;
}
// Loads a value from memory, considering the target triple's restrictions.
static std::pair<Instruction *, SmallVector<Instruction *>>
buildTargetLegalLoad(Type *AccessTy, Value *Ptr, InsertPosition IP, Module *M,
const Twine &LoadName) {
SmallVector<Instruction *> NewInsts;
Value *LoadPtr = buildTargetLegalPtr(M, Ptr, IP, LoadName, &NewInsts);
Instruction *Load = new LoadInst(AccessTy, LoadPtr, LoadName, IP);
NewInsts.push_back(Load);
return std::make_pair(Load, NewInsts);
}
static void eraseNewInstructions(SmallVector<Instruction *> &NewInsts) {
// Remove in reverse order (uses before defs)
for (auto it = NewInsts.rbegin(); it != NewInsts.rend(); ++it) {
(*it)->eraseFromParent();
}
}
Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
ArrayRef<Instruction *> Insts,
ArrayRef<Value *> Srcs,
SourcePred Pred,
bool allowConstant) {
auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); };
SmallVector<uint64_t, 8> SrcTys;
for (uint64_t i = 0; i < EndOfValueSource; i++)
SrcTys.push_back(i);
std::shuffle(SrcTys.begin(), SrcTys.end(), Rand);
for (uint64_t SrcTy : SrcTys) {
switch (SrcTy) {
case SrcFromInstInCurBlock: {
auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));
if (!RS.isEmpty()) {
return RS.getSelection();
}
break;
}
case FunctionArgument: {
Function *F = BB.getParent();
SmallVector<Argument *, 8> Args;
for (uint64_t i = 0; i < F->arg_size(); i++) {
Args.push_back(F->getArg(i));
}
auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred));
if (!RS.isEmpty()) {
return RS.getSelection();
}
break;
}
case InstInDominator: {
auto Dominators = getDominators(&BB);
std::shuffle(Dominators.begin(), Dominators.end(), Rand);
for (BasicBlock *Dom : Dominators) {
SmallVector<Instruction *, 16> Instructions(
llvm::make_pointer_range(*Dom));
auto RS =
makeSampler(Rand, make_filter_range(Instructions, MatchesPred));
// Also consider choosing no source, meaning we want a new one.
if (!RS.isEmpty()) {
return RS.getSelection();
}
}
break;
}
case SrcFromGlobalVariable: {
Module *M = BB.getParent()->getParent();
auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
Type *Ty = GV->getValueType();
InsertPosition IP = BB.getTerminator()
? InsertPosition(BB.getFirstInsertionPt())
: InsertPosition(&BB);
// Build a legal load and track new instructions in case a rollback is
// needed.
auto [LoadGV, NewInsts] = buildTargetLegalLoad(Ty, GV, IP, M, "LGV");
// Because we might be generating new values, we have to check if it
// matches again.
if (DidCreate) {
if (Pred.matches(Srcs, LoadGV)) {
return LoadGV;
}
// Remove newly inserted instructions
eraseNewInstructions(NewInsts);
// If no one is using this GlobalVariable, delete it too.
if (GV->use_empty()) {
GV->eraseFromParent();
}
}
break;
}
case NewConstOrStack: {
return newSource(BB, Insts, Srcs, Pred, allowConstant);
}
default:
case EndOfValueSource: {
llvm_unreachable("EndOfValueSource executed");
}
}
}
llvm_unreachable("Can't find a source");
}
Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
ArrayRef<Value *> Srcs, SourcePred Pred,
bool allowConstant) {
// Generate some constants to choose from.
auto RS = makeSampler<Value *>(Rand);
RS.sample(Pred.generate(Srcs, KnownTypes));
// If we can find a pointer to load from, use it half the time.
Value *Ptr = findPointer(BB, Insts);
if (Ptr) {
// Create load from the chosen pointer
auto IP = BB.getFirstInsertionPt();
if (auto *I = dyn_cast<Instruction>(Ptr)) {
IP = ++I->getIterator();
assert(IP != BB.end() && "guaranteed by the findPointer");
}
// Pick the type independently.
Type *AccessTy = RS.getSelection()->getType();
// Build a legal load and track new instructions in case a rollback is
// needed.
auto [NewLoad, NewInsts] =
buildTargetLegalLoad(AccessTy, Ptr, IP, BB.getModule(), "L");
// Only sample this load if it really matches the descriptor
if (Pred.matches(Srcs, NewLoad))
RS.sample(NewLoad, RS.totalWeight());
else {
// Remove newly inserted instructions
eraseNewInstructions(NewInsts);
}
}
Value *newSrc = RS.getSelection();
// Generate a stack alloca and store the constant to it if constant is not
// allowed, our hope is that later mutations can generate some values and
// store to this placeholder.
if (!allowConstant && isa<Constant>(newSrc)) {
Type *Ty = newSrc->getType();
Function *F = BB.getParent();
AllocaInst *Alloca = createStackMemory(F, Ty, newSrc);
if (BB.getTerminator()) {
newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L",
BB.getTerminator()->getIterator());
} else {
newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);
}
}
return newSrc;
}
static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
const Value *Replacement) {
unsigned int OperandNo = Operand.getOperandNo();
if (Operand->getType() != Replacement->getType())
return false;
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
case Instruction::ExtractElement:
case Instruction::ExtractValue:
// TODO: We could potentially validate these, but for now just leave indices
// alone.
if (OperandNo >= 1)
return false;
break;
case Instruction::InsertValue:
case Instruction::InsertElement:
case Instruction::ShuffleVector:
if (OperandNo >= 2)
return false;
break;
// For Br/Switch, we only try to modify the 1st Operand (condition).
// Modify other operands, like switch case may accidently change case from
// ConstantInt to a register, which is illegal.
case Instruction::Switch:
case Instruction::Br:
if (OperandNo >= 1)
return false;
break;
case Instruction::Call:
case Instruction::Invoke:
case Instruction::CallBr: {
const Function *Callee = cast<CallBase>(I)->getCalledFunction();
// If it's an indirect call, give up.
if (!Callee)
return false;
// If callee is not an intrinsic, operand 0 is the function to be called.
// Since we cannot assume that the replacement is a function pointer,
// we give up.
if (!Callee->getIntrinsicID() && OperandNo == 0)
return false;
return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg);
}
default:
break;
}
return true;
}
Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
ArrayRef<Instruction *> Insts,
Value *V) {
SmallVector<uint64_t, 8> SinkTys;
for (uint64_t i = 0; i < EndOfValueSink; i++)
SinkTys.push_back(i);
std::shuffle(SinkTys.begin(), SinkTys.end(), Rand);
auto findSinkAndConnect =
[this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
auto RS = makeSampler<Use *>(Rand);
for (auto &I : Instructions) {
for (Use &U : I->operands())
if (isCompatibleReplacement(I, U, V))
RS.sample(&U, 1);
}
if (!RS.isEmpty()) {
Use *Sink = RS.getSelection();
User *U = Sink->getUser();
unsigned OpNo = Sink->getOperandNo();
U->setOperand(OpNo, V);
return cast<Instruction>(U);
}
return nullptr;
};
Instruction *Sink = nullptr;
for (uint64_t SinkTy : SinkTys) {
switch (SinkTy) {
case SinkToInstInCurBlock:
Sink = findSinkAndConnect(Insts);
if (Sink)
return Sink;
break;
case PointersInDominator: {
auto Dominators = getDominators(&BB);
std::shuffle(Dominators.begin(), Dominators.end(), Rand);
for (BasicBlock *Dom : Dominators) {
for (Instruction &I : *Dom) {
if (isa<PointerType>(I.getType())) {
return buildTargetLegalStore(V, &I, Insts.back()->getIterator(),
I.getModule());
}
}
}
break;
}
case InstInDominatee: {
auto Dominatees = getDominatees(&BB);
std::shuffle(Dominatees.begin(), Dominatees.end(), Rand);
for (BasicBlock *Dominee : Dominatees) {
std::vector<Instruction *> Instructions;
for (Instruction &I : *Dominee)
Instructions.push_back(&I);
Sink = findSinkAndConnect(Instructions);
if (Sink) {
return Sink;
}
}
break;
}
case NewStore:
/// TODO: allocate a new stack memory.
return newSink(BB, Insts, V);
case SinkToGlobalVariable: {
Module *M = BB.getModule();
auto [GV, DidCreate] =
findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType()));
return buildTargetLegalStore(V, GV, Insts.back()->getIterator(), M);
}
case EndOfValueSink:
default:
llvm_unreachable("EndOfValueSink executed");
}
}
llvm_unreachable("Can't find a sink");
}
Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
ArrayRef<Instruction *> Insts, Value *V) {
Value *Ptr = findPointer(BB, Insts);
if (!Ptr) {
if (uniform(Rand, 0, 1)) {
Type *Ty = V->getType();
Ptr = createStackMemory(BB.getParent(), Ty, PoisonValue::get(Ty));
} else {
Ptr = PoisonValue::get(PointerType::get(V->getContext(), 0));
}
}
return buildTargetLegalStore(V, Ptr, Insts.back()->getIterator(),
BB.getModule());
}
Value *RandomIRBuilder::findPointer(BasicBlock &BB,
ArrayRef<Instruction *> Insts) {
auto IsMatchingPtr = [](Instruction *Inst) {
// Invoke instructions sometimes produce valid pointers but currently
// we can't insert loads or stores from them
if (Inst->isTerminator())
return false;
return Inst->getType()->isPointerTy();
};
if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr)))
return RS.getSelection();
return nullptr;
}
Type *RandomIRBuilder::randomType() {
uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1);
return KnownTypes[TyIdx];
}
Function *RandomIRBuilder::createFunctionDeclaration(Module &M,
uint64_t ArgNum) {
Type *RetType = randomType();
SmallVector<Type *, 2> Args;
for (uint64_t i = 0; i < ArgNum; i++) {
Args.push_back(randomType());
}
Function *F = Function::Create(FunctionType::get(RetType, Args,
/*isVarArg=*/false),
GlobalValue::ExternalLinkage, "f", &M);
return F;
}
Function *RandomIRBuilder::createFunctionDeclaration(Module &M) {
return createFunctionDeclaration(
M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
}
Function *RandomIRBuilder::createFunctionDefinition(Module &M,
uint64_t ArgNum) {
Function *F = this->createFunctionDeclaration(M, ArgNum);
// TODO: Some arguments and a return value would probably be more
// interesting.
LLVMContext &Context = M.getContext();
const DataLayout &DL = M.getDataLayout();
BasicBlock *BB = BasicBlock::Create(Context, "BB", F);
Type *RetTy = F->getReturnType();
if (RetTy != Type::getVoidTy(Context)) {
Instruction *RetAlloca =
new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);
Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);
ReturnInst::Create(Context, RetLoad, BB);
} else {
ReturnInst::Create(Context, BB);
}
return F;
}
Function *RandomIRBuilder::createFunctionDefinition(Module &M) {
return createFunctionDefinition(
M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
}