| //===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm-c/Target.h" |
| #include "llvm/MC/SubtargetFeature.h" |
| #include "llvm/MC/MCAsmBackend.h" |
| #include "llvm/MC/MCAsmInfo.h" |
| #include "llvm/MC/MCCodeEmitter.h" |
| #include "llvm/MC/MCContext.h" |
| #include "llvm/MC/MCInstPrinter.h" |
| #include "llvm/MC/MCInstrInfo.h" |
| #include "llvm/MC/MCObjectFileInfo.h" |
| #include "llvm/MC/MCObjectWriter.h" |
| #include "llvm/MC/MCParser/AsmLexer.h" |
| #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/MC/MCSectionMachO.h" |
| #include "llvm/MC/MCStreamer.h" |
| #include "llvm/MC/MCSubtargetInfo.h" |
| #include "llvm/MC/MCTargetOptionsCommandFlags.inc" |
| #include "llvm/Support/MemoryBuffer.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/FileUtilities.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Support/SourceMgr.h" |
| #include "llvm/Support/TargetSelect.h" |
| #include "llvm/Support/TargetRegistry.h" |
| #include "llvm/Support/ToolOutputFile.h" |
| |
| using namespace llvm; |
| |
| static cl::opt<std::string> |
| TripleName("triple", cl::desc("Target triple to assemble for, " |
| "see -version for available targets")); |
| |
| static cl::opt<std::string> |
| MCPU("mcpu", |
| cl::desc("Target a specific cpu type (-mcpu=help for details)"), |
| cl::value_desc("cpu-name"), cl::init("")); |
| |
| // This is useful for variable-length instruction sets. |
| static cl::opt<unsigned> InsnLimit( |
| "insn-limit", |
| cl::desc("Limit the number of instructions to process (0 for no limit)"), |
| cl::value_desc("count"), cl::init(0)); |
| |
| static cl::list<std::string> |
| MAttrs("mattr", cl::CommaSeparated, |
| cl::desc("Target specific attributes (-mattr=help for details)"), |
| cl::value_desc("a1,+a2,-a3,...")); |
| // The feature string derived from -mattr's values. |
| std::string FeaturesStr; |
| |
| static cl::list<std::string> |
| FuzzerArgs("fuzzer-args", cl::Positional, |
| cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore, |
| cl::PositionalEatsArgs); |
| static std::vector<char *> ModifiedArgv; |
| |
| enum OutputFileType { |
| OFT_Null, |
| OFT_AssemblyFile, |
| OFT_ObjectFile |
| }; |
| static cl::opt<OutputFileType> |
| FileType("filetype", cl::init(OFT_AssemblyFile), |
| cl::desc("Choose an output file type:"), |
| cl::values( |
| clEnumValN(OFT_AssemblyFile, "asm", |
| "Emit an assembly ('.s') file"), |
| clEnumValN(OFT_Null, "null", |
| "Don't emit anything (for timing purposes)"), |
| clEnumValN(OFT_ObjectFile, "obj", |
| "Emit a native object ('.o') file"))); |
| |
| |
| class LLVMFuzzerInputBuffer : public MemoryBuffer |
| { |
| public: |
| LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_) |
| : Data(reinterpret_cast<const char *>(data_)), |
| Size(size_) { |
| init(Data, Data+Size, false); |
| } |
| |
| |
| virtual BufferKind getBufferKind() const { |
| return MemoryBuffer_Malloc; // it's not disk-backed so I think that's |
| // the intent ... though AFAIK it |
| // probably came from an mmap or sbrk |
| } |
| |
| private: |
| const char *Data; |
| size_t Size; |
| }; |
| |
| static int AssembleInput(const char *ProgName, const Target *TheTarget, |
| SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, |
| MCAsmInfo &MAI, MCSubtargetInfo &STI, |
| MCInstrInfo &MCII, MCTargetOptions &MCOptions) { |
| static const bool NoInitialTextSection = false; |
| |
| std::unique_ptr<MCAsmParser> Parser( |
| createMCAsmParser(SrcMgr, Ctx, Str, MAI)); |
| |
| std::unique_ptr<MCTargetAsmParser> TAP( |
| TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions)); |
| |
| if (!TAP) { |
| errs() << ProgName |
| << ": error: this target '" << TripleName |
| << "', does not support assembly parsing.\n"; |
| abort(); |
| } |
| |
| Parser->setTargetParser(*TAP); |
| |
| return Parser->Run(NoInitialTextSection); |
| } |
| |
| |
| int AssembleOneInput(const uint8_t *Data, size_t Size) { |
| const bool ShowInst = false; |
| const bool AsmVerbose = false; |
| const bool UseDwarfDirectory = true; |
| |
| Triple TheTriple(Triple::normalize(TripleName)); |
| |
| SourceMgr SrcMgr; |
| |
| std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size)); |
| |
| // Tell SrcMgr about this buffer, which is what the parser will pick up. |
| SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc()); |
| |
| static const std::vector<std::string> NoIncludeDirs; |
| SrcMgr.setIncludeDirs(NoIncludeDirs); |
| |
| static std::string ArchName; |
| std::string Error; |
| const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, |
| Error); |
| if (!TheTarget) { |
| errs() << "error: this target '" << TheTriple.normalize() |
| << "/" << ArchName << "', was not found: '" << Error << "'\n"; |
| |
| abort(); |
| } |
| |
| std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName)); |
| if (!MRI) { |
| errs() << "Unable to create target register info!"; |
| abort(); |
| } |
| |
| std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName)); |
| if (!MAI) { |
| errs() << "Unable to create target asm info!"; |
| abort(); |
| } |
| |
| |
| MCObjectFileInfo MOFI; |
| MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); |
| |
| static const bool UsePIC = false; |
| MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, Ctx); |
| |
| const unsigned OutputAsmVariant = 0; |
| std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo()); |
| MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant, |
| *MAI, *MCII, *MRI); |
| if (!IP) { |
| errs() |
| << "error: unable to create instruction printer for target triple '" |
| << TheTriple.normalize() << "' with assembly variant " |
| << OutputAsmVariant << ".\n"; |
| |
| abort(); |
| } |
| |
| const char *ProgName = "llvm-mc-fuzzer"; |
| std::unique_ptr<MCSubtargetInfo> STI( |
| TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr)); |
| std::unique_ptr<MCCodeEmitter> CE = nullptr; |
| std::unique_ptr<MCAsmBackend> MAB = nullptr; |
| |
| MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); |
| |
| std::string OutputString; |
| raw_string_ostream Out(OutputString); |
| auto FOut = llvm::make_unique<formatted_raw_ostream>(Out); |
| |
| std::unique_ptr<MCStreamer> Str; |
| |
| if (FileType == OFT_AssemblyFile) { |
| Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), AsmVerbose, |
| UseDwarfDirectory, IP, std::move(CE), |
| std::move(MAB), ShowInst)); |
| } else { |
| assert(FileType == OFT_ObjectFile && "Invalid file type!"); |
| |
| std::error_code EC; |
| const std::string OutputFilename = "-"; |
| auto Out = |
| llvm::make_unique<ToolOutputFile>(OutputFilename, EC, sys::fs::F_None); |
| if (EC) { |
| errs() << EC.message() << '\n'; |
| abort(); |
| } |
| |
| // Don't waste memory on names of temp labels. |
| Ctx.setUseNamesOnTempLabels(false); |
| |
| std::unique_ptr<buffer_ostream> BOS; |
| raw_pwrite_stream *OS = &Out->os(); |
| if (!Out->os().supportsSeeking()) { |
| BOS = make_unique<buffer_ostream>(Out->os()); |
| OS = BOS.get(); |
| } |
| |
| MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); |
| MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions); |
| Str.reset(TheTarget->createMCObjectStreamer( |
| TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB), |
| MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE), *STI, |
| MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, |
| /*DWARFMustBeAtTheEnd*/ false)); |
| } |
| const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, |
| *MCII, MCOptions); |
| |
| (void) Res; |
| |
| return 0; |
| } |
| |
| extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { |
| return AssembleOneInput(Data, Size); |
| } |
| |
| extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, |
| char ***argv) { |
| // The command line is unusual compared to other fuzzers due to the need to |
| // specify the target. Options like -triple, -mcpu, and -mattr work like |
| // their counterparts in llvm-mc, while -fuzzer-args collects options for the |
| // fuzzer itself. |
| // |
| // Examples: |
| // |
| // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to |
| // 4-bytes each and use the contents of ./corpus as the test corpus: |
| // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \ |
| // -fuzzer-args -max_len=4 -runs=100000 ./corpus |
| // |
| // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA |
| // feature enabled using up to 64-byte inputs: |
| // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \ |
| // -disassemble -fuzzer-args ./corpus |
| // |
| // If your aim is to find instructions that are not tested, then it is |
| // advisable to constrain the maximum input size to a single instruction |
| // using -max_len as in the first example. This results in a test corpus of |
| // individual instructions that test unique paths. Without this constraint, |
| // there will be considerable redundancy in the corpus. |
| |
| char **OriginalArgv = *argv; |
| |
| LLVMInitializeAllTargetInfos(); |
| LLVMInitializeAllTargetMCs(); |
| LLVMInitializeAllAsmParsers(); |
| |
| cl::ParseCommandLineOptions(*argc, OriginalArgv); |
| |
| // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that |
| // the driver can parse its arguments. |
| // |
| // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. |
| // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a |
| // non-const buffer to avoid the need to clean up when the fuzzer terminates. |
| ModifiedArgv.push_back(OriginalArgv[0]); |
| for (const auto &FuzzerArg : FuzzerArgs) { |
| for (int i = 1; i < *argc; ++i) { |
| if (FuzzerArg == OriginalArgv[i]) |
| ModifiedArgv.push_back(OriginalArgv[i]); |
| } |
| } |
| *argc = ModifiedArgv.size(); |
| *argv = ModifiedArgv.data(); |
| |
| // Package up features to be passed to target/subtarget |
| // We have to pass it via a global since the callback doesn't |
| // permit any user data. |
| if (MAttrs.size()) { |
| SubtargetFeatures Features; |
| for (unsigned i = 0; i != MAttrs.size(); ++i) |
| Features.AddFeature(MAttrs[i]); |
| FeaturesStr = Features.getString(); |
| } |
| |
| if (TripleName.empty()) |
| TripleName = sys::getDefaultTargetTriple(); |
| |
| return 0; |
| } |