|  | //===- llvm-extract.cpp - LLVM function extraction utility ----------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This utility changes the input module to only contain a single function, | 
|  | // which is primarily used for debugging transformations. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/ADT/SetVector.h" | 
|  | #include "llvm/ADT/SmallPtrSet.h" | 
|  | #include "llvm/Bitcode/BitcodeWriterPass.h" | 
|  | #include "llvm/IR/DataLayout.h" | 
|  | #include "llvm/IR/IRPrintingPasses.h" | 
|  | #include "llvm/IR/Instructions.h" | 
|  | #include "llvm/IR/LLVMContext.h" | 
|  | #include "llvm/IR/Module.h" | 
|  | #include "llvm/IRPrinter/IRPrintingPasses.h" | 
|  | #include "llvm/IRReader/IRReader.h" | 
|  | #include "llvm/Passes/PassBuilder.h" | 
|  | #include "llvm/Support/CommandLine.h" | 
|  | #include "llvm/Support/Error.h" | 
|  | #include "llvm/Support/FileSystem.h" | 
|  | #include "llvm/Support/InitLLVM.h" | 
|  | #include "llvm/Support/Regex.h" | 
|  | #include "llvm/Support/SourceMgr.h" | 
|  | #include "llvm/Support/SystemUtils.h" | 
|  | #include "llvm/Support/ToolOutputFile.h" | 
|  | #include "llvm/Transforms/IPO.h" | 
|  | #include "llvm/Transforms/IPO/BlockExtractor.h" | 
|  | #include "llvm/Transforms/IPO/ExtractGV.h" | 
|  | #include "llvm/Transforms/IPO/GlobalDCE.h" | 
|  | #include "llvm/Transforms/IPO/StripDeadPrototypes.h" | 
|  | #include "llvm/Transforms/IPO/StripSymbols.h" | 
|  | #include <memory> | 
|  | #include <utility> | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | static cl::OptionCategory ExtractCat("llvm-extract Options"); | 
|  |  | 
|  | // InputFilename - The filename to read from. | 
|  | static cl::opt<std::string> InputFilename(cl::Positional, | 
|  | cl::desc("<input bitcode file>"), | 
|  | cl::init("-"), | 
|  | cl::value_desc("filename")); | 
|  |  | 
|  | static cl::opt<std::string> OutputFilename("o", | 
|  | cl::desc("Specify output filename"), | 
|  | cl::value_desc("filename"), | 
|  | cl::init("-"), cl::cat(ExtractCat)); | 
|  |  | 
|  | static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"), | 
|  | cl::cat(ExtractCat)); | 
|  |  | 
|  | static cl::opt<bool> DeleteFn("delete", | 
|  | cl::desc("Delete specified Globals from Module"), | 
|  | cl::cat(ExtractCat)); | 
|  |  | 
|  | static cl::opt<bool> KeepConstInit("keep-const-init", | 
|  | cl::desc("Keep initializers of constants"), | 
|  | cl::cat(ExtractCat)); | 
|  |  | 
|  | static cl::opt<bool> | 
|  | Recursive("recursive", cl::desc("Recursively extract all called functions"), | 
|  | cl::cat(ExtractCat)); | 
|  |  | 
|  | // ExtractFuncs - The functions to extract from the module. | 
|  | static cl::list<std::string> | 
|  | ExtractFuncs("func", cl::desc("Specify function to extract"), | 
|  | cl::value_desc("function"), cl::cat(ExtractCat)); | 
|  |  | 
|  | // ExtractRegExpFuncs - The functions, matched via regular expression, to | 
|  | // extract from the module. | 
|  | static cl::list<std::string> | 
|  | ExtractRegExpFuncs("rfunc", | 
|  | cl::desc("Specify function(s) to extract using a " | 
|  | "regular expression"), | 
|  | cl::value_desc("rfunction"), cl::cat(ExtractCat)); | 
|  |  | 
|  | // ExtractBlocks - The blocks to extract from the module. | 
|  | static cl::list<std::string> ExtractBlocks( | 
|  | "bb", | 
|  | cl::desc( | 
|  | "Specify <function, basic block1[;basic block2...]> pairs to extract.\n" | 
|  | "Each pair will create a function.\n" | 
|  | "If multiple basic blocks are specified in one pair,\n" | 
|  | "the first block in the sequence should dominate the rest.\n" | 
|  | "If an unnamed basic block is to be extracted,\n" | 
|  | "'%' should be added before the basic block variable names.\n" | 
|  | "eg:\n" | 
|  | "  --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" | 
|  | "  --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " | 
|  | "with bb2.\n" | 
|  | "  --bb=f:%1 will extract one function with basic block 1;"), | 
|  | cl::value_desc("function:bb1[;bb2...]"), cl::cat(ExtractCat)); | 
|  |  | 
|  | // ExtractAlias - The alias to extract from the module. | 
|  | static cl::list<std::string> | 
|  | ExtractAliases("alias", cl::desc("Specify alias to extract"), | 
|  | cl::value_desc("alias"), cl::cat(ExtractCat)); | 
|  |  | 
|  | // ExtractRegExpAliases - The aliases, matched via regular expression, to | 
|  | // extract from the module. | 
|  | static cl::list<std::string> | 
|  | ExtractRegExpAliases("ralias", | 
|  | cl::desc("Specify alias(es) to extract using a " | 
|  | "regular expression"), | 
|  | cl::value_desc("ralias"), cl::cat(ExtractCat)); | 
|  |  | 
|  | // ExtractGlobals - The globals to extract from the module. | 
|  | static cl::list<std::string> | 
|  | ExtractGlobals("glob", cl::desc("Specify global to extract"), | 
|  | cl::value_desc("global"), cl::cat(ExtractCat)); | 
|  |  | 
|  | // ExtractRegExpGlobals - The globals, matched via regular expression, to | 
|  | // extract from the module... | 
|  | static cl::list<std::string> | 
|  | ExtractRegExpGlobals("rglob", | 
|  | cl::desc("Specify global(s) to extract using a " | 
|  | "regular expression"), | 
|  | cl::value_desc("rglobal"), cl::cat(ExtractCat)); | 
|  |  | 
|  | static cl::opt<bool> OutputAssembly("S", | 
|  | cl::desc("Write output as LLVM assembly"), | 
|  | cl::Hidden, cl::cat(ExtractCat)); | 
|  |  | 
|  | static cl::opt<bool> PreserveBitcodeUseListOrder( | 
|  | "preserve-bc-uselistorder", | 
|  | cl::desc("Preserve use-list order when writing LLVM bitcode."), | 
|  | cl::init(true), cl::Hidden, cl::cat(ExtractCat)); | 
|  |  | 
|  | static cl::opt<bool> PreserveAssemblyUseListOrder( | 
|  | "preserve-ll-uselistorder", | 
|  | cl::desc("Preserve use-list order when writing LLVM assembly."), | 
|  | cl::init(false), cl::Hidden, cl::cat(ExtractCat)); | 
|  |  | 
|  | int main(int argc, char **argv) { | 
|  | InitLLVM X(argc, argv); | 
|  |  | 
|  | LLVMContext Context; | 
|  | cl::HideUnrelatedOptions(ExtractCat); | 
|  | cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n"); | 
|  |  | 
|  | // Use lazy loading, since we only care about selected global values. | 
|  | SMDiagnostic Err; | 
|  | std::unique_ptr<Module> M = getLazyIRFileModule(InputFilename, Err, Context); | 
|  |  | 
|  | if (!M) { | 
|  | Err.print(argv[0], errs()); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | // Use SetVector to avoid duplicates. | 
|  | SetVector<GlobalValue *> GVs; | 
|  |  | 
|  | // Figure out which aliases we should extract. | 
|  | for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) { | 
|  | GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]); | 
|  | if (!GA) { | 
|  | errs() << argv[0] << ": program doesn't contain alias named '" | 
|  | << ExtractAliases[i] << "'!\n"; | 
|  | return 1; | 
|  | } | 
|  | GVs.insert(GA); | 
|  | } | 
|  |  | 
|  | // Extract aliases via regular expression matching. | 
|  | for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) { | 
|  | std::string Error; | 
|  | Regex RegEx(ExtractRegExpAliases[i]); | 
|  | if (!RegEx.isValid(Error)) { | 
|  | errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' " | 
|  | "invalid regex: " << Error; | 
|  | } | 
|  | bool match = false; | 
|  | for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end(); | 
|  | GA != E; GA++) { | 
|  | if (RegEx.match(GA->getName())) { | 
|  | GVs.insert(&*GA); | 
|  | match = true; | 
|  | } | 
|  | } | 
|  | if (!match) { | 
|  | errs() << argv[0] << ": program doesn't contain global named '" | 
|  | << ExtractRegExpAliases[i] << "'!\n"; | 
|  | return 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Figure out which globals we should extract. | 
|  | for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) { | 
|  | GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]); | 
|  | if (!GV) { | 
|  | errs() << argv[0] << ": program doesn't contain global named '" | 
|  | << ExtractGlobals[i] << "'!\n"; | 
|  | return 1; | 
|  | } | 
|  | GVs.insert(GV); | 
|  | } | 
|  |  | 
|  | // Extract globals via regular expression matching. | 
|  | for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) { | 
|  | std::string Error; | 
|  | Regex RegEx(ExtractRegExpGlobals[i]); | 
|  | if (!RegEx.isValid(Error)) { | 
|  | errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' " | 
|  | "invalid regex: " << Error; | 
|  | } | 
|  | bool match = false; | 
|  | for (auto &GV : M->globals()) { | 
|  | if (RegEx.match(GV.getName())) { | 
|  | GVs.insert(&GV); | 
|  | match = true; | 
|  | } | 
|  | } | 
|  | if (!match) { | 
|  | errs() << argv[0] << ": program doesn't contain global named '" | 
|  | << ExtractRegExpGlobals[i] << "'!\n"; | 
|  | return 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Figure out which functions we should extract. | 
|  | for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) { | 
|  | GlobalValue *GV = M->getFunction(ExtractFuncs[i]); | 
|  | if (!GV) { | 
|  | errs() << argv[0] << ": program doesn't contain function named '" | 
|  | << ExtractFuncs[i] << "'!\n"; | 
|  | return 1; | 
|  | } | 
|  | GVs.insert(GV); | 
|  | } | 
|  | // Extract functions via regular expression matching. | 
|  | for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) { | 
|  | std::string Error; | 
|  | StringRef RegExStr = ExtractRegExpFuncs[i]; | 
|  | Regex RegEx(RegExStr); | 
|  | if (!RegEx.isValid(Error)) { | 
|  | errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' " | 
|  | "invalid regex: " << Error; | 
|  | } | 
|  | bool match = false; | 
|  | for (Module::iterator F = M->begin(), E = M->end(); F != E; | 
|  | F++) { | 
|  | if (RegEx.match(F->getName())) { | 
|  | GVs.insert(&*F); | 
|  | match = true; | 
|  | } | 
|  | } | 
|  | if (!match) { | 
|  | errs() << argv[0] << ": program doesn't contain global named '" | 
|  | << ExtractRegExpFuncs[i] << "'!\n"; | 
|  | return 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Figure out which BasicBlocks we should extract. | 
|  | SmallVector<std::pair<Function *, SmallVector<StringRef, 16>>, 2> BBMap; | 
|  | for (StringRef StrPair : ExtractBlocks) { | 
|  | SmallVector<StringRef, 16> BBNames; | 
|  | auto BBInfo = StrPair.split(':'); | 
|  | // Get the function. | 
|  | Function *F = M->getFunction(BBInfo.first); | 
|  | if (!F) { | 
|  | errs() << argv[0] << ": program doesn't contain a function named '" | 
|  | << BBInfo.first << "'!\n"; | 
|  | return 1; | 
|  | } | 
|  | // Add the function to the materialize list, and store the basic block names | 
|  | // to check after materialization. | 
|  | GVs.insert(F); | 
|  | BBInfo.second.split(BBNames, ';', /*MaxSplit=*/-1, /*KeepEmpty=*/false); | 
|  | BBMap.push_back({F, std::move(BBNames)}); | 
|  | } | 
|  |  | 
|  | // Use *argv instead of argv[0] to work around a wrong GCC warning. | 
|  | ExitOnError ExitOnErr(std::string(*argv) + ": error reading input: "); | 
|  |  | 
|  | if (Recursive) { | 
|  | std::vector<llvm::Function *> Workqueue; | 
|  | for (GlobalValue *GV : GVs) { | 
|  | if (auto *F = dyn_cast<Function>(GV)) { | 
|  | Workqueue.push_back(F); | 
|  | } | 
|  | } | 
|  | while (!Workqueue.empty()) { | 
|  | Function *F = &*Workqueue.back(); | 
|  | Workqueue.pop_back(); | 
|  | ExitOnErr(F->materialize()); | 
|  | for (auto &BB : *F) { | 
|  | for (auto &I : BB) { | 
|  | CallBase *CB = dyn_cast<CallBase>(&I); | 
|  | if (!CB) | 
|  | continue; | 
|  | Function *CF = CB->getCalledFunction(); | 
|  | if (!CF) | 
|  | continue; | 
|  | if (CF->isDeclaration() || !GVs.insert(CF)) | 
|  | continue; | 
|  | Workqueue.push_back(CF); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | auto Materialize = [&](GlobalValue &GV) { ExitOnErr(GV.materialize()); }; | 
|  |  | 
|  | // Materialize requisite global values. | 
|  | if (!DeleteFn) { | 
|  | for (size_t i = 0, e = GVs.size(); i != e; ++i) | 
|  | Materialize(*GVs[i]); | 
|  | } else { | 
|  | // Deleting. Materialize every GV that's *not* in GVs. | 
|  | SmallPtrSet<GlobalValue *, 8> GVSet(llvm::from_range, GVs); | 
|  | for (auto &F : *M) { | 
|  | if (!GVSet.count(&F)) | 
|  | Materialize(F); | 
|  | } | 
|  | } | 
|  |  | 
|  | { | 
|  | std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end()); | 
|  | LoopAnalysisManager LAM; | 
|  | FunctionAnalysisManager FAM; | 
|  | CGSCCAnalysisManager CGAM; | 
|  | ModuleAnalysisManager MAM; | 
|  |  | 
|  | PassBuilder PB; | 
|  |  | 
|  | PB.registerModuleAnalyses(MAM); | 
|  | PB.registerCGSCCAnalyses(CGAM); | 
|  | PB.registerFunctionAnalyses(FAM); | 
|  | PB.registerLoopAnalyses(LAM); | 
|  | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); | 
|  |  | 
|  | ModulePassManager PM; | 
|  | PM.addPass(ExtractGVPass(Gvs, DeleteFn, KeepConstInit)); | 
|  | PM.run(*M, MAM); | 
|  |  | 
|  | // Now that we have all the GVs we want, mark the module as fully | 
|  | // materialized. | 
|  | // FIXME: should the GVExtractionPass handle this? | 
|  | ExitOnErr(M->materializeAll()); | 
|  | } | 
|  |  | 
|  | // Extract the specified basic blocks from the module and erase the existing | 
|  | // functions. | 
|  | if (!ExtractBlocks.empty()) { | 
|  | // Figure out which BasicBlocks we should extract. | 
|  | std::vector<std::vector<BasicBlock *>> GroupOfBBs; | 
|  | for (auto &P : BBMap) { | 
|  | std::vector<BasicBlock *> BBs; | 
|  | for (StringRef BBName : P.second) { | 
|  | // The function has been materialized, so add its matching basic blocks | 
|  | // to the block extractor list, or fail if a name is not found. | 
|  | auto Res = llvm::find_if(*P.first, [&](const BasicBlock &BB) { | 
|  | return BB.getNameOrAsOperand() == BBName; | 
|  | }); | 
|  | if (Res == P.first->end()) { | 
|  | errs() << argv[0] << ": function " << P.first->getName() | 
|  | << " doesn't contain a basic block named '" << BBName | 
|  | << "'!\n"; | 
|  | return 1; | 
|  | } | 
|  | BBs.push_back(&*Res); | 
|  | } | 
|  | GroupOfBBs.push_back(BBs); | 
|  | } | 
|  |  | 
|  | LoopAnalysisManager LAM; | 
|  | FunctionAnalysisManager FAM; | 
|  | CGSCCAnalysisManager CGAM; | 
|  | ModuleAnalysisManager MAM; | 
|  |  | 
|  | PassBuilder PB; | 
|  |  | 
|  | PB.registerModuleAnalyses(MAM); | 
|  | PB.registerCGSCCAnalyses(CGAM); | 
|  | PB.registerFunctionAnalyses(FAM); | 
|  | PB.registerLoopAnalyses(LAM); | 
|  | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); | 
|  |  | 
|  | ModulePassManager PM; | 
|  | PM.addPass(BlockExtractorPass(std::move(GroupOfBBs), true)); | 
|  | PM.run(*M, MAM); | 
|  | } | 
|  |  | 
|  | // In addition to deleting all other functions, we also want to spiff it | 
|  | // up a little bit.  Do this now. | 
|  |  | 
|  | LoopAnalysisManager LAM; | 
|  | FunctionAnalysisManager FAM; | 
|  | CGSCCAnalysisManager CGAM; | 
|  | ModuleAnalysisManager MAM; | 
|  |  | 
|  | PassBuilder PB; | 
|  |  | 
|  | PB.registerModuleAnalyses(MAM); | 
|  | PB.registerCGSCCAnalyses(CGAM); | 
|  | PB.registerFunctionAnalyses(FAM); | 
|  | PB.registerLoopAnalyses(LAM); | 
|  | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); | 
|  |  | 
|  | ModulePassManager PM; | 
|  | if (!DeleteFn) | 
|  | PM.addPass(GlobalDCEPass()); | 
|  | PM.addPass(StripDeadDebugInfoPass()); | 
|  | PM.addPass(StripDeadPrototypesPass()); | 
|  | PM.addPass(StripDeadCGProfilePass()); | 
|  |  | 
|  | std::error_code EC; | 
|  | ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None); | 
|  | if (EC) { | 
|  | errs() << EC.message() << '\n'; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | if (OutputAssembly) | 
|  | PM.addPass(PrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder)); | 
|  | else if (Force || !CheckBitcodeOutputToConsole(Out.os())) | 
|  | PM.addPass(BitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder)); | 
|  |  | 
|  | PM.run(*M, MAM); | 
|  |  | 
|  | // Declare success. | 
|  | Out.keep(); | 
|  |  | 
|  | return 0; | 
|  | } |