IR: Allow metadata attachments on declarations, and fix lazy loaded metadata issue with globals. This change is motivated by an upcoming change to the metadata representation used for CFI. The indirect function call checker needs type information for external function declarations in order to correctly generate jump table entries for such declarations. We currently associate such type information with declarations using a global metadata node, but I plan [1] to move all such metadata to global object attachments. In bitcode, metadata attachments for function declarations appear in the global metadata block. This seems reasonable to me because I expect metadata attachments on declarations to be uncommon. In the long term I'd also expect this to be the case for CFI, because we'd want to use some specialized bitcode format for this metadata that could be read as part of the ThinLTO thin-link phase, which would mean that it would not appear in the global metadata block. To solve the lazy loaded metadata issue I was seeing with D20147, I use the same bitcode representation for metadata attachments for global variables as I do for function declarations. Since there's a use case for metadata attachments in the global metadata block, we might as well use that representation for global variables as well, at least until we have a mechanism for lazy loading global variables. In the assembly format, the metadata attachments appear after the "declare" keyword in order to avoid a parsing ambiguity. [1] http://lists.llvm.org/pipermail/llvm-dev/2016-June/100462.html Differential Revision: http://reviews.llvm.org/D21052 llvm-svn: 273336 GitOrigin-RevId: 21521891a2e3492f22455af6c1c1a903a723115c
diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index f998993..ffa2176 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst
@@ -862,16 +862,6 @@ ``gc`` attributes within the module. These records can be referenced by 1-based index in the *gc* fields of ``FUNCTION`` records. -MODULE_CODE_GLOBALVAR_ATTACHMENT Record -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -``[GLOBALVAR_ATTACHMENT, valueid, n x [id, mdnode]]`` - -The ``GLOBALVAR_ATTACHMENT`` record (code 19) describes the metadata -attachments for a global variable. The ``valueid`` is the value index for -the global variable, and the remaining fields are pairs of metadata name -indices and metadata node indices. - .. _PARAMATTR_BLOCK: PARAMATTR_BLOCK Contents
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index ce07f12..d2a6a19 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -113,9 +113,6 @@ // IFUNC: [ifunc value type, addrspace, resolver val#, linkage, visibility] MODULE_CODE_IFUNC = 18, - - // GLOBALVAR_ATTACHMENT: [valueid, n x [id, mdnode]] - MODULE_CODE_GLOBALVAR_ATTACHMENT = 19, }; /// PARAMATTR blocks have code for defining a parameter attribute set. @@ -260,6 +257,7 @@ METADATA_MACRO = 33, // [distinct, macinfo, line, name, value] METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars]) + METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 2725386..6916252 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp
@@ -397,8 +397,21 @@ assert(Lex.getKind() == lltok::kw_declare); Lex.Lex(); + std::vector<std::pair<unsigned, MDNode *>> MDs; + while (Lex.getKind() == lltok::MetadataVar) { + unsigned MDK; + MDNode *N; + if (ParseMetadataAttachment(MDK, N)) + return true; + MDs.push_back({MDK, N}); + } + Function *F; - return ParseFunctionHeader(F, false); + if (ParseFunctionHeader(F, false)) + return true; + for (auto &MD : MDs) + F->addMetadata(MD.first, *MD.second); + return false; } /// toplevelentity
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index ac534f3..a89bb07 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2692,6 +2692,16 @@ parseMetadataStrings(Record, Blob, NextMetadataNo)) return EC; break; + case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: { + if (Record.size() % 2 == 0) + return error("Invalid record"); + unsigned ValueID = Record[0]; + if (ValueID >= ValueList.size()) + return error("Invalid record"); + if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID])) + parseGlobalObjectAttachment(*GO, ArrayRef<uint64_t>(Record).slice(1)); + break; + } case bitc::METADATA_KIND: { // Support older bitcode files that had METADATA_KIND records in a // block with METADATA_BLOCK_ID. @@ -3840,16 +3850,6 @@ break; } - case bitc::MODULE_CODE_GLOBALVAR_ATTACHMENT: { - if (Record.size() % 2 == 0) - return error("Invalid record"); - unsigned ValueID = Record[0]; - if (ValueID >= ValueList.size()) - return error("Invalid record"); - if (auto *GV = dyn_cast<GlobalVariable>(ValueList[ValueID])) - parseGlobalObjectAttachment(*GV, ArrayRef<uint64_t>(Record).slice(1)); - break; - } // FUNCTION: [type, callingconv, isproto, linkage, paramattr, // alignment, section, visibility, gc, unnamed_addr, // prologuedata, dllstorageclass, comdat, prefixdata]
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 4699e7d..20cbc2b 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -227,7 +227,7 @@ void writeGlobalVariableMetadataAttachment(const GlobalVariable &GV); void pushGlobalMetadataAttachment(SmallVectorImpl<uint64_t> &Record, const GlobalObject &GO); - void writeModuleMetadataStore(); + void writeModuleMetadataKinds(); void writeOperandBundleTags(); void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal); void writeModuleConstants(); @@ -1832,6 +1832,22 @@ writeMetadataStrings(VE.getMDStrings(), Record); writeMetadataRecords(VE.getNonMDStrings(), Record); writeNamedMetadata(Record); + + auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) { + SmallVector<uint64_t, 4> Record; + Record.push_back(VE.getValueID(&GO)); + pushGlobalMetadataAttachment(Record, GO); + Stream.EmitRecord(bitc::METADATA_GLOBAL_DECL_ATTACHMENT, Record); + }; + for (const Function &F : M) + if (F.isDeclaration() && F.hasMetadata()) + AddDeclAttachedMetadata(F); + // FIXME: Only store metadata for declarations here, and move data for global + // variable definitions to a separate block (PR28134). + for (const GlobalVariable &GV : M.globals()) + if (GV.hasMetadata()) + AddDeclAttachedMetadata(GV); + Stream.ExitBlock(); } @@ -1892,7 +1908,7 @@ Stream.ExitBlock(); } -void ModuleBitcodeWriter::writeModuleMetadataStore() { +void ModuleBitcodeWriter::writeModuleMetadataKinds() { SmallVector<uint64_t, 64> Record; // Write metadata kinds @@ -3593,11 +3609,11 @@ // Emit constants. writeModuleConstants(); - // Emit metadata. - writeModuleMetadata(); + // Emit metadata kind names. + writeModuleMetadataKinds(); // Emit metadata. - writeModuleMetadataStore(); + writeModuleMetadata(); // Emit module-level use-lists. if (VE.shouldPreserveUseListOrder()) @@ -3619,14 +3635,6 @@ writeValueSymbolTable(M.getValueSymbolTable(), /* IsModuleLevel */ true, &FunctionToBitcodeIndex); - for (const GlobalVariable &GV : M.globals()) - if (GV.hasMetadata()) { - SmallVector<uint64_t, 4> Record; - Record.push_back(VE.getValueID(&GV)); - pushGlobalMetadataAttachment(Record, GV); - Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR_ATTACHMENT, Record); - } - if (GenerateHash) { writeModuleHash(BlockStartPos); }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 98eb7fa..398f7d7 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -348,7 +348,10 @@ MDs.clear(); GV.getAllMetadata(MDs); for (const auto &I : MDs) - EnumerateMetadata(&GV, I.second); + // FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer + // to write metadata to the global variable's own metadata block + // (PR28134). + EnumerateMetadata(nullptr, I.second); } // Enumerate types used by function bodies and argument lists. @@ -360,7 +363,7 @@ MDs.clear(); F.getAllMetadata(MDs); for (const auto &I : MDs) - EnumerateMetadata(&F, I.second); + EnumerateMetadata(F.isDeclaration() ? nullptr : &F, I.second); for (const BasicBlock &BB : F) for (const Instruction &I : BB) { @@ -530,18 +533,17 @@ EnumerateMetadata(nullptr, MD->getOperand(i)); } -unsigned ValueEnumerator::getMetadataGlobalID(const GlobalObject *GO) const { - return GO ? getValueID(GO) + 1 : 0; +unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const { + return F ? getValueID(F) + 1 : 0; } -void ValueEnumerator::EnumerateMetadata(const GlobalObject *GO, - const Metadata *MD) { - EnumerateMetadata(getMetadataGlobalID(GO), MD); +void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) { + EnumerateMetadata(getMetadataFunctionID(F), MD); } void ValueEnumerator::EnumerateFunctionLocalMetadata( const Function &F, const LocalAsMetadata *Local) { - EnumerateFunctionLocalMetadata(getMetadataGlobalID(&F), Local); + EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local); } void ValueEnumerator::dropFunctionFromMetadata(
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 34d33fc..bff2de7 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -255,7 +255,7 @@ /// it's an \a MDNode. const MDNode *enumerateMetadataImpl(unsigned F, const Metadata *MD); - unsigned getMetadataGlobalID(const GlobalObject *GO) const; + unsigned getMetadataFunctionID(const Function *F) const; /// Enumerate reachable metadata in (almost) post-order. /// @@ -272,7 +272,7 @@ /// \a organizeMetadata() will later partition distinct nodes ahead of /// uniqued ones. ///{ - void EnumerateMetadata(const GlobalObject *GO, const Metadata *MD); + void EnumerateMetadata(const Function *F, const Metadata *MD); void EnumerateMetadata(unsigned F, const Metadata *MD); ///}
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 3404ae7..17b6224 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp
@@ -2616,9 +2616,15 @@ Out << "; Function Attrs: " << AttrStr << '\n'; } - if (F->isDeclaration()) - Out << "declare "; - else + Machine.incorporateFunction(F); + + if (F->isDeclaration()) { + Out << "declare"; + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + F->getAllMetadata(MDs); + printMetadataAttachments(MDs, " "); + Out << ' '; + } else Out << "define "; Out << getLinkagePrintName(F->getLinkage()); @@ -2638,7 +2644,6 @@ Out << ' '; WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent()); Out << '('; - Machine.incorporateFunction(F); // Loop over the arguments, printing them... if (F->isDeclaration() && !IsForDebug) { @@ -2698,13 +2703,13 @@ writeOperand(F->getPersonalityFn(), /*PrintType=*/true); } - SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; - F->getAllMetadata(MDs); - printMetadataAttachments(MDs, " "); - if (F->isDeclaration()) { Out << '\n'; } else { + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + F->getAllMetadata(MDs); + printMetadataAttachments(MDs, " "); + Out << " {"; // Output all of the function's basic blocks. for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index ecba8be..7297d65 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp
@@ -1956,8 +1956,15 @@ Assert(MDs.empty(), "unmaterialized function cannot have metadata", &F, MDs.empty() ? nullptr : MDs.front().second); } else if (F.isDeclaration()) { - Assert(MDs.empty(), "function without a body cannot have metadata", &F, - MDs.empty() ? nullptr : MDs.front().second); + for (const auto &I : MDs) { + AssertDI(I.first != LLVMContext::MD_dbg, + "function declaration may not have a !dbg attachment", &F); + Assert(I.first != LLVMContext::MD_prof, + "function declaration may not have a !prof attachment", &F); + + // Verify the metadata itself. + visitMDNode(*I.second); + } Assert(!F.hasPersonalityFn(), "Function declaration shouldn't have a personality routine", &F); } else {
diff --git a/test/Assembler/metadata-decl.ll b/test/Assembler/metadata-decl.ll new file mode 100644 index 0000000..4f28638f --- /dev/null +++ b/test/Assembler/metadata-decl.ll
@@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s +; RUN: llvm-as < %s | llvm-dis -materialize-metadata | FileCheck %s + +; CHECK: @foo = external global i32, !foo !0 +@foo = external global i32, !foo !0 + +; CHECK: declare !bar !1 void @bar() +declare !bar !1 void @bar() + +!0 = distinct !{} +!1 = distinct !{}
diff --git a/test/Assembler/metadata.ll b/test/Assembler/metadata.ll index a4b9c8a..5b62bfa 100644 --- a/test/Assembler/metadata.ll +++ b/test/Assembler/metadata.ll
@@ -1,7 +1,8 @@ -; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck --check-prefix=CHECK --check-prefix=CHECK-UNMAT %s +; RUN: llvm-as < %s | llvm-dis -materialize-metadata | FileCheck --check-prefix=CHECK-UNMAT %s ; RUN: verify-uselistorder %s -; CHECK: @global = global i32 0, !foo [[M2:![0-9]+]], !foo [[M3:![0-9]+]], !baz [[M3]] +; CHECK-UNMAT: @global = global i32 0, !foo [[M2:![0-9]+]], !foo [[M3:![0-9]+]], !baz [[M3]] @global = global i32 0, !foo !2, !foo !3, !baz !3 ; CHECK-LABEL: @test @@ -32,8 +33,8 @@ unreachable, !\34\32abc !4 } -; CHECK: [[M2]] = distinct !{} -; CHECK: [[M3]] = distinct !{} +; CHECK-UNMAT: [[M2]] = distinct !{} +; CHECK-UNMAT: [[M3]] = distinct !{} ; CHECK: [[M0]] = !DILocation ; CHECK: [[M1]] = distinct !DISubprogram ; CHECK: [[M4]] = distinct !{}
diff --git a/test/Verifier/metadata-function-dbg.ll b/test/Verifier/metadata-function-dbg.ll index 77f7de2..24989ed 100644 --- a/test/Verifier/metadata-function-dbg.ll +++ b/test/Verifier/metadata-function-dbg.ll
@@ -1,11 +1,14 @@ ; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s -define void @foo() !dbg !4 { +; CHECK: function declaration may not have a !dbg attachment +declare !dbg !4 void @f1() + +define void @f2() !dbg !4 { unreachable } ; CHECK: function must have a single !dbg attachment -define void @foo2() !dbg !4 !dbg !4 { +define void @f3() !dbg !4 !dbg !4 { unreachable }
diff --git a/test/Verifier/metadata-function-prof.ll b/test/Verifier/metadata-function-prof.ll index ca0628f..d84a7fe 100644 --- a/test/Verifier/metadata-function-prof.ll +++ b/test/Verifier/metadata-function-prof.ll
@@ -1,11 +1,14 @@ ; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s -define void @foo() !prof !0 { +; CHECK: function declaration may not have a !prof attachment +declare !prof !0 void @f1() + +define void @f2() !prof !0 { unreachable } ; CHECK: function must have a single !prof attachment -define void @foo2() !prof !0 !prof !0 { +define void @f3() !prof !0 !prof !0 { unreachable }
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp index 46892b6..88333ae 100644 --- a/tools/llvm-dis/llvm-dis.cpp +++ b/tools/llvm-dis/llvm-dis.cpp
@@ -27,6 +27,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataStream.h" +#include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/ManagedStatic.h" @@ -59,6 +60,11 @@ cl::desc("Preserve use-list order when writing LLVM assembly."), cl::init(false), cl::Hidden); +static cl::opt<bool> + MaterializeMetadata("materialize-metadata", + cl::desc("Load module without materializing metadata, " + "then materialize only the metadata")); + namespace { static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) { @@ -132,6 +138,37 @@ exit(1); } +static Expected<std::unique_ptr<Module>> openInputFile(LLVMContext &Context) { + if (MaterializeMetadata) { + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = + MemoryBuffer::getFileOrSTDIN(InputFilename); + if (!MBOrErr) + return errorCodeToError(MBOrErr.getError()); + ErrorOr<std::unique_ptr<Module>> MOrErr = + getLazyBitcodeModule(std::move(*MBOrErr), Context, + /*ShouldLazyLoadMetadata=*/true); + if (!MOrErr) + return errorCodeToError(MOrErr.getError()); + (*MOrErr)->materializeMetadata(); + return std::move(*MOrErr); + } else { + std::string ErrorMessage; + std::unique_ptr<DataStreamer> Streamer = + getDataFileStreamer(InputFilename, &ErrorMessage); + if (!Streamer) + return make_error<StringError>(ErrorMessage, inconvertibleErrorCode()); + std::string DisplayFilename; + if (InputFilename == "-") + DisplayFilename = "<stdin>"; + else + DisplayFilename = InputFilename; + ErrorOr<std::unique_ptr<Module>> MOrErr = + getStreamedBitcodeModule(DisplayFilename, std::move(Streamer), Context); + (*MOrErr)->materializeAll(); + return std::move(*MOrErr); + } +} + int main(int argc, char **argv) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(argv[0]); @@ -144,26 +181,16 @@ cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n"); - std::string ErrorMessage; - std::unique_ptr<Module> M; - - // Use the bitcode streaming interface - std::unique_ptr<DataStreamer> Streamer = - getDataFileStreamer(InputFilename, &ErrorMessage); - if (Streamer) { - std::string DisplayFilename; - if (InputFilename == "-") - DisplayFilename = "<stdin>"; - else - DisplayFilename = InputFilename; - ErrorOr<std::unique_ptr<Module>> MOrErr = - getStreamedBitcodeModule(DisplayFilename, std::move(Streamer), Context); - M = std::move(*MOrErr); - M->materializeAll(); - } else { - errs() << argv[0] << ": " << ErrorMessage << '\n'; + Expected<std::unique_ptr<Module>> MOrErr = openInputFile(Context); + if (!MOrErr) { + handleAllErrors(MOrErr.takeError(), [&](ErrorInfoBase &EIB) { + errs() << argv[0] << ": "; + EIB.log(errs()); + errs() << '\n'; + }); return 1; } + std::unique_ptr<Module> M = std::move(*MOrErr); // Just use stdout. We won't actually print anything on it. if (DontPrint)
diff --git a/unittests/IR/MetadataTest.cpp b/unittests/IR/MetadataTest.cpp index a7a28e0..b6cf7e4e 100644 --- a/unittests/IR/MetadataTest.cpp +++ b/unittests/IR/MetadataTest.cpp
@@ -2260,20 +2260,20 @@ TEST_F(FunctionAttachmentTest, Verifier) { Function *F = getFunction("foo"); F->setMetadata("attach", getTuple()); - - // Confirm this has no body. - ASSERT_TRUE(F->empty()); - - // Functions without a body cannot have metadata attachments (they also can't - // be verified directly, so check that the module fails to verify). - EXPECT_TRUE(verifyModule(*F->getParent())); - - // Nor can materializable functions. F->setIsMaterializable(true); - EXPECT_TRUE(verifyModule(*F->getParent())); - // Functions with a body can. + // Confirm this is materializable. + ASSERT_TRUE(F->isMaterializable()); + + // Materializable functions cannot have metadata attachments. + EXPECT_TRUE(verifyFunction(*F)); + + // Function declarations can. F->setIsMaterializable(false); + EXPECT_FALSE(verifyModule(*F->getParent())); + EXPECT_FALSE(verifyFunction(*F)); + + // So can definitions. (void)new UnreachableInst(Context, BasicBlock::Create(Context, "bb", F)); EXPECT_FALSE(verifyModule(*F->getParent())); EXPECT_FALSE(verifyFunction(*F));