| //===-- Serialization.cpp - Binary serialization of index data ------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "Serialization.h" |
| #include "Headers.h" |
| #include "RIFF.h" |
| #include "SymbolLocation.h" |
| #include "SymbolOrigin.h" |
| #include "dex/Dex.h" |
| #include "support/Logger.h" |
| #include "support/Trace.h" |
| #include "clang/Tooling/CompilationDatabase.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Compression.h" |
| #include "llvm/Support/Endian.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <cstdint> |
| #include <vector> |
| |
| namespace clang { |
| namespace clangd { |
| namespace { |
| |
| // IO PRIMITIVES |
| // We use little-endian 32 bit ints, sometimes with variable-length encoding. |
| // |
| // Variable-length int encoding (varint) uses the bottom 7 bits of each byte |
| // to encode the number, and the top bit to indicate whether more bytes follow. |
| // e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop]. |
| // This represents 0x1a | 0x2f<<7 = 6042. |
| // A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact. |
| |
| // Reads binary data from a StringRef, and keeps track of position. |
| class Reader { |
| const char *Begin, *End; |
| bool Err = false; |
| |
| public: |
| Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {} |
| // The "error" bit is set by reading past EOF or reading invalid data. |
| // When in an error state, reads may return zero values: callers should check. |
| bool err() const { return Err; } |
| // Did we read all the data, or encounter an error? |
| bool eof() const { return Begin == End || Err; } |
| // All the data we didn't read yet. |
| llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); } |
| |
| uint8_t consume8() { |
| if (LLVM_UNLIKELY(Begin == End)) { |
| Err = true; |
| return 0; |
| } |
| return *Begin++; |
| } |
| |
| uint32_t consume32() { |
| if (LLVM_UNLIKELY(Begin + 4 > End)) { |
| Err = true; |
| return 0; |
| } |
| auto Ret = llvm::support::endian::read32le(Begin); |
| Begin += 4; |
| return Ret; |
| } |
| |
| llvm::StringRef consume(int N) { |
| if (LLVM_UNLIKELY(Begin + N > End)) { |
| Err = true; |
| return llvm::StringRef(); |
| } |
| llvm::StringRef Ret(Begin, N); |
| Begin += N; |
| return Ret; |
| } |
| |
| uint32_t consumeVar() { |
| constexpr static uint8_t More = 1 << 7; |
| |
| // Use a 32 bit unsigned here to prevent promotion to signed int (unless int |
| // is wider than 32 bits). |
| uint32_t B = consume8(); |
| if (LLVM_LIKELY(!(B & More))) |
| return B; |
| uint32_t Val = B & ~More; |
| for (int Shift = 7; B & More && Shift < 32; Shift += 7) { |
| B = consume8(); |
| // 5th byte of a varint can only have lowest 4 bits set. |
| assert((Shift != 28 || B == (B & 0x0f)) && "Invalid varint encoding"); |
| Val |= (B & ~More) << Shift; |
| } |
| return Val; |
| } |
| |
| llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) { |
| auto StringIndex = consumeVar(); |
| if (LLVM_UNLIKELY(StringIndex >= Strings.size())) { |
| Err = true; |
| return llvm::StringRef(); |
| } |
| return Strings[StringIndex]; |
| } |
| |
| SymbolID consumeID() { |
| llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated. |
| return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw); |
| } |
| |
| // Read a varint (as consumeVar) and resize the container accordingly. |
| // If the size is invalid, return false and mark an error. |
| // (The caller should abort in this case). |
| template <typename T> LLVM_NODISCARD bool consumeSize(T &Container) { |
| auto Size = consumeVar(); |
| // Conservatively assume each element is at least one byte. |
| if (Size > (size_t)(End - Begin)) { |
| Err = true; |
| return false; |
| } |
| Container.resize(Size); |
| return true; |
| } |
| }; |
| |
| void write32(uint32_t I, llvm::raw_ostream &OS) { |
| char Buf[4]; |
| llvm::support::endian::write32le(Buf, I); |
| OS.write(Buf, sizeof(Buf)); |
| } |
| |
| void writeVar(uint32_t I, llvm::raw_ostream &OS) { |
| constexpr static uint8_t More = 1 << 7; |
| if (LLVM_LIKELY(I < 1 << 7)) { |
| OS.write(I); |
| return; |
| } |
| for (;;) { |
| OS.write(I | More); |
| I >>= 7; |
| if (I < 1 << 7) { |
| OS.write(I); |
| return; |
| } |
| } |
| } |
| |
| // STRING TABLE ENCODING |
| // Index data has many string fields, and many strings are identical. |
| // We store each string once, and refer to them by index. |
| // |
| // The string table's format is: |
| // - UncompressedSize : uint32 (or 0 for no compression) |
| // - CompressedData : byte[CompressedSize] |
| // |
| // CompressedData is a zlib-compressed byte[UncompressedSize]. |
| // It contains a sequence of null-terminated strings, e.g. "foo\0bar\0". |
| // These are sorted to improve compression. |
| |
| // Maps each string to a canonical representation. |
| // Strings remain owned externally (e.g. by SymbolSlab). |
| class StringTableOut { |
| llvm::DenseSet<llvm::StringRef> Unique; |
| std::vector<llvm::StringRef> Sorted; |
| // Since strings are interned, look up can be by pointer. |
| llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index; |
| |
| public: |
| StringTableOut() { |
| // Ensure there's at least one string in the table. |
| // Table size zero is reserved to indicate no compression. |
| Unique.insert(""); |
| } |
| // Add a string to the table. Overwrites S if an identical string exists. |
| void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; }; |
| // Finalize the table and write it to OS. No more strings may be added. |
| void finalize(llvm::raw_ostream &OS) { |
| Sorted = {Unique.begin(), Unique.end()}; |
| llvm::sort(Sorted); |
| for (unsigned I = 0; I < Sorted.size(); ++I) |
| Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I); |
| |
| std::string RawTable; |
| for (llvm::StringRef S : Sorted) { |
| RawTable.append(std::string(S)); |
| RawTable.push_back(0); |
| } |
| if (llvm::zlib::isAvailable()) { |
| llvm::SmallString<1> Compressed; |
| llvm::cantFail(llvm::zlib::compress(RawTable, Compressed)); |
| write32(RawTable.size(), OS); |
| OS << Compressed; |
| } else { |
| write32(0, OS); // No compression. |
| OS << RawTable; |
| } |
| } |
| // Get the ID of an string, which must be interned. Table must be finalized. |
| unsigned index(llvm::StringRef S) const { |
| assert(!Sorted.empty() && "table not finalized"); |
| assert(Index.count({S.data(), S.size()}) && "string not interned"); |
| return Index.find({S.data(), S.size()})->second; |
| } |
| }; |
| |
| struct StringTableIn { |
| llvm::BumpPtrAllocator Arena; |
| std::vector<llvm::StringRef> Strings; |
| }; |
| |
| llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) { |
| Reader R(Data); |
| size_t UncompressedSize = R.consume32(); |
| if (R.err()) |
| return error("Truncated string table"); |
| |
| llvm::StringRef Uncompressed; |
| llvm::SmallString<1> UncompressedStorage; |
| if (UncompressedSize == 0) // No compression |
| Uncompressed = R.rest(); |
| else if (llvm::zlib::isAvailable()) { |
| // Don't allocate a massive buffer if UncompressedSize was corrupted |
| // This is effective for sharded index, but not big monolithic ones, as |
| // once compressed size reaches 4MB nothing can be ruled out. |
| // Theoretical max ratio from https://zlib.net/zlib_tech.html |
| constexpr int MaxCompressionRatio = 1032; |
| if (UncompressedSize / MaxCompressionRatio > R.rest().size()) |
| return error("Bad stri table: uncompress {0} -> {1} bytes is implausible", |
| R.rest().size(), UncompressedSize); |
| |
| if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage, |
| UncompressedSize)) |
| return std::move(E); |
| Uncompressed = UncompressedStorage; |
| } else |
| return error("Compressed string table, but zlib is unavailable"); |
| |
| StringTableIn Table; |
| llvm::StringSaver Saver(Table.Arena); |
| R = Reader(Uncompressed); |
| for (Reader R(Uncompressed); !R.eof();) { |
| auto Len = R.rest().find(0); |
| if (Len == llvm::StringRef::npos) |
| return error("Bad string table: not null terminated"); |
| Table.Strings.push_back(Saver.save(R.consume(Len))); |
| R.consume8(); |
| } |
| if (R.err()) |
| return error("Truncated string table"); |
| return std::move(Table); |
| } |
| |
| // SYMBOL ENCODING |
| // Each field of clangd::Symbol is encoded in turn (see implementation). |
| // - StringRef fields encode as varint (index into the string table) |
| // - enums encode as the underlying type |
| // - most numbers encode as varint |
| |
| void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings, |
| llvm::raw_ostream &OS) { |
| writeVar(Strings.index(Loc.FileURI), OS); |
| for (const auto &Endpoint : {Loc.Start, Loc.End}) { |
| writeVar(Endpoint.line(), OS); |
| writeVar(Endpoint.column(), OS); |
| } |
| } |
| |
| SymbolLocation readLocation(Reader &Data, |
| llvm::ArrayRef<llvm::StringRef> Strings) { |
| SymbolLocation Loc; |
| Loc.FileURI = Data.consumeString(Strings).data(); |
| for (auto *Endpoint : {&Loc.Start, &Loc.End}) { |
| Endpoint->setLine(Data.consumeVar()); |
| Endpoint->setColumn(Data.consumeVar()); |
| } |
| return Loc; |
| } |
| |
| IncludeGraphNode readIncludeGraphNode(Reader &Data, |
| llvm::ArrayRef<llvm::StringRef> Strings) { |
| IncludeGraphNode IGN; |
| IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8()); |
| IGN.URI = Data.consumeString(Strings); |
| llvm::StringRef Digest = Data.consume(IGN.Digest.size()); |
| std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin()); |
| if (!Data.consumeSize(IGN.DirectIncludes)) |
| return IGN; |
| for (llvm::StringRef &Include : IGN.DirectIncludes) |
| Include = Data.consumeString(Strings); |
| return IGN; |
| } |
| |
| void writeIncludeGraphNode(const IncludeGraphNode &IGN, |
| const StringTableOut &Strings, |
| llvm::raw_ostream &OS) { |
| OS.write(static_cast<uint8_t>(IGN.Flags)); |
| writeVar(Strings.index(IGN.URI), OS); |
| llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()), |
| IGN.Digest.size()); |
| OS << Hash; |
| writeVar(IGN.DirectIncludes.size(), OS); |
| for (llvm::StringRef Include : IGN.DirectIncludes) |
| writeVar(Strings.index(Include), OS); |
| } |
| |
| void writeSymbol(const Symbol &Sym, const StringTableOut &Strings, |
| llvm::raw_ostream &OS) { |
| OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists, |
| // symbol IDs should probably be in a string table. |
| OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind)); |
| OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang)); |
| writeVar(Strings.index(Sym.Name), OS); |
| writeVar(Strings.index(Sym.Scope), OS); |
| writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS); |
| writeLocation(Sym.Definition, Strings, OS); |
| writeLocation(Sym.CanonicalDeclaration, Strings, OS); |
| writeVar(Sym.References, OS); |
| OS.write(static_cast<uint8_t>(Sym.Flags)); |
| OS.write(static_cast<uint8_t>(Sym.Origin)); |
| writeVar(Strings.index(Sym.Signature), OS); |
| writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS); |
| writeVar(Strings.index(Sym.Documentation), OS); |
| writeVar(Strings.index(Sym.ReturnType), OS); |
| writeVar(Strings.index(Sym.Type), OS); |
| |
| auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) { |
| writeVar(Strings.index(Include.IncludeHeader), OS); |
| writeVar(Include.References, OS); |
| }; |
| writeVar(Sym.IncludeHeaders.size(), OS); |
| for (const auto &Include : Sym.IncludeHeaders) |
| WriteInclude(Include); |
| } |
| |
| Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) { |
| Symbol Sym; |
| Sym.ID = Data.consumeID(); |
| Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8()); |
| Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8()); |
| Sym.Name = Data.consumeString(Strings); |
| Sym.Scope = Data.consumeString(Strings); |
| Sym.TemplateSpecializationArgs = Data.consumeString(Strings); |
| Sym.Definition = readLocation(Data, Strings); |
| Sym.CanonicalDeclaration = readLocation(Data, Strings); |
| Sym.References = Data.consumeVar(); |
| Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8()); |
| Sym.Origin = static_cast<SymbolOrigin>(Data.consume8()); |
| Sym.Signature = Data.consumeString(Strings); |
| Sym.CompletionSnippetSuffix = Data.consumeString(Strings); |
| Sym.Documentation = Data.consumeString(Strings); |
| Sym.ReturnType = Data.consumeString(Strings); |
| Sym.Type = Data.consumeString(Strings); |
| if (!Data.consumeSize(Sym.IncludeHeaders)) |
| return Sym; |
| for (auto &I : Sym.IncludeHeaders) { |
| I.IncludeHeader = Data.consumeString(Strings); |
| I.References = Data.consumeVar(); |
| } |
| return Sym; |
| } |
| |
| // REFS ENCODING |
| // A refs section has data grouped by Symbol. Each symbol has: |
| // - SymbolID: 8 bytes |
| // - NumRefs: varint |
| // - Ref[NumRefs] |
| // Fields of Ref are encoded in turn, see implementation. |
| |
| void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs, |
| const StringTableOut &Strings, llvm::raw_ostream &OS) { |
| OS << ID.raw(); |
| writeVar(Refs.size(), OS); |
| for (const auto &Ref : Refs) { |
| OS.write(static_cast<unsigned char>(Ref.Kind)); |
| writeLocation(Ref.Location, Strings, OS); |
| OS << Ref.Container.raw(); |
| } |
| } |
| |
| std::pair<SymbolID, std::vector<Ref>> |
| readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) { |
| std::pair<SymbolID, std::vector<Ref>> Result; |
| Result.first = Data.consumeID(); |
| if (!Data.consumeSize(Result.second)) |
| return Result; |
| for (auto &Ref : Result.second) { |
| Ref.Kind = static_cast<RefKind>(Data.consume8()); |
| Ref.Location = readLocation(Data, Strings); |
| Ref.Container = Data.consumeID(); |
| } |
| return Result; |
| } |
| |
| // RELATIONS ENCODING |
| // A relations section is a flat list of relations. Each relation has: |
| // - SymbolID (subject): 8 bytes |
| // - relation kind (predicate): 1 byte |
| // - SymbolID (object): 8 bytes |
| // In the future, we might prefer a packed representation if the need arises. |
| |
| void writeRelation(const Relation &R, llvm::raw_ostream &OS) { |
| OS << R.Subject.raw(); |
| OS.write(static_cast<uint8_t>(R.Predicate)); |
| OS << R.Object.raw(); |
| } |
| |
| Relation readRelation(Reader &Data) { |
| SymbolID Subject = Data.consumeID(); |
| RelationKind Predicate = static_cast<RelationKind>(Data.consume8()); |
| SymbolID Object = Data.consumeID(); |
| return {Subject, Predicate, Object}; |
| } |
| |
| struct InternedCompileCommand { |
| llvm::StringRef Directory; |
| std::vector<llvm::StringRef> CommandLine; |
| }; |
| |
| void writeCompileCommand(const InternedCompileCommand &Cmd, |
| const StringTableOut &Strings, |
| llvm::raw_ostream &CmdOS) { |
| writeVar(Strings.index(Cmd.Directory), CmdOS); |
| writeVar(Cmd.CommandLine.size(), CmdOS); |
| for (llvm::StringRef C : Cmd.CommandLine) |
| writeVar(Strings.index(C), CmdOS); |
| } |
| |
| InternedCompileCommand |
| readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) { |
| InternedCompileCommand Cmd; |
| Cmd.Directory = CmdReader.consumeString(Strings); |
| if (!CmdReader.consumeSize(Cmd.CommandLine)) |
| return Cmd; |
| for (llvm::StringRef &C : Cmd.CommandLine) |
| C = CmdReader.consumeString(Strings); |
| return Cmd; |
| } |
| |
| // FILE ENCODING |
| // A file is a RIFF chunk with type 'CdIx'. |
| // It contains the sections: |
| // - meta: version number |
| // - srcs: information related to include graph |
| // - stri: string table |
| // - symb: symbols |
| // - refs: references to symbols |
| |
| // The current versioning scheme is simple - non-current versions are rejected. |
| // If you make a breaking change, bump this version number to invalidate stored |
| // data. Later we may want to support some backward compatibility. |
| constexpr static uint32_t Version = 16; |
| |
| llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) { |
| auto RIFF = riff::readFile(Data); |
| if (!RIFF) |
| return RIFF.takeError(); |
| if (RIFF->Type != riff::fourCC("CdIx")) |
| return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type)); |
| llvm::StringMap<llvm::StringRef> Chunks; |
| for (const auto &Chunk : RIFF->Chunks) |
| Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()), |
| Chunk.Data); |
| |
| if (!Chunks.count("meta")) |
| return error("missing meta chunk"); |
| Reader Meta(Chunks.lookup("meta")); |
| auto SeenVersion = Meta.consume32(); |
| if (SeenVersion != Version) |
| return error("wrong version: want {0}, got {1}", Version, SeenVersion); |
| |
| // meta chunk is checked above, as we prefer the "version mismatch" error. |
| for (llvm::StringRef RequiredChunk : {"stri"}) |
| if (!Chunks.count(RequiredChunk)) |
| return error("missing required chunk {0}", RequiredChunk); |
| |
| auto Strings = readStringTable(Chunks.lookup("stri")); |
| if (!Strings) |
| return Strings.takeError(); |
| |
| IndexFileIn Result; |
| if (Chunks.count("srcs")) { |
| Reader SrcsReader(Chunks.lookup("srcs")); |
| Result.Sources.emplace(); |
| while (!SrcsReader.eof()) { |
| auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings); |
| auto Entry = Result.Sources->try_emplace(IGN.URI).first; |
| Entry->getValue() = std::move(IGN); |
| // We change all the strings inside the structure to point at the keys in |
| // the map, since it is the only copy of the string that's going to live. |
| Entry->getValue().URI = Entry->getKey(); |
| for (auto &Include : Entry->getValue().DirectIncludes) |
| Include = Result.Sources->try_emplace(Include).first->getKey(); |
| } |
| if (SrcsReader.err()) |
| return error("malformed or truncated include uri"); |
| } |
| |
| if (Chunks.count("symb")) { |
| Reader SymbolReader(Chunks.lookup("symb")); |
| SymbolSlab::Builder Symbols; |
| while (!SymbolReader.eof()) |
| Symbols.insert(readSymbol(SymbolReader, Strings->Strings)); |
| if (SymbolReader.err()) |
| return error("malformed or truncated symbol"); |
| Result.Symbols = std::move(Symbols).build(); |
| } |
| if (Chunks.count("refs")) { |
| Reader RefsReader(Chunks.lookup("refs")); |
| RefSlab::Builder Refs; |
| while (!RefsReader.eof()) { |
| auto RefsBundle = readRefs(RefsReader, Strings->Strings); |
| for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert? |
| Refs.insert(RefsBundle.first, Ref); |
| } |
| if (RefsReader.err()) |
| return error("malformed or truncated refs"); |
| Result.Refs = std::move(Refs).build(); |
| } |
| if (Chunks.count("rela")) { |
| Reader RelationsReader(Chunks.lookup("rela")); |
| RelationSlab::Builder Relations; |
| while (!RelationsReader.eof()) |
| Relations.insert(readRelation(RelationsReader)); |
| if (RelationsReader.err()) |
| return error("malformed or truncated relations"); |
| Result.Relations = std::move(Relations).build(); |
| } |
| if (Chunks.count("cmdl")) { |
| Reader CmdReader(Chunks.lookup("cmdl")); |
| InternedCompileCommand Cmd = |
| readCompileCommand(CmdReader, Strings->Strings); |
| if (CmdReader.err()) |
| return error("malformed or truncated commandline section"); |
| Result.Cmd.emplace(); |
| Result.Cmd->Directory = std::string(Cmd.Directory); |
| Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size()); |
| for (llvm::StringRef C : Cmd.CommandLine) |
| Result.Cmd->CommandLine.emplace_back(C); |
| } |
| return std::move(Result); |
| } |
| |
| template <class Callback> |
| void visitStrings(IncludeGraphNode &IGN, const Callback &CB) { |
| CB(IGN.URI); |
| for (llvm::StringRef &Include : IGN.DirectIncludes) |
| CB(Include); |
| } |
| |
| void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) { |
| assert(Data.Symbols && "An index file without symbols makes no sense!"); |
| riff::File RIFF; |
| RIFF.Type = riff::fourCC("CdIx"); |
| |
| llvm::SmallString<4> Meta; |
| { |
| llvm::raw_svector_ostream MetaOS(Meta); |
| write32(Version, MetaOS); |
| } |
| RIFF.Chunks.push_back({riff::fourCC("meta"), Meta}); |
| |
| StringTableOut Strings; |
| std::vector<Symbol> Symbols; |
| for (const auto &Sym : *Data.Symbols) { |
| Symbols.emplace_back(Sym); |
| visitStrings(Symbols.back(), |
| [&](llvm::StringRef &S) { Strings.intern(S); }); |
| } |
| std::vector<IncludeGraphNode> Sources; |
| if (Data.Sources) |
| for (const auto &Source : *Data.Sources) { |
| Sources.push_back(Source.getValue()); |
| visitStrings(Sources.back(), |
| [&](llvm::StringRef &S) { Strings.intern(S); }); |
| } |
| |
| std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs; |
| if (Data.Refs) { |
| for (const auto &Sym : *Data.Refs) { |
| Refs.emplace_back(Sym); |
| for (auto &Ref : Refs.back().second) { |
| llvm::StringRef File = Ref.Location.FileURI; |
| Strings.intern(File); |
| Ref.Location.FileURI = File.data(); |
| } |
| } |
| } |
| |
| std::vector<Relation> Relations; |
| if (Data.Relations) { |
| for (const auto &Relation : *Data.Relations) { |
| Relations.emplace_back(Relation); |
| // No strings to be interned in relations. |
| } |
| } |
| |
| InternedCompileCommand InternedCmd; |
| if (Data.Cmd) { |
| InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size()); |
| InternedCmd.Directory = Data.Cmd->Directory; |
| Strings.intern(InternedCmd.Directory); |
| for (llvm::StringRef C : Data.Cmd->CommandLine) { |
| InternedCmd.CommandLine.emplace_back(C); |
| Strings.intern(InternedCmd.CommandLine.back()); |
| } |
| } |
| |
| std::string StringSection; |
| { |
| llvm::raw_string_ostream StringOS(StringSection); |
| Strings.finalize(StringOS); |
| } |
| RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection}); |
| |
| std::string SymbolSection; |
| { |
| llvm::raw_string_ostream SymbolOS(SymbolSection); |
| for (const auto &Sym : Symbols) |
| writeSymbol(Sym, Strings, SymbolOS); |
| } |
| RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection}); |
| |
| std::string RefsSection; |
| if (Data.Refs) { |
| { |
| llvm::raw_string_ostream RefsOS(RefsSection); |
| for (const auto &Sym : Refs) |
| writeRefs(Sym.first, Sym.second, Strings, RefsOS); |
| } |
| RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection}); |
| } |
| |
| std::string RelationSection; |
| if (Data.Relations) { |
| { |
| llvm::raw_string_ostream RelationOS{RelationSection}; |
| for (const auto &Relation : Relations) |
| writeRelation(Relation, RelationOS); |
| } |
| RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection}); |
| } |
| |
| std::string SrcsSection; |
| { |
| { |
| llvm::raw_string_ostream SrcsOS(SrcsSection); |
| for (const auto &SF : Sources) |
| writeIncludeGraphNode(SF, Strings, SrcsOS); |
| } |
| RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection}); |
| } |
| |
| std::string CmdlSection; |
| if (Data.Cmd) { |
| { |
| llvm::raw_string_ostream CmdOS(CmdlSection); |
| writeCompileCommand(InternedCmd, Strings, CmdOS); |
| } |
| RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection}); |
| } |
| |
| OS << RIFF; |
| } |
| |
| } // namespace |
| |
| // Defined in YAMLSerialization.cpp. |
| void writeYAML(const IndexFileOut &, llvm::raw_ostream &); |
| llvm::Expected<IndexFileIn> readYAML(llvm::StringRef); |
| |
| llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) { |
| switch (O.Format) { |
| case IndexFileFormat::RIFF: |
| writeRIFF(O, OS); |
| break; |
| case IndexFileFormat::YAML: |
| writeYAML(O, OS); |
| break; |
| } |
| return OS; |
| } |
| |
| llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data) { |
| if (Data.startswith("RIFF")) { |
| return readRIFF(Data); |
| } |
| if (auto YAMLContents = readYAML(Data)) { |
| return std::move(*YAMLContents); |
| } else { |
| return error("Not a RIFF file and failed to parse as YAML: {0}", |
| YAMLContents.takeError()); |
| } |
| } |
| |
| std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename, |
| bool UseDex) { |
| trace::Span OverallTracer("LoadIndex"); |
| auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); |
| if (!Buffer) { |
| elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message()); |
| return nullptr; |
| } |
| |
| SymbolSlab Symbols; |
| RefSlab Refs; |
| RelationSlab Relations; |
| { |
| trace::Span Tracer("ParseIndex"); |
| if (auto I = readIndexFile(Buffer->get()->getBuffer())) { |
| if (I->Symbols) |
| Symbols = std::move(*I->Symbols); |
| if (I->Refs) |
| Refs = std::move(*I->Refs); |
| if (I->Relations) |
| Relations = std::move(*I->Relations); |
| } else { |
| elog("Bad index file: {0}", I.takeError()); |
| return nullptr; |
| } |
| } |
| |
| size_t NumSym = Symbols.size(); |
| size_t NumRefs = Refs.numRefs(); |
| size_t NumRelations = Relations.size(); |
| |
| trace::Span Tracer("BuildIndex"); |
| auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs), |
| std::move(Relations)) |
| : MemIndex::build(std::move(Symbols), std::move(Refs), |
| std::move(Relations)); |
| vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n" |
| " - number of symbols: {3}\n" |
| " - number of refs: {4}\n" |
| " - number of relations: {5}", |
| UseDex ? "Dex" : "MemIndex", SymbolFilename, |
| Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations); |
| return Index; |
| } |
| |
| } // namespace clangd |
| } // namespace clang |