clang-tools-extra/clangd/index/Serialization.cpp - llvm-project - Git at Google

 //===-- Serialization.cpp - Binary serialization of index data ------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "Serialization.h"
 #include "Headers.h"
 #include "RIFF.h"
 #include "SymbolLocation.h"
 #include "SymbolOrigin.h"
 #include "dex/Dex.h"
 #include "support/Logger.h"
 #include "support/Trace.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
 #include <vector>

 namespace clang {
 namespace clangd {
 namespace {

 // IO PRIMITIVES
 // We use little-endian 32 bit ints, sometimes with variable-length encoding.
 //
 // Variable-length int encoding (varint) uses the bottom 7 bits of each byte
 // to encode the number, and the top bit to indicate whether more bytes follow.
 // e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
 // This represents 0x1a | 0x2f<<7 = 6042.
 // A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.

 // Reads binary data from a StringRef, and keeps track of position.
 class Reader {
   const char *Begin, *End;
   bool Err = false;

 public:
   Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
   // The "error" bit is set by reading past EOF or reading invalid data.
   // When in an error state, reads may return zero values: callers should check.
   bool err() const { return Err; }
   // Did we read all the data, or encounter an error?
   bool eof() const { return Begin == End || Err; }
   // All the data we didn't read yet.
   llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); }

   uint8_t consume8() {
     if (LLVM_UNLIKELY(Begin == End)) {
       Err = true;
       return 0;
     }
     return *Begin++;
   }

   uint32_t consume32() {
     if (LLVM_UNLIKELY(Begin + 4 > End)) {
       Err = true;
       return 0;
     }
     auto Ret = llvm::support::endian::read32le(Begin);
     Begin += 4;
     return Ret;
   }

   llvm::StringRef consume(int N) {
     if (LLVM_UNLIKELY(Begin + N > End)) {
       Err = true;
       return llvm::StringRef();
     }
     llvm::StringRef Ret(Begin, N);
     Begin += N;
     return Ret;
   }

   uint32_t consumeVar() {
     constexpr static uint8_t More = 1 << 7;

     // Use a 32 bit unsigned here to prevent promotion to signed int (unless int
     // is wider than 32 bits).
     uint32_t B = consume8();
     if (LLVM_LIKELY(!(B & More)))
       return B;
     uint32_t Val = B & ~More;
     for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
       B = consume8();
       // 5th byte of a varint can only have lowest 4 bits set.
       assert((Shift != 28 || B == (B & 0x0f)) && "Invalid varint encoding");
       Val |= (B & ~More) << Shift;
     }
     return Val;
   }

   llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) {
     auto StringIndex = consumeVar();
     if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
       Err = true;
       return llvm::StringRef();
     }
     return Strings[StringIndex];
   }

   SymbolID consumeID() {
     llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated.
     return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw);
   }

   // Read a varint (as consumeVar) and resize the container accordingly.
   // If the size is invalid, return false and mark an error.
   // (The caller should abort in this case).
   template <typename T> LLVM_NODISCARD bool consumeSize(T &Container) {
     auto Size = consumeVar();
     // Conservatively assume each element is at least one byte.
     if (Size > (size_t)(End - Begin)) {
       Err = true;
       return false;
     }
     Container.resize(Size);
     return true;
   }
 };

 void write32(uint32_t I, llvm::raw_ostream &OS) {
   char Buf[4];
   llvm::support::endian::write32le(Buf, I);
   OS.write(Buf, sizeof(Buf));
 }

 void writeVar(uint32_t I, llvm::raw_ostream &OS) {
   constexpr static uint8_t More = 1 << 7;
   if (LLVM_LIKELY(I < 1 << 7)) {
     OS.write(I);
     return;
   }
   for (;;) {
     OS.write(I | More);
     I >>= 7;
     if (I < 1 << 7) {
       OS.write(I);
       return;
     }
   }
 }

 // STRING TABLE ENCODING
 // Index data has many string fields, and many strings are identical.
 // We store each string once, and refer to them by index.
 //
 // The string table's format is:
 //   - UncompressedSize : uint32 (or 0 for no compression)
 //   - CompressedData   : byte[CompressedSize]
 //
 // CompressedData is a zlib-compressed byte[UncompressedSize].
 // It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
 // These are sorted to improve compression.

 // Maps each string to a canonical representation.
 // Strings remain owned externally (e.g. by SymbolSlab).
 class StringTableOut {
   llvm::DenseSet<llvm::StringRef> Unique;
   std::vector<llvm::StringRef> Sorted;
   // Since strings are interned, look up can be by pointer.
   llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index;

 public:
   StringTableOut() {
     // Ensure there's at least one string in the table.
     // Table size zero is reserved to indicate no compression.
     Unique.insert("");
   }
   // Add a string to the table. Overwrites S if an identical string exists.
   void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
   // Finalize the table and write it to OS. No more strings may be added.
   void finalize(llvm::raw_ostream &OS) {
     Sorted = {Unique.begin(), Unique.end()};
     llvm::sort(Sorted);
     for (unsigned I = 0; I < Sorted.size(); ++I)
       Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);

     std::string RawTable;
     for (llvm::StringRef S : Sorted) {
       RawTable.append(std::string(S));
       RawTable.push_back(0);
     }
     if (llvm::zlib::isAvailable()) {
       llvm::SmallString<1> Compressed;
       llvm::cantFail(llvm::zlib::compress(RawTable, Compressed));
       write32(RawTable.size(), OS);
       OS << Compressed;
     } else {
       write32(0, OS); // No compression.
       OS << RawTable;
     }
   }
   // Get the ID of an string, which must be interned. Table must be finalized.
   unsigned index(llvm::StringRef S) const {
     assert(!Sorted.empty() && "table not finalized");
     assert(Index.count({S.data(), S.size()}) && "string not interned");
     return Index.find({S.data(), S.size()})->second;
   }
 };

 struct StringTableIn {
   llvm::BumpPtrAllocator Arena;
   std::vector<llvm::StringRef> Strings;
 };

 llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
   Reader R(Data);
   size_t UncompressedSize = R.consume32();
   if (R.err())
     return error("Truncated string table");

   llvm::StringRef Uncompressed;
   llvm::SmallString<1> UncompressedStorage;
   if (UncompressedSize == 0) // No compression
     Uncompressed = R.rest();
   else if (llvm::zlib::isAvailable()) {
     // Don't allocate a massive buffer if UncompressedSize was corrupted
     // This is effective for sharded index, but not big monolithic ones, as
     // once compressed size reaches 4MB nothing can be ruled out.
     // Theoretical max ratio from https://zlib.net/zlib_tech.html
     constexpr int MaxCompressionRatio = 1032;
     if (UncompressedSize / MaxCompressionRatio > R.rest().size())
       return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
                    R.rest().size(), UncompressedSize);

     if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
                                                UncompressedSize))
       return std::move(E);
     Uncompressed = UncompressedStorage;
   } else
     return error("Compressed string table, but zlib is unavailable");

   StringTableIn Table;
   llvm::StringSaver Saver(Table.Arena);
   R = Reader(Uncompressed);
   for (Reader R(Uncompressed); !R.eof();) {
     auto Len = R.rest().find(0);
     if (Len == llvm::StringRef::npos)
       return error("Bad string table: not null terminated");
     Table.Strings.push_back(Saver.save(R.consume(Len)));
     R.consume8();
   }
   if (R.err())
     return error("Truncated string table");
   return std::move(Table);
 }

 // SYMBOL ENCODING
 // Each field of clangd::Symbol is encoded in turn (see implementation).
 //  - StringRef fields encode as varint (index into the string table)
 //  - enums encode as the underlying type
 //  - most numbers encode as varint

 void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
                    llvm::raw_ostream &OS) {
   writeVar(Strings.index(Loc.FileURI), OS);
   for (const auto &Endpoint : {Loc.Start, Loc.End}) {
     writeVar(Endpoint.line(), OS);
     writeVar(Endpoint.column(), OS);
   }
 }

 SymbolLocation readLocation(Reader &Data,
                             llvm::ArrayRef<llvm::StringRef> Strings) {
   SymbolLocation Loc;
   Loc.FileURI = Data.consumeString(Strings).data();
   for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
     Endpoint->setLine(Data.consumeVar());
     Endpoint->setColumn(Data.consumeVar());
   }
   return Loc;
 }

 IncludeGraphNode readIncludeGraphNode(Reader &Data,
                                       llvm::ArrayRef<llvm::StringRef> Strings) {
   IncludeGraphNode IGN;
   IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
   IGN.URI = Data.consumeString(Strings);
   llvm::StringRef Digest = Data.consume(IGN.Digest.size());
   std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
   if (!Data.consumeSize(IGN.DirectIncludes))
     return IGN;
   for (llvm::StringRef &Include : IGN.DirectIncludes)
     Include = Data.consumeString(Strings);
   return IGN;
 }

 void writeIncludeGraphNode(const IncludeGraphNode &IGN,
                            const StringTableOut &Strings,
                            llvm::raw_ostream &OS) {
   OS.write(static_cast<uint8_t>(IGN.Flags));
   writeVar(Strings.index(IGN.URI), OS);
   llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
                        IGN.Digest.size());
   OS << Hash;
   writeVar(IGN.DirectIncludes.size(), OS);
   for (llvm::StringRef Include : IGN.DirectIncludes)
     writeVar(Strings.index(Include), OS);
 }

 void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
                  llvm::raw_ostream &OS) {
   OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
                       // symbol IDs should probably be in a string table.
   OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
   OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
   writeVar(Strings.index(Sym.Name), OS);
   writeVar(Strings.index(Sym.Scope), OS);
   writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
   writeLocation(Sym.Definition, Strings, OS);
   writeLocation(Sym.CanonicalDeclaration, Strings, OS);
   writeVar(Sym.References, OS);
   OS.write(static_cast<uint8_t>(Sym.Flags));
   OS.write(static_cast<uint8_t>(Sym.Origin));
   writeVar(Strings.index(Sym.Signature), OS);
   writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
   writeVar(Strings.index(Sym.Documentation), OS);
   writeVar(Strings.index(Sym.ReturnType), OS);
   writeVar(Strings.index(Sym.Type), OS);

   auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
     writeVar(Strings.index(Include.IncludeHeader), OS);
     writeVar(Include.References, OS);
   };
   writeVar(Sym.IncludeHeaders.size(), OS);
   for (const auto &Include : Sym.IncludeHeaders)
     WriteInclude(Include);
 }

 Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
   Symbol Sym;
   Sym.ID = Data.consumeID();
   Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
   Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
   Sym.Name = Data.consumeString(Strings);
   Sym.Scope = Data.consumeString(Strings);
   Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
   Sym.Definition = readLocation(Data, Strings);
   Sym.CanonicalDeclaration = readLocation(Data, Strings);
   Sym.References = Data.consumeVar();
   Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
   Sym.Origin = static_cast<SymbolOrigin>(Data.consume8());
   Sym.Signature = Data.consumeString(Strings);
   Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
   Sym.Documentation = Data.consumeString(Strings);
   Sym.ReturnType = Data.consumeString(Strings);
   Sym.Type = Data.consumeString(Strings);
   if (!Data.consumeSize(Sym.IncludeHeaders))
     return Sym;
   for (auto &I : Sym.IncludeHeaders) {
     I.IncludeHeader = Data.consumeString(Strings);
     I.References = Data.consumeVar();
   }
   return Sym;
 }

 // REFS ENCODING
 // A refs section has data grouped by Symbol. Each symbol has:
 //  - SymbolID: 8 bytes
 //  - NumRefs: varint
 //  - Ref[NumRefs]
 // Fields of Ref are encoded in turn, see implementation.

 void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
                const StringTableOut &Strings, llvm::raw_ostream &OS) {
   OS << ID.raw();
   writeVar(Refs.size(), OS);
   for (const auto &Ref : Refs) {
     OS.write(static_cast<unsigned char>(Ref.Kind));
     writeLocation(Ref.Location, Strings, OS);
     OS << Ref.Container.raw();
   }
 }

 std::pair<SymbolID, std::vector<Ref>>
 readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
   std::pair<SymbolID, std::vector<Ref>> Result;
   Result.first = Data.consumeID();
   if (!Data.consumeSize(Result.second))
     return Result;
   for (auto &Ref : Result.second) {
     Ref.Kind = static_cast<RefKind>(Data.consume8());
     Ref.Location = readLocation(Data, Strings);
     Ref.Container = Data.consumeID();
   }
   return Result;
 }

 // RELATIONS ENCODING
 // A relations section is a flat list of relations. Each relation has:
 //  - SymbolID (subject): 8 bytes
 //  - relation kind (predicate): 1 byte
 //  - SymbolID (object): 8 bytes
 // In the future, we might prefer a packed representation if the need arises.

 void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
   OS << R.Subject.raw();
   OS.write(static_cast<uint8_t>(R.Predicate));
   OS << R.Object.raw();
 }

 Relation readRelation(Reader &Data) {
   SymbolID Subject = Data.consumeID();
   RelationKind Predicate = static_cast<RelationKind>(Data.consume8());
   SymbolID Object = Data.consumeID();
   return {Subject, Predicate, Object};
 }

 struct InternedCompileCommand {
   llvm::StringRef Directory;
   std::vector<llvm::StringRef> CommandLine;
 };

 void writeCompileCommand(const InternedCompileCommand &Cmd,
                          const StringTableOut &Strings,
                          llvm::raw_ostream &CmdOS) {
   writeVar(Strings.index(Cmd.Directory), CmdOS);
   writeVar(Cmd.CommandLine.size(), CmdOS);
   for (llvm::StringRef C : Cmd.CommandLine)
     writeVar(Strings.index(C), CmdOS);
 }

 InternedCompileCommand
 readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
   InternedCompileCommand Cmd;
   Cmd.Directory = CmdReader.consumeString(Strings);
   if (!CmdReader.consumeSize(Cmd.CommandLine))
     return Cmd;
   for (llvm::StringRef &C : Cmd.CommandLine)
     C = CmdReader.consumeString(Strings);
   return Cmd;
 }

 // FILE ENCODING
 // A file is a RIFF chunk with type 'CdIx'.
 // It contains the sections:
 //   - meta: version number
 //   - srcs: information related to include graph
 //   - stri: string table
 //   - symb: symbols
 //   - refs: references to symbols

 // The current versioning scheme is simple - non-current versions are rejected.
 // If you make a breaking change, bump this version number to invalidate stored
 // data. Later we may want to support some backward compatibility.
 constexpr static uint32_t Version = 16;

 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
   auto RIFF = riff::readFile(Data);
   if (!RIFF)
     return RIFF.takeError();
   if (RIFF->Type != riff::fourCC("CdIx"))
     return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type));
   llvm::StringMap<llvm::StringRef> Chunks;
   for (const auto &Chunk : RIFF->Chunks)
     Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
                        Chunk.Data);

   if (!Chunks.count("meta"))
     return error("missing meta chunk");
   Reader Meta(Chunks.lookup("meta"));
   auto SeenVersion = Meta.consume32();
   if (SeenVersion != Version)
     return error("wrong version: want {0}, got {1}", Version, SeenVersion);

   // meta chunk is checked above, as we prefer the "version mismatch" error.
   for (llvm::StringRef RequiredChunk : {"stri"})
     if (!Chunks.count(RequiredChunk))
       return error("missing required chunk {0}", RequiredChunk);

   auto Strings = readStringTable(Chunks.lookup("stri"));
   if (!Strings)
     return Strings.takeError();

   IndexFileIn Result;
   if (Chunks.count("srcs")) {
     Reader SrcsReader(Chunks.lookup("srcs"));
     Result.Sources.emplace();
     while (!SrcsReader.eof()) {
       auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
       auto Entry = Result.Sources->try_emplace(IGN.URI).first;
       Entry->getValue() = std::move(IGN);
       // We change all the strings inside the structure to point at the keys in
       // the map, since it is the only copy of the string that's going to live.
       Entry->getValue().URI = Entry->getKey();
       for (auto &Include : Entry->getValue().DirectIncludes)
         Include = Result.Sources->try_emplace(Include).first->getKey();
     }
     if (SrcsReader.err())
       return error("malformed or truncated include uri");
   }

   if (Chunks.count("symb")) {
     Reader SymbolReader(Chunks.lookup("symb"));
     SymbolSlab::Builder Symbols;
     while (!SymbolReader.eof())
       Symbols.insert(readSymbol(SymbolReader, Strings->Strings));
     if (SymbolReader.err())
       return error("malformed or truncated symbol");
     Result.Symbols = std::move(Symbols).build();
   }
   if (Chunks.count("refs")) {
     Reader RefsReader(Chunks.lookup("refs"));
     RefSlab::Builder Refs;
     while (!RefsReader.eof()) {
       auto RefsBundle = readRefs(RefsReader, Strings->Strings);
       for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
         Refs.insert(RefsBundle.first, Ref);
     }
     if (RefsReader.err())
       return error("malformed or truncated refs");
     Result.Refs = std::move(Refs).build();
   }
   if (Chunks.count("rela")) {
     Reader RelationsReader(Chunks.lookup("rela"));
     RelationSlab::Builder Relations;
     while (!RelationsReader.eof())
       Relations.insert(readRelation(RelationsReader));
     if (RelationsReader.err())
       return error("malformed or truncated relations");
     Result.Relations = std::move(Relations).build();
   }
   if (Chunks.count("cmdl")) {
     Reader CmdReader(Chunks.lookup("cmdl"));
     InternedCompileCommand Cmd =
         readCompileCommand(CmdReader, Strings->Strings);
     if (CmdReader.err())
       return error("malformed or truncated commandline section");
     Result.Cmd.emplace();
     Result.Cmd->Directory = std::string(Cmd.Directory);
     Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
     for (llvm::StringRef C : Cmd.CommandLine)
       Result.Cmd->CommandLine.emplace_back(C);
   }
   return std::move(Result);
 }

 template <class Callback>
 void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
   CB(IGN.URI);
   for (llvm::StringRef &Include : IGN.DirectIncludes)
     CB(Include);
 }

 void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
   assert(Data.Symbols && "An index file without symbols makes no sense!");
   riff::File RIFF;
   RIFF.Type = riff::fourCC("CdIx");

   llvm::SmallString<4> Meta;
   {
     llvm::raw_svector_ostream MetaOS(Meta);
     write32(Version, MetaOS);
   }
   RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});

   StringTableOut Strings;
   std::vector<Symbol> Symbols;
   for (const auto &Sym : *Data.Symbols) {
     Symbols.emplace_back(Sym);
     visitStrings(Symbols.back(),
                  [&](llvm::StringRef &S) { Strings.intern(S); });
   }
   std::vector<IncludeGraphNode> Sources;
   if (Data.Sources)
     for (const auto &Source : *Data.Sources) {
       Sources.push_back(Source.getValue());
       visitStrings(Sources.back(),
                    [&](llvm::StringRef &S) { Strings.intern(S); });
     }

   std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
   if (Data.Refs) {
     for (const auto &Sym : *Data.Refs) {
       Refs.emplace_back(Sym);
       for (auto &Ref : Refs.back().second) {
         llvm::StringRef File = Ref.Location.FileURI;
         Strings.intern(File);
         Ref.Location.FileURI = File.data();
       }
     }
   }

   std::vector<Relation> Relations;
   if (Data.Relations) {
     for (const auto &Relation : *Data.Relations) {
       Relations.emplace_back(Relation);
       // No strings to be interned in relations.
     }
   }

   InternedCompileCommand InternedCmd;
   if (Data.Cmd) {
     InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
     InternedCmd.Directory = Data.Cmd->Directory;
     Strings.intern(InternedCmd.Directory);
     for (llvm::StringRef C : Data.Cmd->CommandLine) {
       InternedCmd.CommandLine.emplace_back(C);
       Strings.intern(InternedCmd.CommandLine.back());
     }
   }

   std::string StringSection;
   {
     llvm::raw_string_ostream StringOS(StringSection);
     Strings.finalize(StringOS);
   }
   RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});

   std::string SymbolSection;
   {
     llvm::raw_string_ostream SymbolOS(SymbolSection);
     for (const auto &Sym : Symbols)
       writeSymbol(Sym, Strings, SymbolOS);
   }
   RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});

   std::string RefsSection;
   if (Data.Refs) {
     {
       llvm::raw_string_ostream RefsOS(RefsSection);
       for (const auto &Sym : Refs)
         writeRefs(Sym.first, Sym.second, Strings, RefsOS);
     }
     RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
   }

   std::string RelationSection;
   if (Data.Relations) {
     {
       llvm::raw_string_ostream RelationOS{RelationSection};
       for (const auto &Relation : Relations)
         writeRelation(Relation, RelationOS);
     }
     RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
   }

   std::string SrcsSection;
   {
     {
       llvm::raw_string_ostream SrcsOS(SrcsSection);
       for (const auto &SF : Sources)
         writeIncludeGraphNode(SF, Strings, SrcsOS);
     }
     RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
   }

   std::string CmdlSection;
   if (Data.Cmd) {
     {
       llvm::raw_string_ostream CmdOS(CmdlSection);
       writeCompileCommand(InternedCmd, Strings, CmdOS);
     }
     RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection});
   }

   OS << RIFF;
 }

 } // namespace

 // Defined in YAMLSerialization.cpp.
 void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
 llvm::Expected<IndexFileIn> readYAML(llvm::StringRef);

 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
   switch (O.Format) {
   case IndexFileFormat::RIFF:
     writeRIFF(O, OS);
     break;
   case IndexFileFormat::YAML:
     writeYAML(O, OS);
     break;
   }
   return OS;
 }

 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data) {
   if (Data.startswith("RIFF")) {
     return readRIFF(Data);
   }
   if (auto YAMLContents = readYAML(Data)) {
     return std::move(*YAMLContents);
   } else {
     return error("Not a RIFF file and failed to parse as YAML: {0}",
                  YAMLContents.takeError());
   }
 }

 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
                                        bool UseDex) {
   trace::Span OverallTracer("LoadIndex");
   auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
   if (!Buffer) {
     elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
     return nullptr;
   }

   SymbolSlab Symbols;
   RefSlab Refs;
   RelationSlab Relations;
   {
     trace::Span Tracer("ParseIndex");
     if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
       if (I->Symbols)
         Symbols = std::move(*I->Symbols);
       if (I->Refs)
         Refs = std::move(*I->Refs);
       if (I->Relations)
         Relations = std::move(*I->Relations);
     } else {
       elog("Bad index file: {0}", I.takeError());
       return nullptr;
     }
   }

   size_t NumSym = Symbols.size();
   size_t NumRefs = Refs.numRefs();
   size_t NumRelations = Relations.size();

   trace::Span Tracer("BuildIndex");
   auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs),
                                         std::move(Relations))
                       : MemIndex::build(std::move(Symbols), std::move(Refs),
                                         std::move(Relations));
   vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
        "  - number of symbols: {3}\n"
        "  - number of refs: {4}\n"
        "  - number of relations: {5}",
        UseDex ? "Dex" : "MemIndex", SymbolFilename,
        Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
   return Index;
 }

 } // namespace clangd
 } // namespace clang