| //===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "Headers.h" |
| #include "RIFF.h" |
| #include "index/Index.h" |
| #include "index/Serialization.h" |
| #include "support/Logger.h" |
| #include "clang/Tooling/CompilationDatabase.h" |
| #include "llvm/ADT/ScopeExit.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/Support/Compression.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/ScopedPrinter.h" |
| #include "gmock/gmock.h" |
| #include "gtest/gtest.h" |
| #ifdef LLVM_ON_UNIX |
| #include <sys/resource.h> |
| #endif |
| |
| using ::testing::ElementsAre; |
| using ::testing::Pair; |
| using ::testing::UnorderedElementsAre; |
| using ::testing::UnorderedElementsAreArray; |
| |
| namespace clang { |
| namespace clangd { |
| namespace { |
| |
| const char *YAML = R"( |
| --- |
| !Symbol |
| ID: 057557CEBF6E6B2D |
| Name: 'Foo1' |
| Scope: 'clang::' |
| SymInfo: |
| Kind: Function |
| Lang: Cpp |
| CanonicalDeclaration: |
| FileURI: file:///path/foo.h |
| Start: |
| Line: 1 |
| Column: 0 |
| End: |
| Line: 1 |
| Column: 1 |
| Origin: 128 |
| Flags: 129 |
| Documentation: 'Foo doc' |
| ReturnType: 'int' |
| IncludeHeaders: |
| - Header: 'include1' |
| References: 7 |
| - Header: 'include2' |
| References: 3 |
| ... |
| --- |
| !Symbol |
| ID: 057557CEBF6E6B2E |
| Name: 'Foo2' |
| Scope: 'clang::' |
| SymInfo: |
| Kind: Function |
| Lang: Cpp |
| CanonicalDeclaration: |
| FileURI: file:///path/bar.h |
| Start: |
| Line: 1 |
| Column: 0 |
| End: |
| Line: 1 |
| Column: 1 |
| Flags: 2 |
| Signature: '-sig' |
| CompletionSnippetSuffix: '-snippet' |
| ... |
| !Refs |
| ID: 057557CEBF6E6B2D |
| References: |
| - Kind: 4 |
| Location: |
| FileURI: file:///path/foo.cc |
| Start: |
| Line: 5 |
| Column: 3 |
| End: |
| Line: 5 |
| Column: 8 |
| ... |
| --- !Relations |
| Subject: |
| ID: 6481EE7AF2841756 |
| Predicate: 0 |
| Object: |
| ID: 6512AEC512EA3A2D |
| ... |
| --- !Cmd |
| Directory: 'testdir' |
| CommandLine: |
| - 'cmd1' |
| - 'cmd2' |
| ... |
| --- !Source |
| URI: 'file:///path/source1.cpp' |
| Flags: 1 |
| Digest: EED8F5EAF25C453C |
| DirectIncludes: |
| - 'file:///path/inc1.h' |
| - 'file:///path/inc2.h' |
| ... |
| )"; |
| |
| MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); } |
| MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } |
| MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") { |
| return (arg.IncludeHeader == IncludeHeader) && (arg.References == References); |
| } |
| |
| TEST(SerializationTest, NoCrashOnEmptyYAML) { |
| EXPECT_TRUE(bool(readIndexFile(""))); |
| } |
| |
| TEST(SerializationTest, YAMLConversions) { |
| auto ParsedYAML = readIndexFile(YAML); |
| ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError(); |
| ASSERT_TRUE(bool(ParsedYAML->Symbols)); |
| EXPECT_THAT( |
| *ParsedYAML->Symbols, |
| UnorderedElementsAre(ID("057557CEBF6E6B2D"), ID("057557CEBF6E6B2E"))); |
| |
| auto Sym1 = *ParsedYAML->Symbols->find( |
| cantFail(SymbolID::fromStr("057557CEBF6E6B2D"))); |
| auto Sym2 = *ParsedYAML->Symbols->find( |
| cantFail(SymbolID::fromStr("057557CEBF6E6B2E"))); |
| |
| EXPECT_THAT(Sym1, QName("clang::Foo1")); |
| EXPECT_EQ(Sym1.Signature, ""); |
| EXPECT_EQ(Sym1.Documentation, "Foo doc"); |
| EXPECT_EQ(Sym1.ReturnType, "int"); |
| EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h"); |
| EXPECT_EQ(Sym1.Origin, static_cast<SymbolOrigin>(1 << 7)); |
| EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129); |
| EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion); |
| EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated); |
| EXPECT_THAT(Sym1.IncludeHeaders, |
| UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u), |
| IncludeHeaderWithRef("include2", 3u))); |
| |
| EXPECT_THAT(Sym2, QName("clang::Foo2")); |
| EXPECT_EQ(Sym2.Signature, "-sig"); |
| EXPECT_EQ(Sym2.ReturnType, ""); |
| EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI), |
| "file:///path/bar.h"); |
| EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion); |
| EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated); |
| |
| ASSERT_TRUE(bool(ParsedYAML->Refs)); |
| EXPECT_THAT( |
| *ParsedYAML->Refs, |
| UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")), |
| ::testing::SizeIs(1)))); |
| auto Ref1 = ParsedYAML->Refs->begin()->second.front(); |
| EXPECT_EQ(Ref1.Kind, RefKind::Reference); |
| EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc"); |
| |
| SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756")); |
| SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D")); |
| ASSERT_TRUE(bool(ParsedYAML->Relations)); |
| EXPECT_THAT( |
| *ParsedYAML->Relations, |
| UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived})); |
| |
| ASSERT_TRUE(bool(ParsedYAML->Cmd)); |
| auto &Cmd = *ParsedYAML->Cmd; |
| ASSERT_EQ(Cmd.Directory, "testdir"); |
| EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1", "cmd2")); |
| |
| ASSERT_TRUE(bool(ParsedYAML->Sources)); |
| const auto *URI = "file:///path/source1.cpp"; |
| ASSERT_TRUE(ParsedYAML->Sources->count(URI)); |
| auto IGNDeserialized = ParsedYAML->Sources->lookup(URI); |
| EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C"); |
| EXPECT_THAT(IGNDeserialized.DirectIncludes, |
| ElementsAre("file:///path/inc1.h", "file:///path/inc2.h")); |
| EXPECT_EQ(IGNDeserialized.URI, URI); |
| EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1)); |
| } |
| |
| std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) { |
| std::vector<std::string> Result; |
| for (const auto &Sym : Slab) |
| Result.push_back(toYAML(Sym)); |
| return Result; |
| } |
| std::vector<std::string> YAMLFromRefs(const RefSlab &Slab) { |
| std::vector<std::string> Result; |
| for (const auto &Refs : Slab) |
| Result.push_back(toYAML(Refs)); |
| return Result; |
| } |
| |
| std::vector<std::string> YAMLFromRelations(const RelationSlab &Slab) { |
| std::vector<std::string> Result; |
| for (const auto &Rel : Slab) |
| Result.push_back(toYAML(Rel)); |
| return Result; |
| } |
| |
| TEST(SerializationTest, BinaryConversions) { |
| auto In = readIndexFile(YAML); |
| EXPECT_TRUE(bool(In)) << In.takeError(); |
| |
| // Write to binary format, and parse again. |
| IndexFileOut Out(*In); |
| Out.Format = IndexFileFormat::RIFF; |
| std::string Serialized = llvm::to_string(Out); |
| |
| auto In2 = readIndexFile(Serialized); |
| ASSERT_TRUE(bool(In2)) << In.takeError(); |
| ASSERT_TRUE(In2->Symbols); |
| ASSERT_TRUE(In2->Refs); |
| ASSERT_TRUE(In2->Relations); |
| |
| // Assert the YAML serializations match, for nice comparisons and diffs. |
| EXPECT_THAT(YAMLFromSymbols(*In2->Symbols), |
| UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); |
| EXPECT_THAT(YAMLFromRefs(*In2->Refs), |
| UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); |
| EXPECT_THAT(YAMLFromRelations(*In2->Relations), |
| UnorderedElementsAreArray(YAMLFromRelations(*In->Relations))); |
| } |
| |
| TEST(SerializationTest, SrcsTest) { |
| auto In = readIndexFile(YAML); |
| EXPECT_TRUE(bool(In)) << In.takeError(); |
| |
| std::string TestContent("TestContent"); |
| IncludeGraphNode IGN; |
| IGN.Digest = digest(TestContent); |
| IGN.DirectIncludes = {"inc1", "inc2"}; |
| IGN.URI = "URI"; |
| IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU; |
| IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors; |
| IncludeGraph Sources; |
| Sources[IGN.URI] = IGN; |
| // Write to binary format, and parse again. |
| IndexFileOut Out(*In); |
| Out.Format = IndexFileFormat::RIFF; |
| Out.Sources = &Sources; |
| { |
| std::string Serialized = llvm::to_string(Out); |
| |
| auto In = readIndexFile(Serialized); |
| ASSERT_TRUE(bool(In)) << In.takeError(); |
| ASSERT_TRUE(In->Symbols); |
| ASSERT_TRUE(In->Refs); |
| ASSERT_TRUE(In->Sources); |
| ASSERT_TRUE(In->Sources->count(IGN.URI)); |
| // Assert the YAML serializations match, for nice comparisons and diffs. |
| EXPECT_THAT(YAMLFromSymbols(*In->Symbols), |
| UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); |
| EXPECT_THAT(YAMLFromRefs(*In->Refs), |
| UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); |
| auto IGNDeserialized = In->Sources->lookup(IGN.URI); |
| EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest); |
| EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes); |
| EXPECT_EQ(IGNDeserialized.URI, IGN.URI); |
| EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags); |
| } |
| } |
| |
| TEST(SerializationTest, CmdlTest) { |
| auto In = readIndexFile(YAML); |
| EXPECT_TRUE(bool(In)) << In.takeError(); |
| |
| tooling::CompileCommand Cmd; |
| Cmd.Directory = "testdir"; |
| Cmd.CommandLine.push_back("cmd1"); |
| Cmd.CommandLine.push_back("cmd2"); |
| Cmd.Filename = "ignored"; |
| Cmd.Heuristic = "ignored"; |
| Cmd.Output = "ignored"; |
| |
| IndexFileOut Out(*In); |
| Out.Format = IndexFileFormat::RIFF; |
| Out.Cmd = &Cmd; |
| { |
| std::string Serialized = llvm::to_string(Out); |
| |
| auto In = readIndexFile(Serialized); |
| ASSERT_TRUE(bool(In)) << In.takeError(); |
| ASSERT_TRUE(In->Cmd); |
| |
| const tooling::CompileCommand &SerializedCmd = In->Cmd.getValue(); |
| EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine); |
| EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory); |
| EXPECT_NE(SerializedCmd.Filename, Cmd.Filename); |
| EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic); |
| EXPECT_NE(SerializedCmd.Output, Cmd.Output); |
| } |
| } |
| |
| // rlimit is part of POSIX. |
| // Sanitizers use a lot of address space, so we can't apply strict limits. |
| #if LLVM_ON_UNIX && !LLVM_ADDRESS_SANITIZER_BUILD && \ |
| !LLVM_MEMORY_SANITIZER_BUILD |
| class ScopedMemoryLimit { |
| struct rlimit OriginalLimit; |
| bool Succeeded = false; |
| |
| public: |
| ScopedMemoryLimit(rlim_t Bytes) { |
| if (!getrlimit(RLIMIT_AS, &OriginalLimit)) { |
| struct rlimit NewLimit = OriginalLimit; |
| NewLimit.rlim_cur = Bytes; |
| Succeeded = !setrlimit(RLIMIT_AS, &NewLimit); |
| } |
| if (!Succeeded) |
| log("Failed to set rlimit"); |
| } |
| |
| ~ScopedMemoryLimit() { |
| if (Succeeded) |
| setrlimit(RLIMIT_AS, &OriginalLimit); |
| } |
| }; |
| #else |
| class ScopedMemoryLimit { |
| public: |
| ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); } |
| }; |
| #endif |
| |
| // Test that our deserialization detects invalid array sizes without allocating. |
| // If this detection fails, the test should allocate a huge array and crash. |
| TEST(SerializationTest, NoCrashOnBadArraySize) { |
| // This test is tricky because we need to construct a subtly invalid file. |
| // First, create a valid serialized file. |
| auto In = readIndexFile(YAML); |
| ASSERT_FALSE(!In) << In.takeError(); |
| IndexFileOut Out(*In); |
| Out.Format = IndexFileFormat::RIFF; |
| std::string Serialized = llvm::to_string(Out); |
| |
| // Low-level parse it again and find the `srcs` chunk we're going to corrupt. |
| auto Parsed = riff::readFile(Serialized); |
| ASSERT_FALSE(!Parsed) << Parsed.takeError(); |
| auto Srcs = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) { |
| return C.ID == riff::fourCC("srcs"); |
| }); |
| ASSERT_NE(Srcs, Parsed->Chunks.end()); |
| |
| // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one. |
| // The node has: |
| // - 1 byte: flags (1) |
| // - varint(stringID): URI |
| // - 8 byte: file digest |
| // - varint: DirectIncludes.length |
| // - repeated varint(stringID): DirectIncludes |
| // We want to set DirectIncludes.length to a huge number. |
| // The offset isn't trivial to find, so we use the file digest. |
| std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C"); |
| unsigned Pos = Srcs->Data.find_first_of(FileDigest); |
| ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest"; |
| Pos += FileDigest.size(); |
| |
| // Varints are little-endian base-128 numbers, where the top-bit of each byte |
| // indicates whether there are more. ffffffff0f -> 0xffffffff. |
| std::string CorruptSrcs = |
| (Srcs->Data.take_front(Pos) + llvm::fromHex("ffffffff0f") + |
| "some_random_garbage") |
| .str(); |
| Srcs->Data = CorruptSrcs; |
| |
| // Try to crash rather than hang on large allocation. |
| ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB |
| |
| std::string CorruptFile = llvm::to_string(*Parsed); |
| auto CorruptParsed = readIndexFile(CorruptFile); |
| ASSERT_TRUE(!CorruptParsed); |
| EXPECT_EQ(llvm::toString(CorruptParsed.takeError()), |
| "malformed or truncated include uri"); |
| } |
| |
| // Check we detect invalid string table size size without allocating it first. |
| // If this detection fails, the test should allocate a huge array and crash. |
| TEST(SerializationTest, NoCrashOnBadStringTableSize) { |
| if (!llvm::zlib::isAvailable()) { |
| log("skipping test, no zlib"); |
| return; |
| } |
| |
| // First, create a valid serialized file. |
| auto In = readIndexFile(YAML); |
| ASSERT_FALSE(!In) << In.takeError(); |
| IndexFileOut Out(*In); |
| Out.Format = IndexFileFormat::RIFF; |
| std::string Serialized = llvm::to_string(Out); |
| |
| // Low-level parse it again, we're going to replace the `stri` chunk. |
| auto Parsed = riff::readFile(Serialized); |
| ASSERT_FALSE(!Parsed) << Parsed.takeError(); |
| auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) { |
| return C.ID == riff::fourCC("stri"); |
| }); |
| ASSERT_NE(Stri, Parsed->Chunks.end()); |
| |
| // stri consists of an 8 byte uncompressed-size, and then compressed data. |
| // We'll claim our small amount of data expands to 4GB |
| std::string CorruptStri = |
| (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str(); |
| Stri->Data = CorruptStri; |
| std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C"); |
| |
| // Try to crash rather than hang on large allocation. |
| ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB |
| |
| std::string CorruptFile = llvm::to_string(*Parsed); |
| auto CorruptParsed = readIndexFile(CorruptFile); |
| ASSERT_TRUE(!CorruptParsed); |
| EXPECT_THAT(llvm::toString(CorruptParsed.takeError()), |
| testing::HasSubstr("bytes is implausible")); |
| } |
| |
| } // namespace |
| } // namespace clangd |
| } // namespace clang |