| //===- TokensTest.cpp -----------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "clang/Tooling/Syntax/Tokens.h" |
| #include "clang/AST/ASTConsumer.h" |
| #include "clang/AST/Expr.h" |
| #include "clang/Basic/Diagnostic.h" |
| #include "clang/Basic/DiagnosticIDs.h" |
| #include "clang/Basic/DiagnosticOptions.h" |
| #include "clang/Basic/FileManager.h" |
| #include "clang/Basic/FileSystemOptions.h" |
| #include "clang/Basic/LLVM.h" |
| #include "clang/Basic/LangOptions.h" |
| #include "clang/Basic/SourceLocation.h" |
| #include "clang/Basic/SourceManager.h" |
| #include "clang/Basic/TokenKinds.def" |
| #include "clang/Basic/TokenKinds.h" |
| #include "clang/Frontend/CompilerInstance.h" |
| #include "clang/Frontend/FrontendAction.h" |
| #include "clang/Frontend/Utils.h" |
| #include "clang/Lex/Lexer.h" |
| #include "clang/Lex/PreprocessorOptions.h" |
| #include "clang/Lex/Token.h" |
| #include "clang/Tooling/Tooling.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/IntrusiveRefCntPtr.h" |
| #include "llvm/ADT/None.h" |
| #include "llvm/ADT/Optional.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Support/FormatVariadic.h" |
| #include "llvm/Support/MemoryBuffer.h" |
| #include "llvm/Support/VirtualFileSystem.h" |
| #include "llvm/Support/raw_os_ostream.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Testing/Support/Annotations.h" |
| #include "llvm/Testing/Support/SupportHelpers.h" |
| #include <cassert> |
| #include <cstdlib> |
| #include <gmock/gmock.h> |
| #include <gtest/gtest.h> |
| #include <memory> |
| #include <ostream> |
| #include <string> |
| |
| using namespace clang; |
| using namespace clang::syntax; |
| |
| using llvm::ValueIs; |
| using ::testing::AllOf; |
| using ::testing::Contains; |
| using ::testing::ElementsAre; |
| using ::testing::Field; |
| using ::testing::Matcher; |
| using ::testing::Not; |
| using ::testing::StartsWith; |
| |
| namespace { |
| // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the |
| // argument. |
| MATCHER_P(SameRange, A, "") { |
| return A.begin() == arg.begin() && A.end() == arg.end(); |
| } |
| |
| Matcher<TokenBuffer::Expansion> |
| IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled, |
| Matcher<llvm::ArrayRef<syntax::Token>> Expanded) { |
| return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled), |
| Field(&TokenBuffer::Expansion::Expanded, Expanded)); |
| } |
| // Matchers for syntax::Token. |
| MATCHER_P(Kind, K, "") { return arg.kind() == K; } |
| MATCHER_P2(HasText, Text, SourceMgr, "") { |
| return arg.text(*SourceMgr) == Text; |
| } |
| /// Checks the start and end location of a token are equal to SourceRng. |
| MATCHER_P(RangeIs, SourceRng, "") { |
| return arg.location() == SourceRng.first && |
| arg.endLocation() == SourceRng.second; |
| } |
| |
| class TokenCollectorTest : public ::testing::Test { |
| public: |
| /// Run the clang frontend, collect the preprocessed tokens from the frontend |
| /// invocation and store them in this->Buffer. |
| /// This also clears SourceManager before running the compiler. |
| void recordTokens(llvm::StringRef Code) { |
| class RecordTokens : public ASTFrontendAction { |
| public: |
| explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} |
| |
| bool BeginSourceFileAction(CompilerInstance &CI) override { |
| assert(!Collector && "expected only a single call to BeginSourceFile"); |
| Collector.emplace(CI.getPreprocessor()); |
| return true; |
| } |
| void EndSourceFileAction() override { |
| assert(Collector && "BeginSourceFileAction was never called"); |
| Result = std::move(*Collector).consume(); |
| } |
| |
| std::unique_ptr<ASTConsumer> |
| CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { |
| return std::make_unique<ASTConsumer>(); |
| } |
| |
| private: |
| TokenBuffer &Result; |
| llvm::Optional<TokenCollector> Collector; |
| }; |
| |
| constexpr const char *FileName = "./input.cpp"; |
| FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); |
| // Prepare to run a compiler. |
| if (!Diags->getClient()) |
| Diags->setClient(new IgnoringDiagConsumer); |
| std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", |
| FileName}; |
| auto CI = createInvocationFromCommandLine(Args, Diags, FS); |
| assert(CI); |
| CI->getFrontendOpts().DisableFree = false; |
| CI->getPreprocessorOpts().addRemappedFile( |
| FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); |
| CompilerInstance Compiler; |
| Compiler.setInvocation(std::move(CI)); |
| Compiler.setDiagnostics(Diags.get()); |
| Compiler.setFileManager(FileMgr.get()); |
| Compiler.setSourceManager(SourceMgr.get()); |
| |
| this->Buffer = TokenBuffer(*SourceMgr); |
| RecordTokens Recorder(this->Buffer); |
| ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) |
| << "failed to run the frontend"; |
| } |
| |
| /// Record the tokens and return a test dump of the resulting buffer. |
| std::string collectAndDump(llvm::StringRef Code) { |
| recordTokens(Code); |
| return Buffer.dumpForTests(); |
| } |
| |
| // Adds a file to the test VFS. |
| void addFile(llvm::StringRef Path, llvm::StringRef Contents) { |
| if (!FS->addFile(Path, time_t(), |
| llvm::MemoryBuffer::getMemBufferCopy(Contents))) { |
| ADD_FAILURE() << "could not add a file to VFS: " << Path; |
| } |
| } |
| |
| /// Add a new file, run syntax::tokenize() on it and return the results. |
| std::vector<syntax::Token> tokenize(llvm::StringRef Text) { |
| // FIXME: pass proper LangOptions. |
| return syntax::tokenize( |
| SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)), |
| *SourceMgr, LangOptions()); |
| } |
| |
| // Specialized versions of matchers that hide the SourceManager from clients. |
| Matcher<syntax::Token> HasText(std::string Text) const { |
| return ::HasText(Text, SourceMgr.get()); |
| } |
| Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { |
| std::pair<SourceLocation, SourceLocation> Ls; |
| Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) |
| .getLocWithOffset(R.Begin); |
| Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) |
| .getLocWithOffset(R.End); |
| return ::RangeIs(Ls); |
| } |
| |
| /// Finds a subrange in O(n * m). |
| template <class T, class U, class Eq> |
| llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, |
| llvm::ArrayRef<T> Range, Eq F) { |
| for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) { |
| auto It = Begin; |
| for (auto ItSub = Subrange.begin(); |
| ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) { |
| if (!F(*ItSub, *It)) |
| goto continue_outer; |
| } |
| return llvm::makeArrayRef(Begin, It); |
| continue_outer:; |
| } |
| return llvm::makeArrayRef(Range.end(), Range.end()); |
| } |
| |
| /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. |
| /// The match should be unique. \p Query is a whitespace-separated list of |
| /// tokens to search for. |
| llvm::ArrayRef<syntax::Token> |
| findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { |
| llvm::SmallVector<llvm::StringRef, 8> QueryTokens; |
| Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); |
| if (QueryTokens.empty()) { |
| ADD_FAILURE() << "will not look for an empty list of tokens"; |
| std::abort(); |
| } |
| // An equality test for search. |
| auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { |
| return Q == T.text(*SourceMgr); |
| }; |
| // Find a match. |
| auto Found = |
| findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches); |
| if (Found.begin() == Tokens.end()) { |
| ADD_FAILURE() << "could not find the subrange for " << Query; |
| std::abort(); |
| } |
| // Check that the match is unique. |
| if (findSubrange(llvm::makeArrayRef(QueryTokens), |
| llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches) |
| .begin() != Tokens.end()) { |
| ADD_FAILURE() << "match is not unique for " << Query; |
| std::abort(); |
| } |
| return Found; |
| }; |
| |
| // Specialized versions of findTokenRange for expanded and spelled tokens. |
| llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { |
| return findTokenRange(Query, Buffer.expandedTokens()); |
| } |
| llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, |
| FileID File = FileID()) { |
| if (!File.isValid()) |
| File = SourceMgr->getMainFileID(); |
| return findTokenRange(Query, Buffer.spelledTokens(File)); |
| } |
| |
| // Data fields. |
| llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = |
| new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); |
| IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = |
| new llvm::vfs::InMemoryFileSystem; |
| llvm::IntrusiveRefCntPtr<FileManager> FileMgr = |
| new FileManager(FileSystemOptions(), FS); |
| llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = |
| new SourceManager(*Diags, *FileMgr); |
| /// Contains last result of calling recordTokens(). |
| TokenBuffer Buffer = TokenBuffer(*SourceMgr); |
| }; |
| |
| TEST_F(TokenCollectorTest, RawMode) { |
| EXPECT_THAT(tokenize("int main() {}"), |
| ElementsAre(Kind(tok::kw_int), |
| AllOf(HasText("main"), Kind(tok::identifier)), |
| Kind(tok::l_paren), Kind(tok::r_paren), |
| Kind(tok::l_brace), Kind(tok::r_brace))); |
| // Comments are ignored for now. |
| EXPECT_THAT(tokenize("/* foo */int a; // more comments"), |
| ElementsAre(Kind(tok::kw_int), |
| AllOf(HasText("a"), Kind(tok::identifier)), |
| Kind(tok::semi))); |
| } |
| |
| TEST_F(TokenCollectorTest, Basic) { |
| std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { |
| {"int main() {}", |
| R"(expanded tokens: |
| int main ( ) { } |
| file './input.cpp' |
| spelled tokens: |
| int main ( ) { } |
| no mappings. |
| )"}, |
| // All kinds of whitespace are ignored. |
| {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n", |
| R"(expanded tokens: |
| int main ( ) { } |
| file './input.cpp' |
| spelled tokens: |
| int main ( ) { } |
| no mappings. |
| )"}, |
| // Annotation tokens are ignored. |
| {R"cpp( |
| #pragma GCC visibility push (public) |
| #pragma GCC visibility pop |
| )cpp", |
| R"(expanded tokens: |
| <empty> |
| file './input.cpp' |
| spelled tokens: |
| # pragma GCC visibility push ( public ) # pragma GCC visibility pop |
| mappings: |
| ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) |
| )"}, |
| // Empty files should not crash. |
| {R"cpp()cpp", R"(expanded tokens: |
| <empty> |
| file './input.cpp' |
| spelled tokens: |
| <empty> |
| no mappings. |
| )"}, |
| // Should not crash on errors inside '#define' directives. Error is that |
| // stringification (#B) does not refer to a macro parameter. |
| { |
| R"cpp( |
| a |
| #define MACRO() A #B |
| )cpp", |
| R"(expanded tokens: |
| a |
| file './input.cpp' |
| spelled tokens: |
| a # define MACRO ( ) A # B |
| mappings: |
| ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1) |
| )"}}; |
| for (auto &Test : TestCases) |
| EXPECT_EQ(collectAndDump(Test.first), Test.second) |
| << collectAndDump(Test.first); |
| } |
| |
| TEST_F(TokenCollectorTest, Locations) { |
| // Check locations of the tokens. |
| llvm::Annotations Code(R"cpp( |
| $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] |
| )cpp"); |
| recordTokens(Code.code()); |
| // Check expanded tokens. |
| EXPECT_THAT( |
| Buffer.expandedTokens(), |
| ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), |
| AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), |
| AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), |
| AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), |
| AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))), |
| Kind(tok::eof))); |
| // Check spelled tokens. |
| EXPECT_THAT( |
| Buffer.spelledTokens(SourceMgr->getMainFileID()), |
| ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), |
| AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), |
| AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), |
| AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), |
| AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); |
| } |
| |
| TEST_F(TokenCollectorTest, MacroDirectives) { |
| // Macro directives are not stored anywhere at the moment. |
| std::string Code = R"cpp( |
| #define FOO a |
| #include "unresolved_file.h" |
| #undef FOO |
| #ifdef X |
| #else |
| #endif |
| #ifndef Y |
| #endif |
| #if 1 |
| #elif 2 |
| #else |
| #endif |
| #pragma once |
| #pragma something lalala |
| |
| int a; |
| )cpp"; |
| std::string Expected = |
| "expanded tokens:\n" |
| " int a ;\n" |
| "file './input.cpp'\n" |
| " spelled tokens:\n" |
| " # define FOO a # include \"unresolved_file.h\" # undef FOO " |
| "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " |
| "# endif # pragma once # pragma something lalala int a ;\n" |
| " mappings:\n" |
| " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n"; |
| EXPECT_EQ(collectAndDump(Code), Expected); |
| } |
| |
| TEST_F(TokenCollectorTest, MacroReplacements) { |
| std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { |
| // A simple object-like macro. |
| {R"cpp( |
| #define INT int const |
| INT a; |
| )cpp", |
| R"(expanded tokens: |
| int const a ; |
| file './input.cpp' |
| spelled tokens: |
| # define INT int const INT a ; |
| mappings: |
| ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) |
| ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) |
| )"}, |
| // A simple function-like macro. |
| {R"cpp( |
| #define INT(a) const int |
| INT(10+10) a; |
| )cpp", |
| R"(expanded tokens: |
| const int a ; |
| file './input.cpp' |
| spelled tokens: |
| # define INT ( a ) const int INT ( 10 + 10 ) a ; |
| mappings: |
| ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) |
| ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) |
| )"}, |
| // Recursive macro replacements. |
| {R"cpp( |
| #define ID(X) X |
| #define INT int const |
| ID(ID(INT)) a; |
| )cpp", |
| R"(expanded tokens: |
| int const a ; |
| file './input.cpp' |
| spelled tokens: |
| # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; |
| mappings: |
| ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) |
| ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) |
| )"}, |
| // A little more complicated recursive macro replacements. |
| {R"cpp( |
| #define ADD(X, Y) X+Y |
| #define MULT(X, Y) X*Y |
| |
| int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); |
| )cpp", |
| "expanded tokens:\n" |
| " int a = 1 * 2 + 3 * 4 + 5 ;\n" |
| "file './input.cpp'\n" |
| " spelled tokens:\n" |
| " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " |
| "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" |
| " mappings:\n" |
| " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" |
| " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"}, |
| // Empty macro replacement. |
| // FIXME: the #define directives should not be glued together. |
| {R"cpp( |
| #define EMPTY |
| #define EMPTY_FUNC(X) |
| EMPTY |
| EMPTY_FUNC(1+2+3) |
| )cpp", |
| R"(expanded tokens: |
| <empty> |
| file './input.cpp' |
| spelled tokens: |
| # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) |
| mappings: |
| ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0) |
| ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0) |
| ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) |
| )"}, |
| // File ends with a macro replacement. |
| {R"cpp( |
| #define FOO 10+10; |
| int a = FOO |
| )cpp", |
| R"(expanded tokens: |
| int a = 10 + 10 ; |
| file './input.cpp' |
| spelled tokens: |
| # define FOO 10 + 10 ; int a = FOO |
| mappings: |
| ['#'_0, 'int'_7) => ['int'_0, 'int'_0) |
| ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) |
| )"}}; |
| |
| for (auto &Test : TestCases) |
| EXPECT_EQ(Test.second, collectAndDump(Test.first)) |
| << collectAndDump(Test.first); |
| } |
| |
| TEST_F(TokenCollectorTest, SpecialTokens) { |
| // Tokens coming from concatenations. |
| recordTokens(R"cpp( |
| #define CONCAT(a, b) a ## b |
| int a = CONCAT(1, 2); |
| )cpp"); |
| EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), |
| Contains(HasText("12"))); |
| // Multi-line tokens with slashes at the end. |
| recordTokens("i\\\nn\\\nt"); |
| EXPECT_THAT(Buffer.expandedTokens(), |
| ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")), |
| Kind(tok::eof))); |
| // FIXME: test tokens with digraphs and UCN identifiers. |
| } |
| |
| TEST_F(TokenCollectorTest, LateBoundTokens) { |
| // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), |
| // but we choose to record them as a single token (for now). |
| llvm::Annotations Code(R"cpp( |
| template <class T> |
| struct foo { int a; }; |
| int bar = foo<foo<int$br[[>>]]().a; |
| int baz = 10 $op[[>>]] 2; |
| )cpp"); |
| recordTokens(Code.code()); |
| EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), |
| AllOf(Contains(AllOf(Kind(tok::greatergreater), |
| RangeIs(Code.range("br")))), |
| Contains(AllOf(Kind(tok::greatergreater), |
| RangeIs(Code.range("op")))))); |
| } |
| |
| TEST_F(TokenCollectorTest, DelayedParsing) { |
| llvm::StringLiteral Code = R"cpp( |
| struct Foo { |
| int method() { |
| // Parser will visit method bodies and initializers multiple times, but |
| // TokenBuffer should only record the first walk over the tokens; |
| return 100; |
| } |
| int a = 10; |
| |
| struct Subclass { |
| void foo() { |
| Foo().method(); |
| } |
| }; |
| }; |
| )cpp"; |
| std::string ExpectedTokens = |
| "expanded tokens:\n" |
| " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " |
| "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n"; |
| EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); |
| } |
| |
| TEST_F(TokenCollectorTest, MultiFile) { |
| addFile("./foo.h", R"cpp( |
| #define ADD(X, Y) X+Y |
| int a = 100; |
| #include "bar.h" |
| )cpp"); |
| addFile("./bar.h", R"cpp( |
| int b = ADD(1, 2); |
| #define MULT(X, Y) X*Y |
| )cpp"); |
| llvm::StringLiteral Code = R"cpp( |
| #include "foo.h" |
| int c = ADD(1, MULT(2,3)); |
| )cpp"; |
| |
| std::string Expected = R"(expanded tokens: |
| int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; |
| file './input.cpp' |
| spelled tokens: |
| # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; |
| mappings: |
| ['#'_0, 'int'_3) => ['int'_12, 'int'_12) |
| ['ADD'_6, ';'_17) => ['1'_15, ';'_20) |
| file './foo.h' |
| spelled tokens: |
| # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" |
| mappings: |
| ['#'_0, 'int'_11) => ['int'_0, 'int'_0) |
| ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) |
| file './bar.h' |
| spelled tokens: |
| int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y |
| mappings: |
| ['ADD'_3, ';'_9) => ['1'_8, ';'_11) |
| ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) |
| )"; |
| |
| EXPECT_EQ(Expected, collectAndDump(Code)) |
| << "input: " << Code << "\nresults: " << collectAndDump(Code); |
| } |
| |
| class TokenBufferTest : public TokenCollectorTest {}; |
| |
| TEST_F(TokenBufferTest, SpelledByExpanded) { |
| recordTokens(R"cpp( |
| a1 a2 a3 b1 b2 |
| )cpp"); |
| |
| // Sanity check: expanded and spelled tokens are stored separately. |
| EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); |
| // Searching for subranges of expanded tokens should give the corresponding |
| // spelled ones. |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")), |
| ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2")))); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), |
| ValueIs(SameRange(findSpelled("a1 a2 a3")))); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), |
| ValueIs(SameRange(findSpelled("b1 b2")))); |
| |
| // Test search on simple macro expansions. |
| recordTokens(R"cpp( |
| #define A a1 a2 a3 |
| #define B b1 b2 |
| |
| A split B |
| )cpp"); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), |
| ValueIs(SameRange(findSpelled("A split B")))); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), |
| ValueIs(SameRange(findSpelled("A split").drop_back()))); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), |
| ValueIs(SameRange(findSpelled("split B").drop_front()))); |
| // Ranges not fully covering macro invocations should fail. |
| EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); |
| EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); |
| EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), |
| llvm::None); |
| |
| // Recursive macro invocations. |
| recordTokens(R"cpp( |
| #define ID(x) x |
| #define B b1 b2 |
| |
| ID(ID(ID(a1) a2 a3)) split ID(B) |
| )cpp"); |
| |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), |
| ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")))); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), |
| ValueIs(SameRange(findSpelled("ID ( B )")))); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), |
| ValueIs(SameRange(findSpelled( |
| "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); |
| // Ranges crossing macro call boundaries. |
| EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")), |
| llvm::None); |
| EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")), |
| llvm::None); |
| // FIXME: next two examples should map to macro arguments, but currently they |
| // fail. |
| EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None); |
| EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); |
| |
| // Empty macro expansions. |
| recordTokens(R"cpp( |
| #define EMPTY |
| #define ID(X) X |
| |
| EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 |
| EMPTY EMPTY ID(4 5 6) split2 |
| ID(7 8 9) EMPTY EMPTY |
| )cpp"); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), |
| ValueIs(SameRange(findSpelled("ID ( 1 2 3 )")))); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), |
| ValueIs(SameRange(findSpelled("ID ( 4 5 6 )")))); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), |
| ValueIs(SameRange(findSpelled("ID ( 7 8 9 )")))); |
| |
| // Empty mappings coming from various directives. |
| recordTokens(R"cpp( |
| #define ID(X) X |
| ID(1) |
| #pragma lalala |
| not_mapped |
| )cpp"); |
| EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), |
| ValueIs(SameRange(findSpelled("not_mapped")))); |
| } |
| |
| TEST_F(TokenBufferTest, ExpansionStartingAt) { |
| // Object-like macro expansions. |
| recordTokens(R"cpp( |
| #define FOO 3+4 |
| int a = FOO 1; |
| int b = FOO 2; |
| )cpp"); |
| |
| llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back(); |
| EXPECT_THAT( |
| Buffer.expansionStartingAt(Foo1.data()), |
| ValueIs(IsExpansion(SameRange(Foo1), |
| SameRange(findExpanded("3 + 4 1").drop_back())))); |
| |
| llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back(); |
| EXPECT_THAT( |
| Buffer.expansionStartingAt(Foo2.data()), |
| ValueIs(IsExpansion(SameRange(Foo2), |
| SameRange(findExpanded("3 + 4 2").drop_back())))); |
| |
| // Function-like macro expansions. |
| recordTokens(R"cpp( |
| #define ID(X) X |
| int a = ID(1+2+3); |
| int b = ID(ID(2+3+4)); |
| )cpp"); |
| |
| llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )"); |
| EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()), |
| ValueIs(IsExpansion(SameRange(ID1), |
| SameRange(findExpanded("1 + 2 + 3"))))); |
| // Only the first spelled token should be found. |
| for (const auto &T : ID1.drop_front()) |
| EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); |
| |
| llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )"); |
| EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), |
| ValueIs(IsExpansion(SameRange(ID2), |
| SameRange(findExpanded("2 + 3 + 4"))))); |
| // Only the first spelled token should be found. |
| for (const auto &T : ID2.drop_front()) |
| EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); |
| |
| // PP directives. |
| recordTokens(R"cpp( |
| #define FOO 1 |
| int a = FOO; |
| #pragma once |
| int b = 1; |
| )cpp"); |
| |
| llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1"); |
| EXPECT_THAT( |
| Buffer.expansionStartingAt(&DefineFoo.front()), |
| ValueIs(IsExpansion(SameRange(DefineFoo), |
| SameRange(findExpanded("int a").take_front(0))))); |
| // Only the first spelled token should be found. |
| for (const auto &T : DefineFoo.drop_front()) |
| EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); |
| |
| llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once"); |
| EXPECT_THAT( |
| Buffer.expansionStartingAt(&PragmaOnce.front()), |
| ValueIs(IsExpansion(SameRange(PragmaOnce), |
| SameRange(findExpanded("int b").take_front(0))))); |
| // Only the first spelled token should be found. |
| for (const auto &T : PragmaOnce.drop_front()) |
| EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); |
| } |
| |
| TEST_F(TokenBufferTest, TokensToFileRange) { |
| addFile("./foo.h", "token_from_header"); |
| llvm::Annotations Code(R"cpp( |
| #define FOO token_from_expansion |
| #include "./foo.h" |
| $all[[$i[[int]] a = FOO;]] |
| )cpp"); |
| recordTokens(Code.code()); |
| |
| auto &SM = *SourceMgr; |
| |
| // Two simple examples. |
| auto Int = findExpanded("int").front(); |
| auto Semi = findExpanded(";").front(); |
| EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin, |
| Code.range("i").End)); |
| EXPECT_EQ(syntax::Token::range(SM, Int, Semi), |
| FileRange(SM.getMainFileID(), Code.range("all").Begin, |
| Code.range("all").End)); |
| // We don't test assertion failures because death tests are slow. |
| } |
| |
| TEST_F(TokenBufferTest, macroExpansions) { |
| llvm::Annotations Code(R"cpp( |
| #define FOO B |
| #define FOO2 BA |
| #define CALL(X) int X |
| #define G CALL(FOO2) |
| int B; |
| $macro[[FOO]]; |
| $macro[[CALL]](A); |
| $macro[[G]]; |
| )cpp"); |
| recordTokens(Code.code()); |
| auto &SM = *SourceMgr; |
| auto Expansions = Buffer.macroExpansions(SM.getMainFileID()); |
| std::vector<FileRange> ExpectedMacroRanges; |
| for (auto Range : Code.ranges("macro")) |
| ExpectedMacroRanges.push_back( |
| FileRange(SM.getMainFileID(), Range.Begin, Range.End)); |
| std::vector<FileRange> ActualMacroRanges; |
| for (auto Expansion : Expansions) |
| ActualMacroRanges.push_back(Expansion->range(SM)); |
| EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges); |
| } |
| } // namespace |