| //===--- DirectiveMap.cpp - Find and strip preprocessor directives --------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "clang-pseudo/DirectiveMap.h" |
| #include "clang/Basic/IdentifierTable.h" |
| #include "clang/Basic/TokenKinds.h" |
| #include "llvm/Support/FormatVariadic.h" |
| |
| namespace clang { |
| namespace pseudo { |
| namespace { |
| |
| class DirectiveParser { |
| public: |
| explicit DirectiveParser(const TokenStream &Code) |
| : Code(Code), Tok(&Code.front()) {} |
| void parse(DirectiveMap *Result) { parse(Result, /*TopLevel=*/true); } |
| |
| private: |
| // Roles that a directive might take within a conditional block. |
| enum class Cond { None, If, Else, End }; |
| static Cond classifyDirective(tok::PPKeywordKind K) { |
| switch (K) { |
| case clang::tok::pp_if: |
| case clang::tok::pp_ifdef: |
| case clang::tok::pp_ifndef: |
| return Cond::If; |
| case clang::tok::pp_elif: |
| case clang::tok::pp_elifdef: |
| case clang::tok::pp_elifndef: |
| case clang::tok::pp_else: |
| return Cond::Else; |
| case clang::tok::pp_endif: |
| return Cond::End; |
| default: |
| return Cond::None; |
| } |
| } |
| |
| // Parses tokens starting at Tok into Map. |
| // If we reach an End or Else directive that ends Map, returns it. |
| // If TopLevel is true, then we do not expect End and always return None. |
| llvm::Optional<DirectiveMap::Directive> parse(DirectiveMap *Map, |
| bool TopLevel) { |
| auto StartsDirective = |
| [&, AllowDirectiveAt((const Token *)nullptr)]() mutable { |
| if (Tok->flag(LexFlags::StartsPPLine)) { |
| // If we considered a comment at the start of a PP-line, it doesn't |
| // start a directive but the directive can still start after it. |
| if (Tok->Kind == tok::comment) |
| AllowDirectiveAt = Tok + 1; |
| return Tok->Kind == tok::hash; |
| } |
| return Tok->Kind == tok::hash && AllowDirectiveAt == Tok; |
| }; |
| // Each iteration adds one chunk (or returns, if we see #endif). |
| while (Tok->Kind != tok::eof) { |
| // If there's no directive here, we have a code chunk. |
| if (!StartsDirective()) { |
| const Token *Start = Tok; |
| do |
| ++Tok; |
| while (Tok->Kind != tok::eof && !StartsDirective()); |
| Map->Chunks.push_back(DirectiveMap::Code{ |
| Token::Range{Code.index(*Start), Code.index(*Tok)}}); |
| continue; |
| } |
| |
| // We have some kind of directive. |
| DirectiveMap::Directive Directive; |
| parseDirective(&Directive); |
| Cond Kind = classifyDirective(Directive.Kind); |
| if (Kind == Cond::If) { |
| // #if or similar, starting a nested conditional block. |
| DirectiveMap::Conditional Conditional; |
| Conditional.Branches.emplace_back(); |
| Conditional.Branches.back().first = std::move(Directive); |
| parseConditional(&Conditional); |
| Map->Chunks.push_back(std::move(Conditional)); |
| } else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) { |
| // #endif or similar, ending this PStructure scope. |
| // (#endif is unexpected at the top level, treat as simple directive). |
| return std::move(Directive); |
| } else { |
| // #define or similar, a simple directive at the current scope. |
| Map->Chunks.push_back(std::move(Directive)); |
| } |
| } |
| return None; |
| } |
| |
| // Parse the rest of a conditional section, after seeing the If directive. |
| // Returns after consuming the End directive. |
| void parseConditional(DirectiveMap::Conditional *C) { |
| assert(C->Branches.size() == 1 && |
| C->Branches.front().second.Chunks.empty() && |
| "Should be ready to parse first branch body"); |
| while (Tok->Kind != tok::eof) { |
| auto Terminator = parse(&C->Branches.back().second, /*TopLevel=*/false); |
| if (!Terminator) { |
| assert(Tok->Kind == tok::eof && "gave up parsing before eof?"); |
| C->End.Tokens = Token::Range::emptyAt(Code.index(*Tok)); |
| return; |
| } |
| if (classifyDirective(Terminator->Kind) == Cond::End) { |
| C->End = std::move(*Terminator); |
| return; |
| } |
| assert(classifyDirective(Terminator->Kind) == Cond::Else && |
| "ended branch unexpectedly"); |
| C->Branches.emplace_back(); |
| C->Branches.back().first = std::move(*Terminator); |
| } |
| } |
| |
| // Parse a directive. Tok is the hash. |
| void parseDirective(DirectiveMap::Directive *D) { |
| assert(Tok->Kind == tok::hash); |
| |
| // Directive spans from the hash until the end of line or file. |
| const Token *Begin = Tok++; |
| while (Tok->Kind != tok::eof && !Tok->flag(LexFlags::StartsPPLine)) |
| ++Tok; |
| ArrayRef<Token> Tokens{Begin, Tok}; |
| D->Tokens = {Code.index(*Tokens.begin()), Code.index(*Tokens.end())}; |
| |
| // Directive name is the first non-comment token after the hash. |
| Tokens = Tokens.drop_front().drop_while( |
| [](const Token &T) { return T.Kind == tok::comment; }); |
| if (!Tokens.empty()) |
| D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID(); |
| } |
| |
| const TokenStream &Code; |
| const Token *Tok; |
| clang::IdentifierTable PPKeywords; |
| }; |
| |
| } // namespace |
| |
| DirectiveMap DirectiveMap::parse(const TokenStream &Code) { |
| DirectiveMap Result; |
| DirectiveParser(Code).parse(&Result); |
| return Result; |
| } |
| |
| static void dump(llvm::raw_ostream &OS, const DirectiveMap &, unsigned Indent); |
| static void dump(llvm::raw_ostream &OS, |
| const DirectiveMap::Directive &Directive, unsigned Indent) { |
| OS.indent(Indent) << llvm::formatv("#{0} ({1} tokens)\n", |
| tok::getPPKeywordSpelling(Directive.Kind), |
| Directive.Tokens.size()); |
| } |
| static void dump(llvm::raw_ostream &OS, const DirectiveMap::Code &Code, |
| unsigned Indent) { |
| OS.indent(Indent) << llvm::formatv("code ({0} tokens)\n", Code.Tokens.size()); |
| } |
| static void dump(llvm::raw_ostream &OS, |
| const DirectiveMap::Conditional &Conditional, |
| unsigned Indent) { |
| for (const auto &Branch : Conditional.Branches) { |
| dump(OS, Branch.first, Indent); |
| dump(OS, Branch.second, Indent + 2); |
| } |
| dump(OS, Conditional.End, Indent); |
| } |
| |
| static void dump(llvm::raw_ostream &OS, const DirectiveMap::Chunk &Chunk, |
| unsigned Indent) { |
| switch (Chunk.kind()) { |
| case DirectiveMap::Chunk::K_Empty: |
| llvm_unreachable("invalid chunk"); |
| case DirectiveMap::Chunk::K_Code: |
| return dump(OS, (const DirectiveMap::Code &)Chunk, Indent); |
| case DirectiveMap::Chunk::K_Directive: |
| return dump(OS, (const DirectiveMap::Directive &)Chunk, Indent); |
| case DirectiveMap::Chunk::K_Conditional: |
| return dump(OS, (const DirectiveMap::Conditional &)Chunk, Indent); |
| } |
| } |
| |
| static void dump(llvm::raw_ostream &OS, const DirectiveMap &Map, |
| unsigned Indent) { |
| for (const auto &Chunk : Map.Chunks) |
| dump(OS, Chunk, Indent); |
| } |
| |
| // Define operator<< in terms of dump() functions above. |
| #define OSTREAM_DUMP(Type) \ |
| llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) { \ |
| dump(OS, T, 0); \ |
| return OS; \ |
| } |
| OSTREAM_DUMP(DirectiveMap) |
| OSTREAM_DUMP(DirectiveMap::Chunk) |
| OSTREAM_DUMP(DirectiveMap::Directive) |
| OSTREAM_DUMP(DirectiveMap::Conditional) |
| OSTREAM_DUMP(DirectiveMap::Code) |
| #undef OSTREAM_DUMP |
| |
| } // namespace pseudo |
| } // namespace clang |