blob: 6e66708818954de5be2093567fa408446470011a [file] [log] [blame]
//===--- DirectiveMap.cpp - Find and strip preprocessor directives --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang-pseudo/DirectiveMap.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/Support/FormatVariadic.h"
namespace clang {
namespace pseudo {
namespace {
class DirectiveParser {
public:
explicit DirectiveParser(const TokenStream &Code)
: Code(Code), Tok(&Code.front()) {}
void parse(DirectiveMap *Result) { parse(Result, /*TopLevel=*/true); }
private:
// Roles that a directive might take within a conditional block.
enum class Cond { None, If, Else, End };
static Cond classifyDirective(tok::PPKeywordKind K) {
switch (K) {
case clang::tok::pp_if:
case clang::tok::pp_ifdef:
case clang::tok::pp_ifndef:
return Cond::If;
case clang::tok::pp_elif:
case clang::tok::pp_elifdef:
case clang::tok::pp_elifndef:
case clang::tok::pp_else:
return Cond::Else;
case clang::tok::pp_endif:
return Cond::End;
default:
return Cond::None;
}
}
// Parses tokens starting at Tok into Map.
// If we reach an End or Else directive that ends Map, returns it.
// If TopLevel is true, then we do not expect End and always return None.
llvm::Optional<DirectiveMap::Directive> parse(DirectiveMap *Map,
bool TopLevel) {
auto StartsDirective =
[&, AllowDirectiveAt((const Token *)nullptr)]() mutable {
if (Tok->flag(LexFlags::StartsPPLine)) {
// If we considered a comment at the start of a PP-line, it doesn't
// start a directive but the directive can still start after it.
if (Tok->Kind == tok::comment)
AllowDirectiveAt = Tok + 1;
return Tok->Kind == tok::hash;
}
return Tok->Kind == tok::hash && AllowDirectiveAt == Tok;
};
// Each iteration adds one chunk (or returns, if we see #endif).
while (Tok->Kind != tok::eof) {
// If there's no directive here, we have a code chunk.
if (!StartsDirective()) {
const Token *Start = Tok;
do
++Tok;
while (Tok->Kind != tok::eof && !StartsDirective());
Map->Chunks.push_back(DirectiveMap::Code{
Token::Range{Code.index(*Start), Code.index(*Tok)}});
continue;
}
// We have some kind of directive.
DirectiveMap::Directive Directive;
parseDirective(&Directive);
Cond Kind = classifyDirective(Directive.Kind);
if (Kind == Cond::If) {
// #if or similar, starting a nested conditional block.
DirectiveMap::Conditional Conditional;
Conditional.Branches.emplace_back();
Conditional.Branches.back().first = std::move(Directive);
parseConditional(&Conditional);
Map->Chunks.push_back(std::move(Conditional));
} else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) {
// #endif or similar, ending this PStructure scope.
// (#endif is unexpected at the top level, treat as simple directive).
return std::move(Directive);
} else {
// #define or similar, a simple directive at the current scope.
Map->Chunks.push_back(std::move(Directive));
}
}
return None;
}
// Parse the rest of a conditional section, after seeing the If directive.
// Returns after consuming the End directive.
void parseConditional(DirectiveMap::Conditional *C) {
assert(C->Branches.size() == 1 &&
C->Branches.front().second.Chunks.empty() &&
"Should be ready to parse first branch body");
while (Tok->Kind != tok::eof) {
auto Terminator = parse(&C->Branches.back().second, /*TopLevel=*/false);
if (!Terminator) {
assert(Tok->Kind == tok::eof && "gave up parsing before eof?");
C->End.Tokens = Token::Range::emptyAt(Code.index(*Tok));
return;
}
if (classifyDirective(Terminator->Kind) == Cond::End) {
C->End = std::move(*Terminator);
return;
}
assert(classifyDirective(Terminator->Kind) == Cond::Else &&
"ended branch unexpectedly");
C->Branches.emplace_back();
C->Branches.back().first = std::move(*Terminator);
}
}
// Parse a directive. Tok is the hash.
void parseDirective(DirectiveMap::Directive *D) {
assert(Tok->Kind == tok::hash);
// Directive spans from the hash until the end of line or file.
const Token *Begin = Tok++;
while (Tok->Kind != tok::eof && !Tok->flag(LexFlags::StartsPPLine))
++Tok;
ArrayRef<Token> Tokens{Begin, Tok};
D->Tokens = {Code.index(*Tokens.begin()), Code.index(*Tokens.end())};
// Directive name is the first non-comment token after the hash.
Tokens = Tokens.drop_front().drop_while(
[](const Token &T) { return T.Kind == tok::comment; });
if (!Tokens.empty())
D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID();
}
const TokenStream &Code;
const Token *Tok;
clang::IdentifierTable PPKeywords;
};
} // namespace
DirectiveMap DirectiveMap::parse(const TokenStream &Code) {
DirectiveMap Result;
DirectiveParser(Code).parse(&Result);
return Result;
}
static void dump(llvm::raw_ostream &OS, const DirectiveMap &, unsigned Indent);
static void dump(llvm::raw_ostream &OS,
const DirectiveMap::Directive &Directive, unsigned Indent) {
OS.indent(Indent) << llvm::formatv("#{0} ({1} tokens)\n",
tok::getPPKeywordSpelling(Directive.Kind),
Directive.Tokens.size());
}
static void dump(llvm::raw_ostream &OS, const DirectiveMap::Code &Code,
unsigned Indent) {
OS.indent(Indent) << llvm::formatv("code ({0} tokens)\n", Code.Tokens.size());
}
static void dump(llvm::raw_ostream &OS,
const DirectiveMap::Conditional &Conditional,
unsigned Indent) {
for (const auto &Branch : Conditional.Branches) {
dump(OS, Branch.first, Indent);
dump(OS, Branch.second, Indent + 2);
}
dump(OS, Conditional.End, Indent);
}
static void dump(llvm::raw_ostream &OS, const DirectiveMap::Chunk &Chunk,
unsigned Indent) {
switch (Chunk.kind()) {
case DirectiveMap::Chunk::K_Empty:
llvm_unreachable("invalid chunk");
case DirectiveMap::Chunk::K_Code:
return dump(OS, (const DirectiveMap::Code &)Chunk, Indent);
case DirectiveMap::Chunk::K_Directive:
return dump(OS, (const DirectiveMap::Directive &)Chunk, Indent);
case DirectiveMap::Chunk::K_Conditional:
return dump(OS, (const DirectiveMap::Conditional &)Chunk, Indent);
}
}
static void dump(llvm::raw_ostream &OS, const DirectiveMap &Map,
unsigned Indent) {
for (const auto &Chunk : Map.Chunks)
dump(OS, Chunk, Indent);
}
// Define operator<< in terms of dump() functions above.
#define OSTREAM_DUMP(Type) \
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) { \
dump(OS, T, 0); \
return OS; \
}
OSTREAM_DUMP(DirectiveMap)
OSTREAM_DUMP(DirectiveMap::Chunk)
OSTREAM_DUMP(DirectiveMap::Directive)
OSTREAM_DUMP(DirectiveMap::Conditional)
OSTREAM_DUMP(DirectiveMap::Code)
#undef OSTREAM_DUMP
} // namespace pseudo
} // namespace clang