blob: 25cb26563837a6cba89f0400a2b86a6bad517fd8 [file] [log] [blame]
//===--- Main.cpp - Compile BNF grammar -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This is a tool to compile a BNF grammar, it is used by the build system to
// generate a necessary data bits to statically construct core pieces (Grammar,
// LRTable etc) of the LR parser.
//
//===----------------------------------------------------------------------===//
#include "clang-pseudo/grammar/Grammar.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ToolOutputFile.h"
#include <algorithm>
using llvm::cl::desc;
using llvm::cl::init;
using llvm::cl::opt;
using llvm::cl::Required;
using llvm::cl::value_desc;
using llvm::cl::values;
namespace {
enum EmitType {
EmitSymbolList,
EmitGrammarContent,
};
opt<std::string> Grammar("grammar", desc("Parse a BNF grammar file."),
Required);
opt<EmitType>
Emit(desc("which information to emit:"),
values(clEnumValN(EmitSymbolList, "emit-symbol-list",
"Print nonterminal symbols (default)"),
clEnumValN(EmitGrammarContent, "emit-grammar-content",
"Print the BNF grammar content as a string")));
opt<std::string> OutputFilename("o", init("-"), desc("Output"),
value_desc("file"));
std::string readOrDie(llvm::StringRef Path) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
llvm::MemoryBuffer::getFile(Path);
if (std::error_code EC = Text.getError()) {
llvm::errs() << "Error: can't read grammar file '" << Path
<< "': " << EC.message() << "\n";
::exit(1);
}
return Text.get()->getBuffer().str();
}
} // namespace
namespace clang {
namespace pseudo {
namespace {
// Mangles a symbol name into a valid identifier.
//
// These follow names in the grammar fairly closely:
// nonterminal: `ptr-declarator` becomes `ptr_declarator`;
// punctuator: `,` becomes `COMMA`;
// keyword: `INT` becomes `INT`;
// terminal: `IDENTIFIER` becomes `IDENTIFIER`;
std::string mangleSymbol(SymbolID SID, const Grammar &G) {
static auto &TokNames = *new std::vector<std::string>{
#define TOK(X) llvm::StringRef(#X).upper(),
#define KEYWORD(Keyword, Condition) llvm::StringRef(#Keyword).upper(),
#include "clang/Basic/TokenKinds.def"
};
if (isToken(SID))
return TokNames[symbolToToken(SID)];
std::string Name = G.symbolName(SID).str();
// translation-unit -> translation_unit
std::replace(Name.begin(), Name.end(), '-', '_');
return Name;
}
// Mangles the RHS of a rule definition into a valid identifier.
//
// These are unique only for a fixed LHS.
// e.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`,
// it is `ptr_operator__ptr_declarator`.
std::string mangleRule(RuleID RID, const Grammar &G) {
const auto &R = G.lookupRule(RID);
std::string MangleName = mangleSymbol(R.seq().front(), G);
for (SymbolID S : R.seq().drop_front()) {
MangleName.append("__");
MangleName.append(mangleSymbol(S, G));
}
return MangleName;
}
} // namespace
} // namespace pseudo
} // namespace clang
int main(int argc, char *argv[]) {
llvm::cl::ParseCommandLineOptions(argc, argv, "");
std::string GrammarText = readOrDie(Grammar);
std::vector<std::string> Diags;
auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags);
if (!Diags.empty()) {
llvm::errs() << llvm::join(Diags, "\n");
return 1;
}
std::error_code EC;
llvm::ToolOutputFile Out{OutputFilename, EC, llvm::sys::fs::OF_None};
if (EC) {
llvm::errs() << EC.message() << '\n';
return 1;
}
switch (Emit) {
case EmitSymbolList:
Out.os() << R"cpp(
#ifndef NONTERMINAL
#define NONTERMINAL(NAME, ID)
#endif
#ifndef RULE
#define RULE(LHS, RHS, ID)
#endif
#ifndef EXTENSION
#define EXTENSION(NAME, ID)
#endif
)cpp";
for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
++ID) {
Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n",
clang::pseudo::mangleSymbol(ID, G), ID);
for (const clang::pseudo::Rule &R : G.rulesFor(ID)) {
clang::pseudo::RuleID RID = &R - G.table().Rules.data();
Out.os() << llvm::formatv("RULE({0}, {1}, {2})\n",
clang::pseudo::mangleSymbol(R.Target, G),
clang::pseudo::mangleRule(RID, G), RID);
}
}
for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
EID < G.table().AttributeValues.size(); ++EID) {
llvm::StringRef Name = G.table().AttributeValues[EID];
assert(!Name.empty());
Out.os() << llvm::formatv("EXTENSION({0}, {1})\n", Name, EID);
}
Out.os() << R"cpp(
#undef NONTERMINAL
#undef RULE
#undef EXTENSION
)cpp";
break;
case EmitGrammarContent:
for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {
Out.os() << '"';
Out.os().write_escaped((Line + "\n").str());
Out.os() << "\"\n";
}
break;
}
Out.keep();
return 0;
}