blob: 36af896148209d267107bbbe2cfc5fbb3364b7ef [file] [log] [blame]
//===--- ForestTest.cpp - Test Forest dump ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang-pseudo/Forest.h"
#include "clang-pseudo/Token.h"
#include "clang/Basic/LangOptions.h"
#include "llvm/ADT/StringRef.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <vector>
namespace clang {
namespace pseudo {
namespace {
// FIXME: extract to a TestGrammar class to allow code sharing among tests.
class ForestTest : public ::testing::Test {
public:
void build(llvm::StringRef BNF) {
Diags.clear();
G = Grammar::parseBNF(BNF, Diags);
}
SymbolID symbol(llvm::StringRef Name) const {
for (unsigned I = 0; I < NumTerminals; ++I)
if (G.table().Terminals[I] == Name)
return tokenSymbol(static_cast<tok::TokenKind>(I));
for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID)
if (G.table().Nonterminals[ID].Name == Name)
return ID;
ADD_FAILURE() << "No such symbol found: " << Name;
return 0;
}
RuleID ruleFor(llvm::StringRef NonterminalName) const {
auto RuleRange = G.table().Nonterminals[symbol(NonterminalName)].RuleRange;
if (RuleRange.End - RuleRange.Start == 1)
return G.table().Nonterminals[symbol(NonterminalName)].RuleRange.Start;
ADD_FAILURE() << "Expected a single rule for " << NonterminalName
<< ", but it has " << RuleRange.End - RuleRange.Start
<< " rule!\n";
return 0;
}
protected:
Grammar G;
std::vector<std::string> Diags;
};
TEST_F(ForestTest, DumpBasic) {
build(R"cpp(
_ := add-expression EOF
add-expression := id-expression + id-expression
id-expression := IDENTIFIER
)cpp");
ASSERT_TRUE(Diags.empty());
ForestArena Arena;
const auto &TS =
cook(lex("a + b", clang::LangOptions()), clang::LangOptions());
auto T = Arena.createTerminals(TS);
ASSERT_EQ(T.size(), 4u);
const auto *Left = &Arena.createSequence(
symbol("id-expression"), ruleFor("id-expression"), {&T.front()});
const auto *Right = &Arena.createSequence(symbol("id-expression"),
ruleFor("id-expression"), {&T[2]});
const auto *Add =
&Arena.createSequence(symbol("add-expression"), ruleFor("add-expression"),
{Left, &T[1], Right});
EXPECT_EQ(Add->dumpRecursive(G, true),
"[ 0, end) add-expression := id-expression + id-expression\n"
"[ 0, 1) ├─id-expression~IDENTIFIER := tok[0]\n"
"[ 1, 2) ├─+ := tok[1]\n"
"[ 2, end) └─id-expression~IDENTIFIER := tok[2]\n");
EXPECT_EQ(Add->dumpRecursive(G, false),
"[ 0, end) add-expression := id-expression + id-expression\n"
"[ 0, 1) ├─id-expression := IDENTIFIER\n"
"[ 0, 1) │ └─IDENTIFIER := tok[0]\n"
"[ 1, 2) ├─+ := tok[1]\n"
"[ 2, end) └─id-expression := IDENTIFIER\n"
"[ 2, end) └─IDENTIFIER := tok[2]\n");
}
TEST_F(ForestTest, DumpAmbiguousAndRefs) {
build(R"cpp(
_ := type EOF
type := class-type # rule 4
type := enum-type # rule 5
class-type := shared-type
enum-type := shared-type
shared-type := IDENTIFIER)cpp");
ASSERT_TRUE(Diags.empty());
ForestArena Arena;
const auto &TS = cook(lex("abc", clang::LangOptions()), clang::LangOptions());
auto Terminals = Arena.createTerminals(TS);
ASSERT_EQ(Terminals.size(), 2u);
const auto *SharedType = &Arena.createSequence(
symbol("shared-type"), ruleFor("shared-type"), {Terminals.begin()});
const auto *ClassType = &Arena.createSequence(
symbol("class-type"), ruleFor("class-type"), {SharedType});
const auto *EnumType = &Arena.createSequence(
symbol("enum-type"), ruleFor("enum-type"), {SharedType});
const auto *Alternative1 =
&Arena.createSequence(symbol("type"), /*RuleID=*/4, {ClassType});
const auto *Alternative2 =
&Arena.createSequence(symbol("type"), /*RuleID=*/5, {EnumType});
const auto *Type =
&Arena.createAmbiguous(symbol("type"), {Alternative1, Alternative2});
EXPECT_EQ(Type->dumpRecursive(G),
"[ 0, end) type := <ambiguous>\n"
"[ 0, end) ├─type := class-type\n"
"[ 0, end) │ └─class-type := shared-type\n"
"[ 0, end) │ └─shared-type := IDENTIFIER #1\n"
"[ 0, end) │ └─IDENTIFIER := tok[0]\n"
"[ 0, end) └─type := enum-type\n"
"[ 0, end) └─enum-type := shared-type\n"
"[ 0, end) └─shared-type =#1\n");
}
TEST_F(ForestTest, DumpAbbreviatedShared) {
build(R"cpp(
_ := A
A := B
B := *
)cpp");
ForestArena Arena;
const auto *Star = &Arena.createTerminal(tok::star, 0);
const auto *B = &Arena.createSequence(symbol("B"), ruleFor("B"), {Star});
// We have two identical (but distinct) A nodes.
// The GLR parser would never produce this, but it makes the example simpler.
const auto *A1 = &Arena.createSequence(symbol("A"), ruleFor("A"), {B});
const auto *A2 = &Arena.createSequence(symbol("A"), ruleFor("A"), {B});
const auto *A = &Arena.createAmbiguous(symbol("A"), {A1, A2});
// We must not abbreviate away shared nodes: if we show A~* there's no way to
// show that the intermediate B node is shared between A1 and A2.
EXPECT_EQ(A->dumpRecursive(G, /*Abbreviate=*/true),
"[ 0, end) A := <ambiguous>\n"
"[ 0, end) ├─A~B := * #1\n"
"[ 0, end) │ └─* := tok[0]\n"
"[ 0, end) └─A~B =#1\n");
}
TEST_F(ForestTest, Iteration) {
// Z
// / \
// X Y
// |\|
// A B
ForestArena Arena;
const auto *A = &Arena.createTerminal(tok::identifier, 0);
const auto *B = &Arena.createOpaque(1, 0);
const auto *X = &Arena.createSequence(2, 1, {A, B});
const auto *Y = &Arena.createSequence(2, 2, {B});
const auto *Z = &Arena.createAmbiguous(2, {X, Y});
std::vector<const ForestNode *> Nodes;
for (const ForestNode &N : Z->descendants())
Nodes.push_back(&N);
EXPECT_THAT(Nodes, testing::UnorderedElementsAre(A, B, X, Y, Z));
Nodes.clear();
for (const ForestNode &N : X->descendants())
Nodes.push_back(&N);
EXPECT_THAT(Nodes, testing::UnorderedElementsAre(X, A, B));
}
} // namespace
} // namespace pseudo
} // namespace clang