blob: 6b6b47b8a2dbec47f2e3b3ea8564b4f3eca1f81e [file] [log] [blame]
//===--- GrammarTest.cpp - grammar tests -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang-pseudo/grammar/Grammar.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <memory>
namespace clang {
namespace pseudo {
namespace {
using testing::AllOf;
using testing::ElementsAre;
using testing::IsEmpty;
using testing::Pair;
using testing::UnorderedElementsAre;
MATCHER_P(TargetID, SID, "") { return arg.Target == SID; }
template <typename... T> testing::Matcher<const Rule &> Sequence(T... IDs) {
return testing::Property(&Rule::seq, ElementsAre(IDs...));
}
class GrammarTest : public ::testing::Test {
public:
void build(llvm::StringRef BNF) {
Diags.clear();
G = Grammar::parseBNF(BNF, Diags);
}
SymbolID id(llvm::StringRef Name) const {
for (unsigned I = 0; I < NumTerminals; ++I)
if (G.table().Terminals[I] == Name)
return tokenSymbol(static_cast<tok::TokenKind>(I));
for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID)
if (G.table().Nonterminals[ID].Name == Name)
return ID;
ADD_FAILURE() << "No such symbol found: " << Name;
return 0;
}
RuleID ruleFor(llvm::StringRef NonterminalName) const {
auto RuleRange = G.table().Nonterminals[id(NonterminalName)].RuleRange;
if (RuleRange.End - RuleRange.Start == 1)
return G.table().Nonterminals[id(NonterminalName)].RuleRange.Start;
ADD_FAILURE() << "Expected a single rule for " << NonterminalName
<< ", but it has " << RuleRange.End - RuleRange.Start
<< " rule!\n";
return 0;
}
protected:
Grammar G;
std::vector<std::string> Diags;
};
TEST_F(GrammarTest, Basic) {
build("_ := IDENTIFIER + _ # comment");
EXPECT_THAT(Diags, IsEmpty());
auto ExpectedRule =
AllOf(TargetID(id("_")), Sequence(id("IDENTIFIER"), id("+"), id("_")));
EXPECT_EQ(G.symbolName(id("_")), "_");
EXPECT_THAT(G.rulesFor(id("_")), UnorderedElementsAre(ExpectedRule));
const auto &Rule = G.lookupRule(/*RID=*/0);
EXPECT_THAT(Rule, ExpectedRule);
EXPECT_THAT(G.symbolName(Rule.seq()[0]), "IDENTIFIER");
EXPECT_THAT(G.symbolName(Rule.seq()[1]), "+");
EXPECT_THAT(G.symbolName(Rule.seq()[2]), "_");
}
TEST_F(GrammarTest, EliminatedOptional) {
build("_ := CONST_opt INT ;_opt");
EXPECT_THAT(Diags, IsEmpty());
EXPECT_THAT(G.table().Rules,
UnorderedElementsAre(Sequence(id("INT")),
Sequence(id("CONST"), id("INT")),
Sequence(id("CONST"), id("INT"), id(";")),
Sequence(id("INT"), id(";"))));
}
TEST_F(GrammarTest, RuleIDSorted) {
build(R"bnf(
_ := x
x := y
y := z
z := IDENTIFIER
)bnf");
ASSERT_TRUE(Diags.empty());
EXPECT_LT(ruleFor("z"), ruleFor("y"));
EXPECT_LT(ruleFor("y"), ruleFor("x"));
EXPECT_LT(ruleFor("x"), ruleFor("_"));
}
TEST_F(GrammarTest, Annotation) {
build(R"bnf(
_ := x
x := IDENTIFIER [guard]
)bnf");
ASSERT_THAT(Diags, IsEmpty());
EXPECT_FALSE(G.lookupRule(ruleFor("_")).Guarded);
EXPECT_TRUE(G.lookupRule(ruleFor("x")).Guarded);
}
TEST_F(GrammarTest, Diagnostics) {
build(R"cpp(
_ := ,_opt
_ := undefined-sym
null :=
_ := IDENFIFIE # a typo of the terminal IDENFITIER
invalid
# cycle
a := b
b := a
_ := IDENTIFIER [unknown=value]
)cpp");
EXPECT_EQ(G.underscore(), id("_"));
EXPECT_THAT(Diags, UnorderedElementsAre(
"Rule '_ := ,_opt' has a nullable RHS",
"Rule 'null := ' has a nullable RHS",
"No rules for nonterminal: undefined-sym",
"Failed to parse 'invalid': no separator :=",
"Token-like name IDENFIFIE is used as a nonterminal",
"No rules for nonterminal: IDENFIFIE",
"The grammar contains a cycle involving symbol a",
"Unknown attribute 'unknown'"));
}
TEST_F(GrammarTest, DuplicatedDiagnostics) {
build(R"cpp(
_ := test
test := INT
test := DOUBLE
test := INT
)cpp");
EXPECT_THAT(Diags, UnorderedElementsAre("Duplicate rule: `test := INT`"));
}
TEST_F(GrammarTest, FirstAndFollowSets) {
build(
R"bnf(
_ := expr
expr := expr - term
expr := term
term := IDENTIFIER
term := ( expr )
)bnf");
ASSERT_TRUE(Diags.empty());
auto ToPairs = [](std::vector<llvm::DenseSet<SymbolID>> Input) {
std::vector<std::pair<SymbolID, llvm::DenseSet<SymbolID>>> Sets;
for (SymbolID ID = 0; ID < Input.size(); ++ID)
Sets.emplace_back(ID, std::move(Input[ID]));
return Sets;
};
EXPECT_THAT(
ToPairs(firstSets(G)),
UnorderedElementsAre(
Pair(id("_"), UnorderedElementsAre(id("IDENTIFIER"), id("("))),
Pair(id("expr"), UnorderedElementsAre(id("IDENTIFIER"), id("("))),
Pair(id("term"), UnorderedElementsAre(id("IDENTIFIER"), id("(")))));
EXPECT_THAT(
ToPairs(followSets(G)),
UnorderedElementsAre(
Pair(id("_"), UnorderedElementsAre(id("EOF"))),
Pair(id("expr"), UnorderedElementsAre(id("-"), id("EOF"), id(")"))),
Pair(id("term"), UnorderedElementsAre(id("-"), id("EOF"), id(")")))));
build(R"bnf(
# A simplfied C++ decl-specifier-seq.
_ := decl-specifier-seq
decl-specifier-seq := decl-specifier decl-specifier-seq
decl-specifier-seq := decl-specifier
decl-specifier := simple-type-specifier
decl-specifier := INLINE
simple-type-specifier := INT
)bnf");
ASSERT_TRUE(Diags.empty());
EXPECT_THAT(
ToPairs(firstSets(G)),
UnorderedElementsAre(
Pair(id("_"), UnorderedElementsAre(id("INLINE"), id("INT"))),
Pair(id("decl-specifier-seq"),
UnorderedElementsAre(id("INLINE"), id("INT"))),
Pair(id("simple-type-specifier"), UnorderedElementsAre(id("INT"))),
Pair(id("decl-specifier"),
UnorderedElementsAre(id("INLINE"), id("INT")))));
EXPECT_THAT(
ToPairs(followSets(G)),
UnorderedElementsAre(
Pair(id("_"), UnorderedElementsAre(id("EOF"))),
Pair(id("decl-specifier-seq"), UnorderedElementsAre(id("EOF"))),
Pair(id("decl-specifier"),
UnorderedElementsAre(id("INLINE"), id("INT"), id("EOF"))),
Pair(id("simple-type-specifier"),
UnorderedElementsAre(id("INLINE"), id("INT"), id("EOF")))));
}
} // namespace
} // namespace pseudo
} // namespace clang