blob: cb4ea698c4bb8559b17d4ef8ac714149b974ebac [file] [log] [blame]
//===--- LRTableBuild.cpp - Build a LRTable from LRGraph ---------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang-pseudo/Grammar.h"
#include "clang-pseudo/LRGraph.h"
#include "clang-pseudo/LRTable.h"
#include "clang/Basic/TokenKinds.h"
#include <cstdint>
namespace llvm {
template <> struct DenseMapInfo<clang::pseudo::LRTable::Entry> {
using Entry = clang::pseudo::LRTable::Entry;
static inline Entry getEmptyKey() {
static Entry E{static_cast<clang::pseudo::SymbolID>(-1), 0,
clang::pseudo::LRTable::Action::sentinel()};
return E;
}
static inline Entry getTombstoneKey() {
static Entry E{static_cast<clang::pseudo::SymbolID>(-2), 0,
clang::pseudo::LRTable::Action::sentinel()};
return E;
}
static unsigned getHashValue(const Entry &I) {
return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque());
}
static bool isEqual(const Entry &LHS, const Entry &RHS) {
return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol &&
LHS.Act == RHS.Act;
}
};
} // namespace llvm
namespace clang {
namespace pseudo {
class LRTable::Builder {
public:
bool insert(Entry E) { return Entries.insert(std::move(E)).second; }
LRTable build(const GrammarTable &GT) && {
// E.g. given the following parsing table with 3 states and 3 terminals:
//
// a b c
// +-------+----+-------+-+
// |state0 | | s0,r0 | |
// |state1 | acc| | |
// |state2 | | r1 | |
// +-------+----+-------+-+
//
// The final LRTable:
// - TerminalOffset: [a] = 0, [b] = 1, [c] = 4, [d] = 4 (d is a sentinel)
// - States: [ 1, 0, 0, 2]
// Actions: [ acc, s0, r0, r1]
// ~~~ corresponding range for terminal a
// ~~~~~~~~~~ corresponding range for terminal b
// First step, we sort all entries by (Symbol, State, Action).
std::vector<Entry> Sorted(Entries.begin(), Entries.end());
llvm::sort(Sorted, [](const Entry &L, const Entry &R) {
return std::forward_as_tuple(L.Symbol, L.State, L.Act.opaque()) <
std::forward_as_tuple(R.Symbol, R.State, R.Act.opaque());
});
LRTable Table;
Table.Actions.reserve(Sorted.size());
Table.States.reserve(Sorted.size());
// We are good to finalize the States and Actions.
for (const auto &E : Sorted) {
Table.Actions.push_back(E.Act);
Table.States.push_back(E.State);
}
// Initialize the terminal and nonterminal offset, all ranges are empty by
// default.
Table.TerminalOffset = std::vector<uint32_t>(GT.Terminals.size() + 1, 0);
Table.NontermOffset = std::vector<uint32_t>(GT.Nonterminals.size() + 1, 0);
size_t SortedIndex = 0;
for (SymbolID NonterminalID = 0; NonterminalID < Table.NontermOffset.size();
++NonterminalID) {
Table.NontermOffset[NonterminalID] = SortedIndex;
while (SortedIndex < Sorted.size() &&
Sorted[SortedIndex].Symbol == NonterminalID)
++SortedIndex;
}
for (size_t Terminal = 0; Terminal < Table.TerminalOffset.size();
++Terminal) {
Table.TerminalOffset[Terminal] = SortedIndex;
while (SortedIndex < Sorted.size() &&
Sorted[SortedIndex].Symbol ==
tokenSymbol(static_cast<tok::TokenKind>(Terminal)))
++SortedIndex;
}
return Table;
}
private:
llvm::DenseSet<Entry> Entries;
};
LRTable LRTable::buildForTests(const GrammarTable &GT,
llvm::ArrayRef<Entry> Entries) {
Builder Build;
for (const Entry &E : Entries)
Build.insert(E);
return std::move(Build).build(GT);
}
LRTable LRTable::buildSLR(const Grammar &G) {
Builder Build;
auto Graph = LRGraph::buildLR0(G);
for (const auto &T : Graph.edges()) {
Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
Build.insert({T.Src, T.Label, Act});
}
assert(Graph.states().size() <= (1 << StateBits) &&
"Graph states execceds the maximum limit!");
auto FollowSets = followSets(G);
for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
for (const Item &I : Graph.states()[SID].Items) {
// If we've just parsed the start symbol, we can accept the input.
if (G.lookupRule(I.rule()).Target == G.startSymbol() && !I.hasNext()) {
Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())});
continue;
}
if (!I.hasNext()) {
// If we've reached the end of a rule A := ..., then we can reduce if
// the next token is in the follow set of A.
for (SymbolID Follow : FollowSets[G.lookupRule(I.rule()).Target]) {
assert(isToken(Follow));
Build.insert({SID, Follow, Action::reduce(I.rule())});
}
}
}
}
return std::move(Build).build(G.table());
}
} // namespace pseudo
} // namespace clang