| //===--- LRTable.h - Define LR Parsing Table ---------------------*- C++-*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // The LRTable (referred as LR parsing table in the LR literature) is the core |
| // component in LR parsers, it drives the LR parsers by specifying an action to |
| // take given the current state on the top of the stack and the current |
| // lookahead token. |
| // |
| // The LRTable can be described as a matrix where the rows represent |
| // the states of the LR graph, the columns represent the symbols of the |
| // grammar, and each entry of the matrix (called action) represents a |
| // state transition in the graph. |
| // |
| // Typically, based on the category of the grammar symbol, the LRTable is |
| // broken into two logically separate tables: |
| // - ACTION table with terminals as columns -- e.g ACTION[S, a] specifies |
| // next action (shift/reduce/accept/error) on state S under a lookahead |
| // terminal a |
| // - GOTO table with nonterminals as columns -- e.g. GOTO[S, X] specify |
| // the state which we transist to from the state S with the nonterminal X |
| // |
| // LRTable is *performance-critial* as it is consulted frequently during a |
| // parse. In general, LRTable is very sparse (most of the entries are empty). |
| // For example, for the C++ language, the SLR table has ~1500 states and 650 |
| // symbols which results in a matrix having 975K entries, ~90% of entries are |
| // empty. |
| // |
| // This file implements a speed-and-space-efficient LRTable. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef CLANG_PSEUDO_LRTABLE_H |
| #define CLANG_PSEUDO_LRTABLE_H |
| |
| #include "clang-pseudo/Grammar.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include <cstdint> |
| #include <vector> |
| |
| namespace clang { |
| namespace pseudo { |
| |
| // Represents the LR parsing table, which can efficiently the question "what is |
| // the next step given the lookahead token and current state on top of the |
| // stack?". |
| // |
| // This is a dense implementation, which only takes an amount of space that is |
| // proportional to the number of non-empty entries in the table. |
| // |
| // Unlike the typical LR parsing table which allows at most one available action |
| // per entry, conflicted actions are allowed in LRTable. The LRTable is designed |
| // to be used in nondeterministic LR parsers (e.g. GLR). |
| class LRTable { |
| public: |
| // StateID is only 13 bits wide. |
| using StateID = uint16_t; |
| static constexpr unsigned StateBits = 13; |
| |
| // Action represents the terminal and nonterminal actions, it combines the |
| // entry of the ACTION and GOTO tables from the LR literature. |
| class Action { |
| public: |
| enum Kind : uint8_t { |
| Sentinel = 0, |
| // Terminal actions, corresponding to entries of ACTION table. |
| |
| // Shift to state n: move forward with the lookahead, and push state n |
| // onto the state stack. |
| // A shift is a forward transition, and the value n is the next state that |
| // the parser is to enter. |
| Shift, |
| // Reduce by a rule: pop the state stack. |
| Reduce, |
| // Signals that we have parsed the input successfully. |
| Accept, |
| |
| // Nonterminal actions, corresponding to entry of GOTO table. |
| |
| // Go to state n: push state n onto the state stack. |
| // Similar to Shift, but it is a nonterminal forward transition. |
| GoTo, |
| }; |
| |
| static Action accept(RuleID RID) { return Action(Accept, RID); } |
| static Action goTo(StateID S) { return Action(GoTo, S); } |
| static Action shift(StateID S) { return Action(Shift, S); } |
| static Action reduce(RuleID RID) { return Action(Reduce, RID); } |
| static Action sentinel() { return Action(Sentinel, 0); } |
| |
| StateID getShiftState() const { |
| assert(kind() == Shift); |
| return Value; |
| } |
| StateID getGoToState() const { |
| assert(kind() == GoTo); |
| return Value; |
| } |
| RuleID getReduceRule() const { |
| assert(kind() == Reduce); |
| return Value; |
| } |
| Kind kind() const { return static_cast<Kind>(K); } |
| |
| bool operator==(const Action &L) const { return opaque() == L.opaque(); } |
| uint16_t opaque() const { return K << ValueBits | Value; }; |
| |
| private: |
| Action(Kind K1, unsigned Value) : K(K1), Value(Value) {} |
| static constexpr unsigned ValueBits = StateBits; |
| static constexpr unsigned KindBits = 3; |
| static_assert(ValueBits >= RuleBits, "Value must be able to store RuleID"); |
| static_assert(KindBits + ValueBits <= 16, |
| "Must be able to store kind and value efficiently"); |
| uint16_t K : KindBits; |
| // Either StateID or RuleID, depending on the Kind. |
| uint16_t Value : ValueBits; |
| }; |
| |
| // Returns all available actions for the given state on a terminal. |
| // Expected to be called by LR parsers. |
| llvm::ArrayRef<Action> getActions(StateID State, SymbolID Terminal) const; |
| // Returns the state after we reduce a nonterminal. |
| // Expected to be called by LR parsers. |
| StateID getGoToState(StateID State, SymbolID Nonterminal) const; |
| |
| // Looks up available actions. |
| // Returns empty if no available actions in the table. |
| llvm::ArrayRef<Action> find(StateID State, SymbolID Symbol) const; |
| |
| size_t bytes() const { |
| return sizeof(*this) + Actions.capacity() * sizeof(Action) + |
| States.capacity() * sizeof(StateID) + |
| NontermOffset.capacity() * sizeof(uint32_t) + |
| TerminalOffset.capacity() * sizeof(uint32_t); |
| } |
| |
| std::string dumpStatistics() const; |
| std::string dumpForTests(const Grammar &G) const; |
| |
| // Build a SLR(1) parsing table. |
| static LRTable buildSLR(const Grammar &G); |
| |
| class Builder; |
| // Represents an entry in the table, used for building the LRTable. |
| struct Entry { |
| StateID State; |
| SymbolID Symbol; |
| Action Act; |
| }; |
| // Build a specifid table for testing purposes. |
| static LRTable buildForTests(const GrammarTable &, llvm::ArrayRef<Entry>); |
| |
| private: |
| // Conceptually the LR table is a multimap from (State, SymbolID) => Action. |
| // Our physical representation is quite different for compactness. |
| |
| // Index is nonterminal SymbolID, value is the offset into States/Actions |
| // where the entries for this nonterminal begin. |
| // Give a non-terminal id, the corresponding half-open range of StateIdx is |
| // [NontermIdx[id], NontermIdx[id+1]). |
| std::vector<uint32_t> NontermOffset; |
| // Similar to NontermOffset, but for terminals, index is tok::TokenKind. |
| std::vector<uint32_t> TerminalOffset; |
| // Parallel to Actions, the value is State (rows of the matrix). |
| // Grouped by the SymbolID, and only subranges are sorted. |
| std::vector<StateID> States; |
| // A flat list of available actions, sorted by (SymbolID, State). |
| std::vector<Action> Actions; |
| }; |
| llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &); |
| |
| } // namespace pseudo |
| } // namespace clang |
| |
| #endif // LLVM_CLANG_TOOLING_SYNTAX_PSEUDO_LRTABLE_H |