pseudo/lib/LRTableBuild.cpp - llvm-project/clang-tools-extra - Git at Google

 //===--- LRTableBuild.cpp - Build a LRTable from LRGraph ---------*- C++-*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "clang-pseudo/Grammar.h"
 #include "clang-pseudo/LRGraph.h"
 #include "clang-pseudo/LRTable.h"
 #include "clang/Basic/TokenKinds.h"
 #include <cstdint>

 namespace llvm {
 template <> struct DenseMapInfo<clang::pseudo::LRTable::Entry> {
   using Entry = clang::pseudo::LRTable::Entry;
   static inline Entry getEmptyKey() {
     static Entry E{static_cast<clang::pseudo::SymbolID>(-1), 0,
                    clang::pseudo::LRTable::Action::sentinel()};
     return E;
   }
   static inline Entry getTombstoneKey() {
     static Entry E{static_cast<clang::pseudo::SymbolID>(-2), 0,
                    clang::pseudo::LRTable::Action::sentinel()};
     return E;
   }
   static unsigned getHashValue(const Entry &I) {
     return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque());
   }
   static bool isEqual(const Entry &LHS, const Entry &RHS) {
     return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol &&
            LHS.Act == RHS.Act;
   }
 };
 } // namespace llvm

 namespace clang {
 namespace pseudo {

 class LRTable::Builder {
 public:
   bool insert(Entry E) { return Entries.insert(std::move(E)).second; }
   LRTable build(const GrammarTable &GT) && {
     // E.g. given the following parsing table with 3 states and 3 terminals:
     //
     //            a    b     c
     // +-------+----+-------+-+
     // |state0 |    | s0,r0 | |
     // |state1 | acc|       | |
     // |state2 |    |  r1   | |
     // +-------+----+-------+-+
     //
     // The final LRTable:
     //  - TerminalOffset: [a] = 0, [b] = 1, [c] = 4, [d] = 4 (d is a sentinel)
     //  -  States:     [ 1,    0,  0,  2]
     //    Actions:     [ acc, s0, r0, r1]
     //                   ~~~ corresponding range for terminal a
     //                        ~~~~~~~~~~ corresponding range for terminal b
     // First step, we sort all entries by (Symbol, State, Action).
     std::vector<Entry> Sorted(Entries.begin(), Entries.end());
     llvm::sort(Sorted, [](const Entry &L, const Entry &R) {
       return std::forward_as_tuple(L.Symbol, L.State, L.Act.opaque()) <
              std::forward_as_tuple(R.Symbol, R.State, R.Act.opaque());
     });

     LRTable Table;
     Table.Actions.reserve(Sorted.size());
     Table.States.reserve(Sorted.size());
     // We are good to finalize the States and Actions.
     for (const auto &E : Sorted) {
       Table.Actions.push_back(E.Act);
       Table.States.push_back(E.State);
     }
     // Initialize the terminal and nonterminal offset, all ranges are empty by
     // default.
     Table.TerminalOffset = std::vector<uint32_t>(GT.Terminals.size() + 1, 0);
     Table.NontermOffset = std::vector<uint32_t>(GT.Nonterminals.size() + 1, 0);
     size_t SortedIndex = 0;
     for (SymbolID NonterminalID = 0; NonterminalID < Table.NontermOffset.size();
          ++NonterminalID) {
       Table.NontermOffset[NonterminalID] = SortedIndex;
       while (SortedIndex < Sorted.size() &&
              Sorted[SortedIndex].Symbol == NonterminalID)
         ++SortedIndex;
     }
     for (size_t Terminal = 0; Terminal < Table.TerminalOffset.size();
          ++Terminal) {
       Table.TerminalOffset[Terminal] = SortedIndex;
       while (SortedIndex < Sorted.size() &&
              Sorted[SortedIndex].Symbol ==
                  tokenSymbol(static_cast<tok::TokenKind>(Terminal)))
         ++SortedIndex;
     }
     return Table;
   }

 private:
   llvm::DenseSet<Entry> Entries;
 };

 LRTable LRTable::buildForTests(const GrammarTable &GT,
                                llvm::ArrayRef<Entry> Entries) {
   Builder Build;
   for (const Entry &E : Entries)
     Build.insert(E);
   return std::move(Build).build(GT);
 }

 LRTable LRTable::buildSLR(const Grammar &G) {
   Builder Build;
   auto Graph = LRGraph::buildLR0(G);
   for (const auto &T : Graph.edges()) {
     Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
     Build.insert({T.Src, T.Label, Act});
   }
   assert(Graph.states().size() <= (1 << StateBits) &&
          "Graph states execceds the maximum limit!");
   auto FollowSets = followSets(G);
   for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
     for (const Item &I : Graph.states()[SID].Items) {
       // If we've just parsed the start symbol, we can accept the input.
       if (G.lookupRule(I.rule()).Target == G.startSymbol() && !I.hasNext()) {
         Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())});
         continue;
       }
       if (!I.hasNext()) {
         // If we've reached the end of a rule A := ..., then we can reduce if
         // the next token is in the follow set of A.
         for (SymbolID Follow : FollowSets[G.lookupRule(I.rule()).Target]) {
           assert(isToken(Follow));
           Build.insert({SID, Follow, Action::reduce(I.rule())});
         }
       }
     }
   }
   return std::move(Build).build(G.table());
 }

 } // namespace pseudo
 } // namespace clang
	//===--- LRTableBuild.cpp - Build a LRTable from LRGraph ---------- C++--===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "clang-pseudo/Grammar.h"
	#include "clang-pseudo/LRGraph.h"
	#include "clang-pseudo/LRTable.h"
	#include "clang/Basic/TokenKinds.h"
	#include <cstdint>

	namespace llvm {
	template <> struct DenseMapInfo<clang::pseudo::LRTable::Entry> {
	using Entry = clang::pseudo::LRTable::Entry;
	static inline Entry getEmptyKey() {
	static Entry E{static_cast<clang::pseudo::SymbolID>(-1), 0,
	clang::pseudo::LRTable::Action::sentinel()};
	return E;
	}
	static inline Entry getTombstoneKey() {
	static Entry E{static_cast<clang::pseudo::SymbolID>(-2), 0,
	clang::pseudo::LRTable::Action::sentinel()};
	return E;
	}
	static unsigned getHashValue(const Entry &I) {
	return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque());
	}
	static bool isEqual(const Entry &LHS, const Entry &RHS) {
	return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol &&
	LHS.Act == RHS.Act;
	}
	};
	} // namespace llvm

	namespace clang {
	namespace pseudo {

	class LRTable::Builder {
	public:
	bool insert(Entry E) { return Entries.insert(std::move(E)).second; }
	LRTable build(const GrammarTable &GT) && {
	// E.g. given the following parsing table with 3 states and 3 terminals:
	//
	// a b c
	// +-------+----+-------+-+
	// \|state0 \| \| s0,r0 \| \|
	// \|state1 \| acc\| \| \|
	// \|state2 \| \| r1 \| \|
	// +-------+----+-------+-+
	//
	// The final LRTable:
	// - TerminalOffset: [a] = 0, [b] = 1, [c] = 4, [d] = 4 (d is a sentinel)
	// - States: [ 1, 0, 0, 2]
	// Actions: [ acc, s0, r0, r1]
	// ~~~ corresponding range for terminal a
	// ~~~~~~~~~~ corresponding range for terminal b
	// First step, we sort all entries by (Symbol, State, Action).
	std::vector<Entry> Sorted(Entries.begin(), Entries.end());
	llvm::sort(Sorted, [](const Entry &L, const Entry &R) {
	return std::forward_as_tuple(L.Symbol, L.State, L.Act.opaque()) <
	std::forward_as_tuple(R.Symbol, R.State, R.Act.opaque());
	});

	LRTable Table;
	Table.Actions.reserve(Sorted.size());
	Table.States.reserve(Sorted.size());
	// We are good to finalize the States and Actions.
	for (const auto &E : Sorted) {
	Table.Actions.push_back(E.Act);
	Table.States.push_back(E.State);
	}
	// Initialize the terminal and nonterminal offset, all ranges are empty by
	// default.
	Table.TerminalOffset = std::vector<uint32_t>(GT.Terminals.size() + 1, 0);
	Table.NontermOffset = std::vector<uint32_t>(GT.Nonterminals.size() + 1, 0);
	size_t SortedIndex = 0;
	for (SymbolID NonterminalID = 0; NonterminalID < Table.NontermOffset.size();
	++NonterminalID) {
	Table.NontermOffset[NonterminalID] = SortedIndex;
	while (SortedIndex < Sorted.size() &&
	Sorted[SortedIndex].Symbol == NonterminalID)
	++SortedIndex;
	}
	for (size_t Terminal = 0; Terminal < Table.TerminalOffset.size();
	++Terminal) {
	Table.TerminalOffset[Terminal] = SortedIndex;
	while (SortedIndex < Sorted.size() &&
	Sorted[SortedIndex].Symbol ==
	tokenSymbol(static_cast<tok::TokenKind>(Terminal)))
	++SortedIndex;
	}
	return Table;
	}

	private:
	llvm::DenseSet<Entry> Entries;
	};

	LRTable LRTable::buildForTests(const GrammarTable &GT,
	llvm::ArrayRef<Entry> Entries) {
	Builder Build;
	for (const Entry &E : Entries)
	Build.insert(E);
	return std::move(Build).build(GT);
	}

	LRTable LRTable::buildSLR(const Grammar &G) {
	Builder Build;
	auto Graph = LRGraph::buildLR0(G);
	for (const auto &T : Graph.edges()) {
	Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
	Build.insert({T.Src, T.Label, Act});
	}
	assert(Graph.states().size() <= (1 << StateBits) &&
	"Graph states execceds the maximum limit!");
	auto FollowSets = followSets(G);
	for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
	for (const Item &I : Graph.states()[SID].Items) {
	// If we've just parsed the start symbol, we can accept the input.
	if (G.lookupRule(I.rule()).Target == G.startSymbol() && !I.hasNext()) {
	Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())});
	continue;
	}
	if (!I.hasNext()) {
	// If we've reached the end of a rule A := ..., then we can reduce if
	// the next token is in the follow set of A.
	for (SymbolID Follow : FollowSets[G.lookupRule(I.rule()).Target]) {
	assert(isToken(Follow));
	Build.insert({SID, Follow, Action::reduce(I.rule())});
	}
	}
	}
	}
	return std::move(Build).build(G.table());
	}

	} // namespace pseudo
	} // namespace clang