mlir/tools/mlir-tblgen/FormatGen.cpp - llvm-project - Git at Google

 //===- FormatGen.cpp - Utilities for custom assembly formats ----*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "FormatGen.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/TableGen/Error.h"

 using namespace mlir;
 using namespace mlir::tblgen;

 //===----------------------------------------------------------------------===//
 // FormatToken
 //===----------------------------------------------------------------------===//

 llvm::SMLoc FormatToken::getLoc() const {
   return llvm::SMLoc::getFromPointer(spelling.data());
 }

 //===----------------------------------------------------------------------===//
 // FormatLexer
 //===----------------------------------------------------------------------===//

 FormatLexer::FormatLexer(llvm::SourceMgr &mgr, llvm::SMLoc loc)
     : mgr(mgr), loc(loc),
       curBuffer(mgr.getMemoryBuffer(mgr.getMainFileID())->getBuffer()),
       curPtr(curBuffer.begin()) {}

 FormatToken FormatLexer::emitError(llvm::SMLoc loc, const Twine &msg) {
   mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg);
   llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note,
                             "in custom assembly format for this operation");
   return formToken(FormatToken::error, loc.getPointer());
 }

 FormatToken FormatLexer::emitError(const char *loc, const Twine &msg) {
   return emitError(llvm::SMLoc::getFromPointer(loc), msg);
 }

 FormatToken FormatLexer::emitErrorAndNote(llvm::SMLoc loc, const Twine &msg,
                                           const Twine &note) {
   mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg);
   llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note,
                             "in custom assembly format for this operation");
   mgr.PrintMessage(loc, llvm::SourceMgr::DK_Note, note);
   return formToken(FormatToken::error, loc.getPointer());
 }

 int FormatLexer::getNextChar() {
   char curChar = *curPtr++;
   switch (curChar) {
   default:
     return (unsigned char)curChar;
   case 0: {
     // A nul character in the stream is either the end of the current buffer or
     // a random nul in the file. Disambiguate that here.
     if (curPtr - 1 != curBuffer.end())
       return 0;

     // Otherwise, return end of file.
     --curPtr;
     return EOF;
   }
   case '\n':
   case '\r':
     // Handle the newline character by ignoring it and incrementing the line
     // count. However, be careful about 'dos style' files with \n\r in them.
     // Only treat a \n\r or \r\n as a single line.
     if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar)
       ++curPtr;
     return '\n';
   }
 }

 FormatToken FormatLexer::lexToken() {
   const char *tokStart = curPtr;

   // This always consumes at least one character.
   int curChar = getNextChar();
   switch (curChar) {
   default:
     // Handle identifiers: [a-zA-Z_]
     if (isalpha(curChar) || curChar == '_')
       return lexIdentifier(tokStart);

     // Unknown character, emit an error.
     return emitError(tokStart, "unexpected character");
   case EOF:
     // Return EOF denoting the end of lexing.
     return formToken(FormatToken::eof, tokStart);

   // Lex punctuation.
   case '^':
     return formToken(FormatToken::caret, tokStart);
   case ':':
     return formToken(FormatToken::colon, tokStart);
   case ',':
     return formToken(FormatToken::comma, tokStart);
   case '=':
     return formToken(FormatToken::equal, tokStart);
   case '<':
     return formToken(FormatToken::less, tokStart);
   case '>':
     return formToken(FormatToken::greater, tokStart);
   case '?':
     return formToken(FormatToken::question, tokStart);
   case '(':
     return formToken(FormatToken::l_paren, tokStart);
   case ')':
     return formToken(FormatToken::r_paren, tokStart);
   case '*':
     return formToken(FormatToken::star, tokStart);

   // Ignore whitespace characters.
   case 0:
   case ' ':
   case '\t':
   case '\n':
     return lexToken();

   case '`':
     return lexLiteral(tokStart);
   case '$':
     return lexVariable(tokStart);
   }
 }

 FormatToken FormatLexer::lexLiteral(const char *tokStart) {
   assert(curPtr[-1] == '`');

   // Lex a literal surrounded by ``.
   while (const char curChar = *curPtr++) {
     if (curChar == '`')
       return formToken(FormatToken::literal, tokStart);
   }
   return emitError(curPtr - 1, "unexpected end of file in literal");
 }

 FormatToken FormatLexer::lexVariable(const char *tokStart) {
   if (!isalpha(curPtr[0]) && curPtr[0] != '_')
     return emitError(curPtr - 1, "expected variable name");

   // Otherwise, consume the rest of the characters.
   while (isalnum(*curPtr) || *curPtr == '_')
     ++curPtr;
   return formToken(FormatToken::variable, tokStart);
 }

 FormatToken FormatLexer::lexIdentifier(const char *tokStart) {
   // Match the rest of the identifier regex: [0-9a-zA-Z_\-]*
   while (isalnum(*curPtr) || *curPtr == '_' || *curPtr == '-')
     ++curPtr;

   // Check to see if this identifier is a keyword.
   StringRef str(tokStart, curPtr - tokStart);
   auto kind =
       StringSwitch<FormatToken::Kind>(str)
           .Case("attr-dict", FormatToken::kw_attr_dict)
           .Case("attr-dict-with-keyword", FormatToken::kw_attr_dict_w_keyword)
           .Case("custom", FormatToken::kw_custom)
           .Case("functional-type", FormatToken::kw_functional_type)
           .Case("operands", FormatToken::kw_operands)
           .Case("params", FormatToken::kw_params)
           .Case("ref", FormatToken::kw_ref)
           .Case("regions", FormatToken::kw_regions)
           .Case("results", FormatToken::kw_results)
           .Case("struct", FormatToken::kw_struct)
           .Case("successors", FormatToken::kw_successors)
           .Case("type", FormatToken::kw_type)
           .Default(FormatToken::identifier);
   return FormatToken(kind, str);
 }

 //===----------------------------------------------------------------------===//
 // Utility Functions
 //===----------------------------------------------------------------------===//

 bool mlir::tblgen::shouldEmitSpaceBefore(StringRef value,
                                          bool lastWasPunctuation) {
   if (value.size() != 1 && value != "->")
     return true;
   if (lastWasPunctuation)
     return !StringRef(">)}],").contains(value.front());
   return !StringRef("<>(){}[],").contains(value.front());
 }

 bool mlir::tblgen::canFormatStringAsKeyword(StringRef value) {
   if (!isalpha(value.front()) && value.front() != '_')
     return false;
   return llvm::all_of(value.drop_front(), [](char c) {
     return isalnum(c) || c == '_' || c == '$' || c == '.';
   });
 }

 bool mlir::tblgen::isValidLiteral(StringRef value) {
   if (value.empty())
     return false;
   char front = value.front();

   // If there is only one character, this must either be punctuation or a
   // single character bare identifier.
   if (value.size() == 1)
     return isalpha(front) || StringRef("_:,=<>()[]{}?+*").contains(front);

   // Check the punctuation that are larger than a single character.
   if (value == "->")
     return true;

   // Otherwise, this must be an identifier.
   return canFormatStringAsKeyword(value);
 }

 //===----------------------------------------------------------------------===//
 // Commandline Options
 //===----------------------------------------------------------------------===//

 llvm::cl::opt<bool> mlir::tblgen::formatErrorIsFatal(
     "asmformat-error-is-fatal",
     llvm::cl::desc("Emit a fatal error if format parsing fails"),
     llvm::cl::init(true));
	//===- FormatGen.cpp - Utilities for custom assembly formats ----- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "FormatGen.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Support/SourceMgr.h"
	#include "llvm/TableGen/Error.h"

	using namespace mlir;
	using namespace mlir::tblgen;

	//===----------------------------------------------------------------------===//
	// FormatToken
	//===----------------------------------------------------------------------===//

	llvm::SMLoc FormatToken::getLoc() const {
	return llvm::SMLoc::getFromPointer(spelling.data());
	}

	//===----------------------------------------------------------------------===//
	// FormatLexer
	//===----------------------------------------------------------------------===//

	FormatLexer::FormatLexer(llvm::SourceMgr &mgr, llvm::SMLoc loc)
	: mgr(mgr), loc(loc),
	curBuffer(mgr.getMemoryBuffer(mgr.getMainFileID())->getBuffer()),
	curPtr(curBuffer.begin()) {}

	FormatToken FormatLexer::emitError(llvm::SMLoc loc, const Twine &msg) {
	mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg);
	llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note,
	"in custom assembly format for this operation");
	return formToken(FormatToken::error, loc.getPointer());
	}

	FormatToken FormatLexer::emitError(const char *loc, const Twine &msg) {
	return emitError(llvm::SMLoc::getFromPointer(loc), msg);
	}

	FormatToken FormatLexer::emitErrorAndNote(llvm::SMLoc loc, const Twine &msg,
	const Twine &note) {
	mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg);
	llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note,
	"in custom assembly format for this operation");
	mgr.PrintMessage(loc, llvm::SourceMgr::DK_Note, note);
	return formToken(FormatToken::error, loc.getPointer());
	}

	int FormatLexer::getNextChar() {
	char curChar = *curPtr++;
	switch (curChar) {
	default:
	return (unsigned char)curChar;
	case 0: {
	// A nul character in the stream is either the end of the current buffer or
	// a random nul in the file. Disambiguate that here.
	if (curPtr - 1 != curBuffer.end())
	return 0;

	// Otherwise, return end of file.
	--curPtr;
	return EOF;
	}
	case '\n':
	case '\r':
	// Handle the newline character by ignoring it and incrementing the line
	// count. However, be careful about 'dos style' files with \n\r in them.
	// Only treat a \n\r or \r\n as a single line.
	if ((curPtr == '\n' \|\| (curPtr == '\r')) && *curPtr != curChar)
	++curPtr;
	return '\n';
	}
	}

	FormatToken FormatLexer::lexToken() {
	const char *tokStart = curPtr;

	// This always consumes at least one character.
	int curChar = getNextChar();
	switch (curChar) {
	default:
	// Handle identifiers: [a-zA-Z_]
	if (isalpha(curChar) \|\| curChar == '_')
	return lexIdentifier(tokStart);

	// Unknown character, emit an error.
	return emitError(tokStart, "unexpected character");
	case EOF:
	// Return EOF denoting the end of lexing.
	return formToken(FormatToken::eof, tokStart);

	// Lex punctuation.
	case '^':
	return formToken(FormatToken::caret, tokStart);
	case ':':
	return formToken(FormatToken::colon, tokStart);
	case ',':
	return formToken(FormatToken::comma, tokStart);
	case '=':
	return formToken(FormatToken::equal, tokStart);
	case '<':
	return formToken(FormatToken::less, tokStart);
	case '>':
	return formToken(FormatToken::greater, tokStart);
	case '?':
	return formToken(FormatToken::question, tokStart);
	case '(':
	return formToken(FormatToken::l_paren, tokStart);
	case ')':
	return formToken(FormatToken::r_paren, tokStart);
	case '*':
	return formToken(FormatToken::star, tokStart);

	// Ignore whitespace characters.
	case 0:
	case ' ':
	case '\t':
	case '\n':
	return lexToken();

	case '`':
	return lexLiteral(tokStart);
	case '$':
	return lexVariable(tokStart);
	}
	}

	FormatToken FormatLexer::lexLiteral(const char *tokStart) {
	assert(curPtr[-1] == '`');

	// Lex a literal surrounded by ``.
	while (const char curChar = *curPtr++) {
	if (curChar == '`')
	return formToken(FormatToken::literal, tokStart);
	}
	return emitError(curPtr - 1, "unexpected end of file in literal");
	}

	FormatToken FormatLexer::lexVariable(const char *tokStart) {
	if (!isalpha(curPtr[0]) && curPtr[0] != '_')
	return emitError(curPtr - 1, "expected variable name");

	// Otherwise, consume the rest of the characters.
	while (isalnum(curPtr) \|\| curPtr == '_')
	++curPtr;
	return formToken(FormatToken::variable, tokStart);
	}

	FormatToken FormatLexer::lexIdentifier(const char *tokStart) {
	// Match the rest of the identifier regex: [0-9a-zA-Z_\-]*
	while (isalnum(curPtr) \|\| curPtr == '_' \|\| *curPtr == '-')
	++curPtr;

	// Check to see if this identifier is a keyword.
	StringRef str(tokStart, curPtr - tokStart);
	auto kind =
	StringSwitch<FormatToken::Kind>(str)
	.Case("attr-dict", FormatToken::kw_attr_dict)
	.Case("attr-dict-with-keyword", FormatToken::kw_attr_dict_w_keyword)
	.Case("custom", FormatToken::kw_custom)
	.Case("functional-type", FormatToken::kw_functional_type)
	.Case("operands", FormatToken::kw_operands)
	.Case("params", FormatToken::kw_params)
	.Case("ref", FormatToken::kw_ref)
	.Case("regions", FormatToken::kw_regions)
	.Case("results", FormatToken::kw_results)
	.Case("struct", FormatToken::kw_struct)
	.Case("successors", FormatToken::kw_successors)
	.Case("type", FormatToken::kw_type)
	.Default(FormatToken::identifier);
	return FormatToken(kind, str);
	}

	//===----------------------------------------------------------------------===//
	// Utility Functions
	//===----------------------------------------------------------------------===//

	bool mlir::tblgen::shouldEmitSpaceBefore(StringRef value,
	bool lastWasPunctuation) {
	if (value.size() != 1 && value != "->")
	return true;
	if (lastWasPunctuation)
	return !StringRef(">)}],").contains(value.front());
	return !StringRef("<>(){}[],").contains(value.front());
	}

	bool mlir::tblgen::canFormatStringAsKeyword(StringRef value) {
	if (!isalpha(value.front()) && value.front() != '_')
	return false;
	return llvm::all_of(value.drop_front(), [](char c) {
	return isalnum(c) \|\| c == '_' \|\| c == '$' \|\| c == '.';
	});
	}

	bool mlir::tblgen::isValidLiteral(StringRef value) {
	if (value.empty())
	return false;
	char front = value.front();

	// If there is only one character, this must either be punctuation or a
	// single character bare identifier.
	if (value.size() == 1)
	return isalpha(front) \|\| StringRef("_:,=<>()[]{}?+*").contains(front);

	// Check the punctuation that are larger than a single character.
	if (value == "->")
	return true;

	// Otherwise, this must be an identifier.
	return canFormatStringAsKeyword(value);
	}

	//===----------------------------------------------------------------------===//
	// Commandline Options
	//===----------------------------------------------------------------------===//

	llvm::cl::opt<bool> mlir::tblgen::formatErrorIsFatal(
	"asmformat-error-is-fatal",
	llvm::cl::desc("Emit a fatal error if format parsing fails"),
	llvm::cl::init(true));