| //===- FormatGen.cpp - Utilities for custom assembly formats ----*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "FormatGen.h" |
| #include "llvm/ADT/StringSwitch.h" |
| #include "llvm/Support/SourceMgr.h" |
| #include "llvm/TableGen/Error.h" |
| |
| using namespace mlir; |
| using namespace mlir::tblgen; |
| |
| //===----------------------------------------------------------------------===// |
| // FormatToken |
| //===----------------------------------------------------------------------===// |
| |
| llvm::SMLoc FormatToken::getLoc() const { |
| return llvm::SMLoc::getFromPointer(spelling.data()); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // FormatLexer |
| //===----------------------------------------------------------------------===// |
| |
| FormatLexer::FormatLexer(llvm::SourceMgr &mgr, llvm::SMLoc loc) |
| : mgr(mgr), loc(loc), |
| curBuffer(mgr.getMemoryBuffer(mgr.getMainFileID())->getBuffer()), |
| curPtr(curBuffer.begin()) {} |
| |
| FormatToken FormatLexer::emitError(llvm::SMLoc loc, const Twine &msg) { |
| mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg); |
| llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note, |
| "in custom assembly format for this operation"); |
| return formToken(FormatToken::error, loc.getPointer()); |
| } |
| |
| FormatToken FormatLexer::emitError(const char *loc, const Twine &msg) { |
| return emitError(llvm::SMLoc::getFromPointer(loc), msg); |
| } |
| |
| FormatToken FormatLexer::emitErrorAndNote(llvm::SMLoc loc, const Twine &msg, |
| const Twine ¬e) { |
| mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg); |
| llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note, |
| "in custom assembly format for this operation"); |
| mgr.PrintMessage(loc, llvm::SourceMgr::DK_Note, note); |
| return formToken(FormatToken::error, loc.getPointer()); |
| } |
| |
| int FormatLexer::getNextChar() { |
| char curChar = *curPtr++; |
| switch (curChar) { |
| default: |
| return (unsigned char)curChar; |
| case 0: { |
| // A nul character in the stream is either the end of the current buffer or |
| // a random nul in the file. Disambiguate that here. |
| if (curPtr - 1 != curBuffer.end()) |
| return 0; |
| |
| // Otherwise, return end of file. |
| --curPtr; |
| return EOF; |
| } |
| case '\n': |
| case '\r': |
| // Handle the newline character by ignoring it and incrementing the line |
| // count. However, be careful about 'dos style' files with \n\r in them. |
| // Only treat a \n\r or \r\n as a single line. |
| if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar) |
| ++curPtr; |
| return '\n'; |
| } |
| } |
| |
| FormatToken FormatLexer::lexToken() { |
| const char *tokStart = curPtr; |
| |
| // This always consumes at least one character. |
| int curChar = getNextChar(); |
| switch (curChar) { |
| default: |
| // Handle identifiers: [a-zA-Z_] |
| if (isalpha(curChar) || curChar == '_') |
| return lexIdentifier(tokStart); |
| |
| // Unknown character, emit an error. |
| return emitError(tokStart, "unexpected character"); |
| case EOF: |
| // Return EOF denoting the end of lexing. |
| return formToken(FormatToken::eof, tokStart); |
| |
| // Lex punctuation. |
| case '^': |
| return formToken(FormatToken::caret, tokStart); |
| case ':': |
| return formToken(FormatToken::colon, tokStart); |
| case ',': |
| return formToken(FormatToken::comma, tokStart); |
| case '=': |
| return formToken(FormatToken::equal, tokStart); |
| case '<': |
| return formToken(FormatToken::less, tokStart); |
| case '>': |
| return formToken(FormatToken::greater, tokStart); |
| case '?': |
| return formToken(FormatToken::question, tokStart); |
| case '(': |
| return formToken(FormatToken::l_paren, tokStart); |
| case ')': |
| return formToken(FormatToken::r_paren, tokStart); |
| case '*': |
| return formToken(FormatToken::star, tokStart); |
| |
| // Ignore whitespace characters. |
| case 0: |
| case ' ': |
| case '\t': |
| case '\n': |
| return lexToken(); |
| |
| case '`': |
| return lexLiteral(tokStart); |
| case '$': |
| return lexVariable(tokStart); |
| } |
| } |
| |
| FormatToken FormatLexer::lexLiteral(const char *tokStart) { |
| assert(curPtr[-1] == '`'); |
| |
| // Lex a literal surrounded by ``. |
| while (const char curChar = *curPtr++) { |
| if (curChar == '`') |
| return formToken(FormatToken::literal, tokStart); |
| } |
| return emitError(curPtr - 1, "unexpected end of file in literal"); |
| } |
| |
| FormatToken FormatLexer::lexVariable(const char *tokStart) { |
| if (!isalpha(curPtr[0]) && curPtr[0] != '_') |
| return emitError(curPtr - 1, "expected variable name"); |
| |
| // Otherwise, consume the rest of the characters. |
| while (isalnum(*curPtr) || *curPtr == '_') |
| ++curPtr; |
| return formToken(FormatToken::variable, tokStart); |
| } |
| |
| FormatToken FormatLexer::lexIdentifier(const char *tokStart) { |
| // Match the rest of the identifier regex: [0-9a-zA-Z_\-]* |
| while (isalnum(*curPtr) || *curPtr == '_' || *curPtr == '-') |
| ++curPtr; |
| |
| // Check to see if this identifier is a keyword. |
| StringRef str(tokStart, curPtr - tokStart); |
| auto kind = |
| StringSwitch<FormatToken::Kind>(str) |
| .Case("attr-dict", FormatToken::kw_attr_dict) |
| .Case("attr-dict-with-keyword", FormatToken::kw_attr_dict_w_keyword) |
| .Case("custom", FormatToken::kw_custom) |
| .Case("functional-type", FormatToken::kw_functional_type) |
| .Case("operands", FormatToken::kw_operands) |
| .Case("params", FormatToken::kw_params) |
| .Case("ref", FormatToken::kw_ref) |
| .Case("regions", FormatToken::kw_regions) |
| .Case("results", FormatToken::kw_results) |
| .Case("struct", FormatToken::kw_struct) |
| .Case("successors", FormatToken::kw_successors) |
| .Case("type", FormatToken::kw_type) |
| .Default(FormatToken::identifier); |
| return FormatToken(kind, str); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Utility Functions |
| //===----------------------------------------------------------------------===// |
| |
| bool mlir::tblgen::shouldEmitSpaceBefore(StringRef value, |
| bool lastWasPunctuation) { |
| if (value.size() != 1 && value != "->") |
| return true; |
| if (lastWasPunctuation) |
| return !StringRef(">)}],").contains(value.front()); |
| return !StringRef("<>(){}[],").contains(value.front()); |
| } |
| |
| bool mlir::tblgen::canFormatStringAsKeyword(StringRef value) { |
| if (!isalpha(value.front()) && value.front() != '_') |
| return false; |
| return llvm::all_of(value.drop_front(), [](char c) { |
| return isalnum(c) || c == '_' || c == '$' || c == '.'; |
| }); |
| } |
| |
| bool mlir::tblgen::isValidLiteral(StringRef value) { |
| if (value.empty()) |
| return false; |
| char front = value.front(); |
| |
| // If there is only one character, this must either be punctuation or a |
| // single character bare identifier. |
| if (value.size() == 1) |
| return isalpha(front) || StringRef("_:,=<>()[]{}?+*").contains(front); |
| |
| // Check the punctuation that are larger than a single character. |
| if (value == "->") |
| return true; |
| |
| // Otherwise, this must be an identifier. |
| return canFormatStringAsKeyword(value); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Commandline Options |
| //===----------------------------------------------------------------------===// |
| |
| llvm::cl::opt<bool> mlir::tblgen::formatErrorIsFatal( |
| "asmformat-error-is-fatal", |
| llvm::cl::desc("Emit a fatal error if format parsing fails"), |
| llvm::cl::init(true)); |