blob: 7836ece2d6885d4796444737d7414a555071af1a [file] [log] [blame]
//===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
#define LLVM_MC_MCPARSER_MCASMLEXER_H
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/SMLoc.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <string>
namespace llvm {
/// Target independent representation for an assembler token.
class AsmToken {
public:
enum TokenKind {
// Markers
Eof, Error,
// String values.
Identifier,
String,
// Integer values.
Integer,
BigNum, // larger than 64 bits
// Real values.
Real,
// Comments
Comment,
HashDirective,
// No-value.
EndOfStatement,
Colon,
Space,
Plus, Minus, Tilde,
Slash, // '/'
BackSlash, // '\'
LParen, RParen, LBrac, RBrac, LCurly, RCurly,
Star, Dot, Comma, Dollar, Equal, EqualEqual,
Pipe, PipePipe, Caret,
Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
Less, LessEqual, LessLess, LessGreater,
Greater, GreaterEqual, GreaterGreater, At,
// MIPS unary expression operators such as %neg.
PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi,
PercentDtprel_Lo, PercentGot, PercentGot_Disp, PercentGot_Hi, PercentGot_Lo,
PercentGot_Ofst, PercentGot_Page, PercentGottprel, PercentGp_Rel, PercentHi,
PercentHigher, PercentHighest, PercentLo, PercentNeg, PercentPcrel_Hi,
PercentPcrel_Lo, PercentTlsgd, PercentTlsldm, PercentTprel_Hi,
PercentTprel_Lo
};
private:
TokenKind Kind;
/// A reference to the entire token contents; this is always a pointer into
/// a memory buffer owned by the source manager.
StringRef Str;
APInt IntVal;
public:
AsmToken() = default;
AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
: Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {}
AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
: Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
TokenKind getKind() const { return Kind; }
bool is(TokenKind K) const { return Kind == K; }
bool isNot(TokenKind K) const { return Kind != K; }
SMLoc getLoc() const;
SMLoc getEndLoc() const;
SMRange getLocRange() const;
/// Get the contents of a string token (without quotes).
StringRef getStringContents() const {
assert(Kind == String && "This token isn't a string!");
return Str.slice(1, Str.size() - 1);
}
/// Get the identifier string for the current token, which should be an
/// identifier or a string. This gets the portion of the string which should
/// be used as the identifier, e.g., it does not include the quotes on
/// strings.
StringRef getIdentifier() const {
if (Kind == Identifier)
return getString();
return getStringContents();
}
/// Get the string for the current token, this includes all characters (for
/// example, the quotes on strings) in the token.
///
/// The returned StringRef points into the source manager's memory buffer, and
/// is safe to store across calls to Lex().
StringRef getString() const { return Str; }
// FIXME: Don't compute this in advance, it makes every token larger, and is
// also not generally what we want (it is nicer for recovery etc. to lex 123br
// as a single token, then diagnose as an invalid number).
int64_t getIntVal() const {
assert(Kind == Integer && "This token isn't an integer!");
return IntVal.getZExtValue();
}
APInt getAPIntVal() const {
assert((Kind == Integer || Kind == BigNum) &&
"This token isn't an integer!");
return IntVal;
}
};
/// A callback class which is notified of each comment in an assembly file as
/// it is lexed.
class AsmCommentConsumer {
public:
virtual ~AsmCommentConsumer() = default;
/// Callback function for when a comment is lexed. Loc is the start of the
/// comment text (excluding the comment-start marker). CommentText is the text
/// of the comment, excluding the comment start and end markers, and the
/// newline for single-line comments.
virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
};
/// Generic assembler lexer interface, for use by target specific assembly
/// lexers.
class MCAsmLexer {
/// The current token, stored in the base class for faster access.
SmallVector<AsmToken, 1> CurTok;
/// The location and description of the current error
SMLoc ErrLoc;
std::string Err;
protected: // Can only create subclasses.
const char *TokStart = nullptr;
bool SkipSpace = true;
bool AllowAtInIdentifier;
bool IsAtStartOfStatement = true;
AsmCommentConsumer *CommentConsumer = nullptr;
bool AltMacroMode;
MCAsmLexer();
virtual AsmToken LexToken() = 0;
void SetError(SMLoc errLoc, const std::string &err) {
ErrLoc = errLoc;
Err = err;
}
public:
MCAsmLexer(const MCAsmLexer &) = delete;
MCAsmLexer &operator=(const MCAsmLexer &) = delete;
virtual ~MCAsmLexer();
bool IsaAltMacroMode() {
return AltMacroMode;
}
void SetAltMacroMode(bool AltMacroSet) {
AltMacroMode = AltMacroSet;
}
/// Consume the next token from the input stream and return it.
///
/// The lexer will continuosly return the end-of-file token once the end of
/// the main input file has been reached.
const AsmToken &Lex() {
assert(!CurTok.empty());
// Mark if we parsing out a EndOfStatement.
IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
CurTok.erase(CurTok.begin());
// LexToken may generate multiple tokens via UnLex but will always return
// the first one. Place returned value at head of CurTok vector.
if (CurTok.empty()) {
AsmToken T = LexToken();
CurTok.insert(CurTok.begin(), T);
}
return CurTok.front();
}
void UnLex(AsmToken const &Token) {
IsAtStartOfStatement = false;
CurTok.insert(CurTok.begin(), Token);
}
bool isAtStartOfStatement() { return IsAtStartOfStatement; }
virtual StringRef LexUntilEndOfStatement() = 0;
/// Get the current source location.
SMLoc getLoc() const;
/// Get the current (last) lexed token.
const AsmToken &getTok() const {
return CurTok[0];
}
/// Look ahead at the next token to be lexed.
const AsmToken peekTok(bool ShouldSkipSpace = true) {
AsmToken Tok;
MutableArrayRef<AsmToken> Buf(Tok);
size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
assert(ReadCount == 1);
(void)ReadCount;
return Tok;
}
/// Look ahead an arbitrary number of tokens.
virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
bool ShouldSkipSpace = true) = 0;
/// Get the current error location
SMLoc getErrLoc() {
return ErrLoc;
}
/// Get the current error string
const std::string &getErr() {
return Err;
}
/// Get the kind of current token.
AsmToken::TokenKind getKind() const { return getTok().getKind(); }
/// Check if the current token has kind \p K.
bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
/// Check if the current token has kind \p K.
bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
/// Set whether spaces should be ignored by the lexer
void setSkipSpace(bool val) { SkipSpace = val; }
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
this->CommentConsumer = CommentConsumer;
}
};
} // end namespace llvm
#endif // LLVM_MC_MCPARSER_MCASMLEXER_H