include/llvm/MC/MCParser/MCAsmLexer.h - llvm - Git at Google

 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
 #define LLVM_MC_MCPARSER_MCASMLEXER_H

 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SMLoc.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <string>

 namespace llvm {

 /// Target independent representation for an assembler token.
 class AsmToken {
 public:
   enum TokenKind {
     // Markers
     Eof, Error,

     // String values.
     Identifier,
     String,

     // Integer values.
     Integer,
     BigNum, // larger than 64 bits

     // Real values.
     Real,

     // Comments
     Comment,
     HashDirective,
     // No-value.
     EndOfStatement,
     Colon,
     Space,
     Plus, Minus, Tilde,
     Slash,     // '/'
     BackSlash, // '\'
     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
     Star, Dot, Comma, Dollar, Equal, EqualEqual,

     Pipe, PipePipe, Caret,
     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     Less, LessEqual, LessLess, LessGreater,
     Greater, GreaterEqual, GreaterGreater, At,

     // MIPS unary expression operators such as %neg.
     PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi,
     PercentDtprel_Lo, PercentGot, PercentGot_Disp, PercentGot_Hi, PercentGot_Lo,
     PercentGot_Ofst, PercentGot_Page, PercentGottprel, PercentGp_Rel, PercentHi,
     PercentHigher, PercentHighest, PercentLo, PercentNeg, PercentPcrel_Hi,
     PercentPcrel_Lo, PercentTlsgd, PercentTlsldm, PercentTprel_Hi,
     PercentTprel_Lo
   };

 private:
   TokenKind Kind;

   /// A reference to the entire token contents; this is always a pointer into
   /// a memory buffer owned by the source manager.
   StringRef Str;

   APInt IntVal;

 public:
   AsmToken() = default;
   AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
       : Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {}
   AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
       : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}

   TokenKind getKind() const { return Kind; }
   bool is(TokenKind K) const { return Kind == K; }
   bool isNot(TokenKind K) const { return Kind != K; }

   SMLoc getLoc() const;
   SMLoc getEndLoc() const;
   SMRange getLocRange() const;

   /// Get the contents of a string token (without quotes).
   StringRef getStringContents() const {
     assert(Kind == String && "This token isn't a string!");
     return Str.slice(1, Str.size() - 1);
   }

   /// Get the identifier string for the current token, which should be an
   /// identifier or a string. This gets the portion of the string which should
   /// be used as the identifier, e.g., it does not include the quotes on
   /// strings.
   StringRef getIdentifier() const {
     if (Kind == Identifier)
       return getString();
     return getStringContents();
   }

   /// Get the string for the current token, this includes all characters (for
   /// example, the quotes on strings) in the token.
   ///
   /// The returned StringRef points into the source manager's memory buffer, and
   /// is safe to store across calls to Lex().
   StringRef getString() const { return Str; }

   // FIXME: Don't compute this in advance, it makes every token larger, and is
   // also not generally what we want (it is nicer for recovery etc. to lex 123br
   // as a single token, then diagnose as an invalid number).
   int64_t getIntVal() const {
     assert(Kind == Integer && "This token isn't an integer!");
     return IntVal.getZExtValue();
   }

   APInt getAPIntVal() const {
     assert((Kind == Integer || Kind == BigNum) &&
            "This token isn't an integer!");
     return IntVal;
   }
 };

 /// A callback class which is notified of each comment in an assembly file as
 /// it is lexed.
 class AsmCommentConsumer {
 public:
   virtual ~AsmCommentConsumer() = default;

   /// Callback function for when a comment is lexed. Loc is the start of the
   /// comment text (excluding the comment-start marker). CommentText is the text
   /// of the comment, excluding the comment start and end markers, and the
   /// newline for single-line comments.
   virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
 };


 /// Generic assembler lexer interface, for use by target specific assembly
 /// lexers.
 class MCAsmLexer {
   /// The current token, stored in the base class for faster access.
   SmallVector<AsmToken, 1> CurTok;

   /// The location and description of the current error
   SMLoc ErrLoc;
   std::string Err;

 protected: // Can only create subclasses.
   const char *TokStart = nullptr;
   bool SkipSpace = true;
   bool AllowAtInIdentifier;
   bool IsAtStartOfStatement = true;
   AsmCommentConsumer *CommentConsumer = nullptr;

   bool AltMacroMode;
   MCAsmLexer();

   virtual AsmToken LexToken() = 0;

   void SetError(SMLoc errLoc, const std::string &err) {
     ErrLoc = errLoc;
     Err = err;
   }

 public:
   MCAsmLexer(const MCAsmLexer &) = delete;
   MCAsmLexer &operator=(const MCAsmLexer &) = delete;
   virtual ~MCAsmLexer();

   bool IsaAltMacroMode() {
     return AltMacroMode;
   }

   void SetAltMacroMode(bool AltMacroSet) {
     AltMacroMode = AltMacroSet;
   }

   /// Consume the next token from the input stream and return it.
   ///
   /// The lexer will continuosly return the end-of-file token once the end of
   /// the main input file has been reached.
   const AsmToken &Lex() {
     assert(!CurTok.empty());
     // Mark if we parsing out a EndOfStatement.
     IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
     CurTok.erase(CurTok.begin());
     // LexToken may generate multiple tokens via UnLex but will always return
     // the first one. Place returned value at head of CurTok vector.
     if (CurTok.empty()) {
       AsmToken T = LexToken();
       CurTok.insert(CurTok.begin(), T);
     }
     return CurTok.front();
   }

   void UnLex(AsmToken const &Token) {
     IsAtStartOfStatement = false;
     CurTok.insert(CurTok.begin(), Token);
   }

   bool isAtStartOfStatement() { return IsAtStartOfStatement; }

   virtual StringRef LexUntilEndOfStatement() = 0;

   /// Get the current source location.
   SMLoc getLoc() const;

   /// Get the current (last) lexed token.
   const AsmToken &getTok() const {
     return CurTok[0];
   }

   /// Look ahead at the next token to be lexed.
   const AsmToken peekTok(bool ShouldSkipSpace = true) {
     AsmToken Tok;

     MutableArrayRef<AsmToken> Buf(Tok);
     size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);

     assert(ReadCount == 1);
     (void)ReadCount;

     return Tok;
   }

   /// Look ahead an arbitrary number of tokens.
   virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
                             bool ShouldSkipSpace = true) = 0;

   /// Get the current error location
   SMLoc getErrLoc() {
     return ErrLoc;
   }

   /// Get the current error string
   const std::string &getErr() {
     return Err;
   }

   /// Get the kind of current token.
   AsmToken::TokenKind getKind() const { return getTok().getKind(); }

   /// Check if the current token has kind \p K.
   bool is(AsmToken::TokenKind K) const { return getTok().is(K); }

   /// Check if the current token has kind \p K.
   bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }

   /// Set whether spaces should be ignored by the lexer
   void setSkipSpace(bool val) { SkipSpace = val; }

   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }

   void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
     this->CommentConsumer = CommentConsumer;
   }
 };

 } // end namespace llvm

 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H
	//===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
	#define LLVM_MC_MCPARSER_MCASMLEXER_H

	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/Support/SMLoc.h"
	#include <algorithm>
	#include <cassert>
	#include <cstddef>
	#include <cstdint>
	#include <string>

	namespace llvm {

	/// Target independent representation for an assembler token.
	class AsmToken {
	public:
	enum TokenKind {
	// Markers
	Eof, Error,

	// String values.
	Identifier,
	String,

	// Integer values.
	Integer,
	BigNum, // larger than 64 bits

	// Real values.
	Real,

	// Comments
	Comment,
	HashDirective,
	// No-value.
	EndOfStatement,
	Colon,
	Space,
	Plus, Minus, Tilde,
	Slash, // '/'
	BackSlash, // '\'
	LParen, RParen, LBrac, RBrac, LCurly, RCurly,
	Star, Dot, Comma, Dollar, Equal, EqualEqual,

	Pipe, PipePipe, Caret,
	Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
	Less, LessEqual, LessLess, LessGreater,
	Greater, GreaterEqual, GreaterGreater, At,

	// MIPS unary expression operators such as %neg.
	PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi,
	PercentDtprel_Lo, PercentGot, PercentGot_Disp, PercentGot_Hi, PercentGot_Lo,
	PercentGot_Ofst, PercentGot_Page, PercentGottprel, PercentGp_Rel, PercentHi,
	PercentHigher, PercentHighest, PercentLo, PercentNeg, PercentPcrel_Hi,
	PercentPcrel_Lo, PercentTlsgd, PercentTlsldm, PercentTprel_Hi,
	PercentTprel_Lo
	};

	private:
	TokenKind Kind;

	/// A reference to the entire token contents; this is always a pointer into
	/// a memory buffer owned by the source manager.
	StringRef Str;

	APInt IntVal;

	public:
	AsmToken() = default;
	AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
	: Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {}
	AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
	: Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}

	TokenKind getKind() const { return Kind; }
	bool is(TokenKind K) const { return Kind == K; }
	bool isNot(TokenKind K) const { return Kind != K; }

	SMLoc getLoc() const;
	SMLoc getEndLoc() const;
	SMRange getLocRange() const;

	/// Get the contents of a string token (without quotes).
	StringRef getStringContents() const {
	assert(Kind == String && "This token isn't a string!");
	return Str.slice(1, Str.size() - 1);
	}

	/// Get the identifier string for the current token, which should be an
	/// identifier or a string. This gets the portion of the string which should
	/// be used as the identifier, e.g., it does not include the quotes on
	/// strings.
	StringRef getIdentifier() const {
	if (Kind == Identifier)
	return getString();
	return getStringContents();
	}

	/// Get the string for the current token, this includes all characters (for
	/// example, the quotes on strings) in the token.
	///
	/// The returned StringRef points into the source manager's memory buffer, and
	/// is safe to store across calls to Lex().
	StringRef getString() const { return Str; }

	// FIXME: Don't compute this in advance, it makes every token larger, and is
	// also not generally what we want (it is nicer for recovery etc. to lex 123br
	// as a single token, then diagnose as an invalid number).
	int64_t getIntVal() const {
	assert(Kind == Integer && "This token isn't an integer!");
	return IntVal.getZExtValue();
	}

	APInt getAPIntVal() const {
	assert((Kind == Integer \|\| Kind == BigNum) &&
	"This token isn't an integer!");
	return IntVal;
	}
	};

	/// A callback class which is notified of each comment in an assembly file as
	/// it is lexed.
	class AsmCommentConsumer {
	public:
	virtual ~AsmCommentConsumer() = default;

	/// Callback function for when a comment is lexed. Loc is the start of the
	/// comment text (excluding the comment-start marker). CommentText is the text
	/// of the comment, excluding the comment start and end markers, and the
	/// newline for single-line comments.
	virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
	};


	/// Generic assembler lexer interface, for use by target specific assembly
	/// lexers.
	class MCAsmLexer {
	/// The current token, stored in the base class for faster access.
	SmallVector<AsmToken, 1> CurTok;

	/// The location and description of the current error
	SMLoc ErrLoc;
	std::string Err;

	protected: // Can only create subclasses.
	const char *TokStart = nullptr;
	bool SkipSpace = true;
	bool AllowAtInIdentifier;
	bool IsAtStartOfStatement = true;
	AsmCommentConsumer *CommentConsumer = nullptr;

	bool AltMacroMode;
	MCAsmLexer();

	virtual AsmToken LexToken() = 0;

	void SetError(SMLoc errLoc, const std::string &err) {
	ErrLoc = errLoc;
	Err = err;
	}

	public:
	MCAsmLexer(const MCAsmLexer &) = delete;
	MCAsmLexer &operator=(const MCAsmLexer &) = delete;
	virtual ~MCAsmLexer();

	bool IsaAltMacroMode() {
	return AltMacroMode;
	}

	void SetAltMacroMode(bool AltMacroSet) {
	AltMacroMode = AltMacroSet;
	}

	/// Consume the next token from the input stream and return it.
	///
	/// The lexer will continuosly return the end-of-file token once the end of
	/// the main input file has been reached.
	const AsmToken &Lex() {
	assert(!CurTok.empty());
	// Mark if we parsing out a EndOfStatement.
	IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
	CurTok.erase(CurTok.begin());
	// LexToken may generate multiple tokens via UnLex but will always return
	// the first one. Place returned value at head of CurTok vector.
	if (CurTok.empty()) {
	AsmToken T = LexToken();
	CurTok.insert(CurTok.begin(), T);
	}
	return CurTok.front();
	}

	void UnLex(AsmToken const &Token) {
	IsAtStartOfStatement = false;
	CurTok.insert(CurTok.begin(), Token);
	}

	bool isAtStartOfStatement() { return IsAtStartOfStatement; }

	virtual StringRef LexUntilEndOfStatement() = 0;

	/// Get the current source location.
	SMLoc getLoc() const;

	/// Get the current (last) lexed token.
	const AsmToken &getTok() const {
	return CurTok[0];
	}

	/// Look ahead at the next token to be lexed.
	const AsmToken peekTok(bool ShouldSkipSpace = true) {
	AsmToken Tok;

	MutableArrayRef<AsmToken> Buf(Tok);
	size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);

	assert(ReadCount == 1);
	(void)ReadCount;

	return Tok;
	}

	/// Look ahead an arbitrary number of tokens.
	virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
	bool ShouldSkipSpace = true) = 0;

	/// Get the current error location
	SMLoc getErrLoc() {
	return ErrLoc;
	}

	/// Get the current error string
	const std::string &getErr() {
	return Err;
	}

	/// Get the kind of current token.
	AsmToken::TokenKind getKind() const { return getTok().getKind(); }

	/// Check if the current token has kind \p K.
	bool is(AsmToken::TokenKind K) const { return getTok().is(K); }

	/// Check if the current token has kind \p K.
	bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }

	/// Set whether spaces should be ignored by the lexer
	void setSkipSpace(bool val) { SkipSpace = val; }

	bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
	void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }

	void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
	this->CommentConsumer = CommentConsumer;
	}
	};

	} // end namespace llvm

	#endif // LLVM_MC_MCPARSER_MCASMLEXER_H