blob: 486784859bd71282dd1219a6b3a7fe79ab4ddc64 [file] [log] [blame]
//===-- RISCVAsmParser.cpp - Parse RISCV assembly to MCInst instructions --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace {
struct RISCVOperand;
class RISCVAsmParser : public MCTargetAsmParser {
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo,
int Lower, int Upper, Twine Msg);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
// Auto-generated instruction matching functions
#define GET_ASSEMBLER_HEADER
#include "RISCVGenAsmMatcher.inc"
OperandMatchResultTy parseImmediate(OperandVector &Operands);
OperandMatchResultTy parseRegister(OperandVector &Operands);
OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
bool parseOperand(OperandVector &Operands);
public:
enum RISCVMatchResultTy {
Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "RISCVGenAsmMatcher.inc"
#undef GET_OPERAND_DIAGNOSTIC_TYPES
};
static bool classifySymbolRef(const MCExpr *Expr,
RISCVMCExpr::VariantKind &Kind,
int64_t &Addend);
RISCVAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI, MII) {
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
};
/// RISCVOperand - Instances of this class represent a parsed machine
/// instruction
struct RISCVOperand : public MCParsedAsmOperand {
enum KindTy {
Token,
Register,
Immediate,
} Kind;
struct RegOp {
unsigned RegNum;
};
struct ImmOp {
const MCExpr *Val;
};
SMLoc StartLoc, EndLoc;
union {
StringRef Tok;
RegOp Reg;
ImmOp Imm;
};
RISCVOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
public:
RISCVOperand(const RISCVOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
StartLoc = o.StartLoc;
EndLoc = o.EndLoc;
switch (Kind) {
case Register:
Reg = o.Reg;
break;
case Immediate:
Imm = o.Imm;
break;
case Token:
Tok = o.Tok;
break;
}
}
bool isToken() const override { return Kind == Token; }
bool isReg() const override { return Kind == Register; }
bool isImm() const override { return Kind == Immediate; }
bool isMem() const override { return false; }
bool evaluateConstantImm(int64_t &Imm, RISCVMCExpr::VariantKind &VK) const {
const MCExpr *Val = getImm();
bool Ret = false;
if (auto *RE = dyn_cast<RISCVMCExpr>(Val)) {
Ret = RE->evaluateAsConstant(Imm);
VK = RE->getKind();
} else if (auto CE = dyn_cast<MCConstantExpr>(Val)) {
Ret = true;
VK = RISCVMCExpr::VK_RISCV_None;
Imm = CE->getValue();
}
return Ret;
}
// True if operand is a symbol with no modifiers, or a constant with no
// modifiers and isShiftedInt<N-1, 1>(Op).
template <int N> bool isBareSimmNLsb0() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(Imm, VK);
bool IsValid;
if (!IsConstantImm)
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
else
IsValid = isShiftedInt<N - 1, 1>(Imm);
return IsValid && VK == RISCVMCExpr::VK_RISCV_None;
}
// Predicate methods for AsmOperands defined in RISCVInstrInfo.td
/// Return true if the operand is a valid for the fence instruction e.g.
/// ('iorw').
bool isFenceArg() const {
if (!isImm())
return false;
const MCExpr *Val = getImm();
auto *SVal = dyn_cast<MCSymbolRefExpr>(Val);
if (!SVal || SVal->getKind() != MCSymbolRefExpr::VK_None)
return false;
StringRef Str = SVal->getSymbol().getName();
// Letters must be unique, taken from 'iorw', and in ascending order. This
// holds as long as each individual character is one of 'iorw' and is
// greater than the previous character.
char Prev = '\0';
for (char c : Str) {
if (c != 'i' && c != 'o' && c != 'r' && c != 'w')
return false;
if (c <= Prev)
return false;
Prev = c;
}
return true;
}
bool isUImm5() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(Imm, VK);
return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm12() const {
RISCVMCExpr::VariantKind VK;
int64_t Imm;
bool IsValid;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(Imm, VK);
if (!IsConstantImm)
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
else
IsValid = isInt<12>(Imm);
return IsValid &&
(VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO);
}
bool isUImm12() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(Imm, VK);
return IsConstantImm && isUInt<12>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm13Lsb0() const { return isBareSimmNLsb0<13>(); }
bool isUImm20() const {
RISCVMCExpr::VariantKind VK;
int64_t Imm;
bool IsValid;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(Imm, VK);
if (!IsConstantImm)
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
else
IsValid = isUInt<20>(Imm);
return IsValid && (VK == RISCVMCExpr::VK_RISCV_None ||
VK == RISCVMCExpr::VK_RISCV_HI ||
VK == RISCVMCExpr::VK_RISCV_PCREL_HI);
}
bool isSImm21Lsb0() const { return isBareSimmNLsb0<21>(); }
/// getStartLoc - Gets location of the first token of this operand
SMLoc getStartLoc() const override { return StartLoc; }
/// getEndLoc - Gets location of the last token of this operand
SMLoc getEndLoc() const override { return EndLoc; }
unsigned getReg() const override {
assert(Kind == Register && "Invalid type access!");
return Reg.RegNum;
}
const MCExpr *getImm() const {
assert(Kind == Immediate && "Invalid type access!");
return Imm.Val;
}
StringRef getToken() const {
assert(Kind == Token && "Invalid type access!");
return Tok;
}
void print(raw_ostream &OS) const override {
switch (Kind) {
case Immediate:
OS << *getImm();
break;
case Register:
OS << "<register x";
OS << getReg() << ">";
break;
case Token:
OS << "'" << getToken() << "'";
break;
}
}
static std::unique_ptr<RISCVOperand> createToken(StringRef Str, SMLoc S) {
auto Op = make_unique<RISCVOperand>(Token);
Op->Tok = Str;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
static std::unique_ptr<RISCVOperand> createReg(unsigned RegNo, SMLoc S,
SMLoc E) {
auto Op = make_unique<RISCVOperand>(Register);
Op->Reg.RegNum = RegNo;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
static std::unique_ptr<RISCVOperand> createImm(const MCExpr *Val, SMLoc S,
SMLoc E) {
auto Op = make_unique<RISCVOperand>(Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
assert(Expr && "Expr shouldn't be null!");
int64_t Imm = 0;
bool IsConstant = false;
if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
IsConstant = RE->evaluateAsConstant(Imm);
} else if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
IsConstant = true;
Imm = CE->getValue();
}
if (IsConstant)
Inst.addOperand(MCOperand::createImm(Imm));
else
Inst.addOperand(MCOperand::createExpr(Expr));
}
// Used by the TableGen Code
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getReg()));
}
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
void addFenceArgOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// isFenceArg has validated the operand, meaning this cast is safe
auto SE = cast<MCSymbolRefExpr>(getImm());
unsigned Imm = 0;
for (char c : SE->getSymbol().getName()) {
switch (c) {
default: llvm_unreachable("FenceArg must contain only [iorw]");
case 'i': Imm |= RISCVFenceField::I; break;
case 'o': Imm |= RISCVFenceField::O; break;
case 'r': Imm |= RISCVFenceField::R; break;
case 'w': Imm |= RISCVFenceField::W; break;
}
}
Inst.addOperand(MCOperand::createImm(Imm));
}
};
} // end anonymous namespace.
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#include "RISCVGenAsmMatcher.inc"
bool RISCVAsmParser::generateImmOutOfRangeError(
OperandVector &Operands, uint64_t ErrorInfo, int Lower, int Upper,
Twine Msg = "immediate must be an integer in the range") {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
}
bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
default:
break;
case Match_Success:
Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
case Match_MnemonicFail:
return Error(IDLoc, "unrecognized instruction mnemonic");
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0U) {
if (ErrorInfo >= Operands.size())
return Error(ErrorLoc, "too few operands for instruction");
ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
return Error(ErrorLoc, "invalid operand for instruction");
}
case Match_InvalidUImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
case Match_InvalidSImm12:
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 11),
(1 << 11) - 1);
case Match_InvalidUImm12:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1);
case Match_InvalidSImm13Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 12), (1 << 12) - 2,
"immediate must be a multiple of 2 bytes in the range");
case Match_InvalidUImm20:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1);
case Match_InvalidSImm21Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 20), (1 << 20) - 2,
"immediate must be a multiple of 2 bytes in the range");
case Match_InvalidFenceArg: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(
ErrorLoc,
"operand must be formed of letters selected in-order from 'iorw'");
}
}
llvm_unreachable("Unknown match type detected!");
}
bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
RegNo = 0;
StringRef Name = getLexer().getTok().getIdentifier();
if (!MatchRegisterName(Name) || !MatchRegisterAltName(Name)) {
getParser().Lex(); // Eat identifier token.
return false;
}
return Error(StartLoc, "invalid register name");
}
OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
case AsmToken::Identifier:
StringRef Name = getLexer().getTok().getIdentifier();
unsigned RegNo = MatchRegisterName(Name);
if (RegNo == 0) {
RegNo = MatchRegisterAltName(Name);
if (RegNo == 0)
return MatchOperand_NoMatch;
}
getLexer().Lex();
Operands.push_back(RISCVOperand::createReg(RegNo, S, E));
}
return MatchOperand_Success;
}
OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
const MCExpr *Res;
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Integer:
case AsmToken::String:
if (getParser().parseExpression(Res))
return MatchOperand_ParseFail;
break;
case AsmToken::Identifier: {
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
return MatchOperand_ParseFail;
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
break;
}
case AsmToken::Percent:
return parseOperandWithModifier(Operands);
}
Operands.push_back(RISCVOperand::createImm(Res, S, E));
return MatchOperand_Success;
}
OperandMatchResultTy
RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
if (getLexer().getKind() != AsmToken::Percent) {
Error(getLoc(), "expected '%' for operand modifier");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat '%'
if (getLexer().getKind() != AsmToken::Identifier) {
Error(getLoc(), "expected valid identifier for operand modifier");
return MatchOperand_ParseFail;
}
StringRef Identifier = getParser().getTok().getIdentifier();
RISCVMCExpr::VariantKind VK = RISCVMCExpr::getVariantKindForName(Identifier);
if (VK == RISCVMCExpr::VK_RISCV_Invalid) {
Error(getLoc(), "unrecognized operand modifier");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat the identifier
if (getLexer().getKind() != AsmToken::LParen) {
Error(getLoc(), "expected '('");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat '('
const MCExpr *SubExpr;
if (getParser().parseParenExpression(SubExpr, E)) {
return MatchOperand_ParseFail;
}
const MCExpr *ModExpr = RISCVMCExpr::create(SubExpr, VK, getContext());
Operands.push_back(RISCVOperand::createImm(ModExpr, S, E));
return MatchOperand_Success;
}
OperandMatchResultTy
RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
if (getLexer().isNot(AsmToken::LParen)) {
Error(getLoc(), "expected '('");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat '('
Operands.push_back(RISCVOperand::createToken("(", getLoc()));
if (parseRegister(Operands) != MatchOperand_Success) {
Error(getLoc(), "expected register");
return MatchOperand_ParseFail;
}
if (getLexer().isNot(AsmToken::RParen)) {
Error(getLoc(), "expected ')'");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat ')'
Operands.push_back(RISCVOperand::createToken(")", getLoc()));
return MatchOperand_Success;
}
/// Looks at a token type and creates the relevant operand
/// from this information, adding to Operands.
/// If operand was parsed, returns false, else true.
bool RISCVAsmParser::parseOperand(OperandVector &Operands) {
// Attempt to parse token as register
if (parseRegister(Operands) == MatchOperand_Success)
return false;
// Attempt to parse token as an immediate
if (parseImmediate(Operands) == MatchOperand_Success) {
// Parse memory base register if present
if (getLexer().is(AsmToken::LParen))
return parseMemOpBaseReg(Operands) != MatchOperand_Success;
return false;
}
// Finally we have exhausted all options and must declare defeat.
Error(getLoc(), "unknown operand");
return true;
}
bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
// First operand is token for instruction
Operands.push_back(RISCVOperand::createToken(Name, NameLoc));
// If there are no more operands, then finish
if (getLexer().is(AsmToken::EndOfStatement))
return false;
// Parse first operand
if (parseOperand(Operands))
return true;
// Parse until end of statement, consuming commas between operands
while (getLexer().is(AsmToken::Comma)) {
// Consume comma token
getLexer().Lex();
// Parse next operand
if (parseOperand(Operands))
return true;
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
getParser().eatToEndOfStatement();
return Error(Loc, "unexpected token");
}
getParser().Lex(); // Consume the EndOfStatement.
return false;
}
bool RISCVAsmParser::classifySymbolRef(const MCExpr *Expr,
RISCVMCExpr::VariantKind &Kind,
int64_t &Addend) {
Kind = RISCVMCExpr::VK_RISCV_None;
Addend = 0;
if (const RISCVMCExpr *RE = dyn_cast<RISCVMCExpr>(Expr)) {
Kind = RE->getKind();
Expr = RE->getSubExpr();
}
// It's a simple symbol reference or constant with no addend.
if (isa<MCConstantExpr>(Expr) || isa<MCSymbolRefExpr>(Expr))
return true;
const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr);
if (!BE)
return false;
if (!isa<MCSymbolRefExpr>(BE->getLHS()))
return false;
if (BE->getOpcode() != MCBinaryExpr::Add &&
BE->getOpcode() != MCBinaryExpr::Sub)
return false;
// We are able to support the subtraction of two symbol references
if (BE->getOpcode() == MCBinaryExpr::Sub &&
isa<MCSymbolRefExpr>(BE->getRHS()))
return true;
// See if the addend is is a constant, otherwise there's more going
// on here than we can deal with.
auto AddendExpr = dyn_cast<MCConstantExpr>(BE->getRHS());
if (!AddendExpr)
return false;
Addend = AddendExpr->getValue();
if (BE->getOpcode() == MCBinaryExpr::Sub)
Addend = -Addend;
// It's some symbol reference + a constant addend
return Kind != RISCVMCExpr::VK_RISCV_Invalid;
}
bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
extern "C" void LLVMInitializeRISCVAsmParser() {
RegisterMCAsmParser<RISCVAsmParser> X(getTheRISCV32Target());
RegisterMCAsmParser<RISCVAsmParser> Y(getTheRISCV64Target());
}