utils/TableGen/VarLenCodeEmitterGen.cpp - llvm-project/llvm - Git at Google

 //===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // The CodeEmitterGen component for variable-length instructions.
 //
 // The basic CodeEmitterGen is almost exclusively designed for fixed-
 // length instructions. A good analogy for its encoding scheme is how printf
 // works: The (immutable) formatting string represent the fixed values in the
 // encoded instruction. Placeholders (i.e. %something), on the other hand,
 // represent encoding for instruction operands.
 // ```
 // printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
 //                               <encoded value for operand `dst`>);
 // ```
 // VarLenCodeEmitterGen in this file provides an alternative encoding scheme
 // that works more like a C++ stream operator:
 // ```
 // OS << 0b1101;
 // if (Cond)
 //   OS << OperandEncoding0;
 // OS << 0b1001 << OperandEncoding1;
 // ```
 // You are free to concatenate arbitrary types (and sizes) of encoding
 // fragments on any bit position, bringing more flexibilities on defining
 // encoding for variable-length instructions.
 //
 // In a more specific way, instruction encoding is represented by a DAG type
 // `Inst` field. Here is an example:
 // ```
 // dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
 //                     (operand "$dst", 4));
 // ```
 // It represents the following instruction encoding:
 // ```
 // MSB                                                     LSB
 // 1101<encoding for operand src>1001<encoding for operand dst>
 // ```
 // For more details about DAG operators in the above snippet, please
 // refer to \file include/llvm/Target/Target.td.
 //
 // VarLenCodeEmitter will convert the above DAG into the same helper function
 // generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
 // for few details).
 //
 //===----------------------------------------------------------------------===//

 #include "VarLenCodeEmitterGen.h"
 #include "CodeGenHwModes.h"
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
 #include "InfoByHwMode.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"

 using namespace llvm;

 namespace {

 class VarLenCodeEmitterGen {
   RecordKeeper &Records;

   DenseMap<Record *, VarLenInst> VarLenInsts;

   // Emit based values (i.e. fixed bits in the encoded instructions)
   void emitInstructionBaseValues(
       raw_ostream &OS,
       ArrayRef<const CodeGenInstruction *> NumberedInstructions,
       CodeGenTarget &Target, int HwMode = -1);

   std::string getInstructionCase(Record *R, CodeGenTarget &Target);
   std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
                                             CodeGenTarget &Target);

 public:
   explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {}

   void run(raw_ostream &OS);
 };
 } // end anonymous namespace

 // Get the name of custom encoder or decoder, if there is any.
 // Returns `{encoder name, decoder name}`.
 static std::pair<StringRef, StringRef> getCustomCoders(ArrayRef<Init *> Args) {
   std::pair<StringRef, StringRef> Result;
   for (const auto *Arg : Args) {
     const auto *DI = dyn_cast<DagInit>(Arg);
     if (!DI)
       continue;
     const Init *Op = DI->getOperator();
     if (!isa<DefInit>(Op))
       continue;
     // syntax: `(<encoder | decoder> "function name")`
     StringRef OpName = cast<DefInit>(Op)->getDef()->getName();
     if (OpName != "encoder" && OpName != "decoder")
       continue;
     if (!DI->getNumArgs() || !isa<StringInit>(DI->getArg(0)))
       PrintFatalError("expected '" + OpName +
                       "' directive to be followed by a custom function name.");
     StringRef FuncName = cast<StringInit>(DI->getArg(0))->getValue();
     if (OpName == "encoder")
       Result.first = FuncName;
     else
       Result.second = FuncName;
   }
   return Result;
 }

 VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef)
     : TheDef(TheDef), NumBits(0U) {
   buildRec(DI);
   for (const auto &S : Segments)
     NumBits += S.BitWidth;
 }

 void VarLenInst::buildRec(const DagInit *DI) {
   assert(TheDef && "The def record is nullptr ?");

   std::string Op = DI->getOperator()->getAsString();

   if (Op == "ascend" || Op == "descend") {
     bool Reverse = Op == "descend";
     int i = Reverse ? DI->getNumArgs() - 1 : 0;
     int e = Reverse ? -1 : DI->getNumArgs();
     int s = Reverse ? -1 : 1;
     for (; i != e; i += s) {
       const Init *Arg = DI->getArg(i);
       if (const auto *BI = dyn_cast<BitsInit>(Arg)) {
         if (!BI->isComplete())
           PrintFatalError(TheDef->getLoc(),
                           "Expecting complete bits init in `" + Op + "`");
         Segments.push_back({BI->getNumBits(), BI});
       } else if (const auto *BI = dyn_cast<BitInit>(Arg)) {
         if (!BI->isConcrete())
           PrintFatalError(TheDef->getLoc(),
                           "Expecting concrete bit init in `" + Op + "`");
         Segments.push_back({1, BI});
       } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {
         buildRec(SubDI);
       } else {
         PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" +
                                               Op + "`: " + Arg->getAsString());
       }
     }
   } else if (Op == "operand") {
     // (operand <operand name>, <# of bits>,
     //          [(encoder <custom encoder>)][, (decoder <custom decoder>)])
     if (DI->getNumArgs() < 2)
       PrintFatalError(TheDef->getLoc(),
                       "Expecting at least 2 arguments for `operand`");
     HasDynamicSegment = true;
     const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);
     if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))
       PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`");

     auto NumBitsVal = cast<IntInit>(NumBits)->getValue();
     if (NumBitsVal <= 0)
       PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`");

     auto [CustomEncoder, CustomDecoder] =
         getCustomCoders(DI->getArgs().slice(2));
     Segments.push_back({static_cast<unsigned>(NumBitsVal), OperandName,
                         CustomEncoder, CustomDecoder});
   } else if (Op == "slice") {
     // (slice <operand name>, <high / low bit>, <low / high bit>,
     //        [(encoder <custom encoder>)][, (decoder <custom decoder>)])
     if (DI->getNumArgs() < 3)
       PrintFatalError(TheDef->getLoc(),
                       "Expecting at least 3 arguments for `slice`");
     HasDynamicSegment = true;
     Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),
          *LoBit = DI->getArg(2);
     if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) ||
         !isa<IntInit>(LoBit))
       PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`");

     auto HiBitVal = cast<IntInit>(HiBit)->getValue(),
          LoBitVal = cast<IntInit>(LoBit)->getValue();
     if (HiBitVal < 0 || LoBitVal < 0)
       PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`");
     bool NeedSwap = false;
     unsigned NumBits = 0U;
     if (HiBitVal < LoBitVal) {
       NeedSwap = true;
       NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
     } else {
       NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
     }

     auto [CustomEncoder, CustomDecoder] =
         getCustomCoders(DI->getArgs().slice(3));

     if (NeedSwap) {
       // Normalization: Hi bit should always be the second argument.
       Init *const NewArgs[] = {OperandName, LoBit, HiBit};
       Segments.push_back({NumBits,
                           DagInit::get(DI->getOperator(), nullptr, NewArgs, {}),
                           CustomEncoder, CustomDecoder});
     } else {
       Segments.push_back({NumBits, DI, CustomEncoder, CustomDecoder});
     }
   }
 }

 void VarLenCodeEmitterGen::run(raw_ostream &OS) {
   CodeGenTarget Target(Records);
   auto Insts = Records.getAllDerivedDefinitions("Instruction");

   auto NumberedInstructions = Target.getInstructionsByEnumValue();
   const CodeGenHwModes &HWM = Target.getHwModes();

   // The set of HwModes used by instruction encodings.
   std::set<unsigned> HwModes;
   for (const CodeGenInstruction *CGI : NumberedInstructions) {
     Record *R = CGI->TheDef;

     // Create the corresponding VarLenInst instance.
     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
         R->getValueAsBit("isPseudo"))
       continue;

     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
         for (auto &KV : EBM) {
           HwModes.insert(KV.first);
           Record *EncodingDef = KV.second;
           RecordVal *RV = EncodingDef->getValue("Inst");
           DagInit *DI = cast<DagInit>(RV->getValue());
           VarLenInsts.insert({EncodingDef, VarLenInst(DI, RV)});
         }
         continue;
       }
     }
     RecordVal *RV = R->getValue("Inst");
     DagInit *DI = cast<DagInit>(RV->getValue());
     VarLenInsts.insert({R, VarLenInst(DI, RV)});
   }

   // Emit function declaration
   OS << "void " << Target.getName()
      << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
      << "    SmallVectorImpl<MCFixup> &Fixups,\n"
      << "    APInt &Inst,\n"
      << "    APInt &Scratch,\n"
      << "    const MCSubtargetInfo &STI) const {\n";

   // Emit instruction base values
   if (HwModes.empty()) {
     emitInstructionBaseValues(OS, NumberedInstructions, Target);
   } else {
     for (unsigned HwMode : HwModes)
       emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode);
   }

   if (!HwModes.empty()) {
     OS << "  const unsigned **Index;\n";
     OS << "  const uint64_t *InstBits;\n";
     OS << "  unsigned HwMode = STI.getHwMode();\n";
     OS << "  switch (HwMode) {\n";
     OS << "  default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
     for (unsigned I : HwModes) {
       OS << "  case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
          << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n";
     }
     OS << "  };\n";
   }

   // Emit helper function to retrieve base values.
   OS << "  auto getInstBits = [&](unsigned Opcode) -> APInt {\n"
      << "    unsigned NumBits = Index[Opcode][0];\n"
      << "    if (!NumBits)\n"
      << "      return APInt::getZeroWidth();\n"
      << "    unsigned Idx = Index[Opcode][1];\n"
      << "    ArrayRef<uint64_t> Data(&InstBits[Idx], "
      << "APInt::getNumWords(NumBits));\n"
      << "    return APInt(NumBits, Data);\n"
      << "  };\n";

   // Map to accumulate all the cases.
   std::map<std::string, std::vector<std::string>> CaseMap;

   // Construct all cases statement for each opcode
   for (Record *R : Insts) {
     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
         R->getValueAsBit("isPseudo"))
       continue;
     std::string InstName =
         (R->getValueAsString("Namespace") + "::" + R->getName()).str();
     std::string Case = getInstructionCase(R, Target);

     CaseMap[Case].push_back(std::move(InstName));
   }

   // Emit initial function code
   OS << "  const unsigned opcode = MI.getOpcode();\n"
      << "  switch (opcode) {\n";

   // Emit each case statement
   for (const auto &C : CaseMap) {
     const std::string &Case = C.first;
     const auto &InstList = C.second;

     ListSeparator LS("\n");
     for (const auto &InstName : InstList)
       OS << LS << "    case " << InstName << ":";

     OS << " {\n";
     OS << Case;
     OS << "      break;\n"
        << "    }\n";
   }
   // Default case: unhandled opcode
   OS << "  default:\n"
      << "    std::string msg;\n"
      << "    raw_string_ostream Msg(msg);\n"
      << "    Msg << \"Not supported instr: \" << MI;\n"
      << "    report_fatal_error(Msg.str().c_str());\n"
      << "  }\n";
   OS << "}\n\n";
 }

 static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
                          unsigned &Index) {
   if (!Bits.getNumWords()) {
     IS.indent(4) << "{/*NumBits*/0, /*Index*/0},";
     return;
   }

   IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", "
                << "/*Index*/" << Index << "},";

   SS.indent(4);
   for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
     SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";
 }

 void VarLenCodeEmitterGen::emitInstructionBaseValues(
     raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
     CodeGenTarget &Target, int HwMode) {
   std::string IndexArray, StorageArray;
   raw_string_ostream IS(IndexArray), SS(StorageArray);

   const CodeGenHwModes &HWM = Target.getHwModes();
   if (HwMode == -1) {
     IS << "  static const unsigned Index[][2] = {\n";
     SS << "  static const uint64_t InstBits[] = {\n";
   } else {
     StringRef Name = HWM.getMode(HwMode).Name;
     IS << "  static const unsigned Index_" << Name << "[][2] = {\n";
     SS << "  static const uint64_t InstBits_" << Name << "[] = {\n";
   }

   unsigned NumFixedValueWords = 0U;
   for (const CodeGenInstruction *CGI : NumberedInstructions) {
     Record *R = CGI->TheDef;

     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
         R->getValueAsBit("isPseudo")) {
       IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n";
       continue;
     }

     Record *EncodingDef = R;
     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
         if (EBM.hasMode(HwMode))
           EncodingDef = EBM.get(HwMode);
       }
     }

     auto It = VarLenInsts.find(EncodingDef);
     if (It == VarLenInsts.end())
       PrintFatalError(EncodingDef, "VarLenInst not found for this record");
     const VarLenInst &VLI = It->second;

     unsigned i = 0U, BitWidth = VLI.size();

     // Start by filling in fixed values.
     APInt Value(BitWidth, 0);
     auto SI = VLI.begin(), SE = VLI.end();
     // Scan through all the segments that have fixed-bits values.
     while (i < BitWidth && SI != SE) {
       unsigned SegmentNumBits = SI->BitWidth;
       if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {
         for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
           auto *B = cast<BitInit>(BI->getBit(Idx));
           Value.setBitVal(i + Idx, B->getValue());
         }
       }
       if (const auto *BI = dyn_cast<BitInit>(SI->Value))
         Value.setBitVal(i, BI->getValue());

       i += SegmentNumBits;
       ++SI;
     }

     emitInstBits(IS, SS, Value, NumFixedValueWords);
     IS << '\t' << "// " << R->getName() << "\n";
     if (Value.getNumWords())
       SS << '\t' << "// " << R->getName() << "\n";
   }
   IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n  };\n";
   SS.indent(4) << "UINT64_C(0)\n  };\n";

   OS << IS.str() << SS.str();
 }

 std::string VarLenCodeEmitterGen::getInstructionCase(Record *R,
                                                      CodeGenTarget &Target) {
   std::string Case;
   if (const RecordVal *RV = R->getValue("EncodingInfos")) {
     if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
       const CodeGenHwModes &HWM = Target.getHwModes();
       EncodingInfoByHwMode EBM(DI->getDef(), HWM);
       Case += "      switch (HwMode) {\n";
       Case += "      default: llvm_unreachable(\"Unhandled HwMode\");\n";
       for (auto &KV : EBM) {
         Case += "      case " + itostr(KV.first) + ": {\n";
         Case += getInstructionCaseForEncoding(R, KV.second, Target);
         Case += "      break;\n";
         Case += "      }\n";
       }
       Case += "      }\n";
       return Case;
     }
   }
   return getInstructionCaseForEncoding(R, R, Target);
 }

 std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
     Record *R, Record *EncodingDef, CodeGenTarget &Target) {
   auto It = VarLenInsts.find(EncodingDef);
   if (It == VarLenInsts.end())
     PrintFatalError(EncodingDef, "Parsed encoding record not found");
   const VarLenInst &VLI = It->second;
   size_t BitWidth = VLI.size();

   CodeGenInstruction &CGI = Target.getInstruction(R);

   std::string Case;
   raw_string_ostream SS(Case);
   // Resize the scratch buffer.
   if (BitWidth && !VLI.isFixedValueOnly())
     SS.indent(6) << "Scratch = Scratch.zext(" << BitWidth << ");\n";
   // Populate based value.
   SS.indent(6) << "Inst = getInstBits(opcode);\n";

   // Process each segment in VLI.
   size_t Offset = 0U;
   for (const auto &ES : VLI) {
     unsigned NumBits = ES.BitWidth;
     const Init *Val = ES.Value;
     // If it's a StringInit or DagInit, it's a reference to an operand
     // or part of an operand.
     if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
       StringRef OperandName;
       unsigned LoBit = 0U;
       if (const auto *SV = dyn_cast<StringInit>(Val)) {
         OperandName = SV->getValue();
       } else {
         // Normalized: (slice <operand name>, <high bit>, <low bit>)
         const auto *DV = cast<DagInit>(Val);
         OperandName = cast<StringInit>(DV->getArg(0))->getValue();
         LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());
       }

       auto OpIdx = CGI.Operands.ParseOperandName(OperandName);
       unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
       StringRef CustomEncoder =
           CGI.Operands[OpIdx.first].EncoderMethodNames[OpIdx.second];
       if (ES.CustomEncoder.size())
         CustomEncoder = ES.CustomEncoder;

       SS.indent(6) << "Scratch.clearAllBits();\n";
       SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n";
       if (CustomEncoder.empty())
         SS.indent(6) << "getMachineOpValue(MI, MI.getOperand("
                      << utostr(FlatOpIdx) << ")";
       else
         SS.indent(6) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx);

       SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";

       SS.indent(6) << "Inst.insertBits("
                    << "Scratch.extractBits(" << utostr(NumBits) << ", "
                    << utostr(LoBit) << ")"
                    << ", " << Offset << ");\n";
     }
     Offset += NumBits;
   }

   StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
   if (!PostEmitter.empty())
     SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";

   return Case;
 }

 namespace llvm {

 void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) {
   VarLenCodeEmitterGen(R).run(OS);
 }

 } // end namespace llvm
	//===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// The CodeEmitterGen component for variable-length instructions.
	//
	// The basic CodeEmitterGen is almost exclusively designed for fixed-
	// length instructions. A good analogy for its encoding scheme is how printf
	// works: The (immutable) formatting string represent the fixed values in the
	// encoded instruction. Placeholders (i.e. %something), on the other hand,
	// represent encoding for instruction operands.
	// ```
	// printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
	// <encoded value for operand `dst`>);
	// ```
	// VarLenCodeEmitterGen in this file provides an alternative encoding scheme
	// that works more like a C++ stream operator:
	// ```
	// OS << 0b1101;
	// if (Cond)
	// OS << OperandEncoding0;
	// OS << 0b1001 << OperandEncoding1;
	// ```
	// You are free to concatenate arbitrary types (and sizes) of encoding
	// fragments on any bit position, bringing more flexibilities on defining
	// encoding for variable-length instructions.
	//
	// In a more specific way, instruction encoding is represented by a DAG type
	// `Inst` field. Here is an example:
	// ```
	// dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
	// (operand "$dst", 4));
	// ```
	// It represents the following instruction encoding:
	// ```
	// MSB LSB
	// 1101<encoding for operand src>1001<encoding for operand dst>
	// ```
	// For more details about DAG operators in the above snippet, please
	// refer to \file include/llvm/Target/Target.td.
	//
	// VarLenCodeEmitter will convert the above DAG into the same helper function
	// generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
	// for few details).
	//
	//===----------------------------------------------------------------------===//

	#include "VarLenCodeEmitterGen.h"
	#include "CodeGenHwModes.h"
	#include "CodeGenInstruction.h"
	#include "CodeGenTarget.h"
	#include "InfoByHwMode.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/TableGen/Error.h"
	#include "llvm/TableGen/Record.h"

	using namespace llvm;

	namespace {

	class VarLenCodeEmitterGen {
	RecordKeeper &Records;

	DenseMap<Record *, VarLenInst> VarLenInsts;

	// Emit based values (i.e. fixed bits in the encoded instructions)
	void emitInstructionBaseValues(
	raw_ostream &OS,
	ArrayRef<const CodeGenInstruction *> NumberedInstructions,
	CodeGenTarget &Target, int HwMode = -1);

	std::string getInstructionCase(Record *R, CodeGenTarget &Target);
	std::string getInstructionCaseForEncoding(Record R, Record EncodingDef,
	CodeGenTarget &Target);

	public:
	explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {}

	void run(raw_ostream &OS);
	};
	} // end anonymous namespace

	// Get the name of custom encoder or decoder, if there is any.
	// Returns `{encoder name, decoder name}`.
	static std::pair<StringRef, StringRef> getCustomCoders(ArrayRef<Init *> Args) {
	std::pair<StringRef, StringRef> Result;
	for (const auto *Arg : Args) {
	const auto *DI = dyn_cast<DagInit>(Arg);
	if (!DI)
	continue;
	const Init *Op = DI->getOperator();
	if (!isa<DefInit>(Op))
	continue;
	// syntax: `(<encoder \| decoder> "function name")`
	StringRef OpName = cast<DefInit>(Op)->getDef()->getName();
	if (OpName != "encoder" && OpName != "decoder")
	continue;
	if (!DI->getNumArgs() \|\| !isa<StringInit>(DI->getArg(0)))
	PrintFatalError("expected '" + OpName +
	"' directive to be followed by a custom function name.");
	StringRef FuncName = cast<StringInit>(DI->getArg(0))->getValue();
	if (OpName == "encoder")
	Result.first = FuncName;
	else
	Result.second = FuncName;
	}
	return Result;
	}

	VarLenInst::VarLenInst(const DagInit DI, const RecordVal TheDef)
	: TheDef(TheDef), NumBits(0U) {
	buildRec(DI);
	for (const auto &S : Segments)
	NumBits += S.BitWidth;
	}

	void VarLenInst::buildRec(const DagInit *DI) {
	assert(TheDef && "The def record is nullptr ?");

	std::string Op = DI->getOperator()->getAsString();

	if (Op == "ascend" \|\| Op == "descend") {
	bool Reverse = Op == "descend";
	int i = Reverse ? DI->getNumArgs() - 1 : 0;
	int e = Reverse ? -1 : DI->getNumArgs();
	int s = Reverse ? -1 : 1;
	for (; i != e; i += s) {
	const Init *Arg = DI->getArg(i);
	if (const auto *BI = dyn_cast<BitsInit>(Arg)) {
	if (!BI->isComplete())
	PrintFatalError(TheDef->getLoc(),
	"Expecting complete bits init in `" + Op + "`");
	Segments.push_back({BI->getNumBits(), BI});
	} else if (const auto *BI = dyn_cast<BitInit>(Arg)) {
	if (!BI->isConcrete())
	PrintFatalError(TheDef->getLoc(),
	"Expecting concrete bit init in `" + Op + "`");
	Segments.push_back({1, BI});
	} else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {
	buildRec(SubDI);
	} else {
	PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" +
	Op + "`: " + Arg->getAsString());
	}
	}
	} else if (Op == "operand") {
	// (operand <operand name>, <# of bits>,
	// [(encoder <custom encoder>)][, (decoder <custom decoder>)])
	if (DI->getNumArgs() < 2)
	PrintFatalError(TheDef->getLoc(),
	"Expecting at least 2 arguments for `operand`");
	HasDynamicSegment = true;
	const Init OperandName = DI->getArg(0), NumBits = DI->getArg(1);
	if (!isa<StringInit>(OperandName) \|\| !isa<IntInit>(NumBits))
	PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`");

	auto NumBitsVal = cast<IntInit>(NumBits)->getValue();
	if (NumBitsVal <= 0)
	PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`");

	auto [CustomEncoder, CustomDecoder] =
	getCustomCoders(DI->getArgs().slice(2));
	Segments.push_back({static_cast<unsigned>(NumBitsVal), OperandName,
	CustomEncoder, CustomDecoder});
	} else if (Op == "slice") {
	// (slice <operand name>, <high / low bit>, <low / high bit>,
	// [(encoder <custom encoder>)][, (decoder <custom decoder>)])
	if (DI->getNumArgs() < 3)
	PrintFatalError(TheDef->getLoc(),
	"Expecting at least 3 arguments for `slice`");
	HasDynamicSegment = true;
	Init OperandName = DI->getArg(0), HiBit = DI->getArg(1),
	*LoBit = DI->getArg(2);
	if (!isa<StringInit>(OperandName) \|\| !isa<IntInit>(HiBit) \|\|
	!isa<IntInit>(LoBit))
	PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`");

	auto HiBitVal = cast<IntInit>(HiBit)->getValue(),
	LoBitVal = cast<IntInit>(LoBit)->getValue();
	if (HiBitVal < 0 \|\| LoBitVal < 0)
	PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`");
	bool NeedSwap = false;
	unsigned NumBits = 0U;
	if (HiBitVal < LoBitVal) {
	NeedSwap = true;
	NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
	} else {
	NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
	}

	auto [CustomEncoder, CustomDecoder] =
	getCustomCoders(DI->getArgs().slice(3));

	if (NeedSwap) {
	// Normalization: Hi bit should always be the second argument.
	Init *const NewArgs[] = {OperandName, LoBit, HiBit};
	Segments.push_back({NumBits,
	DagInit::get(DI->getOperator(), nullptr, NewArgs, {}),
	CustomEncoder, CustomDecoder});
	} else {
	Segments.push_back({NumBits, DI, CustomEncoder, CustomDecoder});
	}
	}
	}

	void VarLenCodeEmitterGen::run(raw_ostream &OS) {
	CodeGenTarget Target(Records);
	auto Insts = Records.getAllDerivedDefinitions("Instruction");

	auto NumberedInstructions = Target.getInstructionsByEnumValue();
	const CodeGenHwModes &HWM = Target.getHwModes();

	// The set of HwModes used by instruction encodings.
	std::set<unsigned> HwModes;
	for (const CodeGenInstruction *CGI : NumberedInstructions) {
	Record *R = CGI->TheDef;

	// Create the corresponding VarLenInst instance.
	if (R->getValueAsString("Namespace") == "TargetOpcode" \|\|
	R->getValueAsBit("isPseudo"))
	continue;

	if (const RecordVal *RV = R->getValue("EncodingInfos")) {
	if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
	EncodingInfoByHwMode EBM(DI->getDef(), HWM);
	for (auto &KV : EBM) {
	HwModes.insert(KV.first);
	Record *EncodingDef = KV.second;
	RecordVal *RV = EncodingDef->getValue("Inst");
	DagInit *DI = cast<DagInit>(RV->getValue());
	VarLenInsts.insert({EncodingDef, VarLenInst(DI, RV)});
	}
	continue;
	}
	}
	RecordVal *RV = R->getValue("Inst");
	DagInit *DI = cast<DagInit>(RV->getValue());
	VarLenInsts.insert({R, VarLenInst(DI, RV)});
	}

	// Emit function declaration
	OS << "void " << Target.getName()
	<< "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
	<< " SmallVectorImpl<MCFixup> &Fixups,\n"
	<< " APInt &Inst,\n"
	<< " APInt &Scratch,\n"
	<< " const MCSubtargetInfo &STI) const {\n";

	// Emit instruction base values
	if (HwModes.empty()) {
	emitInstructionBaseValues(OS, NumberedInstructions, Target);
	} else {
	for (unsigned HwMode : HwModes)
	emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode);
	}

	if (!HwModes.empty()) {
	OS << " const unsigned **Index;\n";
	OS << " const uint64_t *InstBits;\n";
	OS << " unsigned HwMode = STI.getHwMode();\n";
	OS << " switch (HwMode) {\n";
	OS << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
	for (unsigned I : HwModes) {
	OS << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
	<< "; Index = Index_" << HWM.getMode(I).Name << "; break;\n";
	}
	OS << " };\n";
	}

	// Emit helper function to retrieve base values.
	OS << " auto getInstBits = [&](unsigned Opcode) -> APInt {\n"
	<< " unsigned NumBits = Index[Opcode][0];\n"
	<< " if (!NumBits)\n"
	<< " return APInt::getZeroWidth();\n"
	<< " unsigned Idx = Index[Opcode][1];\n"
	<< " ArrayRef<uint64_t> Data(&InstBits[Idx], "
	<< "APInt::getNumWords(NumBits));\n"
	<< " return APInt(NumBits, Data);\n"
	<< " };\n";

	// Map to accumulate all the cases.
	std::map<std::string, std::vector<std::string>> CaseMap;

	// Construct all cases statement for each opcode
	for (Record *R : Insts) {
	if (R->getValueAsString("Namespace") == "TargetOpcode" \|\|
	R->getValueAsBit("isPseudo"))
	continue;
	std::string InstName =
	(R->getValueAsString("Namespace") + "::" + R->getName()).str();
	std::string Case = getInstructionCase(R, Target);

	CaseMap[Case].push_back(std::move(InstName));
	}

	// Emit initial function code
	OS << " const unsigned opcode = MI.getOpcode();\n"
	<< " switch (opcode) {\n";

	// Emit each case statement
	for (const auto &C : CaseMap) {
	const std::string &Case = C.first;
	const auto &InstList = C.second;

	ListSeparator LS("\n");
	for (const auto &InstName : InstList)
	OS << LS << " case " << InstName << ":";

	OS << " {\n";
	OS << Case;
	OS << " break;\n"
	<< " }\n";
	}
	// Default case: unhandled opcode
	OS << " default:\n"
	<< " std::string msg;\n"
	<< " raw_string_ostream Msg(msg);\n"
	<< " Msg << \"Not supported instr: \" << MI;\n"
	<< " report_fatal_error(Msg.str().c_str());\n"
	<< " }\n";
	OS << "}\n\n";
	}

	static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
	unsigned &Index) {
	if (!Bits.getNumWords()) {
	IS.indent(4) << "{/NumBits/0, /Index/0},";
	return;
	}

	IS.indent(4) << "{/NumBits/" << Bits.getBitWidth() << ", "
	<< "/Index/" << Index << "},";

	SS.indent(4);
	for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
	SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";
	}

	void VarLenCodeEmitterGen::emitInstructionBaseValues(
	raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
	CodeGenTarget &Target, int HwMode) {
	std::string IndexArray, StorageArray;
	raw_string_ostream IS(IndexArray), SS(StorageArray);

	const CodeGenHwModes &HWM = Target.getHwModes();
	if (HwMode == -1) {
	IS << " static const unsigned Index[][2] = {\n";
	SS << " static const uint64_t InstBits[] = {\n";
	} else {
	StringRef Name = HWM.getMode(HwMode).Name;
	IS << " static const unsigned Index_" << Name << "[][2] = {\n";
	SS << " static const uint64_t InstBits_" << Name << "[] = {\n";
	}

	unsigned NumFixedValueWords = 0U;
	for (const CodeGenInstruction *CGI : NumberedInstructions) {
	Record *R = CGI->TheDef;

	if (R->getValueAsString("Namespace") == "TargetOpcode" \|\|
	R->getValueAsBit("isPseudo")) {
	IS.indent(4) << "{/NumBits/0, /Index/0},\n";
	continue;
	}

	Record *EncodingDef = R;
	if (const RecordVal *RV = R->getValue("EncodingInfos")) {
	if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
	EncodingInfoByHwMode EBM(DI->getDef(), HWM);
	if (EBM.hasMode(HwMode))
	EncodingDef = EBM.get(HwMode);
	}
	}

	auto It = VarLenInsts.find(EncodingDef);
	if (It == VarLenInsts.end())
	PrintFatalError(EncodingDef, "VarLenInst not found for this record");
	const VarLenInst &VLI = It->second;

	unsigned i = 0U, BitWidth = VLI.size();

	// Start by filling in fixed values.
	APInt Value(BitWidth, 0);
	auto SI = VLI.begin(), SE = VLI.end();
	// Scan through all the segments that have fixed-bits values.
	while (i < BitWidth && SI != SE) {
	unsigned SegmentNumBits = SI->BitWidth;
	if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {
	for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
	auto *B = cast<BitInit>(BI->getBit(Idx));
	Value.setBitVal(i + Idx, B->getValue());
	}
	}
	if (const auto *BI = dyn_cast<BitInit>(SI->Value))
	Value.setBitVal(i, BI->getValue());

	i += SegmentNumBits;
	++SI;
	}

	emitInstBits(IS, SS, Value, NumFixedValueWords);
	IS << '\t' << "// " << R->getName() << "\n";
	if (Value.getNumWords())
	SS << '\t' << "// " << R->getName() << "\n";
	}
	IS.indent(4) << "{/NumBits/0, /Index/0}\n };\n";
	SS.indent(4) << "UINT64_C(0)\n };\n";

	OS << IS.str() << SS.str();
	}

	std::string VarLenCodeEmitterGen::getInstructionCase(Record *R,
	CodeGenTarget &Target) {
	std::string Case;
	if (const RecordVal *RV = R->getValue("EncodingInfos")) {
	if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
	const CodeGenHwModes &HWM = Target.getHwModes();
	EncodingInfoByHwMode EBM(DI->getDef(), HWM);
	Case += " switch (HwMode) {\n";
	Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n";
	for (auto &KV : EBM) {
	Case += " case " + itostr(KV.first) + ": {\n";
	Case += getInstructionCaseForEncoding(R, KV.second, Target);
	Case += " break;\n";
	Case += " }\n";
	}
	Case += " }\n";
	return Case;
	}
	}
	return getInstructionCaseForEncoding(R, R, Target);
	}

	std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
	Record R, Record EncodingDef, CodeGenTarget &Target) {
	auto It = VarLenInsts.find(EncodingDef);
	if (It == VarLenInsts.end())
	PrintFatalError(EncodingDef, "Parsed encoding record not found");
	const VarLenInst &VLI = It->second;
	size_t BitWidth = VLI.size();

	CodeGenInstruction &CGI = Target.getInstruction(R);

	std::string Case;
	raw_string_ostream SS(Case);
	// Resize the scratch buffer.
	if (BitWidth && !VLI.isFixedValueOnly())
	SS.indent(6) << "Scratch = Scratch.zext(" << BitWidth << ");\n";
	// Populate based value.
	SS.indent(6) << "Inst = getInstBits(opcode);\n";

	// Process each segment in VLI.
	size_t Offset = 0U;
	for (const auto &ES : VLI) {
	unsigned NumBits = ES.BitWidth;
	const Init *Val = ES.Value;
	// If it's a StringInit or DagInit, it's a reference to an operand
	// or part of an operand.
	if (isa<StringInit>(Val) \|\| isa<DagInit>(Val)) {
	StringRef OperandName;
	unsigned LoBit = 0U;
	if (const auto *SV = dyn_cast<StringInit>(Val)) {
	OperandName = SV->getValue();
	} else {
	// Normalized: (slice <operand name>, <high bit>, <low bit>)
	const auto *DV = cast<DagInit>(Val);
	OperandName = cast<StringInit>(DV->getArg(0))->getValue();
	LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());
	}

	auto OpIdx = CGI.Operands.ParseOperandName(OperandName);
	unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
	StringRef CustomEncoder =
	CGI.Operands[OpIdx.first].EncoderMethodNames[OpIdx.second];
	if (ES.CustomEncoder.size())
	CustomEncoder = ES.CustomEncoder;

	SS.indent(6) << "Scratch.clearAllBits();\n";
	SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n";
	if (CustomEncoder.empty())
	SS.indent(6) << "getMachineOpValue(MI, MI.getOperand("
	<< utostr(FlatOpIdx) << ")";
	else
	SS.indent(6) << CustomEncoder << "(MI, /OpIdx=/" << utostr(FlatOpIdx);

	SS << ", /Pos=/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";

	SS.indent(6) << "Inst.insertBits("
	<< "Scratch.extractBits(" << utostr(NumBits) << ", "
	<< utostr(LoBit) << ")"
	<< ", " << Offset << ");\n";
	}
	Offset += NumBits;
	}

	StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
	if (!PostEmitter.empty())
	SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";

	return Case;
	}

	namespace llvm {

	void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) {
	VarLenCodeEmitterGen(R).run(OS);
	}

	} // end namespace llvm