//===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The CodeEmitterGen component for variable-length instructions.
//
// The basic CodeEmitterGen is almost exclusively designed for fixed-
// length instructions. A good analogy for its encoding scheme is how printf
// works: The (immutable) formatting string represent the fixed values in the
// encoded instruction. Placeholders (i.e. %something), on the other hand,
// represent encoding for instruction operands.
// ```
// printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
//                               <encoded value for operand `dst`>);
// ```
// VarLenCodeEmitterGen in this file provides an alternative encoding scheme
// that works more like a C++ stream operator:
// ```
// OS << 0b1101;
// if (Cond)
//   OS << OperandEncoding0;
// OS << 0b1001 << OperandEncoding1;
// ```
// You are free to concatenate arbitrary types (and sizes) of encoding
// fragments on any bit position, bringing more flexibilities on defining
// encoding for variable-length instructions.
//
// In a more specific way, instruction encoding is represented by a DAG type
// `Inst` field. Here is an example:
// ```
// dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
//                     (operand "$dst", 4));
// ```
// It represents the following instruction encoding:
// ```
// MSB                                                     LSB
// 1101<encoding for operand src>1001<encoding for operand dst>
// ```
// For more details about DAG operators in the above snippet, please
// refer to \file include/llvm/Target/Target.td.
//
// VarLenCodeEmitter will convert the above DAG into the same helper function
// generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
// for few details).
//
//===----------------------------------------------------------------------===//

#include "VarLenCodeEmitterGen.h"
#include "CodeGenHwModes.h"
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "InfoByHwMode.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"

#include <algorithm>

using namespace llvm;

namespace {

class VarLenCodeEmitterGen {
  const RecordKeeper &Records;

  // Representaton of alternative encodings used for HwModes.
  using AltEncodingTy = int;
  // Mode identifier when only one encoding is defined.
  const AltEncodingTy Universal = -1;
  // The set of alternative instruction encodings with a descriptive
  // name suffix to improve readability of the generated code.
  std::map<AltEncodingTy, std::string> Modes;

  DenseMap<const Record *, DenseMap<AltEncodingTy, VarLenInst>> VarLenInsts;

  // Emit based values (i.e. fixed bits in the encoded instructions)
  void emitInstructionBaseValues(
      raw_ostream &OS,
      ArrayRef<const CodeGenInstruction *> NumberedInstructions,
      const CodeGenTarget &Target, AltEncodingTy Mode);

  std::string getInstructionCases(const Record *R, const CodeGenTarget &Target);
  std::string getInstructionCaseForEncoding(const Record *R, AltEncodingTy Mode,
                                            const VarLenInst &VLI,
                                            const CodeGenTarget &Target,
                                            int Indent);

public:
  explicit VarLenCodeEmitterGen(const RecordKeeper &R) : Records(R) {}

  void run(raw_ostream &OS);
};
} // end anonymous namespace

// Get the name of custom encoder or decoder, if there is any.
// Returns `{encoder name, decoder name}`.
static std::pair<StringRef, StringRef>
getCustomCoders(ArrayRef<const Init *> Args) {
  std::pair<StringRef, StringRef> Result;
  for (const auto *Arg : Args) {
    const auto *DI = dyn_cast<DagInit>(Arg);
    if (!DI)
      continue;
    const Init *Op = DI->getOperator();
    if (!isa<DefInit>(Op))
      continue;
    // syntax: `(<encoder | decoder> "function name")`
    StringRef OpName = cast<DefInit>(Op)->getDef()->getName();
    if (OpName != "encoder" && OpName != "decoder")
      continue;
    if (!DI->getNumArgs() || !isa<StringInit>(DI->getArg(0)))
      PrintFatalError("expected '" + OpName +
                      "' directive to be followed by a custom function name.");
    StringRef FuncName = cast<StringInit>(DI->getArg(0))->getValue();
    if (OpName == "encoder")
      Result.first = FuncName;
    else
      Result.second = FuncName;
  }
  return Result;
}

VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef)
    : TheDef(TheDef), NumBits(0U), HasDynamicSegment(false) {
  buildRec(DI);
  for (const auto &S : Segments)
    NumBits += S.BitWidth;
}

void VarLenInst::buildRec(const DagInit *DI) {
  assert(TheDef && "The def record is nullptr ?");

  std::string Op = DI->getOperator()->getAsString();

  if (Op == "ascend" || Op == "descend") {
    bool Reverse = Op == "descend";
    int i = Reverse ? DI->getNumArgs() - 1 : 0;
    int e = Reverse ? -1 : DI->getNumArgs();
    int s = Reverse ? -1 : 1;
    for (; i != e; i += s) {
      const Init *Arg = DI->getArg(i);
      if (const auto *BI = dyn_cast<BitsInit>(Arg)) {
        if (!BI->isComplete())
          PrintFatalError(TheDef->getLoc(),
                          "Expecting complete bits init in `" + Op + "`");
        Segments.push_back({BI->getNumBits(), BI});
      } else if (const auto *BI = dyn_cast<BitInit>(Arg)) {
        if (!BI->isConcrete())
          PrintFatalError(TheDef->getLoc(),
                          "Expecting concrete bit init in `" + Op + "`");
        Segments.push_back({1, BI});
      } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {
        buildRec(SubDI);
      } else {
        PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" +
                                              Op + "`: " + Arg->getAsString());
      }
    }
  } else if (Op == "operand") {
    // (operand <operand name>, <# of bits>,
    //          [(encoder <custom encoder>)][, (decoder <custom decoder>)])
    if (DI->getNumArgs() < 2)
      PrintFatalError(TheDef->getLoc(),
                      "Expecting at least 2 arguments for `operand`");
    HasDynamicSegment = true;
    const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);
    if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))
      PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`");

    auto NumBitsVal = cast<IntInit>(NumBits)->getValue();
    if (NumBitsVal <= 0)
      PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`");

    auto [CustomEncoder, CustomDecoder] =
        getCustomCoders(DI->getArgs().slice(2));
    Segments.push_back({static_cast<unsigned>(NumBitsVal), OperandName,
                        CustomEncoder, CustomDecoder});
  } else if (Op == "slice") {
    // (slice <operand name>, <high / low bit>, <low / high bit>,
    //        [(encoder <custom encoder>)][, (decoder <custom decoder>)])
    if (DI->getNumArgs() < 3)
      PrintFatalError(TheDef->getLoc(),
                      "Expecting at least 3 arguments for `slice`");
    HasDynamicSegment = true;
    const Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),
               *LoBit = DI->getArg(2);
    if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) ||
        !isa<IntInit>(LoBit))
      PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`");

    auto HiBitVal = cast<IntInit>(HiBit)->getValue(),
         LoBitVal = cast<IntInit>(LoBit)->getValue();
    if (HiBitVal < 0 || LoBitVal < 0)
      PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`");
    bool NeedSwap = false;
    unsigned NumBits = 0U;
    if (HiBitVal < LoBitVal) {
      NeedSwap = true;
      NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
    } else {
      NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
    }

    auto [CustomEncoder, CustomDecoder] =
        getCustomCoders(DI->getArgs().slice(3));

    if (NeedSwap) {
      // Normalization: Hi bit should always be the second argument.
      SmallVector<std::pair<const Init *, const StringInit *>> NewArgs(
          DI->getArgAndNames());
      std::swap(NewArgs[1], NewArgs[2]);
      Segments.push_back({NumBits, DagInit::get(DI->getOperator(), NewArgs),
                          CustomEncoder, CustomDecoder});
    } else {
      Segments.push_back({NumBits, DI, CustomEncoder, CustomDecoder});
    }
  }
}

void VarLenCodeEmitterGen::run(raw_ostream &OS) {
  CodeGenTarget Target(Records);

  auto NumberedInstructions = Target.getInstructions();

  for (const CodeGenInstruction *CGI : NumberedInstructions) {
    const Record *R = CGI->TheDef;
    // Create the corresponding VarLenInst instance.
    if (R->getValueAsString("Namespace") == "TargetOpcode" ||
        R->getValueAsBit("isPseudo"))
      continue;

    // Setup alternative encodings according to HwModes
    if (const Record *RV = R->getValueAsOptionalDef("EncodingInfos")) {
      const CodeGenHwModes &HWM = Target.getHwModes();
      EncodingInfoByHwMode EBM(RV, HWM);
      for (const auto [Mode, EncodingDef] : EBM) {
        Modes.try_emplace(Mode, "_" + HWM.getMode(Mode).Name.str());
        const RecordVal *RV = EncodingDef->getValue("Inst");
        const DagInit *DI = cast<DagInit>(RV->getValue());
        VarLenInsts[R].try_emplace(Mode, VarLenInst(DI, RV));
      }
      continue;
    }
    const RecordVal *RV = R->getValue("Inst");
    const DagInit *DI = cast<DagInit>(RV->getValue());
    VarLenInsts[R].try_emplace(Universal, VarLenInst(DI, RV));
  }

  if (Modes.empty())
    Modes.try_emplace(Universal, ""); // Base case, skip suffix.

  // Emit function declaration
  OS << "void " << Target.getName()
     << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
     << "    SmallVectorImpl<MCFixup> &Fixups,\n"
     << "    APInt &Inst,\n"
     << "    APInt &Scratch,\n"
     << "    const MCSubtargetInfo &STI) const {\n";

  // Emit instruction base values
  for (const auto &Mode : Modes)
    emitInstructionBaseValues(OS, NumberedInstructions, Target, Mode.first);

  if (Modes.size() > 1) {
    OS << "  unsigned Mode = STI.getHwMode();\n";
  }

  for (const auto &Mode : Modes) {
    // Emit helper function to retrieve base values.
    OS << "  auto getInstBits" << Mode.second
       << " = [&](unsigned Opcode) -> APInt {\n"
       << "    unsigned NumBits = Index" << Mode.second << "[Opcode][0];\n"
       << "    if (!NumBits)\n"
       << "      return APInt::getZeroWidth();\n"
       << "    unsigned Idx = Index" << Mode.second << "[Opcode][1];\n"
       << "    ArrayRef<uint64_t> Data(&InstBits" << Mode.second << "[Idx], "
       << "APInt::getNumWords(NumBits));\n"
       << "    return APInt(NumBits, Data);\n"
       << "  };\n";
  }

  // Map to accumulate all the cases.
  std::map<std::string, std::vector<std::string>> CaseMap;

  // Construct all cases statement for each opcode
  for (const Record *R : Records.getAllDerivedDefinitions("Instruction")) {
    if (R->getValueAsString("Namespace") == "TargetOpcode" ||
        R->getValueAsBit("isPseudo"))
      continue;
    std::string InstName =
        (R->getValueAsString("Namespace") + "::" + R->getName()).str();
    std::string Case = getInstructionCases(R, Target);

    CaseMap[Case].push_back(std::move(InstName));
  }

  // Emit initial function code
  OS << "  const unsigned opcode = MI.getOpcode();\n"
     << "  switch (opcode) {\n";

  // Emit each case statement
  for (const auto &C : CaseMap) {
    const std::string &Case = C.first;
    const auto &InstList = C.second;

    ListSeparator LS("\n");
    for (const auto &InstName : InstList)
      OS << LS << "    case " << InstName << ":";

    OS << " {\n";
    OS << Case;
    OS << "      break;\n"
       << "    }\n";
  }
  // Default case: unhandled opcode
  OS << "  default:\n"
     << "    reportUnsupportedInst(MI);\n"
     << "  }\n";
  OS << "}\n\n";
}

static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
                         unsigned &Index) {
  if (!Bits.getNumWords()) {
    IS.indent(4) << "{/*NumBits*/0, /*Index*/0},";
    return;
  }

  IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", " << "/*Index*/"
               << Index << "},";

  SS.indent(4);
  for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
    SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";
}

void VarLenCodeEmitterGen::emitInstructionBaseValues(
    raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
    const CodeGenTarget &Target, AltEncodingTy Mode) {
  std::string IndexArray, StorageArray;
  raw_string_ostream IS(IndexArray), SS(StorageArray);

  IS << "  static const unsigned Index" << Modes[Mode] << "[][2] = {\n";
  SS << "  static const uint64_t InstBits" << Modes[Mode] << "[] = {\n";

  unsigned NumFixedValueWords = 0U;
  for (const CodeGenInstruction *CGI : NumberedInstructions) {
    const Record *R = CGI->TheDef;

    if (R->getValueAsString("Namespace") == "TargetOpcode" ||
        R->getValueAsBit("isPseudo")) {
      IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n";
      continue;
    }

    const auto InstIt = VarLenInsts.find(R);
    if (InstIt == VarLenInsts.end())
      PrintFatalError(R, "VarLenInst not found for this record");
    auto ModeIt = InstIt->second.find(Mode);
    if (ModeIt == InstIt->second.end())
      ModeIt = InstIt->second.find(Universal);
    if (ModeIt == InstIt->second.end()) {
      IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\t" << "// " << R->getName()
                   << " no encoding\n";
      continue;
    }
    const VarLenInst &VLI = ModeIt->second;
    unsigned i = 0U, BitWidth = VLI.size();

    // Start by filling in fixed values.
    APInt Value(BitWidth, 0);
    auto SI = VLI.begin(), SE = VLI.end();
    // Scan through all the segments that have fixed-bits values.
    while (i < BitWidth && SI != SE) {
      unsigned SegmentNumBits = SI->BitWidth;
      if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {
        for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
          auto *B = cast<BitInit>(BI->getBit(Idx));
          Value.setBitVal(i + Idx, B->getValue());
        }
      }
      if (const auto *BI = dyn_cast<BitInit>(SI->Value))
        Value.setBitVal(i, BI->getValue());

      i += SegmentNumBits;
      ++SI;
    }

    emitInstBits(IS, SS, Value, NumFixedValueWords);
    IS << '\t' << "// " << R->getName() << "\n";
    if (Value.getNumWords())
      SS << '\t' << "// " << R->getName() << "\n";
  }
  IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n  };\n";
  SS.indent(4) << "UINT64_C(0)\n  };\n";

  OS << IndexArray << StorageArray;
}

std::string
VarLenCodeEmitterGen::getInstructionCases(const Record *R,
                                          const CodeGenTarget &Target) {
  auto It = VarLenInsts.find(R);
  if (It == VarLenInsts.end())
    PrintFatalError(R, "Parsed encoding record not found");
  const auto &Map = It->second;

  // Is this instructions encoding universal (same for all modes)?
  // Allways true if there is only one mode.
  if (Map.size() == 1 && Map.begin()->first == Universal) {
    // Universal, just pick the first mode.
    AltEncodingTy Mode = Modes.begin()->first;
    const auto &Encoding = Map.begin()->second;
    return getInstructionCaseForEncoding(R, Mode, Encoding, Target,
                                         /*Indent=*/6);
  }

  std::string Case;
  Case += "      switch (Mode) {\n";
  Case += "      default: llvm_unreachable(\"Unhandled Mode\");\n";
  for (const auto &Mode : Modes) {
    Case += "      case " + itostr(Mode.first) + ": {\n";
    const auto &It = Map.find(Mode.first);
    if (It == Map.end()) {
      Case +=
          "        llvm_unreachable(\"Undefined encoding in this mode\");\n";
    } else {
      Case += getInstructionCaseForEncoding(R, It->first, It->second, Target,
                                            /*Indent=*/8);
    }
    Case += "        break;\n";
    Case += "      }\n";
  }
  Case += "      }\n";
  return Case;
}

std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
    const Record *R, AltEncodingTy Mode, const VarLenInst &VLI,
    const CodeGenTarget &Target, int Indent) {
  const CodeGenInstruction &CGI = Target.getInstruction(R);

  std::string Case;
  raw_string_ostream SS(Case);
  // Populate based value.
  SS.indent(Indent) << "Inst = getInstBits" << Modes[Mode] << "(opcode);\n";

  // Process each segment in VLI.
  size_t Offset = 0U;
  unsigned HighScratchAccess = 0U;
  for (const auto &ES : VLI) {
    unsigned NumBits = ES.BitWidth;
    const Init *Val = ES.Value;
    // If it's a StringInit or DagInit, it's a reference to an operand
    // or part of an operand.
    if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
      StringRef OperandName;
      unsigned LoBit = 0U;
      if (const auto *SV = dyn_cast<StringInit>(Val)) {
        OperandName = SV->getValue();
      } else {
        // Normalized: (slice <operand name>, <high bit>, <low bit>)
        const auto *DV = cast<DagInit>(Val);
        OperandName = cast<StringInit>(DV->getArg(0))->getValue();
        LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());
      }

      auto OpIdx = CGI.Operands.parseOperandName(OperandName);
      unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
      StringRef CustomEncoder =
          CGI.Operands[OpIdx.first].EncoderMethodNames[OpIdx.second];
      if (ES.CustomEncoder.size())
        CustomEncoder = ES.CustomEncoder;

      SS.indent(Indent) << "Scratch.clearAllBits();\n";
      SS.indent(Indent) << "// op: " << OperandName.drop_front(1) << "\n";
      if (CustomEncoder.empty())
        SS.indent(Indent) << "getMachineOpValue(MI, MI.getOperand("
                          << utostr(FlatOpIdx) << ")";
      else
        SS.indent(Indent) << CustomEncoder << "(MI, /*OpIdx=*/"
                          << utostr(FlatOpIdx);

      SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";

      SS.indent(Indent) << "Inst.insertBits("
                        << "Scratch.extractBits(" << utostr(NumBits) << ", "
                        << utostr(LoBit) << ")"
                        << ", " << Offset << ");\n";

      HighScratchAccess = std::max(HighScratchAccess, NumBits + LoBit);
    }
    Offset += NumBits;
  }

  StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
  if (!PostEmitter.empty())
    SS.indent(Indent) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";

  // Resize the scratch buffer if it's to small.
  std::string ScratchResizeStr;
  if (VLI.size() && !VLI.isFixedValueOnly()) {
    raw_string_ostream RS(ScratchResizeStr);
    RS.indent(Indent) << "if (Scratch.getBitWidth() < " << HighScratchAccess
                      << ") { Scratch = Scratch.zext(" << HighScratchAccess
                      << "); }\n";
  }

  return ScratchResizeStr + Case;
}

void llvm::emitVarLenCodeEmitter(const RecordKeeper &R, raw_ostream &OS) {
  VarLenCodeEmitterGen(R).run(OS);
}
