llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp - llvm-project - Git at Google

 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "../Target.h"
 #include "../ParallelSnippetGenerator.h"
 #include "../SerialSnippetGenerator.h"
 #include "../SnippetGenerator.h"

 #include "MCTargetDesc/RISCVBaseInfo.h"
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
 #include "MCTargetDesc/RISCVMatInt.h"
 #include "RISCV.h"
 #include "RISCVExegesisPasses.h"
 #include "RISCVInstrInfo.h"
 #include "RISCVRegisterInfo.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/raw_ostream.h"

 // include computeAvailableFeatures and computeRequiredFeatures.
 #define GET_AVAILABLE_OPCODE_CHECKER
 #include "RISCVGenInstrInfo.inc"

 #include "llvm/CodeGen/MachineInstrBuilder.h"

 #include <vector>

 namespace llvm {
 namespace exegesis {

 static cl::opt<bool>
     OnlyUsesVLMAXForVL("riscv-vlmax-for-vl",
                        cl::desc("Only enumerate VLMAX for VL operand"),
                        cl::init(false), cl::Hidden);

 static cl::opt<bool>
     EnumerateRoundingModes("riscv-enumerate-rounding-modes",
                            cl::desc("Enumerate different FRM and VXRM"),
                            cl::init(true), cl::Hidden);

 static cl::opt<std::string>
     FilterConfig("riscv-filter-config",
                  cl::desc("Show only the configs matching this regex"),
                  cl::init(""), cl::Hidden);

 #include "RISCVGenExegesis.inc"

 namespace {

 template <class BaseT> class RISCVSnippetGenerator : public BaseT {
   static void printRoundingMode(raw_ostream &OS, unsigned Val, bool UsesVXRM) {
     if (UsesVXRM) {
       assert(RISCVVXRndMode::isValidRoundingMode(Val));
       OS << RISCVVXRndMode::roundingModeToString(
           static_cast<RISCVVXRndMode::RoundingMode>(Val));
     } else {
       assert(RISCVFPRndMode::isValidRoundingMode(Val));
       OS << RISCVFPRndMode::roundingModeToString(
           static_cast<RISCVFPRndMode::RoundingMode>(Val));
     }
   }

   static constexpr unsigned MinSEW = 8;
   // ELEN is basically SEW_max.
   unsigned ELEN = 64;

   // We can't know the real min/max VLEN w/o a Function, so we're
   // using the VLen from Zvl.
   unsigned ZvlVLen = 32;

   /// Mask for registers that are NOT standalone registers like X0 and V0
   BitVector AggregateRegisters;

   // Returns true when opcode is available in any of the FBs.
   static bool
   isOpcodeAvailableIn(unsigned Opcode,
                       ArrayRef<RISCV_MC::SubtargetFeatureBits> FBs) {
     FeatureBitset RequiredFeatures = RISCV_MC::computeRequiredFeatures(Opcode);
     for (uint8_t FB : FBs) {
       if (RequiredFeatures[FB])
         return true;
     }
     return false;
   }

   static bool isRVVFloatingPointOp(unsigned Opcode) {
     return isOpcodeAvailableIn(Opcode,
                                {RISCV_MC::Feature_HasVInstructionsAnyFBit});
   }

   // Get the element group width of each vector cryptor extension.
   static unsigned getZvkEGWSize(unsigned Opcode, unsigned SEW) {
     using namespace RISCV_MC;
     if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkgBit,
                                      Feature_HasStdExtZvknedBit,
                                      Feature_HasStdExtZvksedBit}))
       return 128U;
     if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkshBit}))
       return 256U;
     if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvknhaOrZvknhbBit}))
       // In Zvknh[ab], when SEW=64 is used (i.e. Zvknhb), EGW is 256.
       // Otherwise it's 128.
       return SEW == 64 ? 256U : 128U;

     llvm_unreachable("Unsupported opcode");
   }

   // A handy utility to multiply or divide an integer by LMUL.
   template <typename T> static T multiplyLMul(T Val, RISCVVType::VLMUL VLMul) {
     auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
     return IsFractional ? Val / LMul : Val * LMul;
   }

   /// Return the denominator of the fractional (i.e. the `x` in .vfx suffix) or
   /// nullopt if BaseOpcode is not a vector sext/zext.
   static std::optional<unsigned> isRVVSignZeroExtend(unsigned BaseOpcode) {
     switch (BaseOpcode) {
     case RISCV::VSEXT_VF2:
     case RISCV::VZEXT_VF2:
       return 2;
     case RISCV::VSEXT_VF4:
     case RISCV::VZEXT_VF4:
       return 4;
     case RISCV::VSEXT_VF8:
     case RISCV::VZEXT_VF8:
       return 8;
     default:
       return std::nullopt;
     }
   }

   void annotateWithVType(const CodeTemplate &CT, const Instruction &Instr,
                          unsigned BaseOpcode,
                          const BitVector &ForbiddenRegisters,
                          std::vector<CodeTemplate> &Result) const;

 public:
   RISCVSnippetGenerator(const LLVMState &State,
                         const SnippetGenerator::Options &Opts)
       : BaseT(State, Opts),
         AggregateRegisters(State.getRegInfo().getNumRegs(), /*initVal=*/true) {
     // Initialize standalone registers mask.
     const MCRegisterInfo &RegInfo = State.getRegInfo();
     const unsigned StandaloneRegClasses[] = {
         RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID};

     for (unsigned RegClassID : StandaloneRegClasses)
       for (unsigned Reg : RegInfo.getRegClass(RegClassID))
         AggregateRegisters.reset(Reg);

     // Initialize ELEN and VLEN.
     // FIXME: We could have obtained these two constants from RISCVSubtarget
     // but in order to get that from TargetMachine, we need a Function.
     const MCSubtargetInfo &STI = State.getSubtargetInfo();
     ELEN = STI.hasFeature(RISCV::FeatureStdExtZve64x) ? 64 : 32;

     const unsigned ZvlFeatures[] = {
         RISCV::FeatureStdExtZvl32b,    RISCV::FeatureStdExtZvl64b,
         RISCV::FeatureStdExtZvl128b,   RISCV::FeatureStdExtZvl256b,
         RISCV::FeatureStdExtZvl512b,   RISCV::FeatureStdExtZvl1024b,
         RISCV::FeatureStdExtZvl2048b,  RISCV::FeatureStdExtZvl4096b,
         RISCV::FeatureStdExtZvl8192b,  RISCV::FeatureStdExtZvl16384b,
         RISCV::FeatureStdExtZvl32768b, RISCV::FeatureStdExtZvl65536b};
     for (auto [Idx, Feature] : enumerate(ZvlFeatures)) {
       if (STI.hasFeature(Feature))
         ZvlVLen = std::max(ZvlVLen, 1u << (Idx + 5));
     }
   }

   Expected<std::vector<CodeTemplate>>
   generateCodeTemplates(InstructionTemplate Variant,
                         const BitVector &ForbiddenRegisters) const override;
 };

 static bool isMaskedSibling(unsigned MaskedOp, unsigned UnmaskedOp) {
   const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedOp);
   return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp;
 }

 // There are primarily two kinds of opcodes that are not eligible
 // in a serial snippet:
 // (1) Has a use operand that can not overlap with the def operand
 // (i.e. early clobber).
 // (2) The register file of the only use operand is different from
 // that of the def operand. For instance, use operand is vector and
 // the result is a scalar.
 static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
                                          const Instruction &I) {
   if (llvm::any_of(I.Operands,
                    [](const Operand &Op) { return Op.isEarlyClobber(); }))
     return true;

   switch (BaseOpcode) {
   case RISCV::VCOMPRESS_VM:
   case RISCV::VCPOP_M:
   case RISCV::VCPOP_V:
   // The permutation instructions listed below cannot have destination
   // overlapping with the source.
   case RISCV::VRGATHEREI16_VV:
   case RISCV::VRGATHER_VI:
   case RISCV::VRGATHER_VV:
   case RISCV::VRGATHER_VX:
   case RISCV::VSLIDE1UP_VX:
   case RISCV::VSLIDEUP_VI:
   case RISCV::VSLIDEUP_VX:
     return true;
   default:
     return false;
   }
 }

 static bool isZvfhminZvfbfminOpcodes(unsigned BaseOpcode) {
   switch (BaseOpcode) {
   case RISCV::VFNCVT_F_F_W:
   case RISCV::VFWCVT_F_F_V:
   case RISCV::VFNCVTBF16_F_F_W:
   case RISCV::VFWCVTBF16_F_F_V:
     return true;
   default:
     return false;
   }
 }

 static bool isVectorReduction(unsigned BaseOpcode) {
   switch (BaseOpcode) {
   case RISCV::VREDAND_VS:
   case RISCV::VREDMAXU_VS:
   case RISCV::VREDMAX_VS:
   case RISCV::VREDMINU_VS:
   case RISCV::VREDMIN_VS:
   case RISCV::VREDOR_VS:
   case RISCV::VREDSUM_VS:
   case RISCV::VREDXOR_VS:
   case RISCV::VWREDSUMU_VS:
   case RISCV::VWREDSUM_VS:
   case RISCV::VFREDMAX_VS:
   case RISCV::VFREDMIN_VS:
   case RISCV::VFREDOSUM_VS:
   case RISCV::VFREDUSUM_VS:
     return true;
   default:
     return false;
   }
 }

 template <class BaseT>
 void RISCVSnippetGenerator<BaseT>::annotateWithVType(
     const CodeTemplate &OrigCT, const Instruction &Instr, unsigned BaseOpcode,
     const BitVector &ForbiddenRegisters,
     std::vector<CodeTemplate> &Result) const {
   const MCSubtargetInfo &STI = SnippetGenerator::State.getSubtargetInfo();
   unsigned VPseudoOpcode = Instr.getOpcode();

   bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;

   const MCInstrDesc &MIDesc = Instr.Description;
   const uint64_t TSFlags = MIDesc.TSFlags;

   RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);

   const size_t StartingResultSize = Result.size();

   SmallPtrSet<const Operand *, 4> VTypeOperands;
   std::optional<AliasingConfigurations> SelfAliasing;
   // Exegesis see instructions with tied operands being inherently serial.
   // But for RVV instructions, those tied operands are passthru rather
   // than real read operands. So we manually put dependency between
   // destination (i.e. def) and any of the non-tied/SEW/policy/AVL/RM
   // operands.
   auto assignSerialRVVOperands = [&, this](InstructionTemplate &IT) {
     // Initialize SelfAliasing on first use.
     if (!SelfAliasing.has_value()) {
       BitVector ExcludeRegs = ForbiddenRegisters;
       ExcludeRegs |= AggregateRegisters;
       SelfAliasing = AliasingConfigurations(Instr, Instr, ExcludeRegs);
       bool EmptyUses = false;
       for (auto &ARO : SelfAliasing->Configurations) {
         auto &Uses = ARO.Uses;
         for (auto ROA = Uses.begin(); ROA != Uses.end();) {
           const Operand *Op = ROA->Op;
           // Exclude tied operand(s).
           if (Op->isTied()) {
             ROA = Uses.erase(ROA);
             continue;
           }

           // Special handling for reduction operations: for a given reduction
           // `vredop vd, vs2, vs1`, we don't want vd to be aliased with vs1
           // since we're only reading `vs1[0]` and many implementations
           // optimize for this case (e.g. chaining). Instead, we're forcing
           // it to create alias between vd and vs2.
           if (isVectorReduction(BaseOpcode) &&
               // vs1's operand index is always 3.
               Op->getIndex() == 3) {
             ROA = Uses.erase(ROA);
             continue;
           }

           // Exclude any special operands like SEW and VL -- we've already
           // assigned values to them.
           if (VTypeOperands.count(Op)) {
             ROA = Uses.erase(ROA);
             continue;
           }
           ++ROA;
         }

         // If any of the use operand candidate lists is empty, there is
         // no point to assign self aliasing registers.
         if (Uses.empty()) {
           EmptyUses = true;
           break;
         }
       }
       if (EmptyUses)
         SelfAliasing->Configurations.clear();
     }

     // This is a self aliasing instruction so defs and uses are from the same
     // instance, hence twice IT in the following call.
     if (!SelfAliasing->empty() && !SelfAliasing->hasImplicitAliasing())
       setRandomAliasing(*SelfAliasing, IT, IT);
   };

   // We are going to create a CodeTemplate (configuration) for each supported
   // SEW, policy, and VL.
   // FIXME: Account for EEW and EMUL.
   SmallVector<std::optional<unsigned>, 4> Log2SEWs;
   SmallVector<std::optional<unsigned>, 4> Policies;
   SmallVector<std::optional<int>, 3> AVLs;
   SmallVector<std::optional<unsigned>, 8> RoundingModes;

   bool HasSEWOp = RISCVII::hasSEWOp(TSFlags);
   bool HasPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
   bool HasVLOp = RISCVII::hasVLOp(TSFlags);
   bool HasRMOp = RISCVII::hasRoundModeOp(TSFlags);
   bool UsesVXRM = RISCVII::usesVXRM(TSFlags);

   if (HasSEWOp) {
     const Operand &SEWOp = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)];
     VTypeOperands.insert(&SEWOp);

     if (SEWOp.Info->OperandType == RISCVOp::OPERAND_SEW_MASK) {
       // If it's a mask-producing instruction, the SEW operand is always zero.
       Log2SEWs.push_back(0);
     } else {
       SmallVector<unsigned, 4> SEWCandidates;

       // (RVV spec 3.4.2) For fractional LMUL, the supported SEW are between
       // [SEW_min, LMUL * ELEN].
       unsigned SEWUpperBound =
           VLMul >= RISCVVType::LMUL_F8 ? multiplyLMul(ELEN, VLMul) : ELEN;
       for (unsigned SEW = MinSEW; SEW <= SEWUpperBound; SEW <<= 1) {
         SEWCandidates.push_back(SEW);

         // Some scheduling classes already integrate SEW; only put
         // their corresponding SEW values at the SEW operands.
         // NOTE: It is imperative to put this condition in the front, otherwise
         // it is tricky and difficult to know if there is an integrated
         // SEW after other rules are applied to filter the candidates.
         const auto *RVVBase =
             RISCVVInversePseudosTable::getBaseInfo(BaseOpcode, VLMul, SEW);
         if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode ||
                         isMaskedSibling(VPseudoOpcode, RVVBase->Pseudo) ||
                         isMaskedSibling(RVVBase->Pseudo, VPseudoOpcode))) {
           // There is an integrated SEW, remove all but the SEW pushed last.
           SEWCandidates.erase(SEWCandidates.begin(), SEWCandidates.end() - 1);
           break;
         }
       }

       // Filter out some candidates.
       for (auto SEW = SEWCandidates.begin(); SEW != SEWCandidates.end();) {
         // For floating point operations, only select SEW of the supported FLEN.
         if (isRVVFloatingPointOp(VPseudoOpcode)) {
           bool Supported = false;
           Supported |= isZvfhminZvfbfminOpcodes(BaseOpcode) && *SEW == 16;
           Supported |= STI.hasFeature(RISCV::FeatureStdExtZvfh) && *SEW == 16;
           Supported |= STI.hasFeature(RISCV::FeatureStdExtF) && *SEW == 32;
           Supported |= STI.hasFeature(RISCV::FeatureStdExtD) && *SEW == 64;
           if (!Supported) {
             SEW = SEWCandidates.erase(SEW);
             continue;
           }
         }

         // The EEW for source operand in VSEXT and VZEXT is a fraction
         // of the SEW, hence only SEWs that will lead to valid EEW are allowed.
         if (auto Frac = isRVVSignZeroExtend(BaseOpcode))
           if (*SEW / *Frac < MinSEW) {
             SEW = SEWCandidates.erase(SEW);
             continue;
           }

         // Most vector crypto 1.0 instructions only work on SEW=32.
         using namespace RISCV_MC;
         if (isOpcodeAvailableIn(BaseOpcode, {Feature_HasStdExtZvkgBit,
                                              Feature_HasStdExtZvknedBit,
                                              Feature_HasStdExtZvknhaOrZvknhbBit,
                                              Feature_HasStdExtZvksedBit,
                                              Feature_HasStdExtZvkshBit})) {
           if (*SEW != 32)
             // Zvknhb supports SEW=64 as well.
             if (*SEW != 64 || !STI.hasFeature(RISCV::FeatureStdExtZvknhb) ||
                 !isOpcodeAvailableIn(BaseOpcode,
                                      {Feature_HasStdExtZvknhaOrZvknhbBit})) {
               SEW = SEWCandidates.erase(SEW);
               continue;
             }

           // We're also enforcing the requirement of `LMUL * VLEN >= EGW` here,
           // because some of the extensions have SEW-dependant EGW.
           unsigned EGW = getZvkEGWSize(BaseOpcode, *SEW);
           if (multiplyLMul(ZvlVLen, VLMul) < EGW) {
             SEW = SEWCandidates.erase(SEW);
             continue;
           }
         }

         ++SEW;
       }

       // We're not going to produce any result with zero SEW candidate.
       if (SEWCandidates.empty())
         return;

       for (unsigned SEW : SEWCandidates)
         Log2SEWs.push_back(Log2_32(SEW));
     }
   } else {
     Log2SEWs.push_back(std::nullopt);
   }

   if (HasPolicyOp) {
     VTypeOperands.insert(&Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)]);

     Policies = {0, RISCVVType::TAIL_AGNOSTIC, RISCVVType::MASK_AGNOSTIC,
                 (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)};
   } else {
     Policies.push_back(std::nullopt);
   }

   if (HasVLOp) {
     VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc)]);

     if (OnlyUsesVLMAXForVL)
       AVLs.push_back(-1);
     else
       AVLs = {// 5-bit immediate value
               1,
               // VLMAX
               -1,
               // Non-X0 register
               0};
   } else {
     AVLs.push_back(std::nullopt);
   }

   if (HasRMOp) {
     VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1]);

     if (UsesVXRM) {
       // Use RNU as the default VXRM.
       RoundingModes = {RISCVVXRndMode::RNU};
       if (EnumerateRoundingModes)
         RoundingModes.append(
             {RISCVVXRndMode::RNE, RISCVVXRndMode::RDN, RISCVVXRndMode::ROD});
     } else {
       if (EnumerateRoundingModes)
         RoundingModes = {RISCVFPRndMode::RNE, RISCVFPRndMode::RTZ,
                          RISCVFPRndMode::RDN, RISCVFPRndMode::RUP,
                          RISCVFPRndMode::RMM};
       else
         // If we're not enumerating FRM, use DYN to instruct
         // RISCVInsertReadWriteCSRPass to insert nothing.
         RoundingModes = {RISCVFPRndMode::DYN};
     }
   } else {
     RoundingModes = {std::nullopt};
   }

   std::set<std::tuple<std::optional<unsigned>, std::optional<int>,
                       std::optional<unsigned>, std::optional<unsigned>>>
       Combinations;
   for (auto AVL : AVLs) {
     for (auto Log2SEW : Log2SEWs)
       for (auto Policy : Policies) {
         for (auto RM : RoundingModes)
           Combinations.insert(std::make_tuple(RM, AVL, Log2SEW, Policy));
       }
   }

   std::string ConfigStr;
   SmallVector<std::pair<const Operand *, MCOperand>, 4> ValueAssignments;
   for (const auto &[RM, AVL, Log2SEW, Policy] : Combinations) {
     InstructionTemplate IT(&Instr);

     ListSeparator LS;
     ConfigStr = "vtype = {";
     raw_string_ostream SS(ConfigStr);

     ValueAssignments.clear();

     if (RM) {
       const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1];
       ValueAssignments.push_back({&Op, MCOperand::createImm(*RM)});
       printRoundingMode(SS << LS << (UsesVXRM ? "VXRM" : "FRM") << ": ", *RM,
                         UsesVXRM);
     }

     if (AVL) {
       MCOperand OpVal;
       if (*AVL < 0) {
         // VLMAX
         OpVal = MCOperand::createImm(-1);
         SS << LS << "AVL: VLMAX";
       } else if (*AVL == 0) {
         // A register holding AVL.
         // TODO: Generate a random register.
         OpVal = MCOperand::createReg(RISCV::X5);
         OpVal.print(SS << LS << "AVL: ");
       } else {
         // A 5-bit immediate.
         // The actual value assignment is deferred to
         // RISCVExegesisTarget::randomizeTargetMCOperand.
         SS << LS << "AVL: simm5";
       }
       if (OpVal.isValid()) {
         const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc)];
         ValueAssignments.push_back({&Op, OpVal});
       }
     }

     if (Log2SEW) {
       const Operand &Op = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)];
       ValueAssignments.push_back({&Op, MCOperand::createImm(*Log2SEW)});
       SS << LS << "SEW: e" << (*Log2SEW ? 1 << *Log2SEW : 8);
     }

     if (Policy) {
       const Operand &Op = Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)];
       ValueAssignments.push_back({&Op, MCOperand::createImm(*Policy)});
       SS << LS
          << "Policy: " << (*Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu")
          << "/" << (*Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu");
     }

     SS << "}";

     // Filter out some configurations, if needed.
     if (!FilterConfig.empty()) {
       if (!Regex(FilterConfig).match(ConfigStr))
         continue;
     }

     CodeTemplate CT = OrigCT.clone();
     CT.Config = std::move(ConfigStr);
     for (InstructionTemplate &IT : CT.Instructions) {
       if (IsSerial) {
         // Reset this template's value assignments and do it
         // ourselves.
         IT = InstructionTemplate(&Instr);
         assignSerialRVVOperands(IT);
       }

       for (const auto &[Op, OpVal] : ValueAssignments)
         IT.getValueFor(*Op) = OpVal;
     }
     Result.push_back(std::move(CT));
     if (Result.size() - StartingResultSize >=
         SnippetGenerator::Opts.MaxConfigsPerOpcode)
       return;
   }
 }

 template <class BaseT>
 Expected<std::vector<CodeTemplate>>
 RISCVSnippetGenerator<BaseT>::generateCodeTemplates(
     InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
   const Instruction &Instr = Variant.getInstr();

   bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;

   unsigned BaseOpcode = RISCV::getRVVMCOpcode(Instr.getOpcode());

   // Bail out ineligible opcodes before generating base code templates since
   // the latter is quite expensive.
   if (IsSerial && BaseOpcode && isIneligibleOfSerialSnippets(BaseOpcode, Instr))
     return std::vector<CodeTemplate>{};

   auto BaseCodeTemplates =
       BaseT::generateCodeTemplates(Variant, ForbiddenRegisters);
   if (!BaseCodeTemplates)
     return BaseCodeTemplates.takeError();

   if (!BaseOpcode)
     return BaseCodeTemplates;

   // Specialize for RVV pseudo.
   std::vector<CodeTemplate> ExpandedTemplates;
   for (const auto &BaseCT : *BaseCodeTemplates)
     annotateWithVType(BaseCT, Instr, BaseOpcode, ForbiddenRegisters,
                       ExpandedTemplates);

   return ExpandedTemplates;
 }

 // Stores constant value to a general-purpose (integer) register.
 static std::vector<MCInst> loadIntReg(const MCSubtargetInfo &STI,
                                       MCRegister Reg, const APInt &Value) {
   SmallVector<MCInst, 8> MCInstSeq;
   MCRegister DestReg = Reg;

   RISCVMatInt::generateMCInstSeq(Value.getSExtValue(), STI, DestReg, MCInstSeq);

   std::vector<MCInst> MatIntInstrs(MCInstSeq.begin(), MCInstSeq.end());
   return MatIntInstrs;
 }

 const MCPhysReg ScratchIntReg = RISCV::X30; // t5

 // Stores constant bits to a floating-point register.
 static std::vector<MCInst> loadFPRegBits(const MCSubtargetInfo &STI,
                                          MCRegister Reg, const APInt &Bits,
                                          unsigned FmvOpcode) {
   std::vector<MCInst> Instrs = loadIntReg(STI, ScratchIntReg, Bits);
   Instrs.push_back(MCInstBuilder(FmvOpcode).addReg(Reg).addReg(ScratchIntReg));
   return Instrs;
 }

 // main idea is:
 // we support APInt only if (represented as double) it has zero fractional
 // part: 1.0, 2.0, 3.0, etc... then we can do the trick: write int to tmp reg t5
 // and then do FCVT this is only reliable thing in 32-bit mode, otherwise we
 // need to use __floatsidf
 static std::vector<MCInst> loadFP64RegBits32(const MCSubtargetInfo &STI,
                                              MCRegister Reg,
                                              const APInt &Bits) {
   double D = Bits.bitsToDouble();
   double IPart;
   double FPart = std::modf(D, &IPart);

   if (std::abs(FPart) > std::numeric_limits<double>::epsilon()) {
     errs() << "loadFP64RegBits32 is not implemented for doubles like " << D
            << ", please remove fractional part\n";
     return {};
   }

   std::vector<MCInst> Instrs = loadIntReg(STI, ScratchIntReg, Bits);
   Instrs.push_back(MCInstBuilder(RISCV::FCVT_D_W)
                        .addReg(Reg)
                        .addReg(ScratchIntReg)
                        .addImm(RISCVFPRndMode::RNE));
   return Instrs;
 }

 class ExegesisRISCVTarget : public ExegesisTarget {
   // NOTE: Alternatively, we can use BitVector here, but the number of RVV MC
   // opcodes is just a small portion of the entire opcode space, so I thought it
   // would be a waste of space to use BitVector.
   mutable SmallSet<unsigned, 16> RVVMCOpcodesWithPseudos;

 public:
   ExegesisRISCVTarget();

   bool matchesArch(Triple::ArchType Arch) const override;

   std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
                                const APInt &Value) const override;

   const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
                                            unsigned Opcode) const override {
     // We don't want to support RVV instructions that depend on VTYPE, because
     // those instructions by themselves don't carry any additional information
     // for us to setup the proper VTYPE environment via VSETVL instructions.
     // FIXME: Ideally, we should use RISCVVInversePseudosTable, but it requires
     // LMUL and SEW and I don't think enumerating those combinations is any
     // better than the ugly trick here that memorizes the corresponding MC
     // opcodes of the RVV pseudo we have processed previously. This works most
     // of the time because RVV pseudo opcodes are placed before any other RVV
     // opcodes. Of course this doesn't work if we're asked to benchmark only a
     // certain subset of opcodes.
     if (RVVMCOpcodesWithPseudos.count(Opcode))
       return "The MC opcode of RVV instructions are ignored";

     // We want to support all RVV pseudos.
     if (unsigned MCOpcode = RISCV::getRVVMCOpcode(Opcode)) {
       RVVMCOpcodesWithPseudos.insert(MCOpcode);
       return nullptr;
     }

     return ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode);
   }

   MCRegister getDefaultLoopCounterRegister(const Triple &) const override;

   void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
                                    MachineBasicBlock &TargetMBB,
                                    const MCInstrInfo &MII,
                                    MCRegister LoopRegister) const override;

   MCRegister getScratchMemoryRegister(const Triple &TT) const override;

   void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg,
                           unsigned Offset) const override;

   ArrayRef<MCPhysReg> getUnavailableRegisters() const override;

   bool allowAsBackToBack(const Instruction &Instr) const override {
     return !Instr.Description.isPseudo();
   }

   Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var,
                                  MCOperand &AssignedValue,
                                  const BitVector &ForbiddenRegs) const override;

   std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
       const LLVMState &State,
       const SnippetGenerator::Options &Opts) const override {
     return std::make_unique<RISCVSnippetGenerator<SerialSnippetGenerator>>(
         State, Opts);
   }

   std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
       const LLVMState &State,
       const SnippetGenerator::Options &Opts) const override {
     return std::make_unique<RISCVSnippetGenerator<ParallelSnippetGenerator>>(
         State, Opts);
   }

   std::vector<InstructionTemplate>
   generateInstructionVariants(const Instruction &Instr,
                               unsigned MaxConfigsPerOpcode) const override;

   void addTargetSpecificPasses(PassManagerBase &PM) const override {
     // Turn AVL operand of physical registers into virtual registers.
     PM.add(exegesis::createRISCVPreprocessingPass());
     PM.add(createRISCVInsertVSETVLIPass());
     // Setting up the correct FRM.
     PM.add(createRISCVInsertReadWriteCSRPass());
     PM.add(createRISCVInsertWriteVXRMPass());
     // This will assign physical register to the result of VSETVLI instructions
     // that produce VLMAX.
     PM.add(exegesis::createRISCVPostprocessingPass());
     // PseudoRET will be expanded by RISCVAsmPrinter; we have to expand
     // PseudoMovImm with RISCVPostRAExpandPseudoPass though.
     PM.add(createRISCVPostRAExpandPseudoPass());
   }
 };

 ExegesisRISCVTarget::ExegesisRISCVTarget()
     : ExegesisTarget(RISCVCpuPfmCounters, RISCV_MC::isOpcodeAvailable) {}

 bool ExegesisRISCVTarget::matchesArch(Triple::ArchType Arch) const {
   return Arch == Triple::riscv32 || Arch == Triple::riscv64;
 }

 std::vector<MCInst> ExegesisRISCVTarget::setRegTo(const MCSubtargetInfo &STI,
                                                   MCRegister Reg,
                                                   const APInt &Value) const {
   if (RISCV::GPRRegClass.contains(Reg))
     return loadIntReg(STI, Reg, Value);
   if (RISCV::FPR16RegClass.contains(Reg))
     return loadFPRegBits(STI, Reg, Value, RISCV::FMV_H_X);
   if (RISCV::FPR32RegClass.contains(Reg))
     return loadFPRegBits(STI, Reg, Value, RISCV::FMV_W_X);
   if (RISCV::FPR64RegClass.contains(Reg)) {
     if (STI.hasFeature(RISCV::Feature64Bit))
       return loadFPRegBits(STI, Reg, Value, RISCV::FMV_D_X);
     return loadFP64RegBits32(STI, Reg, Value);
   }
   // TODO: Emit proper code to initialize other kinds of registers.
   return {};
 }

 const MCPhysReg DefaultLoopCounterReg = RISCV::X31; // t6
 const MCPhysReg ScratchMemoryReg = RISCV::X10;      // a0

 MCRegister
 ExegesisRISCVTarget::getDefaultLoopCounterRegister(const Triple &) const {
   return DefaultLoopCounterReg;
 }

 void ExegesisRISCVTarget::decrementLoopCounterAndJump(
     MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
     const MCInstrInfo &MII, MCRegister LoopRegister) const {
   BuildMI(&MBB, DebugLoc(), MII.get(RISCV::ADDI))
       .addDef(LoopRegister)
       .addUse(LoopRegister)
       .addImm(-1);
   BuildMI(&MBB, DebugLoc(), MII.get(RISCV::BNE))
       .addUse(LoopRegister)
       .addUse(RISCV::X0)
       .addMBB(&TargetMBB);
 }

 MCRegister
 ExegesisRISCVTarget::getScratchMemoryRegister(const Triple &TT) const {
   return ScratchMemoryReg; // a0
 }

 void ExegesisRISCVTarget::fillMemoryOperands(InstructionTemplate &IT,
                                              MCRegister Reg,
                                              unsigned Offset) const {
   // TODO: for now we ignore Offset because have no way
   // to detect it in instruction.
   auto &I = IT.getInstr();

   auto MemOpIt =
       find_if(I.Operands, [](const Operand &Op) { return Op.isMemory(); });
   assert(MemOpIt != I.Operands.end() &&
          "Instruction must have memory operands");

   const Operand &MemOp = *MemOpIt;

   assert(MemOp.isReg() && "Memory operand expected to be register");

   IT.getValueFor(MemOp) = MCOperand::createReg(Reg);
 }

 const MCPhysReg UnavailableRegisters[4] = {RISCV::X0, DefaultLoopCounterReg,
                                            ScratchIntReg, ScratchMemoryReg};

 ArrayRef<MCPhysReg> ExegesisRISCVTarget::getUnavailableRegisters() const {
   return UnavailableRegisters;
 }

 Error ExegesisRISCVTarget::randomizeTargetMCOperand(
     const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue,
     const BitVector &ForbiddenRegs) const {
   uint8_t OperandType =
       Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType;

   switch (OperandType) {
   case RISCVOp::OPERAND_FRMARG:
     AssignedValue = MCOperand::createImm(RISCVFPRndMode::DYN);
     break;
   case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
     AssignedValue = MCOperand::createImm(0b1 << 4);
     break;
   case RISCVOp::OPERAND_SIMM6_NONZERO:
   case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
     AssignedValue = MCOperand::createImm(1);
     break;
   case RISCVOp::OPERAND_SIMM5:
     // 5-bit signed immediate value.
     AssignedValue = MCOperand::createImm(randomIndex(31) - 16);
     break;
   case RISCVOp::OPERAND_AVL:
   case RISCVOp::OPERAND_UIMM5:
     // 5-bit unsigned immediate value.
     AssignedValue = MCOperand::createImm(randomIndex(31));
     break;
   default:
     if (OperandType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
         OperandType <= RISCVOp::OPERAND_LAST_RISCV_IMM)
       AssignedValue = MCOperand::createImm(0);
   }
   return Error::success();
 }

 std::vector<InstructionTemplate>
 ExegesisRISCVTarget::generateInstructionVariants(
     const Instruction &Instr, unsigned int MaxConfigsPerOpcode) const {
   InstructionTemplate IT{&Instr};
   for (const Operand &Op : Instr.Operands)
     if (Op.isMemory()) {
       IT.getValueFor(Op) = MCOperand::createReg(ScratchMemoryReg);
     }
   return {IT};
 }

 } // anonymous namespace

 static ExegesisTarget *getTheRISCVExegesisTarget() {
   static ExegesisRISCVTarget Target;
   return &Target;
 }

 void InitializeRISCVExegesisTarget() {
   ExegesisTarget::registerTarget(getTheRISCVExegesisTarget());
 }

 } // namespace exegesis
 } // namespace llvm