llvm/lib/Target/AMDGPU/SIInstrInfo.h - llvm-project - Git at Google

 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// Interface definition for SIInstrInfo.
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H

 #include "AMDGPUMIRFormatter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIRegisterInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"

 #define GET_INSTRINFO_HEADER
 #include "AMDGPUGenInstrInfo.inc"

 namespace llvm {

 class APInt;
 class GCNSubtarget;
 class LiveVariables;
 class MachineDominatorTree;
 class MachineRegisterInfo;
 class RegScavenger;
 class TargetRegisterClass;
 class ScheduleHazardRecognizer;

 constexpr unsigned DefaultMemoryClusterDWordsLimit = 8;

 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
 /// on any path from the start of an entry function to this load.
 static const MachineMemOperand::Flags MONoClobber =
     MachineMemOperand::MOTargetFlag1;

 /// Mark the MMO of a load as the last use.
 static const MachineMemOperand::Flags MOLastUse =
     MachineMemOperand::MOTargetFlag2;

 /// Utility to store machine instructions worklist.
 struct SIInstrWorklist {
   SIInstrWorklist() = default;

   void insert(MachineInstr *MI);

   MachineInstr *top() const {
     const auto *iter = InstrList.begin();
     return *iter;
   }

   void erase_top() {
     const auto *iter = InstrList.begin();
     InstrList.erase(iter);
   }

   bool empty() const { return InstrList.empty(); }

   void clear() {
     InstrList.clear();
     DeferredList.clear();
   }

   bool isDeferred(MachineInstr *MI);

   SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }

 private:
   /// InstrList contains the MachineInstrs.
   SetVector<MachineInstr *> InstrList;
   /// Deferred instructions are specific MachineInstr
   /// that will be added by insert method.
   SetVector<MachineInstr *> DeferredList;
 };

 class SIInstrInfo final : public AMDGPUGenInstrInfo {
 private:
   const SIRegisterInfo RI;
   const GCNSubtarget &ST;
   TargetSchedModel SchedModel;
   mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;

   // The inverse predicate should have the negative value.
   enum BranchPredicate {
     INVALID_BR = 0,
     SCC_TRUE = 1,
     SCC_FALSE = -1,
     VCCNZ = 2,
     VCCZ = -2,
     EXECNZ = -3,
     EXECZ = 3
   };

   using SetVectorType = SmallSetVector<MachineInstr *, 32>;

   static unsigned getBranchOpcode(BranchPredicate Cond);
   static BranchPredicate getBranchPredicate(unsigned Opcode);

 public:
   unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
                               MachineRegisterInfo &MRI,
                               const MachineOperand &SuperReg,
                               const TargetRegisterClass *SuperRC,
                               unsigned SubIdx,
                               const TargetRegisterClass *SubRC) const;
   MachineOperand buildExtractSubRegOrImm(
       MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
       const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
       unsigned SubIdx, const TargetRegisterClass *SubRC) const;

 private:
   void swapOperands(MachineInstr &Inst) const;

   std::pair<bool, MachineBasicBlock *>
   moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
                    MachineDominatorTree *MDT = nullptr) const;

   void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
                    MachineDominatorTree *MDT = nullptr) const;

   void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;

   void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;

   void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
                            unsigned Opcode) const;

   void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
                           unsigned Opcode) const;

   void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
                                unsigned Opcode, bool Swap = false) const;

   void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
                                 unsigned Opcode,
                                 MachineDominatorTree *MDT = nullptr) const;

   void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst,
                           MachineDominatorTree *MDT) const;

   void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst,
                              MachineDominatorTree *MDT) const;

   void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
                             MachineDominatorTree *MDT = nullptr) const;

   void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
                             MachineInstr &Inst) const;
   void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
   void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
                                unsigned Opcode,
                                MachineDominatorTree *MDT = nullptr) const;
   void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
                       MachineInstr &Inst) const;

   void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
                                     SIInstrWorklist &Worklist) const;

   void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
                                     MachineInstr &SCCDefInst,
                                     SIInstrWorklist &Worklist,
                                     Register NewCond = Register()) const;
   void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
                                 SIInstrWorklist &Worklist) const;

   const TargetRegisterClass *
   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;

   bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
                                     const MachineInstr &MIb) const;

   Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;

   bool verifyCopy(const MachineInstr &MI, const MachineRegisterInfo &MRI,
                   StringRef &ErrInfo) const;

 protected:
   /// If the specific machine instruction is a instruction that moves/copies
   /// value from one register to another register return destination and source
   /// registers as machine operands.
   std::optional<DestSourcePair>
   isCopyInstrImpl(const MachineInstr &MI) const override;

   bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0,
                            AMDGPU::OpName Src0OpName, MachineOperand &Src1,
                            AMDGPU::OpName Src1OpName) const;
   bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx,
                      const MachineOperand *fromMO, unsigned toIdx,
                      const MachineOperand *toMO) const;
   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
                                        unsigned OpIdx0,
                                        unsigned OpIdx1) const override;

 public:
   enum TargetOperandFlags {
     MO_MASK = 0xf,

     MO_NONE = 0,
     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
     MO_GOTPCREL = 1,
     // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
     MO_GOTPCREL32 = 2,
     MO_GOTPCREL32_LO = 2,
     // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
     MO_GOTPCREL32_HI = 3,
     // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
     MO_REL32 = 4,
     MO_REL32_LO = 4,
     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
     MO_REL32_HI = 5,

     MO_FAR_BRANCH_OFFSET = 6,

     MO_ABS32_LO = 8,
     MO_ABS32_HI = 9,
   };

   explicit SIInstrInfo(const GCNSubtarget &ST);

   const SIRegisterInfo &getRegisterInfo() const {
     return RI;
   }

   const GCNSubtarget &getSubtarget() const {
     return ST;
   }

   bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;

   bool isIgnorableUse(const MachineOperand &MO) const override;

   bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo,
                     MachineCycleInfo *CI) const override;

   bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
                                int64_t &Offset1) const override;

   bool isGlobalMemoryObject(const MachineInstr *MI) const override;

   bool getMemOperandsWithOffsetWidth(
       const MachineInstr &LdSt,
       SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
       bool &OffsetIsScalable, LocationSize &Width,
       const TargetRegisterInfo *TRI) const final;

   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                            int64_t Offset1, bool OffsetIsScalable1,
                            ArrayRef<const MachineOperand *> BaseOps2,
                            int64_t Offset2, bool OffsetIsScalable2,
                            unsigned ClusterSize,
                            unsigned NumBytes) const override;

   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
                                int64_t Offset1, unsigned NumLoads) const override;

   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    const DebugLoc &DL, Register DestReg, Register SrcReg,
                    bool KillSrc, bool RenamableDest = false,
                    bool RenamableSrc = false) const override;

   const TargetRegisterClass *getPreferredSelectRegClass(
                                unsigned Size) const;

   Register insertNE(MachineBasicBlock *MBB,
                     MachineBasicBlock::iterator I, const DebugLoc &DL,
                     Register SrcReg, int Value) const;

   Register insertEQ(MachineBasicBlock *MBB,
                     MachineBasicBlock::iterator I, const DebugLoc &DL,
                     Register SrcReg, int Value)  const;

   bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg,
                                int64_t &ImmVal) const override;

   void storeRegToStackSlot(
       MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
       bool isKill, int FrameIndex, const TargetRegisterClass *RC,
       const TargetRegisterInfo *TRI, Register VReg,
       MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;

   void loadRegFromStackSlot(
       MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
       int FrameIndex, const TargetRegisterClass *RC,
       const TargetRegisterInfo *TRI, Register VReg,
       MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;

   bool expandPostRAPseudo(MachineInstr &MI) const override;

   void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                      Register DestReg, unsigned SubIdx,
                      const MachineInstr &Orig,
                      const TargetRegisterInfo &TRI) const override;

   // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
   // instructions. Returns a pair of generated instructions.
   // Can split either post-RA with physical registers or pre-RA with
   // virtual registers. In latter case IR needs to be in SSA form and
   // and a REG_SEQUENCE is produced to define original register.
   std::pair<MachineInstr*, MachineInstr*>
   expandMovDPP64(MachineInstr &MI) const;

   // Returns an opcode that can be used to move a value to a \p DstRC
   // register.  If there is no hardware instruction that can store to \p
   // DstRC, then AMDGPU::COPY is returned.
   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;

   const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
                                                      unsigned EltSize,
                                                      bool IsSGPR) const;

   const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
                                              bool IsIndirectSrc) const;
   LLVM_READONLY
   int commuteOpcode(unsigned Opc) const;

   LLVM_READONLY
   inline int commuteOpcode(const MachineInstr &MI) const {
     return commuteOpcode(MI.getOpcode());
   }

   bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
                              unsigned &SrcOpIdx1) const override;

   bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
                              unsigned &SrcOpIdx1) const;

   bool isBranchOffsetInRange(unsigned BranchOpc,
                              int64_t BrOffset) const override;

   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;

   /// Return whether the block terminate with divergent branch.
   /// Note this only work before lowering the pseudo control flow instructions.
   bool hasDivergentBranch(const MachineBasicBlock *MBB) const;

   void insertIndirectBranch(MachineBasicBlock &MBB,
                             MachineBasicBlock &NewDestBB,
                             MachineBasicBlock &RestoreBB, const DebugLoc &DL,
                             int64_t BrOffset, RegScavenger *RS) const override;

   bool analyzeBranchImpl(MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator I,
                          MachineBasicBlock *&TBB,
                          MachineBasicBlock *&FBB,
                          SmallVectorImpl<MachineOperand> &Cond,
                          bool AllowModify) const;

   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;

   unsigned removeBranch(MachineBasicBlock &MBB,
                         int *BytesRemoved = nullptr) const override;

   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         const DebugLoc &DL,
                         int *BytesAdded = nullptr) const override;

   bool reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const override;

   bool canInsertSelect(const MachineBasicBlock &MBB,
                        ArrayRef<MachineOperand> Cond, Register DstReg,
                        Register TrueReg, Register FalseReg, int &CondCycles,
                        int &TrueCycles, int &FalseCycles) const override;

   void insertSelect(MachineBasicBlock &MBB,
                     MachineBasicBlock::iterator I, const DebugLoc &DL,
                     Register DstReg, ArrayRef<MachineOperand> Cond,
                     Register TrueReg, Register FalseReg) const override;

   void insertVectorSelect(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator I, const DebugLoc &DL,
                           Register DstReg, ArrayRef<MachineOperand> Cond,
                           Register TrueReg, Register FalseReg) const;

   bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
                       Register &SrcReg2, int64_t &CmpMask,
                       int64_t &CmpValue) const override;

   bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
                             Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
                             const MachineRegisterInfo *MRI) const override;

   bool
   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
                                   const MachineInstr &MIb) const override;

   static bool isFoldableCopy(const MachineInstr &MI);

   void removeModOperands(MachineInstr &MI) const;

   /// Return the extracted immediate value in a subregister use from a constant
   /// materialized in a super register.
   ///
   /// e.g. %imm = S_MOV_B64 K[0:63]
   ///      USE %imm.sub1
   /// This will return K[32:63]
   static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal,
                                                      unsigned SubRegIndex);

   bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
                      MachineRegisterInfo *MRI) const final;

   unsigned getMachineCSELookAheadLimit() const override { return 500; }

   MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
                                       LiveIntervals *LIS) const override;

   bool isSchedulingBoundary(const MachineInstr &MI,
                             const MachineBasicBlock *MBB,
                             const MachineFunction &MF) const override;

   static bool isSALU(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::SALU;
   }

   bool isSALU(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SALU;
   }

   static bool isVALU(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
   }

   bool isVALU(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VALU;
   }

   static bool isImage(const MachineInstr &MI) {
     return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI);
   }

   bool isImage(uint16_t Opcode) const {
     return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode);
   }

   static bool isVMEM(const MachineInstr &MI) {
     return isMUBUF(MI) || isMTBUF(MI) || isImage(MI);
   }

   bool isVMEM(uint16_t Opcode) const {
     return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode);
   }

   static bool isSOP1(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
   }

   bool isSOP1(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
   }

   static bool isSOP2(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
   }

   bool isSOP2(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
   }

   static bool isSOPC(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
   }

   bool isSOPC(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
   }

   static bool isSOPK(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
   }

   bool isSOPK(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
   }

   static bool isSOPP(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
   }

   bool isSOPP(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
   }

   static bool isPacked(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
   }

   bool isPacked(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
   }

   static bool isVOP1(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
   }

   bool isVOP1(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
   }

   static bool isVOP2(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
   }

   bool isVOP2(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
   }

   static bool isVOP3(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
   }

   bool isVOP3(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
   }

   static bool isSDWA(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
   }

   bool isSDWA(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SDWA;
   }

   static bool isVOPC(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
   }

   bool isVOPC(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
   }

   static bool isMUBUF(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
   }

   bool isMUBUF(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
   }

   static bool isMTBUF(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
   }

   bool isMTBUF(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
   }

   static bool isSMRD(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
   }

   bool isSMRD(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
   }

   bool isBufferSMRD(const MachineInstr &MI) const;

   static bool isDS(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::DS;
   }

   bool isDS(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::DS;
   }

   static bool isLDSDMA(const MachineInstr &MI) {
     return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI));
   }

   bool isLDSDMA(uint16_t Opcode) {
     return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode));
   }

   static bool isGWS(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::GWS;
   }

   bool isGWS(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::GWS;
   }

   bool isAlwaysGDS(uint16_t Opcode) const;

   static bool isMIMG(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
   }

   bool isMIMG(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
   }

   static bool isVIMAGE(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE;
   }

   bool isVIMAGE(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VIMAGE;
   }

   static bool isVSAMPLE(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE;
   }

   bool isVSAMPLE(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE;
   }

   static bool isGather4(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
   }

   bool isGather4(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
   }

   static bool isFLAT(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
   }

   // Is a FLAT encoded instruction which accesses a specific segment,
   // i.e. global_* or scratch_*.
   static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
     auto Flags = MI.getDesc().TSFlags;
     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
   }

   bool isSegmentSpecificFLAT(uint16_t Opcode) const {
     auto Flags = get(Opcode).TSFlags;
     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
   }

   static bool isFLATGlobal(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
   }

   bool isFLATGlobal(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
   }

   static bool isFLATScratch(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
   }

   bool isFLATScratch(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
   }

   // Any FLAT encoded instruction, including global_* and scratch_*.
   bool isFLAT(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
   }

   static bool isEXP(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::EXP;
   }

   static bool isDualSourceBlendEXP(const MachineInstr &MI) {
     if (!isEXP(MI))
       return false;
     unsigned Target = MI.getOperand(0).getImm();
     return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
            Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
   }

   bool isEXP(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::EXP;
   }

   static bool isAtomicNoRet(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
   }

   bool isAtomicNoRet(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
   }

   static bool isAtomicRet(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
   }

   bool isAtomicRet(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
   }

   static bool isAtomic(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
                                    SIInstrFlags::IsAtomicNoRet);
   }

   bool isAtomic(uint16_t Opcode) const {
     return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
                                   SIInstrFlags::IsAtomicNoRet);
   }

   static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
     return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD;
   }

   static bool isWQM(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
   }

   bool isWQM(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::WQM;
   }

   static bool isDisableWQM(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
   }

   bool isDisableWQM(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
   }

   // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of
   // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions
   // therefore we need an explicit check for them since just checking if the
   // Spill bit is set and what instruction type it came from misclassifies
   // them.
   static bool isVGPRSpill(const MachineInstr &MI) {
     return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR &&
            MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
            (isSpill(MI) && isVALU(MI));
   }

   bool isVGPRSpill(uint16_t Opcode) const {
     return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR &&
            Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
            (isSpill(Opcode) && isVALU(Opcode));
   }

   static bool isSGPRSpill(const MachineInstr &MI) {
     return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR ||
            MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
            (isSpill(MI) && isSALU(MI));
   }

   bool isSGPRSpill(uint16_t Opcode) const {
     return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR ||
            Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
            (isSpill(Opcode) && isSALU(Opcode));
   }

   bool isSpill(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::Spill;
   }

   static bool isSpill(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::Spill;
   }

   static bool isWWMRegSpillOpcode(uint16_t Opcode) {
     return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
            Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
            Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
            Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
   }

   static bool isChainCallOpcode(uint64_t Opcode) {
     return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 ||
            Opcode == AMDGPU::SI_CS_CHAIN_TC_W64;
   }

   static bool isDPP(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
   }

   bool isDPP(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::DPP;
   }

   static bool isTRANS(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
   }

   bool isTRANS(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::TRANS;
   }

   static bool isVOP3P(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
   }

   bool isVOP3P(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
   }

   static bool isVINTRP(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
   }

   bool isVINTRP(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
   }

   static bool isMAI(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
   }

   bool isMAI(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
   }

   static bool isMFMA(const MachineInstr &MI) {
     return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
            MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
   }

   static bool isDOT(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
   }

   static bool isWMMA(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
   }

   bool isWMMA(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
   }

   static bool isMFMAorWMMA(const MachineInstr &MI) {
     return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI);
   }

   static bool isSWMMAC(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
   }

   bool isSWMMAC(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC;
   }

   bool isDOT(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
   }

   bool isXDL(const MachineInstr &MI) const;

   static bool isDGEMM(unsigned Opcode) { return AMDGPU::getMAIIsDGEMM(Opcode); }

   static bool isLDSDIR(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
   }

   bool isLDSDIR(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
   }

   static bool isVINTERP(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
   }

   bool isVINTERP(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
   }

   static bool isScalarUnit(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
   }

   static bool usesVM_CNT(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
   }

   static bool usesLGKM_CNT(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
   }

   // Most sopk treat the immediate as a signed 16-bit, however some
   // use it as unsigned.
   static bool sopkIsZext(unsigned Opcode) {
     return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 ||
            Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 ||
            Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 ||
            Opcode == AMDGPU::S_GETREG_B32;
   }

   /// \returns true if this is an s_store_dword* instruction. This is more
   /// specific than isSMEM && mayStore.
   static bool isScalarStore(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
   }

   bool isScalarStore(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
   }

   static bool isFixedSize(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
   }

   bool isFixedSize(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
   }

   static bool hasFPClamp(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
   }

   bool hasFPClamp(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
   }

   static bool hasIntClamp(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
   }

   uint64_t getClampMask(const MachineInstr &MI) const {
     const uint64_t ClampFlags = SIInstrFlags::FPClamp |
                                 SIInstrFlags::IntClamp |
                                 SIInstrFlags::ClampLo |
                                 SIInstrFlags::ClampHi;
       return MI.getDesc().TSFlags & ClampFlags;
   }

   static bool usesFPDPRounding(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
   }

   bool usesFPDPRounding(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
   }

   static bool isFPAtomic(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
   }

   bool isFPAtomic(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
   }

   static bool isNeverUniform(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
   }

   // Check to see if opcode is for a barrier start. Pre gfx12 this is just the
   // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want
   // to check for the barrier start (S_BARRIER_SIGNAL*)
   bool isBarrierStart(unsigned Opcode) const {
     return Opcode == AMDGPU::S_BARRIER ||
            Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 ||
            Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 ||
            Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM ||
            Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
   }

   bool isBarrier(unsigned Opcode) const {
     return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
            Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER;
   }

   static bool isF16PseudoScalarTrans(unsigned Opcode) {
     return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
            Opcode == AMDGPU::V_S_LOG_F16_e64 ||
            Opcode == AMDGPU::V_S_RCP_F16_e64 ||
            Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
            Opcode == AMDGPU::V_S_SQRT_F16_e64;
   }

   static bool doesNotReadTiedSource(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
   }

   bool doesNotReadTiedSource(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
   }

   bool isIGLP(unsigned Opcode) const {
     return Opcode == AMDGPU::SCHED_BARRIER ||
            Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
   }

   bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); }

   static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
     switch (Opcode) {
     case AMDGPU::S_WAITCNT_soft:
       return AMDGPU::S_WAITCNT;
     case AMDGPU::S_WAITCNT_VSCNT_soft:
       return AMDGPU::S_WAITCNT_VSCNT;
     case AMDGPU::S_WAIT_LOADCNT_soft:
       return AMDGPU::S_WAIT_LOADCNT;
     case AMDGPU::S_WAIT_STORECNT_soft:
       return AMDGPU::S_WAIT_STORECNT;
     case AMDGPU::S_WAIT_SAMPLECNT_soft:
       return AMDGPU::S_WAIT_SAMPLECNT;
     case AMDGPU::S_WAIT_BVHCNT_soft:
       return AMDGPU::S_WAIT_BVHCNT;
     case AMDGPU::S_WAIT_DSCNT_soft:
       return AMDGPU::S_WAIT_DSCNT;
     case AMDGPU::S_WAIT_KMCNT_soft:
       return AMDGPU::S_WAIT_KMCNT;
     default:
       return Opcode;
     }
   }

   bool isWaitcnt(unsigned Opcode) const {
     switch (getNonSoftWaitcntOpcode(Opcode)) {
     case AMDGPU::S_WAITCNT:
     case AMDGPU::S_WAITCNT_VSCNT:
     case AMDGPU::S_WAITCNT_VMCNT:
     case AMDGPU::S_WAITCNT_EXPCNT:
     case AMDGPU::S_WAITCNT_LGKMCNT:
     case AMDGPU::S_WAIT_LOADCNT:
     case AMDGPU::S_WAIT_LOADCNT_DSCNT:
     case AMDGPU::S_WAIT_STORECNT:
     case AMDGPU::S_WAIT_STORECNT_DSCNT:
     case AMDGPU::S_WAIT_SAMPLECNT:
     case AMDGPU::S_WAIT_BVHCNT:
     case AMDGPU::S_WAIT_EXPCNT:
     case AMDGPU::S_WAIT_DSCNT:
     case AMDGPU::S_WAIT_KMCNT:
     case AMDGPU::S_WAIT_IDLE:
       return true;
     default:
       return false;
     }
   }

   bool isVGPRCopy(const MachineInstr &MI) const {
     assert(isCopyInstr(MI));
     Register Dest = MI.getOperand(0).getReg();
     const MachineFunction &MF = *MI.getParent()->getParent();
     const MachineRegisterInfo &MRI = MF.getRegInfo();
     return !RI.isSGPRReg(MRI, Dest);
   }

   bool hasVGPRUses(const MachineInstr &MI) const {
     const MachineFunction &MF = *MI.getParent()->getParent();
     const MachineRegisterInfo &MRI = MF.getRegInfo();
     return llvm::any_of(MI.explicit_uses(),
                         [&MRI, this](const MachineOperand &MO) {
       return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
   }

   /// Return true if the instruction modifies the mode register.q
   static bool modifiesModeRegister(const MachineInstr &MI);

   /// This function is used to determine if an instruction can be safely
   /// executed under EXEC = 0 without hardware error, indeterminate results,
   /// and/or visible effects on future vector execution or outside the shader.
   /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
   /// used in removing branches over short EXEC = 0 sequences.
   /// As such it embeds certain assumptions which may not apply to every case
   /// of EXEC = 0 execution.
   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;

   /// Returns true if the instruction could potentially depend on the value of
   /// exec. If false, exec dependencies may safely be ignored.
   bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;

   bool isInlineConstant(const APInt &Imm) const;

   bool isInlineConstant(const APFloat &Imm) const;

   // Returns true if this non-register operand definitely does not need to be
   // encoded as a 32-bit literal. Note that this function handles all kinds of
   // operands, not just immediates.
   //
   // Some operands like FrameIndexes could resolve to an inline immediate value
   // that will not require an additional 4-bytes; this function assumes that it
   // will.
   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const {
     assert(!MO.isReg() && "isInlineConstant called on register operand!");
     if (!MO.isImm())
       return false;
     return isInlineConstant(MO.getImm(), OperandType);
   }
   bool isInlineConstant(int64_t ImmVal, uint8_t OperandType) const;

   bool isInlineConstant(const MachineOperand &MO,
                         const MCOperandInfo &OpInfo) const {
     return isInlineConstant(MO, OpInfo.OperandType);
   }

   /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
   /// be an inline immediate.
   bool isInlineConstant(const MachineInstr &MI,
                         const MachineOperand &UseMO,
                         const MachineOperand &DefMO) const {
     assert(UseMO.getParent() == &MI);
     int OpIdx = UseMO.getOperandNo();
     if (OpIdx >= MI.getDesc().NumOperands)
       return false;

     return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]);
   }

   /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
   /// immediate.
   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
     const MachineOperand &MO = MI.getOperand(OpIdx);
     return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
   }

   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
                         int64_t ImmVal) const {
     if (OpIdx >= MI.getDesc().NumOperands)
       return false;

     if (isCopyInstr(MI)) {
       unsigned Size = getOpSize(MI, OpIdx);
       assert(Size == 8 || Size == 4);

       uint8_t OpType = (Size == 8) ?
         AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
       return isInlineConstant(ImmVal, OpType);
     }

     return isInlineConstant(ImmVal, MI.getDesc().operands()[OpIdx].OperandType);
   }

   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
                         const MachineOperand &MO) const {
     return isInlineConstant(MI, OpIdx, MO.getImm());
   }

   bool isInlineConstant(const MachineOperand &MO) const {
     return isInlineConstant(*MO.getParent(), MO.getOperandNo());
   }

   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
                          const MachineOperand &MO) const;

   /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
   /// This function will return false if you pass it a 32-bit instruction.
   bool hasVALU32BitEncoding(unsigned Opcode) const;

   /// Returns true if this operand uses the constant bus.
   bool usesConstantBus(const MachineRegisterInfo &MRI,
                        const MachineOperand &MO,
                        const MCOperandInfo &OpInfo) const;

   bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineInstr &MI,
                        int OpIdx) const {
     return usesConstantBus(MRI, MI.getOperand(OpIdx),
                            MI.getDesc().operands()[OpIdx]);
   }

   /// Return true if this instruction has any modifiers.
   ///  e.g. src[012]_mod, omod, clamp.
   bool hasModifiers(unsigned Opcode) const;

   bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const;
   bool hasAnyModifiersSet(const MachineInstr &MI) const;

   bool canShrink(const MachineInstr &MI,
                  const MachineRegisterInfo &MRI) const;

   MachineInstr *buildShrunkInst(MachineInstr &MI,
                                 unsigned NewOpcode) const;

   bool verifyInstruction(const MachineInstr &MI,
                          StringRef &ErrInfo) const override;

   unsigned getVALUOp(const MachineInstr &MI) const;

   void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MBBI,
                              const DebugLoc &DL, Register Reg, bool IsSCCLive,
                              SlotIndexes *Indexes = nullptr) const;

   void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
                    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
                    Register Reg, SlotIndexes *Indexes = nullptr) const;

   /// Return the correct register class for \p OpNo.  For target-specific
   /// instructions, this will return the register class that has been defined
   /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
   /// the register class of its machine operand.
   /// to infer the correct register class base on the other operands.
   const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
                                            unsigned OpNo) const;

   /// Return the size in bytes of the operand OpNo on the given
   // instruction opcode.
   unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
     const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];

     if (OpInfo.RegClass == -1) {
       // If this is an immediate operand, this must be a 32-bit literal.
       assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
       return 4;
     }

     return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
   }

   /// This form should usually be preferred since it handles operands
   /// with unknown register classes.
   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
     const MachineOperand &MO = MI.getOperand(OpNo);
     if (MO.isReg()) {
       if (unsigned SubReg = MO.getSubReg()) {
         return RI.getSubRegIdxSize(SubReg) / 8;
       }
     }
     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
   }

   /// Legalize the \p OpIndex operand of this instruction by inserting
   /// a MOV.  For example:
   /// ADD_I32_e32 VGPR0, 15
   /// to
   /// MOV VGPR1, 15
   /// ADD_I32_e32 VGPR0, VGPR1
   ///
   /// If the operand being legalized is a register, then a COPY will be used
   /// instead of MOV.
   void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;

   /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
   /// for \p MI.
   bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
                       const MachineOperand *MO = nullptr) const;

   /// Check if \p MO would be a valid operand for the given operand
   /// definition \p OpInfo. Note this does not attempt to validate constant bus
   /// restrictions (e.g. literal constant usage).
   bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
                           const MCOperandInfo &OpInfo,
                           const MachineOperand &MO) const;

   /// Check if \p MO (a register operand) is a legal register for the
   /// given operand description or operand index.
   /// The operand index version provide more legality checks
   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
                          const MCOperandInfo &OpInfo,
                          const MachineOperand &MO) const;
   bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
                          const MachineOperand &MO) const;
   /// Legalize operands in \p MI by either commuting it or inserting a
   /// copy of src1.
   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;

   /// Fix operands in \p MI to satisfy constant bus requirements.
   void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;

   /// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class
   /// for the dst register (\p DstRC) can be optionally supplied. This function
   /// can only be used when it is know that the value in SrcReg is same across
   /// all threads in the wave.
   /// \returns The SGPR register that \p SrcReg was copied to.
   Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
                               MachineRegisterInfo &MRI,
                               const TargetRegisterClass *DstRC = nullptr) const;

   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
   void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;

   void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
                               MachineBasicBlock::iterator I,
                               const TargetRegisterClass *DstRC,
                               MachineOperand &Op, MachineRegisterInfo &MRI,
                               const DebugLoc &DL) const;

   /// Legalize all operands in this instruction.  This function may create new
   /// instructions and control-flow around \p MI.  If present, \p MDT is
   /// updated.
   /// \returns A new basic block that contains \p MI if new blocks were created.
   MachineBasicBlock *
   legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;

   /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
   /// was moved to VGPR. \returns true if succeeded.
   bool moveFlatAddrToVGPR(MachineInstr &Inst) const;

   /// Replace the instructions opcode with the equivalent VALU
   /// opcode.  This function will also move the users of MachineInstruntions
   /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
   /// updated.
   void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;

   void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
                       MachineInstr &Inst) const;

   void insertNoop(MachineBasicBlock &MBB,
                   MachineBasicBlock::iterator MI) const override;

   void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    unsigned Quantity) const override;

   void insertReturn(MachineBasicBlock &MBB) const;

   /// Build instructions that simulate the behavior of a `s_trap 2` instructions
   /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is
   /// interpreted as a nop.
   MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI,
                                          MachineBasicBlock &MBB,
                                          MachineInstr &MI,
                                          const DebugLoc &DL) const;

   /// Return the number of wait states that result from executing this
   /// instruction.
   static unsigned getNumWaitStates(const MachineInstr &MI);

   /// Returns the operand named \p Op.  If \p MI does not have an
   /// operand named \c Op, this function returns nullptr.
   LLVM_READONLY
   MachineOperand *getNamedOperand(MachineInstr &MI,
                                   AMDGPU::OpName OperandName) const;

   LLVM_READONLY
   const MachineOperand *getNamedOperand(const MachineInstr &MI,
                                         AMDGPU::OpName OperandName) const {
     return getNamedOperand(const_cast<MachineInstr &>(MI), OperandName);
   }

   /// Get required immediate operand
   int64_t getNamedImmOperand(const MachineInstr &MI,
                              AMDGPU::OpName OperandName) const {
     int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
     return MI.getOperand(Idx).getImm();
   }

   uint64_t getDefaultRsrcDataFormat() const;
   uint64_t getScratchRsrcWords23() const;

   bool isLowLatencyInstruction(const MachineInstr &MI) const;
   bool isHighLatencyDef(int Opc) const override;

   /// Return the descriptor of the target-specific machine instruction
   /// that corresponds to the specified pseudo or native opcode.
   const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
     return get(pseudoToMCOpcode(Opcode));
   }

   unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
   unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;

   Register isLoadFromStackSlot(const MachineInstr &MI,
                                int &FrameIndex) const override;
   Register isStoreToStackSlot(const MachineInstr &MI,
                               int &FrameIndex) const override;

   unsigned getInstBundleSize(const MachineInstr &MI) const;
   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;

   bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;

   std::pair<unsigned, unsigned>
   decomposeMachineOperandsTargetFlags(unsigned TF) const override;

   ArrayRef<std::pair<int, const char *>>
   getSerializableTargetIndices() const override;

   ArrayRef<std::pair<unsigned, const char *>>
   getSerializableDirectMachineOperandTargetFlags() const override;

   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
   getSerializableMachineMemOperandTargetFlags() const override;

   ScheduleHazardRecognizer *
   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                  const ScheduleDAG *DAG) const override;

   ScheduleHazardRecognizer *
   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;

   ScheduleHazardRecognizer *
   CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
                                  const ScheduleDAGMI *DAG) const override;

   unsigned getLiveRangeSplitOpcode(Register Reg,
                                    const MachineFunction &MF) const override;

   bool isBasicBlockPrologue(const MachineInstr &MI,
                             Register Reg = Register()) const override;

   MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator InsPt,
                                          const DebugLoc &DL, Register Src,
                                          Register Dst) const override;

   MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator InsPt,
                                     const DebugLoc &DL, Register Src,
                                     unsigned SrcSubReg,
                                     Register Dst) const override;

   bool isWave32() const;

   /// Return a partially built integer add instruction without carry.
   /// Caller must add source operands.
   /// For pre-GFX9 it will generate unused carry destination operand.
   /// TODO: After GFX9 it should return a no-carry operation.
   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator I,
                                     const DebugLoc &DL,
                                     Register DestReg) const;

   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator I,
                                     const DebugLoc &DL,
                                     Register DestReg,
                                     RegScavenger &RS) const;

   static bool isKillTerminator(unsigned Opcode);
   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;

   bool isLegalMUBUFImmOffset(unsigned Imm) const;

   static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST);

   bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
                         Align Alignment = Align(4)) const;

   /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
   /// encoded instruction. If \p Signed, this is for an instruction that
   /// interprets the offset as signed.
   bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
                          uint64_t FlatVariant) const;

   /// Split \p COffsetVal into {immediate offset field, remainder offset}
   /// values.
   std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
                                               unsigned AddrSpace,
                                               uint64_t FlatVariant) const;

   /// Returns true if negative offsets are allowed for the given \p FlatVariant.
   bool allowNegativeFlatOffset(uint64_t FlatVariant) const;

   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
   /// Return -1 if the target-specific opcode for the pseudo instruction does
   /// not exist. If Opcode is not a pseudo instruction, this is identity.
   int pseudoToMCOpcode(int Opcode) const;

   /// \brief Check if this instruction should only be used by assembler.
   /// Return true if this opcode should not be used by codegen.
   bool isAsmOnlyOpcode(int MCOp) const;

   const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
                                          const TargetRegisterInfo *TRI,
                                          const MachineFunction &MF)
     const override;

   void fixImplicitOperands(MachineInstr &MI) const;

   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
                                       ArrayRef<unsigned> Ops,
                                       MachineBasicBlock::iterator InsertPt,
                                       int FrameIndex,
                                       LiveIntervals *LIS = nullptr,
                                       VirtRegMap *VRM = nullptr) const override;

   unsigned getInstrLatency(const InstrItineraryData *ItinData,
                            const MachineInstr &MI,
                            unsigned *PredCost = nullptr) const override;

   InstructionUniformity
   getInstructionUniformity(const MachineInstr &MI) const override final;

   InstructionUniformity
   getGenericInstructionUniformity(const MachineInstr &MI) const;

   const MIRFormatter *getMIRFormatter() const override {
     if (!Formatter)
       Formatter = std::make_unique<AMDGPUMIRFormatter>();
     return Formatter.get();
   }

   static unsigned getDSShaderTypeValue(const MachineFunction &MF);

   const TargetSchedModel &getSchedModel() const { return SchedModel; }

   // Enforce operand's \p OpName even alignment if required by target.
   // This is used if an operand is a 32 bit register but needs to be aligned
   // regardless.
   void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const;
 };

 /// \brief Returns true if a reg:subreg pair P has a TRC class
 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
                          const TargetRegisterClass &TRC,
                          MachineRegisterInfo &MRI) {
   auto *RC = MRI.getRegClass(P.Reg);
   if (!P.SubReg)
     return RC == &TRC;
   auto *TRI = MRI.getTargetRegisterInfo();
   return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
 }

 /// \brief Create RegSubRegPair from a register MachineOperand
 inline
 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
   assert(O.isReg());
   return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
 }

 /// \brief Return the SubReg component from REG_SEQUENCE
 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
                                                     unsigned SubReg);

 /// \brief Return the defining instruction for a given reg:subreg pair
 /// skipping copy like instructions and subreg-manipulation pseudos.
 /// Following another subreg of a reg:subreg isn't supported.
 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
                                MachineRegisterInfo &MRI);

 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
 /// attempt to track between blocks.
 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
                                 Register VReg,
                                 const MachineInstr &DefMI,
                                 const MachineInstr &UseMI);

 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
 /// track between blocks.
 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
                                    Register VReg,
                                    const MachineInstr &DefMI);

 namespace AMDGPU {

   LLVM_READONLY
   int getVOPe64(uint16_t Opcode);

   LLVM_READONLY
   int getVOPe32(uint16_t Opcode);

   LLVM_READONLY
   int getSDWAOp(uint16_t Opcode);

   LLVM_READONLY
   int getDPPOp32(uint16_t Opcode);

   LLVM_READONLY
   int getDPPOp64(uint16_t Opcode);

   LLVM_READONLY
   int getBasicFromSDWAOp(uint16_t Opcode);

   LLVM_READONLY
   int getCommuteRev(uint16_t Opcode);

   LLVM_READONLY
   int getCommuteOrig(uint16_t Opcode);

   LLVM_READONLY
   int getAddr64Inst(uint16_t Opcode);

   /// Check if \p Opcode is an Addr64 opcode.
   ///
   /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
   LLVM_READONLY
   int getIfAddr64Inst(uint16_t Opcode);

   LLVM_READONLY
   int getSOPKOp(uint16_t Opcode);

   /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
   /// of a VADDR form.
   LLVM_READONLY
   int getGlobalSaddrOp(uint16_t Opcode);

   /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
   /// of a SADDR form.
   LLVM_READONLY
   int getGlobalVaddrOp(uint16_t Opcode);

   LLVM_READONLY
   int getVCMPXNoSDstOp(uint16_t Opcode);

   /// \returns ST form with only immediate offset of a FLAT Scratch instruction
   /// given an \p Opcode of an SS (SADDR) form.
   LLVM_READONLY
   int getFlatScratchInstSTfromSS(uint16_t Opcode);

   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
   /// of an SVS (SADDR + VADDR) form.
   LLVM_READONLY
   int getFlatScratchInstSVfromSVS(uint16_t Opcode);

   /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
   /// of an SV (VADDR) form.
   LLVM_READONLY
   int getFlatScratchInstSSfromSV(uint16_t Opcode);

   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
   /// of an SS (SADDR) form.
   LLVM_READONLY
   int getFlatScratchInstSVfromSS(uint16_t Opcode);

   /// \returns earlyclobber version of a MAC MFMA is exists.
   LLVM_READONLY
   int getMFMAEarlyClobberOp(uint16_t Opcode);

   /// \returns Version of an MFMA instruction which uses AGPRs for srcC and
   /// vdst, given an \p Opcode of an MFMA which uses VGPRs for srcC/vdst.
   LLVM_READONLY
   int getMFMASrcCVDstAGPROp(uint16_t Opcode);

   /// \returns v_cmpx version of a v_cmp instruction.
   LLVM_READONLY
   int getVCMPXOpFromVCMP(uint16_t Opcode);

   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);

 } // end namespace AMDGPU

 namespace AMDGPU {
 enum AsmComments {
   // For sgpr to vgpr spill instructions
   SGPR_SPILL = MachineInstr::TAsmComments
 };
 } // namespace AMDGPU

 namespace SI {
 namespace KernelInputOffsets {

 /// Offsets in bytes from the start of the input buffer
 enum Offsets {
   NGROUPS_X = 0,
   NGROUPS_Y = 4,
   NGROUPS_Z = 8,
   GLOBAL_SIZE_X = 12,
   GLOBAL_SIZE_Y = 16,
   GLOBAL_SIZE_Z = 20,
   LOCAL_SIZE_X = 24,
   LOCAL_SIZE_Y = 28,
   LOCAL_SIZE_Z = 32
 };

 } // end namespace KernelInputOffsets
 } // end namespace SI

 } // end namespace llvm

 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H