llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp - llvm-project - Git at Google

 //===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains the AArch64 implementation of the TargetRegisterInfo
 // class.
 //
 //===----------------------------------------------------------------------===//

 #include "AArch64RegisterInfo.h"
 #include "AArch64FrameLowering.h"
 #include "AArch64InstrInfo.h"
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64InstPrinter.h"
 #include "Utils/AArch64SMEAttributes.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/TargetParser/Triple.h"

 using namespace llvm;

 #define GET_CC_REGISTER_LISTS
 #include "AArch64GenCallingConv.inc"
 #define GET_REGINFO_TARGET_DESC
 #include "AArch64GenRegisterInfo.inc"

 AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT, unsigned HwMode)
     : AArch64GenRegisterInfo(AArch64::LR, 0, 0, 0, HwMode), TT(TT) {
   AArch64_MC::initLLVMToCVRegMapping(this);
 }

 /// Return whether the register needs a CFI entry. Not all unwinders may know
 /// about SVE registers, so we assume the lowest common denominator, i.e. the
 /// callee-saves required by the base ABI. For the SVE registers z8-z15 only the
 /// lower 64-bits (d8-d15) need to be saved. The lower 64-bits subreg is
 /// returned in \p RegToUseForCFI.
 bool AArch64RegisterInfo::regNeedsCFI(MCRegister Reg,
                                       MCRegister &RegToUseForCFI) const {
   if (AArch64::PPRRegClass.contains(Reg))
     return false;

   if (AArch64::ZPRRegClass.contains(Reg)) {
     RegToUseForCFI = getSubReg(Reg, AArch64::dsub);
     for (int I = 0; CSR_AArch64_AAPCS_SaveList[I]; ++I) {
       if (CSR_AArch64_AAPCS_SaveList[I] == RegToUseForCFI)
         return true;
     }
     return false;
   }

   RegToUseForCFI = Reg;
   return true;
 }

 const MCPhysReg *
 AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   assert(MF && "Invalid MachineFunction pointer.");

   if (MF->getFunction().getCallingConv() == CallingConv::GHC)
     // GHC set of callee saved regs is empty as all those regs are
     // used for passing STG regs around
     return CSR_AArch64_NoRegs_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::PreserveNone)
     return CSR_AArch64_NoneRegs_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
     return CSR_AArch64_AllRegs_SaveList;

   if (MF->getFunction().getCallingConv() == CallingConv::ARM64EC_Thunk_X64)
     return CSR_Win_AArch64_Arm64EC_Thunk_SaveList;

   // Darwin has its own CSR_AArch64_AAPCS_SaveList, which means most CSR save
   // lists depending on that will need to have their Darwin variant as well.
   if (MF->getSubtarget<AArch64Subtarget>().isTargetDarwin())
     return getDarwinCalleeSavedRegs(MF);

   if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
     return CSR_Win_AArch64_CFGuard_Check_SaveList;
   if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows()) {
     if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
             ->supportSwiftError() &&
         MF->getFunction().getAttributes().hasAttrSomewhere(
             Attribute::SwiftError))
       return CSR_Win_AArch64_AAPCS_SwiftError_SaveList;
     if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail)
       return CSR_Win_AArch64_AAPCS_SwiftTail_SaveList;
     if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
       return CSR_Win_AArch64_AAVPCS_SaveList;
     if (MF->getFunction().getCallingConv() ==
         CallingConv::AArch64_SVE_VectorCall)
       return CSR_Win_AArch64_SVE_AAPCS_SaveList;
     if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
       return CSR_Win_AArch64_SVE_AAPCS_SaveList;
     return CSR_Win_AArch64_AAPCS_SaveList;
   }
   if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
     return CSR_AArch64_AAVPCS_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
     return CSR_AArch64_SVE_AAPCS_SaveList;
   if (MF->getFunction().getCallingConv() ==
           CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
     report_fatal_error(
         "Calling convention "
         "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is only "
         "supported to improve calls to SME ACLE save/restore/disable-za "
         "functions, and is not intended to be used beyond that scope.");
   if (MF->getFunction().getCallingConv() ==
       CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
     report_fatal_error(
         "Calling convention "
         "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 is "
         "only supported to improve calls to SME ACLE __arm_get_current_vg "
         "function, and is not intended to be used beyond that scope.");
   if (MF->getFunction().getCallingConv() ==
           CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
     report_fatal_error(
         "Calling convention "
         "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
         "only supported to improve calls to SME ACLE __arm_sme_state "
         "and is not intended to be used beyond that scope.");
   if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
           ->supportSwiftError() &&
       MF->getFunction().getAttributes().hasAttrSomewhere(
           Attribute::SwiftError))
     return CSR_AArch64_AAPCS_SwiftError_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail)
     return CSR_AArch64_AAPCS_SwiftTail_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
     return CSR_AArch64_RT_MostRegs_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::PreserveAll)
     return CSR_AArch64_RT_AllRegs_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::Win64)
     // This is for OSes other than Windows; Windows is a separate case further
     // above.
     return CSR_AArch64_AAPCS_X18_SaveList;
   if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
     return CSR_AArch64_SVE_AAPCS_SaveList;
   return CSR_AArch64_AAPCS_SaveList;
 }

 const MCPhysReg *
 AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
   assert(MF && "Invalid MachineFunction pointer.");
   assert(MF->getSubtarget<AArch64Subtarget>().isTargetDarwin() &&
          "Invalid subtarget for getDarwinCalleeSavedRegs");

   if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
     report_fatal_error(
         "Calling convention CFGuard_Check is unsupported on Darwin.");
   if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
     return CSR_Darwin_AArch64_AAVPCS_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
     report_fatal_error(
         "Calling convention SVE_VectorCall is unsupported on Darwin.");
   if (MF->getFunction().getCallingConv() ==
           CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
     report_fatal_error(
         "Calling convention "
         "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is "
         "only supported to improve calls to SME ACLE save/restore/disable-za "
         "functions, and is not intended to be used beyond that scope.");
   if (MF->getFunction().getCallingConv() ==
       CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
     report_fatal_error(
         "Calling convention "
         "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 is "
         "only supported to improve calls to SME ACLE __arm_get_current_vg "
         "function, and is not intended to be used beyond that scope.");
   if (MF->getFunction().getCallingConv() ==
           CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
     report_fatal_error(
         "Calling convention "
         "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
         "only supported to improve calls to SME ACLE __arm_sme_state "
         "and is not intended to be used beyond that scope.");
   if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
     return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR()
                ? CSR_Darwin_AArch64_CXX_TLS_PE_SaveList
                : CSR_Darwin_AArch64_CXX_TLS_SaveList;
   if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
           ->supportSwiftError() &&
       MF->getFunction().getAttributes().hasAttrSomewhere(
           Attribute::SwiftError))
     return CSR_Darwin_AArch64_AAPCS_SwiftError_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail)
     return CSR_Darwin_AArch64_AAPCS_SwiftTail_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
     return CSR_Darwin_AArch64_RT_MostRegs_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::PreserveAll)
     return CSR_Darwin_AArch64_RT_AllRegs_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::Win64)
     return CSR_Darwin_AArch64_AAPCS_Win64_SaveList;
   if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
     return CSR_Darwin_AArch64_SVE_AAPCS_SaveList;
   return CSR_Darwin_AArch64_AAPCS_SaveList;
 }

 const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
     const MachineFunction *MF) const {
   assert(MF && "Invalid MachineFunction pointer.");
   if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
       MF->getInfo<AArch64FunctionInfo>()->isSplitCSR())
     return CSR_Darwin_AArch64_CXX_TLS_ViaCopy_SaveList;
   return nullptr;
 }

 void AArch64RegisterInfo::UpdateCustomCalleeSavedRegs(
     MachineFunction &MF) const {
   const MCPhysReg *CSRs = getCalleeSavedRegs(&MF);
   SmallVector<MCPhysReg, 32> UpdatedCSRs;
   for (const MCPhysReg *I = CSRs; *I; ++I)
     UpdatedCSRs.push_back(*I);

   for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
     if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
       UpdatedCSRs.push_back(AArch64::GPR64commonRegClass.getRegister(i));
     }
   }
   // Register lists are zero-terminated.
   UpdatedCSRs.push_back(0);
   MF.getRegInfo().setCalleeSavedRegs(UpdatedCSRs);
 }

 const TargetRegisterClass *
 AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
                                        unsigned Idx) const {
   // edge case for GPR/FPR register classes
   if (RC == &AArch64::GPR32allRegClass && Idx == AArch64::hsub)
     return &AArch64::FPR32RegClass;
   else if (RC == &AArch64::GPR64allRegClass && Idx == AArch64::hsub)
     return &AArch64::FPR64RegClass;

   // Forward to TableGen's default version.
   return AArch64GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
 }

 const uint32_t *
 AArch64RegisterInfo::getDarwinCallPreservedMask(const MachineFunction &MF,
                                                 CallingConv::ID CC) const {
   assert(MF.getSubtarget<AArch64Subtarget>().isTargetDarwin() &&
          "Invalid subtarget for getDarwinCallPreservedMask");

   if (CC == CallingConv::CXX_FAST_TLS)
     return CSR_Darwin_AArch64_CXX_TLS_RegMask;
   if (CC == CallingConv::AArch64_VectorCall)
     return CSR_Darwin_AArch64_AAVPCS_RegMask;
   if (CC == CallingConv::AArch64_SVE_VectorCall)
     return CSR_Darwin_AArch64_SVE_AAPCS_RegMask;
   if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
     return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask;
   if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
     return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1_RegMask;
   if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
     return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2_RegMask;
   if (CC == CallingConv::CFGuard_Check)
     report_fatal_error(
         "Calling convention CFGuard_Check is unsupported on Darwin.");
   if (MF.getSubtarget<AArch64Subtarget>()
           .getTargetLowering()
           ->supportSwiftError() &&
       MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
     return CSR_Darwin_AArch64_AAPCS_SwiftError_RegMask;
   if (CC == CallingConv::SwiftTail)
     return CSR_Darwin_AArch64_AAPCS_SwiftTail_RegMask;
   if (CC == CallingConv::PreserveMost)
     return CSR_Darwin_AArch64_RT_MostRegs_RegMask;
   if (CC == CallingConv::PreserveAll)
     return CSR_Darwin_AArch64_RT_AllRegs_RegMask;
   return CSR_Darwin_AArch64_AAPCS_RegMask;
 }

 const uint32_t *
 AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                           CallingConv::ID CC) const {
   bool SCS = MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack);
   if (CC == CallingConv::GHC)
     // This is academic because all GHC calls are (supposed to be) tail calls
     return SCS ? CSR_AArch64_NoRegs_SCS_RegMask : CSR_AArch64_NoRegs_RegMask;
   if (CC == CallingConv::PreserveNone)
     return SCS ? CSR_AArch64_NoneRegs_SCS_RegMask
                : CSR_AArch64_NoneRegs_RegMask;
   if (CC == CallingConv::AnyReg)
     return SCS ? CSR_AArch64_AllRegs_SCS_RegMask : CSR_AArch64_AllRegs_RegMask;

   // All the following calling conventions are handled differently on Darwin.
   if (MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
     if (SCS)
       report_fatal_error("ShadowCallStack attribute not supported on Darwin.");
     return getDarwinCallPreservedMask(MF, CC);
   }

   if (CC == CallingConv::AArch64_VectorCall)
     return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
   if (CC == CallingConv::AArch64_SVE_VectorCall)
     return SCS ? CSR_AArch64_SVE_AAPCS_SCS_RegMask
                : CSR_AArch64_SVE_AAPCS_RegMask;
   if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
     return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask;
   if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
     return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1_RegMask;
   if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
     return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2_RegMask;
   if (CC == CallingConv::CFGuard_Check)
     return CSR_Win_AArch64_CFGuard_Check_RegMask;
   if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
           ->supportSwiftError() &&
       MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
     return SCS ? CSR_AArch64_AAPCS_SwiftError_SCS_RegMask
                : CSR_AArch64_AAPCS_SwiftError_RegMask;
   if (CC == CallingConv::SwiftTail) {
     if (SCS)
       report_fatal_error("ShadowCallStack attribute not supported with swifttail");
     return CSR_AArch64_AAPCS_SwiftTail_RegMask;
   }
   if (CC == CallingConv::PreserveMost)
     return SCS ? CSR_AArch64_RT_MostRegs_SCS_RegMask
                : CSR_AArch64_RT_MostRegs_RegMask;
   if (CC == CallingConv::PreserveAll)
     return SCS ? CSR_AArch64_RT_AllRegs_SCS_RegMask
                : CSR_AArch64_RT_AllRegs_RegMask;

   return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
 }

 const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask(
     const MachineFunction &MF) const {
   if (MF.getSubtarget<AArch64Subtarget>().isTargetLinux())
     return CSR_AArch64_AAPCS_RegMask;

   return nullptr;
 }

 const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
   if (TT.isOSDarwin())
     return CSR_Darwin_AArch64_TLS_RegMask;

   assert(TT.isOSBinFormatELF() && "Invalid target");
   return CSR_AArch64_TLS_ELF_RegMask;
 }

 void AArch64RegisterInfo::UpdateCustomCallPreservedMask(MachineFunction &MF,
                                                  const uint32_t **Mask) const {
   uint32_t *UpdatedMask = MF.allocateRegMask();
   unsigned RegMaskSize = MachineOperand::getRegMaskSize(getNumRegs());
   memcpy(UpdatedMask, *Mask, sizeof(UpdatedMask[0]) * RegMaskSize);

   for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
     if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
       for (MCPhysReg SubReg :
            subregs_inclusive(AArch64::GPR64commonRegClass.getRegister(i))) {
         // See TargetRegisterInfo::getCallPreservedMask for how to interpret the
         // register mask.
         UpdatedMask[SubReg / 32] |= 1u << (SubReg % 32);
       }
     }
   }
   *Mask = UpdatedMask;
 }

 const uint32_t *AArch64RegisterInfo::getSMStartStopCallPreservedMask() const {
   return CSR_AArch64_SMStartStop_RegMask;
 }

 const uint32_t *
 AArch64RegisterInfo::SMEABISupportRoutinesCallPreservedMaskFromX0() const {
   return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask;
 }

 const uint32_t *AArch64RegisterInfo::getNoPreservedMask() const {
   return CSR_AArch64_NoRegs_RegMask;
 }

 const uint32_t *
 AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
                                                 CallingConv::ID CC) const {
   // This should return a register mask that is the same as that returned by
   // getCallPreservedMask but that additionally preserves the register used for
   // the first i64 argument (which must also be the register used to return a
   // single i64 return value)
   //
   // In case that the calling convention does not use the same register for
   // both, the function should return NULL (does not currently apply)
   assert(CC != CallingConv::GHC && "should not be GHC calling convention.");
   if (MF.getSubtarget<AArch64Subtarget>().isTargetDarwin())
     return CSR_Darwin_AArch64_AAPCS_ThisReturn_RegMask;
   return CSR_AArch64_AAPCS_ThisReturn_RegMask;
 }

 const uint32_t *AArch64RegisterInfo::getWindowsStackProbePreservedMask() const {
   return CSR_AArch64_StackProbe_Windows_RegMask;
 }

 std::optional<std::string>
 AArch64RegisterInfo::explainReservedReg(const MachineFunction &MF,
                                         MCRegister PhysReg) const {
   if (hasBasePointer(MF) && MCRegisterInfo::regsOverlap(PhysReg, AArch64::X19))
     return std::string("X19 is used as the frame base pointer register.");

   if (MF.getSubtarget<AArch64Subtarget>().isWindowsArm64EC()) {
     bool warn = false;
     if (MCRegisterInfo::regsOverlap(PhysReg, AArch64::X13) ||
         MCRegisterInfo::regsOverlap(PhysReg, AArch64::X14) ||
         MCRegisterInfo::regsOverlap(PhysReg, AArch64::X23) ||
         MCRegisterInfo::regsOverlap(PhysReg, AArch64::X24) ||
         MCRegisterInfo::regsOverlap(PhysReg, AArch64::X28))
       warn = true;

     for (unsigned i = AArch64::B16; i <= AArch64::B31; ++i)
       if (MCRegisterInfo::regsOverlap(PhysReg, i))
         warn = true;

     if (warn)
       return std::string(AArch64InstPrinter::getRegisterName(PhysReg)) +
              " is clobbered by asynchronous signals when using Arm64EC.";
   }

   return {};
 }

 BitVector
 AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
   const AArch64FrameLowering *TFI = getFrameLowering(MF);

   // FIXME: avoid re-calculating this every time.
   BitVector Reserved(getNumRegs());
   markSuperRegs(Reserved, AArch64::WSP);
   markSuperRegs(Reserved, AArch64::WZR);

   if (TFI->hasFP(MF) || TT.isOSDarwin())
     markSuperRegs(Reserved, AArch64::W29);

   if (MF.getSubtarget<AArch64Subtarget>().isWindowsArm64EC()) {
     // x13, x14, x23, x24, x28, and v16-v31 are clobbered by asynchronous
     // signals, so we can't ever use them.
     markSuperRegs(Reserved, AArch64::W13);
     markSuperRegs(Reserved, AArch64::W14);
     markSuperRegs(Reserved, AArch64::W23);
     markSuperRegs(Reserved, AArch64::W24);
     markSuperRegs(Reserved, AArch64::W28);
     for (unsigned i = AArch64::B16; i <= AArch64::B31; ++i)
       markSuperRegs(Reserved, i);
   }

   for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
     if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
       markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i));
   }

   if (hasBasePointer(MF))
     markSuperRegs(Reserved, AArch64::W19);

   // SLH uses register W16/X16 as the taint register.
   if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
     markSuperRegs(Reserved, AArch64::W16);

   // FFR is modelled as global state that cannot be allocated.
   if (MF.getSubtarget<AArch64Subtarget>().hasSVE())
     Reserved.set(AArch64::FFR);

   // SME tiles are not allocatable.
   if (MF.getSubtarget<AArch64Subtarget>().hasSME()) {
     for (MCPhysReg SubReg : subregs_inclusive(AArch64::ZA))
       Reserved.set(SubReg);
   }

   // VG cannot be allocated
   Reserved.set(AArch64::VG);

   if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) {
     for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true);
          SubReg.isValid(); ++SubReg)
       Reserved.set(*SubReg);
   }

   markSuperRegs(Reserved, AArch64::FPCR);
   markSuperRegs(Reserved, AArch64::FPMR);
   markSuperRegs(Reserved, AArch64::FPSR);

   if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
     markSuperRegs(Reserved, AArch64::X27);
     markSuperRegs(Reserved, AArch64::X28);
     markSuperRegs(Reserved, AArch64::W27);
     markSuperRegs(Reserved, AArch64::W28);
   }

   assert(checkAllSuperRegsMarked(Reserved));

   // Add _HI registers after checkAllSuperRegsMarked as this check otherwise
   // becomes considerably more expensive.
   Reserved.set(AArch64::WSP_HI);
   Reserved.set(AArch64::WZR_HI);
   static_assert(AArch64::W30_HI - AArch64::W0_HI == 30,
                 "Unexpected order of registers");
   Reserved.set(AArch64::W0_HI, AArch64::W30_HI);
   static_assert(AArch64::B31_HI - AArch64::B0_HI == 31,
                 "Unexpected order of registers");
   Reserved.set(AArch64::B0_HI, AArch64::B31_HI);
   static_assert(AArch64::H31_HI - AArch64::H0_HI == 31,
                 "Unexpected order of registers");
   Reserved.set(AArch64::H0_HI, AArch64::H31_HI);
   static_assert(AArch64::S31_HI - AArch64::S0_HI == 31,
                 "Unexpected order of registers");
   Reserved.set(AArch64::S0_HI, AArch64::S31_HI);
   static_assert(AArch64::D31_HI - AArch64::D0_HI == 31,
                 "Unexpected order of registers");
   Reserved.set(AArch64::D0_HI, AArch64::D31_HI);
   static_assert(AArch64::Q31_HI - AArch64::Q0_HI == 31,
                 "Unexpected order of registers");
   Reserved.set(AArch64::Q0_HI, AArch64::Q31_HI);

   return Reserved;
 }

 BitVector
 AArch64RegisterInfo::getUserReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
     // ReserveXRegister is set for registers manually reserved
     // through +reserve-x#i.
     if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
       markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i));
   }
   return Reserved;
 }

 BitVector
 AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
     if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReservedForRA(i))
       markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i));
   }

   if (MF.getSubtarget<AArch64Subtarget>().isLRReservedForRA()) {
     // In order to prevent the register allocator from using LR, we need to
     // mark it as reserved. However we don't want to keep it reserved throughout
     // the pipeline since it prevents other infrastructure from reasoning about
     // it's liveness. We use the NoVRegs property instead of IsSSA because
     // IsSSA is removed before VirtRegRewriter runs.
     if (!MF.getProperties().hasNoVRegs())
       markSuperRegs(Reserved, AArch64::LR);
   }

   assert(checkAllSuperRegsMarked(Reserved));

   // Handle strictlyReservedRegs separately to avoid re-evaluating the assert,
   // which becomes considerably expensive when considering the _HI registers.
   Reserved |= getStrictlyReservedRegs(MF);

   return Reserved;
 }

 bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
                                         MCRegister Reg) const {
   return getReservedRegs(MF)[Reg];
 }

 bool AArch64RegisterInfo::isUserReservedReg(const MachineFunction &MF,
                                             MCRegister Reg) const {
   return getUserReservedRegs(MF)[Reg];
 }

 bool AArch64RegisterInfo::isStrictlyReservedReg(const MachineFunction &MF,
                                                 MCRegister Reg) const {
   return getStrictlyReservedRegs(MF)[Reg];
 }

 bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
   return llvm::any_of(*AArch64::GPR64argRegClass.MC, [this, &MF](MCPhysReg r) {
     return isStrictlyReservedReg(MF, r);
   });
 }

 void AArch64RegisterInfo::emitReservedArgRegCallError(
     const MachineFunction &MF) const {
   const Function &F = MF.getFunction();
   F.getContext().diagnose(DiagnosticInfoUnsupported{F, ("AArch64 doesn't support"
     " function calls if any of the argument registers is reserved.")});
 }

 bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
                                           MCRegister PhysReg) const {
   // SLH uses register X16 as the taint register but it will fallback to a different
   // method if the user clobbers it. So X16 is not reserved for inline asm but is
   // for normal codegen.
   if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening) &&
         MCRegisterInfo::regsOverlap(PhysReg, AArch64::X16))
     return true;

   // ZA/ZT0 registers are reserved but may be permitted in the clobber list.
   if (PhysReg == AArch64::ZA || PhysReg == AArch64::ZT0)
     return true;

   return !isReservedReg(MF, PhysReg);
 }

 const TargetRegisterClass *
 AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
                                       unsigned Kind) const {
   return &AArch64::GPR64spRegClass;
 }

 const TargetRegisterClass *
 AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
   if (RC == &AArch64::CCRRegClass)
     return &AArch64::GPR64RegClass; // Only MSR & MRS copy NZCV.
   return RC;
 }

 unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }

 bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();

   // In the presence of variable sized objects or funclets, if the fixed stack
   // size is large enough that referencing from the FP won't result in things
   // being in range relatively often, we can use a base pointer to allow access
   // from the other direction like the SP normally works.
   //
   // Furthermore, if both variable sized objects are present, and the
   // stack needs to be dynamically re-aligned, the base pointer is the only
   // reliable way to reference the locals.
   if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) {
     if (hasStackRealignment(MF))
       return true;

     auto &ST = MF.getSubtarget<AArch64Subtarget>();
     const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     if (ST.hasSVE() || ST.isStreaming()) {
       // Frames that have variable sized objects and scalable SVE objects,
       // should always use a basepointer.
       if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
         return true;
     }

     // Frames with hazard padding can have a large offset between the frame
     // pointer and GPR locals, which includes the emergency spill slot. If the
     // emergency spill slot is not within range of the load/store instructions
     // (which have a signed 9-bit range), we will fail to compile if it is used.
     // Since hasBasePointer() is called before we know if we have hazard padding
     // or an emergency spill slot we need to enable the basepointer
     // conservatively.
     if (ST.getStreamingHazardSize() &&
         !AFI->getSMEFnAttrs().hasNonStreamingInterfaceAndBody()) {
       return true;
     }

     // Conservatively estimate whether the negative offset from the frame
     // pointer will be sufficient to reach. If a function has a smallish
     // frame, it's less likely to have lots of spills and callee saved
     // space, so it's all more likely to be within range of the frame pointer.
     // If it's wrong, we'll materialize the constant and still get to the
     // object; it's just suboptimal. Negative offsets use the unscaled
     // load/store instructions, which have a 9-bit signed immediate.
     return MFI.getLocalFrameSize() >= 256;
   }

   return false;
 }

 bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
                                              MCRegister Reg) const {
   CallingConv::ID CC = MF.getFunction().getCallingConv();
   const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
   bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv(),
                                          MF.getFunction().isVarArg());

   auto HasReg = [](ArrayRef<MCRegister> RegList, MCRegister Reg) {
     return llvm::is_contained(RegList, Reg);
   };

   switch (CC) {
   default:
     report_fatal_error("Unsupported calling convention.");
   case CallingConv::GHC:
     return HasReg(CC_AArch64_GHC_ArgRegs, Reg);
   case CallingConv::PreserveNone:
     if (!MF.getFunction().isVarArg())
       return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg);
     [[fallthrough]];
   case CallingConv::C:
   case CallingConv::Fast:
   case CallingConv::PreserveMost:
   case CallingConv::PreserveAll:
   case CallingConv::CXX_FAST_TLS:
   case CallingConv::Swift:
   case CallingConv::SwiftTail:
   case CallingConv::Tail:
     if (STI.isTargetWindows()) {
       if (IsVarArg)
         return HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
       switch (CC) {
       default:
         return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
       case CallingConv::Swift:
       case CallingConv::SwiftTail:
         return HasReg(CC_AArch64_Win64PCS_Swift_ArgRegs, Reg) ||
                HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
       }
     }
     if (!STI.isTargetDarwin()) {
       switch (CC) {
       default:
         return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
       case CallingConv::Swift:
       case CallingConv::SwiftTail:
         return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg) ||
                HasReg(CC_AArch64_AAPCS_Swift_ArgRegs, Reg);
       }
     }
     if (!IsVarArg) {
       switch (CC) {
       default:
         return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg);
       case CallingConv::Swift:
       case CallingConv::SwiftTail:
         return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg) ||
                HasReg(CC_AArch64_DarwinPCS_Swift_ArgRegs, Reg);
       }
     }
     if (STI.isTargetILP32())
       return HasReg(CC_AArch64_DarwinPCS_ILP32_VarArg_ArgRegs, Reg);
     return HasReg(CC_AArch64_DarwinPCS_VarArg_ArgRegs, Reg);
   case CallingConv::Win64:
     if (IsVarArg)
       HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
     return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
   case CallingConv::CFGuard_Check:
     return HasReg(CC_AArch64_Win64_CFGuard_Check_ArgRegs, Reg);
   case CallingConv::AArch64_VectorCall:
   case CallingConv::AArch64_SVE_VectorCall:
   case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0:
   case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1:
   case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2:
     if (STI.isTargetWindows())
       return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
     return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
   }
 }

 Register
 AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const AArch64FrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
 }

 bool AArch64RegisterInfo::requiresRegisterScavenging(
     const MachineFunction &MF) const {
   return true;
 }

 bool AArch64RegisterInfo::requiresVirtualBaseRegisters(
     const MachineFunction &MF) const {
   return true;
 }

 bool
 AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
   // This function indicates whether the emergency spillslot should be placed
   // close to the beginning of the stackframe (closer to FP) or the end
   // (closer to SP).
   //
   // The beginning works most reliably if we have a frame pointer.
   // In the presence of any non-constant space between FP and locals,
   // (e.g. in case of stack realignment or a scalable SVE area), it is
   // better to use SP or BP.
   const AArch64FrameLowering &TFI = *getFrameLowering(MF);
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
           AFI->hasCalculatedStackSizeSVE()) &&
          "Expected SVE area to be calculated by this point");
   return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
          !AFI->hasStackHazardSlotIndex();
 }

 bool AArch64RegisterInfo::requiresFrameIndexScavenging(
     const MachineFunction &MF) const {
   return true;
 }

 bool
 AArch64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI.adjustsStack())
     return true;
   return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken();
 }

 /// needsFrameBaseReg - Returns true if the instruction's frame index
 /// reference would be better served by a base register other than FP
 /// or SP. Used by LocalStackFrameAllocation to determine which frame index
 /// references it should create new base registers for.
 bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
                                             int64_t Offset) const {
   for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i)
     assert(i < MI->getNumOperands() &&
            "Instr doesn't have FrameIndex operand!");

   // It's the load/store FI references that cause issues, as it can be difficult
   // to materialize the offset if it won't fit in the literal field. Estimate
   // based on the size of the local frame and some conservative assumptions
   // about the rest of the stack frame (note, this is pre-regalloc, so
   // we don't know everything for certain yet) whether this offset is likely
   // to be out of range of the immediate. Return true if so.

   // We only generate virtual base registers for loads and stores, so
   // return false for everything else.
   if (!MI->mayLoad() && !MI->mayStore())
     return false;

   // Without a virtual base register, if the function has variable sized
   // objects, all fixed-size local references will be via the frame pointer,
   // Approximate the offset and see if it's legal for the instruction.
   // Note that the incoming offset is based on the SP value at function entry,
   // so it'll be negative.
   MachineFunction &MF = *MI->getParent()->getParent();
   const AArch64FrameLowering *TFI = getFrameLowering(MF);
   MachineFrameInfo &MFI = MF.getFrameInfo();

   // Estimate an offset from the frame pointer.
   // Conservatively assume all GPR callee-saved registers get pushed.
   // FP, LR, X19-X28, D8-D15. 64-bits each.
   int64_t FPOffset = Offset - 16 * 20;
   // Estimate an offset from the stack pointer.
   // The incoming offset is relating to the SP at the start of the function,
   // but when we access the local it'll be relative to the SP after local
   // allocation, so adjust our SP-relative offset by that allocation size.
   Offset += MFI.getLocalFrameSize();
   // Assume that we'll have at least some spill slots allocated.
   // FIXME: This is a total SWAG number. We should run some statistics
   //        and pick a real one.
   Offset += 128; // 128 bytes of spill slots

   // If there is a frame pointer, try using it.
   // The FP is only available if there is no dynamic realignment. We
   // don't know for sure yet whether we'll need that, so we guess based
   // on whether there are any local variables that would trigger it.
   if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset))
     return false;

   // If we can reference via the stack pointer or base pointer, try that.
   // FIXME: This (and the code that resolves the references) can be improved
   //        to only disallow SP relative references in the live range of
   //        the VLA(s). In practice, it's unclear how much difference that
   //        would make, but it may be worth doing.
   if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
     return false;

   // If even offset 0 is illegal, we don't want a virtual base register.
   if (!isFrameOffsetLegal(MI, AArch64::SP, 0))
     return false;

   // The offset likely isn't legal; we want to allocate a virtual base register.
   return true;
 }

 bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
                                              Register BaseReg,
                                              int64_t Offset) const {
   assert(MI && "Unable to get the legal offset for nil instruction.");
   StackOffset SaveOffset = StackOffset::getFixed(Offset);
   return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal;
 }

 /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
 /// at the beginning of the basic block.
 Register
 AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
                                                   int FrameIdx,
                                                   int64_t Offset) const {
   MachineBasicBlock::iterator Ins = MBB->begin();
   DebugLoc DL; // Defaults to "unknown"
   if (Ins != MBB->end())
     DL = Ins->getDebugLoc();
   const MachineFunction &MF = *MBB->getParent();
   const AArch64InstrInfo *TII =
       MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
   const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
   MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
   unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);

   BuildMI(*MBB, Ins, DL, MCID, BaseReg)
       .addFrameIndex(FrameIdx)
       .addImm(Offset)
       .addImm(Shifter);

   return BaseReg;
 }

 void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
                                             int64_t Offset) const {
   // ARM doesn't need the general 64-bit offsets
   StackOffset Off = StackOffset::getFixed(Offset);

   unsigned i = 0;
   while (!MI.getOperand(i).isFI()) {
     ++i;
     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
   }

   const MachineFunction *MF = MI.getParent()->getParent();
   const AArch64InstrInfo *TII =
       MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
   bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII);
   assert(Done && "Unable to resolve frame index!");
   (void)Done;
 }

 // Create a scratch register for the frame index elimination in an instruction.
 // This function has special handling of stack tagging loop pseudos, in which
 // case it can also change the instruction opcode.
 static Register
 createScratchRegisterForInstruction(MachineInstr &MI, unsigned FIOperandNum,
                                     const AArch64InstrInfo *TII) {
   // ST*Gloop have a reserved scratch register in operand 1. Use it, and also
   // replace the instruction with the writeback variant because it will now
   // satisfy the operand constraints for it.
   Register ScratchReg;
   if (MI.getOpcode() == AArch64::STGloop ||
       MI.getOpcode() == AArch64::STZGloop) {
     assert(FIOperandNum == 3 &&
            "Wrong frame index operand for STGloop/STZGloop");
     unsigned Op = MI.getOpcode() == AArch64::STGloop ? AArch64::STGloop_wback
                                                      : AArch64::STZGloop_wback;
     ScratchReg = MI.getOperand(1).getReg();
     MI.getOperand(3).ChangeToRegister(ScratchReg, false, false, true);
     MI.setDesc(TII->get(Op));
     MI.tieOperands(1, 3);
   } else {
     ScratchReg =
         MI.getMF()->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
     MI.getOperand(FIOperandNum)
         .ChangeToRegister(ScratchReg, false, false, true);
   }
   return ScratchReg;
 }

 void AArch64RegisterInfo::getOffsetOpcodes(
     const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const {
   // The smallest scalable element supported by scaled SVE addressing
   // modes are predicates, which are 2 scalable bytes in size. So the scalable
   // byte offset must always be a multiple of 2.
   assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");

   // Add fixed-sized offset using existing DIExpression interface.
   DIExpression::appendOffset(Ops, Offset.getFixed());

   unsigned VG = getDwarfRegNum(AArch64::VG, true);
   int64_t VGSized = Offset.getScalable() / 2;
   if (VGSized > 0) {
     Ops.push_back(dwarf::DW_OP_constu);
     Ops.push_back(VGSized);
     Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
     Ops.push_back(dwarf::DW_OP_mul);
     Ops.push_back(dwarf::DW_OP_plus);
   } else if (VGSized < 0) {
     Ops.push_back(dwarf::DW_OP_constu);
     Ops.push_back(-VGSized);
     Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
     Ops.push_back(dwarf::DW_OP_mul);
     Ops.push_back(dwarf::DW_OP_minus);
   }
 }

 bool AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                               int SPAdj, unsigned FIOperandNum,
                                               RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");

   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   const AArch64InstrInfo *TII =
       MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
   const AArch64FrameLowering *TFI = getFrameLowering(MF);
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   bool Tagged =
       MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED;
   Register FrameReg;

   // Special handling of dbg_value, stackmap patchpoint statepoint instructions.
   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
       MI.getOpcode() == TargetOpcode::PATCHPOINT ||
       MI.getOpcode() == TargetOpcode::STATEPOINT) {
     StackOffset Offset =
         TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
                                         /*PreferFP=*/true,
                                         /*ForSimm=*/false);
     Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
     MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
     return false;
   }

   if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
     MachineOperand &FI = MI.getOperand(FIOperandNum);
     StackOffset Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
     assert(!Offset.getScalable() &&
            "Frame offsets with a scalable component are not supported");
     FI.ChangeToImmediate(Offset.getFixed());
     return false;
   }

   StackOffset Offset;
   if (MI.getOpcode() == AArch64::TAGPstack) {
     // TAGPstack must use the virtual frame register in its 3rd operand.
     const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     FrameReg = MI.getOperand(3).getReg();
     Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
                                       AFI->getTaggedBasePointerOffset());
   } else if (Tagged) {
     StackOffset SPOffset = StackOffset::getFixed(
         MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize());
     if (MFI.hasVarSizedObjects() ||
         isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) !=
             (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) {
       // Can't update to SP + offset in place. Precalculate the tagged pointer
       // in a scratch register.
       Offset = TFI->resolveFrameIndexReference(
           MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
       Register ScratchReg =
           MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
       emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset,
                       TII);
       BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg)
           .addReg(ScratchReg)
           .addReg(ScratchReg)
           .addImm(0);
       MI.getOperand(FIOperandNum)
           .ChangeToRegister(ScratchReg, false, false, true);
       return false;
     }
     FrameReg = AArch64::SP;
     Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
                                    (int64_t)MFI.getStackSize());
   } else {
     Offset = TFI->resolveFrameIndexReference(
         MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
   }

   // Modify MI as necessary to handle as much of 'Offset' as possible
   if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
     return true;

   assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
          "Emergency spill slot is out of reach");

   // If we get here, the immediate doesn't fit into the instruction.  We folded
   // as much as possible above.  Handle the rest, providing a register that is
   // SP+LargeImm.
   Register ScratchReg =
       createScratchRegisterForInstruction(MI, FIOperandNum, TII);
   emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
   return false;
 }

 unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
                                                   MachineFunction &MF) const {
   const AArch64FrameLowering *TFI = getFrameLowering(MF);

   switch (RC->getID()) {
   default:
     return 0;
   case AArch64::GPR32RegClassID:
   case AArch64::GPR32spRegClassID:
   case AArch64::GPR32allRegClassID:
   case AArch64::GPR64spRegClassID:
   case AArch64::GPR64allRegClassID:
   case AArch64::GPR64RegClassID:
   case AArch64::GPR32commonRegClassID:
   case AArch64::GPR64commonRegClassID:
     return 32 - 1                                   // XZR/SP
               - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
               - MF.getSubtarget<AArch64Subtarget>().getNumXRegisterReserved()
               - hasBasePointer(MF);  // X19
   case AArch64::FPR8RegClassID:
   case AArch64::FPR16RegClassID:
   case AArch64::FPR32RegClassID:
   case AArch64::FPR64RegClassID:
   case AArch64::FPR128RegClassID:
     return 32;

   case AArch64::MatrixIndexGPR32_8_11RegClassID:
   case AArch64::MatrixIndexGPR32_12_15RegClassID:
     return 4;

   case AArch64::DDRegClassID:
   case AArch64::DDDRegClassID:
   case AArch64::DDDDRegClassID:
   case AArch64::QQRegClassID:
   case AArch64::QQQRegClassID:
   case AArch64::QQQQRegClassID:
     return 32;

   case AArch64::FPR128_loRegClassID:
   case AArch64::FPR64_loRegClassID:
   case AArch64::FPR16_loRegClassID:
     return 16;
   case AArch64::FPR128_0to7RegClassID:
     return 8;
   }
 }

 // FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register allocation
 // where a consecutive multi-vector tuple is constructed from the same indices
 // of multiple strided loads. This may still result in unnecessary copies
 // between the loads and the tuple. Here we try to return a hint to assign the
 // contiguous ZPRMulReg starting at the same register as the first operand of
 // the pseudo, which should be a subregister of the first strided load.
 //
 // For example, if the first strided load has been assigned $z16_z20_z24_z28
 // and the operands of the pseudo are each accessing subregister zsub2, we
 // should look through through Order to find a contiguous register which
 // begins with $z24 (i.e. $z24_z25_z26_z27).
 //
 bool AArch64RegisterInfo::getRegAllocationHints(
     Register VirtReg, ArrayRef<MCPhysReg> Order,
     SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
     const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {

   auto &ST = MF.getSubtarget<AArch64Subtarget>();
   if (!ST.hasSME() || !ST.isStreaming())
     return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
                                                      VRM);

   // The SVE calling convention preserves registers Z8-Z23. As a result, there
   // are no ZPR2Strided or ZPR4Strided registers that do not overlap with the
   // callee-saved registers and so by default these will be pushed to the back
   // of the allocation order for the ZPRStridedOrContiguous classes.
   // If any of the instructions which define VirtReg are used by the
   // FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy
   // instructions over reducing the number of clobbered callee-save registers,
   // so we add the strided registers as a hint.
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   unsigned RegID = MRI.getRegClass(VirtReg)->getID();
   if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
       RegID == AArch64::ZPR4StridedOrContiguousRegClassID) {

     // Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
     for (const MachineInstr &Use : MRI.use_nodbg_instructions(VirtReg)) {
       if (Use.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
           Use.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO)
         continue;

       unsigned UseOps = Use.getNumOperands() - 1;
       const TargetRegisterClass *StridedRC;
       switch (RegID) {
       case AArch64::ZPR2StridedOrContiguousRegClassID:
         StridedRC = &AArch64::ZPR2StridedRegClass;
         break;
       case AArch64::ZPR4StridedOrContiguousRegClassID:
         StridedRC = &AArch64::ZPR4StridedRegClass;
         break;
       default:
         llvm_unreachable("Unexpected RegID");
       }

       SmallVector<MCPhysReg, 4> StridedOrder;
       for (MCPhysReg Reg : Order)
         if (StridedRC->contains(Reg))
           StridedOrder.push_back(Reg);

       int OpIdx = Use.findRegisterUseOperandIdx(VirtReg, this);
       assert(OpIdx != -1 && "Expected operand index from register use.");

       unsigned TupleID = MRI.getRegClass(Use.getOperand(0).getReg())->getID();
       bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
                       TupleID == AArch64::ZPR4Mul4RegClassID;

       const MachineOperand *AssignedRegOp = llvm::find_if(
           make_range(Use.operands_begin() + 1, Use.operands_end()),
           [&VRM](const MachineOperand &Op) {
             return VRM->hasPhys(Op.getReg());
           });

       // Example:
       //
       // When trying to find a suitable register allocation for VirtReg %v2 in:
       //
       //  %v0:zpr2stridedorcontiguous = ld1 p0/z, [...]
       //  %v1:zpr2stridedorcontiguous = ld1 p0/z, [...]
       //  %v2:zpr2stridedorcontiguous = ld1 p0/z, [...]
       //  %v3:zpr2stridedorcontiguous = ld1 p0/z, [...]
       //  %v4:zpr4mul4 = FORM_TRANSPOSED_X4 %v0:0, %v1:0, %v2:0, %v3:0
       //
       // One such suitable allocation would be:
       //
       //  { z0, z8 }  = ld1 p0/z, [...]
       //  { z1, z9 }  = ld1 p0/z, [...]
       //  { z2, z10 } = ld1 p0/z, [...]
       //  { z3, z11 } = ld1 p0/z, [...]
       //  { z0, z1, z2, z3 } =
       //     FORM_TRANSPOSED_X4 {z0, z8}:0, {z1, z9}:0, {z2, z10}:0, {z3, z11}:0
       //
       // Below we distinguish two cases when trying to find a register:
       // * None of the registers used by FORM_TRANSPOSED_X4 have been assigned
       //   yet. In this case the code muse ensure that there are at least UseOps
       //   free consecutive registers. If IsMulZPR is true, then the first of
       //   registers must also be a multiple of UseOps, e.g. { z0, z1, z2, z3 }
       //   is valid but { z1, z2, z3, z5 } is not.
       // * One or more of the registers used by FORM_TRANSPOSED_X4 is already
       //   assigned a physical register, which means only checking that a
       //   consecutive range of free tuple registers exists which includes
       //   the assigned register.
       //   e.g. in the example above, if { z0, z8 } is already allocated for
       //   %v0, we just need to ensure that { z1, z9 }, { z2, z10 } and
       //   { z3, z11 } are also free. If so, we add { z2, z10 }.

       if (AssignedRegOp == Use.operands_end()) {
         // There are no registers already assigned to any of the pseudo
         // operands. Look for a valid starting register for the group.
         for (unsigned I = 0; I < StridedOrder.size(); ++I) {
           MCPhysReg Reg = StridedOrder[I];

           // If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
           // register of the first load should be a multiple of 2 or 4.
           unsigned SubRegIdx = Use.getOperand(OpIdx).getSubReg();
           if (IsMulZPR && (getSubReg(Reg, SubRegIdx) - AArch64::Z0) % UseOps !=
                               ((unsigned)OpIdx - 1))
             continue;

           // In the example above, if VirtReg is the third operand of the
           // tuple (%v2) and Reg == Z2_Z10, then we need to make sure that
           // Z0_Z8, Z1_Z9 and Z3_Z11 are also available.
           auto IsFreeConsecutiveReg = [&](unsigned UseOp) {
             unsigned R = Reg - (OpIdx - 1) + UseOp;
             return StridedRC->contains(R) &&
                    (UseOp == 0 ||
                     ((getSubReg(R, AArch64::zsub0) - AArch64::Z0) ==
                      (getSubReg(R - 1, AArch64::zsub0) - AArch64::Z0) + 1)) &&
                    !Matrix->isPhysRegUsed(R);
           };
           if (all_of(iota_range<unsigned>(0U, UseOps, /*Inclusive=*/false),
                      IsFreeConsecutiveReg))
             Hints.push_back(Reg);
         }
       } else {
         // At least one operand already has a physical register assigned.
         // Find the starting sub-register of this and use it to work out the
         // correct strided register to suggest based on the current op index.
         MCPhysReg TargetStartReg =
             getSubReg(VRM->getPhys(AssignedRegOp->getReg()), AArch64::zsub0) +
             (OpIdx - AssignedRegOp->getOperandNo());

         for (unsigned I = 0; I < StridedOrder.size(); ++I)
           if (getSubReg(StridedOrder[I], AArch64::zsub0) == TargetStartReg)
             Hints.push_back(StridedOrder[I]);
       }

       if (!Hints.empty())
         return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
                                                          MF, VRM);
     }
   }

   for (MachineInstr &MI : MRI.def_instructions(VirtReg)) {
     if (MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
         MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO)
       return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
                                                        MF, VRM);

     unsigned FirstOpSubReg = MI.getOperand(1).getSubReg();
     switch (FirstOpSubReg) {
     case AArch64::zsub0:
     case AArch64::zsub1:
     case AArch64::zsub2:
     case AArch64::zsub3:
       break;
     default:
       continue;
     }

     // Look up the physical register mapped to the first operand of the pseudo.
     Register FirstOpVirtReg = MI.getOperand(1).getReg();
     if (!VRM->hasPhys(FirstOpVirtReg))
       continue;

     MCRegister TupleStartReg =
         getSubReg(VRM->getPhys(FirstOpVirtReg), FirstOpSubReg);
     for (unsigned I = 0; I < Order.size(); ++I)
       if (MCRegister R = getSubReg(Order[I], AArch64::zsub0))
         if (R == TupleStartReg)
           Hints.push_back(Order[I]);
   }

   return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
                                                    VRM);
 }

 unsigned AArch64RegisterInfo::getLocalAddressRegister(
   const MachineFunction &MF) const {
   const auto &MFI = MF.getFrameInfo();
   if (!MF.hasEHFunclets() && !MFI.hasVarSizedObjects())
     return AArch64::SP;
   else if (hasStackRealignment(MF))
     return getBaseRegister();
   return getFrameRegister(MF);
 }

 /// SrcRC and DstRC will be morphed into NewRC if this returns true
 bool AArch64RegisterInfo::shouldCoalesce(
     MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
     const TargetRegisterClass *DstRC, unsigned DstSubReg,
     const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
   MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();

   if (MI->isCopy() &&
       ((DstRC->getID() == AArch64::GPR64RegClassID) ||
        (DstRC->getID() == AArch64::GPR64commonRegClassID)) &&
       MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg())
     // Do not coalesce in the case of a 32-bit subregister copy
     // which implements a 32 to 64 bit zero extension
     // which relies on the upper 32 bits being zeroed.
     return false;

   auto IsCoalescerBarrier = [](const MachineInstr &MI) {
     switch (MI.getOpcode()) {
     case AArch64::COALESCER_BARRIER_FPR16:
     case AArch64::COALESCER_BARRIER_FPR32:
     case AArch64::COALESCER_BARRIER_FPR64:
     case AArch64::COALESCER_BARRIER_FPR128:
       return true;
     default:
       return false;
     }
   };

   // For calls that temporarily have to toggle streaming mode as part of the
   // call-sequence, we need to be more careful when coalescing copy instructions
   // so that we don't end up coalescing the NEON/FP result or argument register
   // with a whole Z-register, such that after coalescing the register allocator
   // will try to spill/reload the entire Z register.
   //
   // We do this by checking if the node has any defs/uses that are
   // COALESCER_BARRIER pseudos. These are 'nops' in practice, but they exist to
   // instruct the coalescer to avoid coalescing the copy.
   if (MI->isCopy() && SubReg != DstSubReg &&
       (AArch64::ZPRRegClass.hasSubClassEq(DstRC) ||
        AArch64::ZPRRegClass.hasSubClassEq(SrcRC))) {
     unsigned SrcReg = MI->getOperand(1).getReg();
     if (any_of(MRI.def_instructions(SrcReg), IsCoalescerBarrier))
       return false;
     unsigned DstReg = MI->getOperand(0).getReg();
     if (any_of(MRI.use_nodbg_instructions(DstReg), IsCoalescerBarrier))
       return false;
   }

   return true;
 }

 bool AArch64RegisterInfo::shouldAnalyzePhysregInMachineLoopInfo(
     MCRegister R) const {
   return R == AArch64::VG;
 }