|  | //=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file contains the table-generated and custom routines for the AArch64 | 
|  | // Calling Convention. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AArch64CallingConvention.h" | 
|  | #include "AArch64.h" | 
|  | #include "AArch64InstrInfo.h" | 
|  | #include "AArch64Subtarget.h" | 
|  | #include "llvm/CodeGen/CallingConvLower.h" | 
|  | #include "llvm/CodeGen/TargetInstrInfo.h" | 
|  | using namespace llvm; | 
|  |  | 
|  | static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, | 
|  | AArch64::X3, AArch64::X4, AArch64::X5, | 
|  | AArch64::X6, AArch64::X7}; | 
|  | static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, | 
|  | AArch64::H3, AArch64::H4, AArch64::H5, | 
|  | AArch64::H6, AArch64::H7}; | 
|  | static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, | 
|  | AArch64::S3, AArch64::S4, AArch64::S5, | 
|  | AArch64::S6, AArch64::S7}; | 
|  | static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, | 
|  | AArch64::D3, AArch64::D4, AArch64::D5, | 
|  | AArch64::D6, AArch64::D7}; | 
|  | static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, | 
|  | AArch64::Q3, AArch64::Q4, AArch64::Q5, | 
|  | AArch64::Q6, AArch64::Q7}; | 
|  | static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2, | 
|  | AArch64::Z3, AArch64::Z4, AArch64::Z5, | 
|  | AArch64::Z6, AArch64::Z7}; | 
|  | static const MCPhysReg PRegList[] = {AArch64::P0, AArch64::P1, AArch64::P2, | 
|  | AArch64::P3}; | 
|  |  | 
|  | static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers, | 
|  | MVT LocVT, ISD::ArgFlagsTy &ArgFlags, | 
|  | CCState &State, Align SlotAlign) { | 
|  | if (LocVT.isScalableVector()) { | 
|  | const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( | 
|  | State.getMachineFunction().getSubtarget()); | 
|  | const AArch64TargetLowering *TLI = Subtarget.getTargetLowering(); | 
|  |  | 
|  | // We are about to reinvoke the CCAssignFn auto-generated handler. If we | 
|  | // don't unset these flags we will get stuck in an infinite loop forever | 
|  | // invoking the custom handler. | 
|  | ArgFlags.setInConsecutiveRegs(false); | 
|  | ArgFlags.setInConsecutiveRegsLast(false); | 
|  |  | 
|  | // The calling convention for passing SVE tuples states that in the event | 
|  | // we cannot allocate enough registers for the tuple we should still leave | 
|  | // any remaining registers unallocated. However, when we call the | 
|  | // CCAssignFn again we want it to behave as if all remaining registers are | 
|  | // allocated. This will force the code to pass the tuple indirectly in | 
|  | // accordance with the PCS. | 
|  | bool ZRegsAllocated[8]; | 
|  | for (int I = 0; I < 8; I++) { | 
|  | ZRegsAllocated[I] = State.isAllocated(ZRegList[I]); | 
|  | State.AllocateReg(ZRegList[I]); | 
|  | } | 
|  | // The same applies to P registers. | 
|  | bool PRegsAllocated[4]; | 
|  | for (int I = 0; I < 4; I++) { | 
|  | PRegsAllocated[I] = State.isAllocated(PRegList[I]); | 
|  | State.AllocateReg(PRegList[I]); | 
|  | } | 
|  |  | 
|  | auto &It = PendingMembers[0]; | 
|  | CCAssignFn *AssignFn = | 
|  | TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false); | 
|  | // FIXME: Get the correct original type. | 
|  | Type *OrigTy = EVT(It.getValVT()).getTypeForEVT(State.getContext()); | 
|  | if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full, | 
|  | ArgFlags, OrigTy, State)) | 
|  | llvm_unreachable("Call operand has unhandled type"); | 
|  |  | 
|  | // Return the flags to how they were before. | 
|  | ArgFlags.setInConsecutiveRegs(true); | 
|  | ArgFlags.setInConsecutiveRegsLast(true); | 
|  |  | 
|  | // Return the register state back to how it was before, leaving any | 
|  | // unallocated registers available for other smaller types. | 
|  | for (int I = 0; I < 8; I++) | 
|  | if (!ZRegsAllocated[I]) | 
|  | State.DeallocateReg(ZRegList[I]); | 
|  | for (int I = 0; I < 4; I++) | 
|  | if (!PRegsAllocated[I]) | 
|  | State.DeallocateReg(PRegList[I]); | 
|  |  | 
|  | // All pending members have now been allocated | 
|  | PendingMembers.clear(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | unsigned Size = LocVT.getSizeInBits() / 8; | 
|  | for (auto &It : PendingMembers) { | 
|  | It.convertToMem(State.AllocateStack(Size, SlotAlign)); | 
|  | State.addLoc(It); | 
|  | SlotAlign = Align(1); | 
|  | } | 
|  |  | 
|  | // All pending members have now been allocated | 
|  | PendingMembers.clear(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An | 
|  | /// [N x Ty] type must still be contiguous in memory though. | 
|  | static bool CC_AArch64_Custom_Stack_Block( | 
|  | unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, | 
|  | ISD::ArgFlagsTy &ArgFlags, CCState &State) { | 
|  | SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); | 
|  |  | 
|  | // Add the argument to the list to be allocated once we know the size of the | 
|  | // block. | 
|  | PendingMembers.push_back( | 
|  | CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); | 
|  |  | 
|  | if (!ArgFlags.isInConsecutiveRegsLast()) | 
|  | return true; | 
|  |  | 
|  | return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8)); | 
|  | } | 
|  |  | 
|  | /// Given an [N x Ty] block, it should be passed in a consecutive sequence of | 
|  | /// registers. If no such sequence is available, mark the rest of the registers | 
|  | /// of that type as used and place the argument on the stack. | 
|  | static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, | 
|  | CCValAssign::LocInfo &LocInfo, | 
|  | ISD::ArgFlagsTy &ArgFlags, CCState &State) { | 
|  | const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( | 
|  | State.getMachineFunction().getSubtarget()); | 
|  | bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO(); | 
|  |  | 
|  | // Try to allocate a contiguous block of registers, each of the correct | 
|  | // size to hold one member. | 
|  | ArrayRef<MCPhysReg> RegList; | 
|  | if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32)) | 
|  | RegList = XRegList; | 
|  | else if (LocVT.SimpleTy == MVT::f16 || LocVT.SimpleTy == MVT::bf16) | 
|  | RegList = HRegList; | 
|  | else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector()) | 
|  | RegList = SRegList; | 
|  | else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector()) | 
|  | RegList = DRegList; | 
|  | else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector()) | 
|  | RegList = QRegList; | 
|  | else if (LocVT.isScalableVector()) { | 
|  | // Scalable masks should be pass by Predicate registers. | 
|  | if (LocVT == MVT::nxv1i1 || LocVT == MVT::nxv2i1 || LocVT == MVT::nxv4i1 || | 
|  | LocVT == MVT::nxv8i1 || LocVT == MVT::nxv16i1 || | 
|  | LocVT == MVT::aarch64svcount) | 
|  | RegList = PRegList; | 
|  | else | 
|  | RegList = ZRegList; | 
|  | } else { | 
|  | // Not an array we want to split up after all. | 
|  | return false; | 
|  | } | 
|  |  | 
|  | SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); | 
|  |  | 
|  | // Add the argument to the list to be allocated once we know the size of the | 
|  | // block. | 
|  | PendingMembers.push_back( | 
|  | CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); | 
|  |  | 
|  | if (!ArgFlags.isInConsecutiveRegsLast()) | 
|  | return true; | 
|  |  | 
|  | // [N x i32] arguments get packed into x-registers on Darwin's arm64_32 | 
|  | // because that's how the armv7k Clang front-end emits small structs. | 
|  | unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1; | 
|  | ArrayRef<MCPhysReg> RegResult = State.AllocateRegBlock( | 
|  | RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg); | 
|  | if (!RegResult.empty() && EltsPerReg == 1) { | 
|  | for (const auto &[It, Reg] : zip(PendingMembers, RegResult)) { | 
|  | It.convertToReg(Reg); | 
|  | State.addLoc(It); | 
|  | } | 
|  | PendingMembers.clear(); | 
|  | return true; | 
|  | } else if (!RegResult.empty()) { | 
|  | assert(EltsPerReg == 2 && "unexpected ABI"); | 
|  | bool UseHigh = false; | 
|  | CCValAssign::LocInfo Info; | 
|  | unsigned RegIdx = 0; | 
|  | for (auto &It : PendingMembers) { | 
|  | Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt; | 
|  | State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, | 
|  | RegResult[RegIdx], MVT::i64, Info)); | 
|  | UseHigh = !UseHigh; | 
|  | if (!UseHigh) | 
|  | ++RegIdx; | 
|  | } | 
|  | PendingMembers.clear(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | if (!LocVT.isScalableVector()) { | 
|  | // Mark all regs in the class as unavailable | 
|  | for (auto Reg : RegList) | 
|  | State.AllocateReg(Reg); | 
|  | } | 
|  |  | 
|  | const MaybeAlign StackAlign = | 
|  | State.getMachineFunction().getDataLayout().getStackAlignment(); | 
|  | assert(StackAlign && "data layout string is missing stack alignment"); | 
|  | const Align MemAlign = ArgFlags.getNonZeroMemAlign(); | 
|  | Align SlotAlign = std::min(MemAlign, *StackAlign); | 
|  | if (!Subtarget.isTargetDarwin()) | 
|  | SlotAlign = std::max(SlotAlign, Align(8)); | 
|  |  | 
|  | return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); | 
|  | } | 
|  |  | 
|  | // TableGen provides definitions of the calling convention analysis entry | 
|  | // points. | 
|  | #include "AArch64GenCallingConv.inc" |