| //===-- AArch64Arm64ECCallLowering.cpp - Lower Arm64EC calls ----*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// This file contains the IR transform to lower external or indirect calls for |
| /// the ARM64EC calling convention. Such calls must go through the runtime, so |
| /// we can translate the calling convention for calls into the emulator. |
| /// |
| /// This subsumes Control Flow Guard handling. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "AArch64.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/ADT/SmallString.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Mangler.h" |
| #include "llvm/InitializePasses.h" |
| #include "llvm/Object/COFF.h" |
| #include "llvm/Pass.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/TargetParser/Triple.h" |
| |
| using namespace llvm; |
| using namespace llvm::COFF; |
| |
| using OperandBundleDef = OperandBundleDefT<Value *>; |
| |
| #define DEBUG_TYPE "arm64eccalllowering" |
| |
| STATISTIC(Arm64ECCallsLowered, "Number of Arm64EC calls lowered"); |
| |
| static cl::opt<bool> LowerDirectToIndirect("arm64ec-lower-direct-to-indirect", |
| cl::Hidden, cl::init(true)); |
| static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden, |
| cl::init(true)); |
| |
| namespace { |
| |
| class AArch64Arm64ECCallLowering : public ModulePass { |
| public: |
| static char ID; |
| AArch64Arm64ECCallLowering() : ModulePass(ID) { |
| initializeAArch64Arm64ECCallLoweringPass(*PassRegistry::getPassRegistry()); |
| } |
| |
| Function *buildExitThunk(FunctionType *FnTy, AttributeList Attrs); |
| Function *buildEntryThunk(Function *F); |
| void lowerCall(CallBase *CB); |
| Function *buildGuestExitThunk(Function *F); |
| bool processFunction(Function &F, SetVector<Function *> &DirectCalledFns); |
| bool runOnModule(Module &M) override; |
| |
| private: |
| int cfguard_module_flag = 0; |
| FunctionType *GuardFnType = nullptr; |
| PointerType *GuardFnPtrType = nullptr; |
| Constant *GuardFnCFGlobal = nullptr; |
| Constant *GuardFnGlobal = nullptr; |
| Module *M = nullptr; |
| |
| Type *PtrTy; |
| Type *I64Ty; |
| Type *VoidTy; |
| |
| void getThunkType(FunctionType *FT, AttributeList AttrList, |
| Arm64ECThunkType TT, raw_ostream &Out, |
| FunctionType *&Arm64Ty, FunctionType *&X64Ty); |
| void getThunkRetType(FunctionType *FT, AttributeList AttrList, |
| raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy, |
| SmallVectorImpl<Type *> &Arm64ArgTypes, |
| SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr); |
| void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, |
| Arm64ECThunkType TT, raw_ostream &Out, |
| SmallVectorImpl<Type *> &Arm64ArgTypes, |
| SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr); |
| void canonicalizeThunkType(Type *T, Align Alignment, bool Ret, |
| uint64_t ArgSizeBytes, raw_ostream &Out, |
| Type *&Arm64Ty, Type *&X64Ty); |
| }; |
| |
| } // end anonymous namespace |
| |
| void AArch64Arm64ECCallLowering::getThunkType( |
| FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT, |
| raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty) { |
| Out << (TT == Arm64ECThunkType::Entry ? "$ientry_thunk$cdecl$" |
| : "$iexit_thunk$cdecl$"); |
| |
| Type *Arm64RetTy; |
| Type *X64RetTy; |
| |
| SmallVector<Type *> Arm64ArgTypes; |
| SmallVector<Type *> X64ArgTypes; |
| |
| // The first argument to a thunk is the called function, stored in x9. |
| // For exit thunks, we pass the called function down to the emulator; |
| // for entry/guest exit thunks, we just call the Arm64 function directly. |
| if (TT == Arm64ECThunkType::Exit) |
| Arm64ArgTypes.push_back(PtrTy); |
| X64ArgTypes.push_back(PtrTy); |
| |
| bool HasSretPtr = false; |
| getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes, |
| X64ArgTypes, HasSretPtr); |
| |
| getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes, |
| HasSretPtr); |
| |
| Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false); |
| |
| X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false); |
| } |
| |
| void AArch64Arm64ECCallLowering::getThunkArgTypes( |
| FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT, |
| raw_ostream &Out, SmallVectorImpl<Type *> &Arm64ArgTypes, |
| SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) { |
| |
| Out << "$"; |
| if (FT->isVarArg()) { |
| // We treat the variadic function's thunk as a normal function |
| // with the following type on the ARM side: |
| // rettype exitthunk( |
| // ptr x9, ptr x0, i64 x1, i64 x2, i64 x3, ptr x4, i64 x5) |
| // |
| // that can coverage all types of variadic function. |
| // x9 is similar to normal exit thunk, store the called function. |
| // x0-x3 is the arguments be stored in registers. |
| // x4 is the address of the arguments on the stack. |
| // x5 is the size of the arguments on the stack. |
| // |
| // On the x64 side, it's the same except that x5 isn't set. |
| // |
| // If both the ARM and X64 sides are sret, there are only three |
| // arguments in registers. |
| // |
| // If the X64 side is sret, but the ARM side isn't, we pass an extra value |
| // to/from the X64 side, and let SelectionDAG transform it into a memory |
| // location. |
| Out << "varargs"; |
| |
| // x0-x3 |
| for (int i = HasSretPtr ? 1 : 0; i < 4; i++) { |
| Arm64ArgTypes.push_back(I64Ty); |
| X64ArgTypes.push_back(I64Ty); |
| } |
| |
| // x4 |
| Arm64ArgTypes.push_back(PtrTy); |
| X64ArgTypes.push_back(PtrTy); |
| // x5 |
| Arm64ArgTypes.push_back(I64Ty); |
| if (TT != Arm64ECThunkType::Entry) { |
| // FIXME: x5 isn't actually used by the x64 side; revisit once we |
| // have proper isel for varargs |
| X64ArgTypes.push_back(I64Ty); |
| } |
| return; |
| } |
| |
| unsigned I = 0; |
| if (HasSretPtr) |
| I++; |
| |
| if (I == FT->getNumParams()) { |
| Out << "v"; |
| return; |
| } |
| |
| for (unsigned E = FT->getNumParams(); I != E; ++I) { |
| #if 0 |
| // FIXME: Need more information about argument size; see |
| // https://reviews.llvm.org/D132926 |
| uint64_t ArgSizeBytes = AttrList.getParamArm64ECArgSizeBytes(I); |
| Align ParamAlign = AttrList.getParamAlignment(I).valueOrOne(); |
| #else |
| uint64_t ArgSizeBytes = 0; |
| Align ParamAlign = Align(); |
| #endif |
| Type *Arm64Ty, *X64Ty; |
| canonicalizeThunkType(FT->getParamType(I), ParamAlign, |
| /*Ret*/ false, ArgSizeBytes, Out, Arm64Ty, X64Ty); |
| Arm64ArgTypes.push_back(Arm64Ty); |
| X64ArgTypes.push_back(X64Ty); |
| } |
| } |
| |
| void AArch64Arm64ECCallLowering::getThunkRetType( |
| FunctionType *FT, AttributeList AttrList, raw_ostream &Out, |
| Type *&Arm64RetTy, Type *&X64RetTy, SmallVectorImpl<Type *> &Arm64ArgTypes, |
| SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr) { |
| Type *T = FT->getReturnType(); |
| #if 0 |
| // FIXME: Need more information about argument size; see |
| // https://reviews.llvm.org/D132926 |
| uint64_t ArgSizeBytes = AttrList.getRetArm64ECArgSizeBytes(); |
| #else |
| int64_t ArgSizeBytes = 0; |
| #endif |
| if (T->isVoidTy()) { |
| if (FT->getNumParams()) { |
| auto SRetAttr = AttrList.getParamAttr(0, Attribute::StructRet); |
| auto InRegAttr = AttrList.getParamAttr(0, Attribute::InReg); |
| if (SRetAttr.isValid() && InRegAttr.isValid()) { |
| // sret+inreg indicates a call that returns a C++ class value. This is |
| // actually equivalent to just passing and returning a void* pointer |
| // as the first argument. Translate it that way, instead of trying |
| // to model "inreg" in the thunk's calling convention, to simplify |
| // the rest of the code. |
| Out << "i8"; |
| Arm64RetTy = I64Ty; |
| X64RetTy = I64Ty; |
| return; |
| } |
| if (SRetAttr.isValid()) { |
| // FIXME: Sanity-check the sret type; if it's an integer or pointer, |
| // we'll get screwy mangling/codegen. |
| // FIXME: For large struct types, mangle as an integer argument and |
| // integer return, so we can reuse more thunks, instead of "m" syntax. |
| // (MSVC mangles this case as an integer return with no argument, but |
| // that's a miscompile.) |
| Type *SRetType = SRetAttr.getValueAsType(); |
| Align SRetAlign = AttrList.getParamAlignment(0).valueOrOne(); |
| Type *Arm64Ty, *X64Ty; |
| canonicalizeThunkType(SRetType, SRetAlign, /*Ret*/ true, ArgSizeBytes, |
| Out, Arm64Ty, X64Ty); |
| Arm64RetTy = VoidTy; |
| X64RetTy = VoidTy; |
| Arm64ArgTypes.push_back(FT->getParamType(0)); |
| X64ArgTypes.push_back(FT->getParamType(0)); |
| HasSretPtr = true; |
| return; |
| } |
| } |
| |
| Out << "v"; |
| Arm64RetTy = VoidTy; |
| X64RetTy = VoidTy; |
| return; |
| } |
| |
| canonicalizeThunkType(T, Align(), /*Ret*/ true, ArgSizeBytes, Out, Arm64RetTy, |
| X64RetTy); |
| if (X64RetTy->isPointerTy()) { |
| // If the X64 type is canonicalized to a pointer, that means it's |
| // passed/returned indirectly. For a return value, that means it's an |
| // sret pointer. |
| X64ArgTypes.push_back(X64RetTy); |
| X64RetTy = VoidTy; |
| } |
| } |
| |
| void AArch64Arm64ECCallLowering::canonicalizeThunkType( |
| Type *T, Align Alignment, bool Ret, uint64_t ArgSizeBytes, raw_ostream &Out, |
| Type *&Arm64Ty, Type *&X64Ty) { |
| if (T->isFloatTy()) { |
| Out << "f"; |
| Arm64Ty = T; |
| X64Ty = T; |
| return; |
| } |
| |
| if (T->isDoubleTy()) { |
| Out << "d"; |
| Arm64Ty = T; |
| X64Ty = T; |
| return; |
| } |
| |
| if (T->isFloatingPointTy()) { |
| report_fatal_error( |
| "Only 32 and 64 bit floating points are supported for ARM64EC thunks"); |
| } |
| |
| auto &DL = M->getDataLayout(); |
| |
| if (auto *StructTy = dyn_cast<StructType>(T)) |
| if (StructTy->getNumElements() == 1) |
| T = StructTy->getElementType(0); |
| |
| if (T->isArrayTy()) { |
| Type *ElementTy = T->getArrayElementType(); |
| uint64_t ElementCnt = T->getArrayNumElements(); |
| uint64_t ElementSizePerBytes = DL.getTypeSizeInBits(ElementTy) / 8; |
| uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes; |
| if (ElementTy->isFloatTy() || ElementTy->isDoubleTy()) { |
| Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes; |
| if (Alignment.value() >= 16 && !Ret) |
| Out << "a" << Alignment.value(); |
| Arm64Ty = T; |
| if (TotalSizeBytes <= 8) { |
| // Arm64 returns small structs of float/double in float registers; |
| // X64 uses RAX. |
| X64Ty = llvm::Type::getIntNTy(M->getContext(), TotalSizeBytes * 8); |
| } else { |
| // Struct is passed directly on Arm64, but indirectly on X64. |
| X64Ty = PtrTy; |
| } |
| return; |
| } else if (T->isFloatingPointTy()) { |
| report_fatal_error("Only 32 and 64 bit floating points are supported for " |
| "ARM64EC thunks"); |
| } |
| } |
| |
| if ((T->isIntegerTy() || T->isPointerTy()) && DL.getTypeSizeInBits(T) <= 64) { |
| Out << "i8"; |
| Arm64Ty = I64Ty; |
| X64Ty = I64Ty; |
| return; |
| } |
| |
| unsigned TypeSize = ArgSizeBytes; |
| if (TypeSize == 0) |
| TypeSize = DL.getTypeSizeInBits(T) / 8; |
| Out << "m"; |
| if (TypeSize != 4) |
| Out << TypeSize; |
| if (Alignment.value() >= 16 && !Ret) |
| Out << "a" << Alignment.value(); |
| // FIXME: Try to canonicalize Arm64Ty more thoroughly? |
| Arm64Ty = T; |
| if (TypeSize == 1 || TypeSize == 2 || TypeSize == 4 || TypeSize == 8) { |
| // Pass directly in an integer register |
| X64Ty = llvm::Type::getIntNTy(M->getContext(), TypeSize * 8); |
| } else { |
| // Passed directly on Arm64, but indirectly on X64. |
| X64Ty = PtrTy; |
| } |
| } |
| |
| // This function builds the "exit thunk", a function which translates |
| // arguments and return values when calling x64 code from AArch64 code. |
| Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT, |
| AttributeList Attrs) { |
| SmallString<256> ExitThunkName; |
| llvm::raw_svector_ostream ExitThunkStream(ExitThunkName); |
| FunctionType *Arm64Ty, *X64Ty; |
| getThunkType(FT, Attrs, Arm64ECThunkType::Exit, ExitThunkStream, Arm64Ty, |
| X64Ty); |
| if (Function *F = M->getFunction(ExitThunkName)) |
| return F; |
| |
| Function *F = Function::Create(Arm64Ty, GlobalValue::LinkOnceODRLinkage, 0, |
| ExitThunkName, M); |
| F->setCallingConv(CallingConv::ARM64EC_Thunk_Native); |
| F->setSection(".wowthk$aa"); |
| F->setComdat(M->getOrInsertComdat(ExitThunkName)); |
| // Copy MSVC, and always set up a frame pointer. (Maybe this isn't necessary.) |
| F->addFnAttr("frame-pointer", "all"); |
| // Only copy sret from the first argument. For C++ instance methods, clang can |
| // stick an sret marking on a later argument, but it doesn't actually affect |
| // the ABI, so we can omit it. This avoids triggering a verifier assertion. |
| if (FT->getNumParams()) { |
| auto SRet = Attrs.getParamAttr(0, Attribute::StructRet); |
| auto InReg = Attrs.getParamAttr(0, Attribute::InReg); |
| if (SRet.isValid() && !InReg.isValid()) |
| F->addParamAttr(1, SRet); |
| } |
| // FIXME: Copy anything other than sret? Shouldn't be necessary for normal |
| // C ABI, but might show up in other cases. |
| BasicBlock *BB = BasicBlock::Create(M->getContext(), "", F); |
| IRBuilder<> IRB(BB); |
| Value *CalleePtr = |
| M->getOrInsertGlobal("__os_arm64x_dispatch_call_no_redirect", PtrTy); |
| Value *Callee = IRB.CreateLoad(PtrTy, CalleePtr); |
| auto &DL = M->getDataLayout(); |
| SmallVector<Value *> Args; |
| |
| // Pass the called function in x9. |
| Args.push_back(F->arg_begin()); |
| |
| Type *RetTy = Arm64Ty->getReturnType(); |
| if (RetTy != X64Ty->getReturnType()) { |
| // If the return type is an array or struct, translate it. Values of size |
| // 8 or less go into RAX; bigger values go into memory, and we pass a |
| // pointer. |
| if (DL.getTypeStoreSize(RetTy) > 8) { |
| Args.push_back(IRB.CreateAlloca(RetTy)); |
| } |
| } |
| |
| for (auto &Arg : make_range(F->arg_begin() + 1, F->arg_end())) { |
| // Translate arguments from AArch64 calling convention to x86 calling |
| // convention. |
| // |
| // For simple types, we don't need to do any translation: they're |
| // represented the same way. (Implicit sign extension is not part of |
| // either convention.) |
| // |
| // The big thing we have to worry about is struct types... but |
| // fortunately AArch64 clang is pretty friendly here: the cases that need |
| // translation are always passed as a struct or array. (If we run into |
| // some cases where this doesn't work, we can teach clang to mark it up |
| // with an attribute.) |
| // |
| // The first argument is the called function, stored in x9. |
| if (Arg.getType()->isArrayTy() || Arg.getType()->isStructTy() || |
| DL.getTypeStoreSize(Arg.getType()) > 8) { |
| Value *Mem = IRB.CreateAlloca(Arg.getType()); |
| IRB.CreateStore(&Arg, Mem); |
| if (DL.getTypeStoreSize(Arg.getType()) <= 8) { |
| Type *IntTy = IRB.getIntNTy(DL.getTypeStoreSizeInBits(Arg.getType())); |
| Args.push_back(IRB.CreateLoad(IntTy, IRB.CreateBitCast(Mem, PtrTy))); |
| } else |
| Args.push_back(Mem); |
| } else { |
| Args.push_back(&Arg); |
| } |
| } |
| // FIXME: Transfer necessary attributes? sret? anything else? |
| |
| Callee = IRB.CreateBitCast(Callee, PtrTy); |
| CallInst *Call = IRB.CreateCall(X64Ty, Callee, Args); |
| Call->setCallingConv(CallingConv::ARM64EC_Thunk_X64); |
| |
| Value *RetVal = Call; |
| if (RetTy != X64Ty->getReturnType()) { |
| // If we rewrote the return type earlier, convert the return value to |
| // the proper type. |
| if (DL.getTypeStoreSize(RetTy) > 8) { |
| RetVal = IRB.CreateLoad(RetTy, Args[1]); |
| } else { |
| Value *CastAlloca = IRB.CreateAlloca(RetTy); |
| IRB.CreateStore(Call, IRB.CreateBitCast(CastAlloca, PtrTy)); |
| RetVal = IRB.CreateLoad(RetTy, CastAlloca); |
| } |
| } |
| |
| if (RetTy->isVoidTy()) |
| IRB.CreateRetVoid(); |
| else |
| IRB.CreateRet(RetVal); |
| return F; |
| } |
| |
| // This function builds the "entry thunk", a function which translates |
| // arguments and return values when calling AArch64 code from x64 code. |
| Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { |
| SmallString<256> EntryThunkName; |
| llvm::raw_svector_ostream EntryThunkStream(EntryThunkName); |
| FunctionType *Arm64Ty, *X64Ty; |
| getThunkType(F->getFunctionType(), F->getAttributes(), |
| Arm64ECThunkType::Entry, EntryThunkStream, Arm64Ty, X64Ty); |
| if (Function *F = M->getFunction(EntryThunkName)) |
| return F; |
| |
| Function *Thunk = Function::Create(X64Ty, GlobalValue::LinkOnceODRLinkage, 0, |
| EntryThunkName, M); |
| Thunk->setCallingConv(CallingConv::ARM64EC_Thunk_X64); |
| Thunk->setSection(".wowthk$aa"); |
| Thunk->setComdat(M->getOrInsertComdat(EntryThunkName)); |
| // Copy MSVC, and always set up a frame pointer. (Maybe this isn't necessary.) |
| Thunk->addFnAttr("frame-pointer", "all"); |
| |
| auto &DL = M->getDataLayout(); |
| BasicBlock *BB = BasicBlock::Create(M->getContext(), "", Thunk); |
| IRBuilder<> IRB(BB); |
| |
| Type *RetTy = Arm64Ty->getReturnType(); |
| Type *X64RetType = X64Ty->getReturnType(); |
| |
| bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy(); |
| unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1; |
| unsigned PassthroughArgSize = F->isVarArg() ? 5 : Thunk->arg_size(); |
| |
| // Translate arguments to call. |
| SmallVector<Value *> Args; |
| for (unsigned i = ThunkArgOffset, e = PassthroughArgSize; i != e; ++i) { |
| Value *Arg = Thunk->getArg(i); |
| Type *ArgTy = Arm64Ty->getParamType(i - ThunkArgOffset); |
| if (ArgTy->isArrayTy() || ArgTy->isStructTy() || |
| DL.getTypeStoreSize(ArgTy) > 8) { |
| // Translate array/struct arguments to the expected type. |
| if (DL.getTypeStoreSize(ArgTy) <= 8) { |
| Value *CastAlloca = IRB.CreateAlloca(ArgTy); |
| IRB.CreateStore(Arg, IRB.CreateBitCast(CastAlloca, PtrTy)); |
| Arg = IRB.CreateLoad(ArgTy, CastAlloca); |
| } else { |
| Arg = IRB.CreateLoad(ArgTy, IRB.CreateBitCast(Arg, PtrTy)); |
| } |
| } |
| Args.push_back(Arg); |
| } |
| |
| if (F->isVarArg()) { |
| // The 5th argument to variadic entry thunks is used to model the x64 sp |
| // which is passed to the thunk in x4, this can be passed to the callee as |
| // the variadic argument start address after skipping over the 32 byte |
| // shadow store. |
| |
| // The EC thunk CC will assign any argument marked as InReg to x4. |
| Thunk->addParamAttr(5, Attribute::InReg); |
| Value *Arg = Thunk->getArg(5); |
| Arg = IRB.CreatePtrAdd(Arg, IRB.getInt64(0x20)); |
| Args.push_back(Arg); |
| |
| // Pass in a zero variadic argument size (in x5). |
| Args.push_back(IRB.getInt64(0)); |
| } |
| |
| // Call the function passed to the thunk. |
| Value *Callee = Thunk->getArg(0); |
| Callee = IRB.CreateBitCast(Callee, PtrTy); |
| Value *Call = IRB.CreateCall(Arm64Ty, Callee, Args); |
| |
| Value *RetVal = Call; |
| if (TransformDirectToSRet) { |
| IRB.CreateStore(RetVal, IRB.CreateBitCast(Thunk->getArg(1), PtrTy)); |
| } else if (X64RetType != RetTy) { |
| Value *CastAlloca = IRB.CreateAlloca(X64RetType); |
| IRB.CreateStore(Call, IRB.CreateBitCast(CastAlloca, PtrTy)); |
| RetVal = IRB.CreateLoad(X64RetType, CastAlloca); |
| } |
| |
| // Return to the caller. Note that the isel has code to translate this |
| // "ret" to a tail call to __os_arm64x_dispatch_ret. (Alternatively, we |
| // could emit a tail call here, but that would require a dedicated calling |
| // convention, which seems more complicated overall.) |
| if (X64RetType->isVoidTy()) |
| IRB.CreateRetVoid(); |
| else |
| IRB.CreateRet(RetVal); |
| |
| return Thunk; |
| } |
| |
| // Builds the "guest exit thunk", a helper to call a function which may or may |
| // not be an exit thunk. (We optimistically assume non-dllimport function |
| // declarations refer to functions defined in AArch64 code; if the linker |
| // can't prove that, we use this routine instead.) |
| Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) { |
| llvm::raw_null_ostream NullThunkName; |
| FunctionType *Arm64Ty, *X64Ty; |
| getThunkType(F->getFunctionType(), F->getAttributes(), |
| Arm64ECThunkType::GuestExit, NullThunkName, Arm64Ty, X64Ty); |
| auto MangledName = getArm64ECMangledFunctionName(F->getName().str()); |
| assert(MangledName && "Can't guest exit to function that's already native"); |
| std::string ThunkName = *MangledName; |
| if (ThunkName[0] == '?' && ThunkName.find("@") != std::string::npos) { |
| ThunkName.insert(ThunkName.find("@"), "$exit_thunk"); |
| } else { |
| ThunkName.append("$exit_thunk"); |
| } |
| Function *GuestExit = |
| Function::Create(Arm64Ty, GlobalValue::WeakODRLinkage, 0, ThunkName, M); |
| GuestExit->setComdat(M->getOrInsertComdat(ThunkName)); |
| GuestExit->setSection(".wowthk$aa"); |
| GuestExit->setMetadata( |
| "arm64ec_unmangled_name", |
| MDNode::get(M->getContext(), |
| MDString::get(M->getContext(), F->getName()))); |
| GuestExit->setMetadata( |
| "arm64ec_ecmangled_name", |
| MDNode::get(M->getContext(), |
| MDString::get(M->getContext(), *MangledName))); |
| F->setMetadata("arm64ec_hasguestexit", MDNode::get(M->getContext(), {})); |
| BasicBlock *BB = BasicBlock::Create(M->getContext(), "", GuestExit); |
| IRBuilder<> B(BB); |
| |
| // Load the global symbol as a pointer to the check function. |
| Value *GuardFn; |
| if (cfguard_module_flag == 2 && !F->hasFnAttribute("guard_nocf")) |
| GuardFn = GuardFnCFGlobal; |
| else |
| GuardFn = GuardFnGlobal; |
| LoadInst *GuardCheckLoad = B.CreateLoad(GuardFnPtrType, GuardFn); |
| |
| // Create new call instruction. The CFGuard check should always be a call, |
| // even if the original CallBase is an Invoke or CallBr instruction. |
| Function *Thunk = buildExitThunk(F->getFunctionType(), F->getAttributes()); |
| CallInst *GuardCheck = B.CreateCall( |
| GuardFnType, GuardCheckLoad, |
| {B.CreateBitCast(F, B.getPtrTy()), B.CreateBitCast(Thunk, B.getPtrTy())}); |
| |
| // Ensure that the first argument is passed in the correct register. |
| GuardCheck->setCallingConv(CallingConv::CFGuard_Check); |
| |
| Value *GuardRetVal = B.CreateBitCast(GuardCheck, PtrTy); |
| SmallVector<Value *> Args; |
| for (Argument &Arg : GuestExit->args()) |
| Args.push_back(&Arg); |
| CallInst *Call = B.CreateCall(Arm64Ty, GuardRetVal, Args); |
| Call->setTailCallKind(llvm::CallInst::TCK_MustTail); |
| |
| if (Call->getType()->isVoidTy()) |
| B.CreateRetVoid(); |
| else |
| B.CreateRet(Call); |
| |
| auto SRetAttr = F->getAttributes().getParamAttr(0, Attribute::StructRet); |
| auto InRegAttr = F->getAttributes().getParamAttr(0, Attribute::InReg); |
| if (SRetAttr.isValid() && !InRegAttr.isValid()) { |
| GuestExit->addParamAttr(0, SRetAttr); |
| Call->addParamAttr(0, SRetAttr); |
| } |
| |
| return GuestExit; |
| } |
| |
| // Lower an indirect call with inline code. |
| void AArch64Arm64ECCallLowering::lowerCall(CallBase *CB) { |
| assert(Triple(CB->getModule()->getTargetTriple()).isOSWindows() && |
| "Only applicable for Windows targets"); |
| |
| IRBuilder<> B(CB); |
| Value *CalledOperand = CB->getCalledOperand(); |
| |
| // If the indirect call is called within catchpad or cleanuppad, |
| // we need to copy "funclet" bundle of the call. |
| SmallVector<llvm::OperandBundleDef, 1> Bundles; |
| if (auto Bundle = CB->getOperandBundle(LLVMContext::OB_funclet)) |
| Bundles.push_back(OperandBundleDef(*Bundle)); |
| |
| // Load the global symbol as a pointer to the check function. |
| Value *GuardFn; |
| if (cfguard_module_flag == 2 && !CB->hasFnAttr("guard_nocf")) |
| GuardFn = GuardFnCFGlobal; |
| else |
| GuardFn = GuardFnGlobal; |
| LoadInst *GuardCheckLoad = B.CreateLoad(GuardFnPtrType, GuardFn); |
| |
| // Create new call instruction. The CFGuard check should always be a call, |
| // even if the original CallBase is an Invoke or CallBr instruction. |
| Function *Thunk = buildExitThunk(CB->getFunctionType(), CB->getAttributes()); |
| CallInst *GuardCheck = |
| B.CreateCall(GuardFnType, GuardCheckLoad, |
| {B.CreateBitCast(CalledOperand, B.getPtrTy()), |
| B.CreateBitCast(Thunk, B.getPtrTy())}, |
| Bundles); |
| |
| // Ensure that the first argument is passed in the correct register. |
| GuardCheck->setCallingConv(CallingConv::CFGuard_Check); |
| |
| Value *GuardRetVal = B.CreateBitCast(GuardCheck, CalledOperand->getType()); |
| CB->setCalledOperand(GuardRetVal); |
| } |
| |
| bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { |
| if (!GenerateThunks) |
| return false; |
| |
| M = &Mod; |
| |
| // Check if this module has the cfguard flag and read its value. |
| if (auto *MD = |
| mdconst::extract_or_null<ConstantInt>(M->getModuleFlag("cfguard"))) |
| cfguard_module_flag = MD->getZExtValue(); |
| |
| PtrTy = PointerType::getUnqual(M->getContext()); |
| I64Ty = Type::getInt64Ty(M->getContext()); |
| VoidTy = Type::getVoidTy(M->getContext()); |
| |
| GuardFnType = FunctionType::get(PtrTy, {PtrTy, PtrTy}, false); |
| GuardFnPtrType = PointerType::get(GuardFnType, 0); |
| GuardFnCFGlobal = |
| M->getOrInsertGlobal("__os_arm64x_check_icall_cfg", GuardFnPtrType); |
| GuardFnGlobal = |
| M->getOrInsertGlobal("__os_arm64x_check_icall", GuardFnPtrType); |
| |
| SetVector<Function *> DirectCalledFns; |
| for (Function &F : Mod) |
| if (!F.isDeclaration() && |
| F.getCallingConv() != CallingConv::ARM64EC_Thunk_Native && |
| F.getCallingConv() != CallingConv::ARM64EC_Thunk_X64) |
| processFunction(F, DirectCalledFns); |
| |
| struct ThunkInfo { |
| Constant *Src; |
| Constant *Dst; |
| Arm64ECThunkType Kind; |
| }; |
| SmallVector<ThunkInfo> ThunkMapping; |
| for (Function &F : Mod) { |
| if (!F.isDeclaration() && (!F.hasLocalLinkage() || F.hasAddressTaken()) && |
| F.getCallingConv() != CallingConv::ARM64EC_Thunk_Native && |
| F.getCallingConv() != CallingConv::ARM64EC_Thunk_X64) { |
| if (!F.hasComdat()) |
| F.setComdat(Mod.getOrInsertComdat(F.getName())); |
| ThunkMapping.push_back( |
| {&F, buildEntryThunk(&F), Arm64ECThunkType::Entry}); |
| } |
| } |
| for (Function *F : DirectCalledFns) { |
| ThunkMapping.push_back( |
| {F, buildExitThunk(F->getFunctionType(), F->getAttributes()), |
| Arm64ECThunkType::Exit}); |
| if (!F->hasDLLImportStorageClass()) |
| ThunkMapping.push_back( |
| {buildGuestExitThunk(F), F, Arm64ECThunkType::GuestExit}); |
| } |
| |
| if (!ThunkMapping.empty()) { |
| SmallVector<Constant *> ThunkMappingArrayElems; |
| for (ThunkInfo &Thunk : ThunkMapping) { |
| ThunkMappingArrayElems.push_back(ConstantStruct::getAnon( |
| {ConstantExpr::getBitCast(Thunk.Src, PtrTy), |
| ConstantExpr::getBitCast(Thunk.Dst, PtrTy), |
| ConstantInt::get(M->getContext(), APInt(32, uint8_t(Thunk.Kind)))})); |
| } |
| Constant *ThunkMappingArray = ConstantArray::get( |
| llvm::ArrayType::get(ThunkMappingArrayElems[0]->getType(), |
| ThunkMappingArrayElems.size()), |
| ThunkMappingArrayElems); |
| new GlobalVariable(Mod, ThunkMappingArray->getType(), /*isConstant*/ false, |
| GlobalValue::ExternalLinkage, ThunkMappingArray, |
| "llvm.arm64ec.symbolmap"); |
| } |
| |
| return true; |
| } |
| |
| bool AArch64Arm64ECCallLowering::processFunction( |
| Function &F, SetVector<Function *> &DirectCalledFns) { |
| SmallVector<CallBase *, 8> IndirectCalls; |
| |
| // For ARM64EC targets, a function definition's name is mangled differently |
| // from the normal symbol. We currently have no representation of this sort |
| // of symbol in IR, so we change the name to the mangled name, then store |
| // the unmangled name as metadata. Later passes that need the unmangled |
| // name (emitting the definition) can grab it from the metadata. |
| // |
| // FIXME: Handle functions with weak linkage? |
| if (!F.hasLocalLinkage() || F.hasAddressTaken()) { |
| if (std::optional<std::string> MangledName = |
| getArm64ECMangledFunctionName(F.getName().str())) { |
| F.setMetadata("arm64ec_unmangled_name", |
| MDNode::get(M->getContext(), |
| MDString::get(M->getContext(), F.getName()))); |
| if (F.hasComdat() && F.getComdat()->getName() == F.getName()) { |
| Comdat *MangledComdat = M->getOrInsertComdat(MangledName.value()); |
| SmallVector<GlobalObject *> ComdatUsers = |
| to_vector(F.getComdat()->getUsers()); |
| for (GlobalObject *User : ComdatUsers) |
| User->setComdat(MangledComdat); |
| } |
| F.setName(MangledName.value()); |
| } |
| } |
| |
| // Iterate over the instructions to find all indirect call/invoke/callbr |
| // instructions. Make a separate list of pointers to indirect |
| // call/invoke/callbr instructions because the original instructions will be |
| // deleted as the checks are added. |
| for (BasicBlock &BB : F) { |
| for (Instruction &I : BB) { |
| auto *CB = dyn_cast<CallBase>(&I); |
| if (!CB || CB->getCallingConv() == CallingConv::ARM64EC_Thunk_X64 || |
| CB->isInlineAsm()) |
| continue; |
| |
| // We need to instrument any call that isn't directly calling an |
| // ARM64 function. |
| // |
| // FIXME: getCalledFunction() fails if there's a bitcast (e.g. |
| // unprototyped functions in C) |
| if (Function *F = CB->getCalledFunction()) { |
| if (!LowerDirectToIndirect || F->hasLocalLinkage() || |
| F->isIntrinsic() || !F->isDeclaration()) |
| continue; |
| |
| DirectCalledFns.insert(F); |
| continue; |
| } |
| |
| IndirectCalls.push_back(CB); |
| ++Arm64ECCallsLowered; |
| } |
| } |
| |
| if (IndirectCalls.empty()) |
| return false; |
| |
| for (CallBase *CB : IndirectCalls) |
| lowerCall(CB); |
| |
| return true; |
| } |
| |
| char AArch64Arm64ECCallLowering::ID = 0; |
| INITIALIZE_PASS(AArch64Arm64ECCallLowering, "Arm64ECCallLowering", |
| "AArch64Arm64ECCallLowering", false, false) |
| |
| ModulePass *llvm::createAArch64Arm64ECCallLoweringPass() { |
| return new AArch64Arm64ECCallLowering; |
| } |