| //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the interfaces that ARM uses to lower LLVM code into a |
| // selection DAG. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "ARMISelLowering.h" |
| #include "ARMBaseInstrInfo.h" |
| #include "ARMBaseRegisterInfo.h" |
| #include "ARMCallingConv.h" |
| #include "ARMConstantPoolValue.h" |
| #include "ARMMachineFunctionInfo.h" |
| #include "ARMPerfectShuffle.h" |
| #include "ARMRegisterInfo.h" |
| #include "ARMSelectionDAGInfo.h" |
| #include "ARMSubtarget.h" |
| #include "MCTargetDesc/ARMAddressingModes.h" |
| #include "MCTargetDesc/ARMBaseInfo.h" |
| #include "Utils/ARMBaseInfo.h" |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/BitVector.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/StringSwitch.h" |
| #include "llvm/ADT/Triple.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/Analysis/VectorUtils.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/ISDOpcodes.h" |
| #include "llvm/CodeGen/IntrinsicLowering.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineConstantPool.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineJumpTableInfo.h" |
| #include "llvm/CodeGen/MachineMemOperand.h" |
| #include "llvm/CodeGen/MachineOperand.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/RuntimeLibcalls.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/SelectionDAGNodes.h" |
| #include "llvm/CodeGen/TargetInstrInfo.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| #include "llvm/CodeGen/TargetOpcodes.h" |
| #include "llvm/CodeGen/TargetRegisterInfo.h" |
| #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/Constant.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DebugLoc.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GlobalAlias.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/GlobalVariable.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/InlineAsm.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/User.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/MC/MCInstrDesc.h" |
| #include "llvm/MC/MCInstrItineraries.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/MC/MCSchedule.h" |
| #include "llvm/Support/AtomicOrdering.h" |
| #include "llvm/Support/BranchProbability.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CodeGen.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MachineValueType.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Target/TargetOptions.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <cstdint> |
| #include <cstdlib> |
| #include <iterator> |
| #include <limits> |
| #include <string> |
| #include <tuple> |
| #include <utility> |
| #include <vector> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "arm-isel" |
| |
| STATISTIC(NumTailCalls, "Number of tail calls"); |
| STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); |
| STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); |
| STATISTIC(NumConstpoolPromoted, |
| "Number of constants with their storage promoted into constant pools"); |
| |
| static cl::opt<bool> |
| ARMInterworking("arm-interworking", cl::Hidden, |
| cl::desc("Enable / disable ARM interworking (for debugging only)"), |
| cl::init(true)); |
| |
| static cl::opt<bool> EnableConstpoolPromotion( |
| "arm-promote-constant", cl::Hidden, |
| cl::desc("Enable / disable promotion of unnamed_addr constants into " |
| "constant pools"), |
| cl::init(false)); // FIXME: set to true by default once PR32780 is fixed |
| static cl::opt<unsigned> ConstpoolPromotionMaxSize( |
| "arm-promote-constant-max-size", cl::Hidden, |
| cl::desc("Maximum size of constant to promote into a constant pool"), |
| cl::init(64)); |
| static cl::opt<unsigned> ConstpoolPromotionMaxTotal( |
| "arm-promote-constant-max-total", cl::Hidden, |
| cl::desc("Maximum size of ALL constants to promote into a constant pool"), |
| cl::init(128)); |
| |
| // The APCS parameter registers. |
| static const MCPhysReg GPRArgRegs[] = { |
| ARM::R0, ARM::R1, ARM::R2, ARM::R3 |
| }; |
| |
| void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, |
| MVT PromotedBitwiseVT) { |
| if (VT != PromotedLdStVT) { |
| setOperationAction(ISD::LOAD, VT, Promote); |
| AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); |
| |
| setOperationAction(ISD::STORE, VT, Promote); |
| AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); |
| } |
| |
| MVT ElemTy = VT.getVectorElementType(); |
| if (ElemTy != MVT::f64) |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| if (ElemTy == MVT::i32) { |
| setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| } else { |
| setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
| setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
| setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
| setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
| } |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
| setOperationAction(ISD::SELECT, VT, Expand); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| setOperationAction(ISD::VSELECT, VT, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); |
| if (VT.isInteger()) { |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::SRL, VT, Custom); |
| } |
| |
| // Promote all bit-wise operations. |
| if (VT.isInteger() && VT != PromotedBitwiseVT) { |
| setOperationAction(ISD::AND, VT, Promote); |
| AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); |
| setOperationAction(ISD::OR, VT, Promote); |
| AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); |
| setOperationAction(ISD::XOR, VT, Promote); |
| AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); |
| } |
| |
| // Neon does not support vector divide/remainder operations. |
| setOperationAction(ISD::SDIV, VT, Expand); |
| setOperationAction(ISD::UDIV, VT, Expand); |
| setOperationAction(ISD::FDIV, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::UREM, VT, Expand); |
| setOperationAction(ISD::FREM, VT, Expand); |
| |
| if (!VT.isFloatingPoint() && |
| VT != MVT::v2i64 && VT != MVT::v1i64) |
| for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) |
| setOperationAction(Opcode, VT, Legal); |
| } |
| |
| void ARMTargetLowering::addDRTypeForNEON(MVT VT) { |
| addRegisterClass(VT, &ARM::DPRRegClass); |
| addTypeForNEON(VT, MVT::f64, MVT::v2i32); |
| } |
| |
| void ARMTargetLowering::addQRTypeForNEON(MVT VT) { |
| addRegisterClass(VT, &ARM::DPairRegClass); |
| addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); |
| } |
| |
| ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, |
| const ARMSubtarget &STI) |
| : TargetLowering(TM), Subtarget(&STI) { |
| RegInfo = Subtarget->getRegisterInfo(); |
| Itins = Subtarget->getInstrItineraryData(); |
| |
| setBooleanContents(ZeroOrOneBooleanContent); |
| setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
| |
| if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && |
| !Subtarget->isTargetWatchOS()) { |
| bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; |
| for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) |
| setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID), |
| IsHFTarget ? CallingConv::ARM_AAPCS_VFP |
| : CallingConv::ARM_AAPCS); |
| } |
| |
| if (Subtarget->isTargetMachO()) { |
| // Uses VFP for Thumb libfuncs if available. |
| if (Subtarget->isThumb() && Subtarget->hasVFP2() && |
| Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { |
| static const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const ISD::CondCode Cond; |
| } LibraryCalls[] = { |
| // Single-precision floating-point arithmetic. |
| { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID }, |
| |
| // Double-precision floating-point arithmetic. |
| { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID }, |
| |
| // Single-precision comparisons. |
| { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE }, |
| { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE }, |
| { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE }, |
| { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE }, |
| { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE }, |
| { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE }, |
| { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE }, |
| { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ }, |
| |
| // Double-precision comparisons. |
| { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE }, |
| { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE }, |
| { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE }, |
| { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE }, |
| { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE }, |
| { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE }, |
| { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE }, |
| { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ }, |
| |
| // Floating-point to integer conversions. |
| // i64 conversions are done via library routines even when generating VFP |
| // instructions, so use the same ones. |
| { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID }, |
| { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID }, |
| |
| // Conversions between floating types. |
| { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID }, |
| { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID }, |
| |
| // Integer to floating-point conversions. |
| // i64 conversions are done via library routines even when generating VFP |
| // instructions, so use the same ones. |
| // FIXME: There appears to be some naming inconsistency in ARM libgcc: |
| // e.g., __floatunsidf vs. __floatunssidfvfp. |
| { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID }, |
| { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| if (LC.Cond != ISD::SETCC_INVALID) |
| setCmpLibcallCC(LC.Op, LC.Cond); |
| } |
| } |
| } |
| |
| // These libcalls are not available in 32-bit. |
| setLibcallName(RTLIB::SHL_I128, nullptr); |
| setLibcallName(RTLIB::SRL_I128, nullptr); |
| setLibcallName(RTLIB::SRA_I128, nullptr); |
| |
| // RTLIB |
| if (Subtarget->isAAPCS_ABI() && |
| (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || |
| Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { |
| static const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const CallingConv::ID CC; |
| const ISD::CondCode Cond; |
| } LibraryCalls[] = { |
| // Double-precision floating-point arithmetic helper functions |
| // RTABI chapter 4.1.2, Table 2 |
| { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Double-precision floating-point comparison helper functions |
| // RTABI chapter 4.1.2, Table 3 |
| { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, |
| { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, |
| |
| // Single-precision floating-point arithmetic helper functions |
| // RTABI chapter 4.1.2, Table 4 |
| { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Single-precision floating-point comparison helper functions |
| // RTABI chapter 4.1.2, Table 5 |
| { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, |
| { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, |
| |
| // Floating-point to integer conversions. |
| // RTABI chapter 4.1.2, Table 6 |
| { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Conversions between floating types. |
| // RTABI chapter 4.1.2, Table 7 |
| { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Integer to floating-point conversions. |
| // RTABI chapter 4.1.2, Table 8 |
| { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Long long helper functions |
| // RTABI chapter 4.2, Table 9 |
| { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Integer division functions |
| // RTABI chapter 4.3.1 |
| { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| if (LC.Cond != ISD::SETCC_INVALID) |
| setCmpLibcallCC(LC.Op, LC.Cond); |
| } |
| |
| // EABI dependent RTLIB |
| if (TM.Options.EABIVersion == EABI::EABI4 || |
| TM.Options.EABIVersion == EABI::EABI5) { |
| static const struct { |
| const RTLIB::Libcall Op; |
| const char *const Name; |
| const CallingConv::ID CC; |
| const ISD::CondCode Cond; |
| } MemOpsLibraryCalls[] = { |
| // Memory operations |
| // RTABI chapter 4.3.4 |
| { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| }; |
| |
| for (const auto &LC : MemOpsLibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| if (LC.Cond != ISD::SETCC_INVALID) |
| setCmpLibcallCC(LC.Op, LC.Cond); |
| } |
| } |
| } |
| |
| if (Subtarget->isTargetWindows()) { |
| static const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const CallingConv::ID CC; |
| } LibraryCalls[] = { |
| { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| } |
| } |
| |
| // Use divmod compiler-rt calls for iOS 5.0 and later. |
| if (Subtarget->isTargetMachO() && |
| !(Subtarget->isTargetIOS() && |
| Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { |
| setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); |
| setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); |
| } |
| |
| // The half <-> float conversion functions are always soft-float on |
| // non-watchos platforms, but are needed for some targets which use a |
| // hard-float calling convention by default. |
| if (!Subtarget->isTargetWatchABI()) { |
| if (Subtarget->isAAPCS_ABI()) { |
| setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); |
| setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); |
| setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); |
| } else { |
| setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); |
| setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); |
| setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); |
| } |
| } |
| |
| // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have |
| // a __gnu_ prefix (which is the default). |
| if (Subtarget->isTargetAEABI()) { |
| static const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const CallingConv::ID CC; |
| } LibraryCalls[] = { |
| { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS }, |
| { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS }, |
| { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| } |
| } |
| |
| if (Subtarget->isThumb1Only()) |
| addRegisterClass(MVT::i32, &ARM::tGPRRegClass); |
| else |
| addRegisterClass(MVT::i32, &ARM::GPRRegClass); |
| |
| if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && |
| !Subtarget->isThumb1Only()) { |
| addRegisterClass(MVT::f32, &ARM::SPRRegClass); |
| addRegisterClass(MVT::f64, &ARM::DPRRegClass); |
| } |
| |
| if (Subtarget->hasFullFP16()) { |
| addRegisterClass(MVT::f16, &ARM::HPRRegClass); |
| setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::i32, Custom); |
| setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
| |
| setOperationAction(ISD::FMINNUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); |
| } |
| |
| for (MVT VT : MVT::vector_valuetypes()) { |
| for (MVT InnerVT : MVT::vector_valuetypes()) { |
| setTruncStoreAction(VT, InnerVT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
| } |
| |
| setOperationAction(ISD::MULHS, VT, Expand); |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::MULHU, VT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| |
| setOperationAction(ISD::BSWAP, VT, Expand); |
| } |
| |
| setOperationAction(ISD::ConstantFP, MVT::f32, Custom); |
| setOperationAction(ISD::ConstantFP, MVT::f64, Custom); |
| |
| setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); |
| setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); |
| |
| if (Subtarget->hasNEON()) { |
| addDRTypeForNEON(MVT::v2f32); |
| addDRTypeForNEON(MVT::v8i8); |
| addDRTypeForNEON(MVT::v4i16); |
| addDRTypeForNEON(MVT::v2i32); |
| addDRTypeForNEON(MVT::v1i64); |
| |
| addQRTypeForNEON(MVT::v4f32); |
| addQRTypeForNEON(MVT::v2f64); |
| addQRTypeForNEON(MVT::v16i8); |
| addQRTypeForNEON(MVT::v8i16); |
| addQRTypeForNEON(MVT::v4i32); |
| addQRTypeForNEON(MVT::v2i64); |
| |
| if (Subtarget->hasFullFP16()) { |
| addQRTypeForNEON(MVT::v8f16); |
| addDRTypeForNEON(MVT::v4f16); |
| } |
| |
| // v2f64 is legal so that QR subregs can be extracted as f64 elements, but |
| // neither Neon nor VFP support any arithmetic operations on it. |
| // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively |
| // supported for v4f32. |
| setOperationAction(ISD::FADD, MVT::v2f64, Expand); |
| setOperationAction(ISD::FSUB, MVT::v2f64, Expand); |
| setOperationAction(ISD::FMUL, MVT::v2f64, Expand); |
| // FIXME: Code duplication: FDIV and FREM are expanded always, see |
| // ARMTargetLowering::addTypeForNEON method for details. |
| setOperationAction(ISD::FDIV, MVT::v2f64, Expand); |
| setOperationAction(ISD::FREM, MVT::v2f64, Expand); |
| // FIXME: Create unittest. |
| // In another words, find a way when "copysign" appears in DAG with vector |
| // operands. |
| setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); |
| // FIXME: Code duplication: SETCC has custom operation action, see |
| // ARMTargetLowering::addTypeForNEON method for details. |
| setOperationAction(ISD::SETCC, MVT::v2f64, Expand); |
| // FIXME: Create unittest for FNEG and for FABS. |
| setOperationAction(ISD::FNEG, MVT::v2f64, Expand); |
| setOperationAction(ISD::FABS, MVT::v2f64, Expand); |
| setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); |
| setOperationAction(ISD::FSIN, MVT::v2f64, Expand); |
| setOperationAction(ISD::FCOS, MVT::v2f64, Expand); |
| setOperationAction(ISD::FPOW, MVT::v2f64, Expand); |
| setOperationAction(ISD::FLOG, MVT::v2f64, Expand); |
| setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); |
| setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); |
| setOperationAction(ISD::FEXP, MVT::v2f64, Expand); |
| setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); |
| // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. |
| setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); |
| setOperationAction(ISD::FRINT, MVT::v2f64, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); |
| setOperationAction(ISD::FMA, MVT::v2f64, Expand); |
| |
| setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); |
| setOperationAction(ISD::FSIN, MVT::v4f32, Expand); |
| setOperationAction(ISD::FCOS, MVT::v4f32, Expand); |
| setOperationAction(ISD::FPOW, MVT::v4f32, Expand); |
| setOperationAction(ISD::FLOG, MVT::v4f32, Expand); |
| setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); |
| setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); |
| setOperationAction(ISD::FEXP, MVT::v4f32, Expand); |
| setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); |
| setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); |
| setOperationAction(ISD::FRINT, MVT::v4f32, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); |
| |
| // Mark v2f32 intrinsics. |
| setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); |
| setOperationAction(ISD::FSIN, MVT::v2f32, Expand); |
| setOperationAction(ISD::FCOS, MVT::v2f32, Expand); |
| setOperationAction(ISD::FPOW, MVT::v2f32, Expand); |
| setOperationAction(ISD::FLOG, MVT::v2f32, Expand); |
| setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); |
| setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); |
| setOperationAction(ISD::FEXP, MVT::v2f32, Expand); |
| setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); |
| setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); |
| setOperationAction(ISD::FRINT, MVT::v2f32, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); |
| |
| // Neon does not support some operations on v1i64 and v2i64 types. |
| setOperationAction(ISD::MUL, MVT::v1i64, Expand); |
| // Custom handling for some quad-vector types to detect VMULL. |
| setOperationAction(ISD::MUL, MVT::v8i16, Custom); |
| setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
| setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
| // Custom handling for some vector types to avoid expensive expansions |
| setOperationAction(ISD::SDIV, MVT::v4i16, Custom); |
| setOperationAction(ISD::SDIV, MVT::v8i8, Custom); |
| setOperationAction(ISD::UDIV, MVT::v4i16, Custom); |
| setOperationAction(ISD::UDIV, MVT::v8i8, Custom); |
| // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with |
| // a destination type that is wider than the source, and nor does |
| // it have a FP_TO_[SU]INT instruction with a narrower destination than |
| // source. |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); |
| |
| setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); |
| setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); |
| |
| // NEON does not have single instruction CTPOP for vectors with element |
| // types wider than 8-bits. However, custom lowering can leverage the |
| // v8i8/v16i8 vcnt instruction. |
| setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); |
| setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); |
| setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); |
| setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); |
| setOperationAction(ISD::CTPOP, MVT::v1i64, Custom); |
| setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); |
| |
| setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); |
| setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); |
| |
| // NEON does not have single instruction CTTZ for vectors. |
| setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); |
| |
| setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); |
| |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); |
| |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); |
| |
| // NEON only has FMA instructions as of VFP4. |
| if (!Subtarget->hasVFP4()) { |
| setOperationAction(ISD::FMA, MVT::v2f32, Expand); |
| setOperationAction(ISD::FMA, MVT::v4f32, Expand); |
| } |
| |
| setTargetDAGCombine(ISD::INTRINSIC_VOID); |
| setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); |
| setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
| setTargetDAGCombine(ISD::SHL); |
| setTargetDAGCombine(ISD::SRL); |
| setTargetDAGCombine(ISD::SRA); |
| setTargetDAGCombine(ISD::SIGN_EXTEND); |
| setTargetDAGCombine(ISD::ZERO_EXTEND); |
| setTargetDAGCombine(ISD::ANY_EXTEND); |
| setTargetDAGCombine(ISD::BUILD_VECTOR); |
| setTargetDAGCombine(ISD::VECTOR_SHUFFLE); |
| setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); |
| setTargetDAGCombine(ISD::STORE); |
| setTargetDAGCombine(ISD::FP_TO_SINT); |
| setTargetDAGCombine(ISD::FP_TO_UINT); |
| setTargetDAGCombine(ISD::FDIV); |
| setTargetDAGCombine(ISD::LOAD); |
| |
| // It is legal to extload from v4i8 to v4i16 or v4i32. |
| for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, |
| MVT::v2i32}) { |
| for (MVT VT : MVT::integer_vector_valuetypes()) { |
| setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); |
| setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); |
| } |
| } |
| } |
| |
| if (Subtarget->isFPOnlySP()) { |
| // When targeting a floating-point unit with only single-precision |
| // operations, f64 is legal for the few double-precision instructions which |
| // are present However, no double-precision operations other than moves, |
| // loads and stores are provided by the hardware. |
| setOperationAction(ISD::FADD, MVT::f64, Expand); |
| setOperationAction(ISD::FSUB, MVT::f64, Expand); |
| setOperationAction(ISD::FMUL, MVT::f64, Expand); |
| setOperationAction(ISD::FMA, MVT::f64, Expand); |
| setOperationAction(ISD::FDIV, MVT::f64, Expand); |
| setOperationAction(ISD::FREM, MVT::f64, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
| setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); |
| setOperationAction(ISD::FNEG, MVT::f64, Expand); |
| setOperationAction(ISD::FABS, MVT::f64, Expand); |
| setOperationAction(ISD::FSQRT, MVT::f64, Expand); |
| setOperationAction(ISD::FSIN, MVT::f64, Expand); |
| setOperationAction(ISD::FCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FPOW, MVT::f64, Expand); |
| setOperationAction(ISD::FLOG, MVT::f64, Expand); |
| setOperationAction(ISD::FLOG2, MVT::f64, Expand); |
| setOperationAction(ISD::FLOG10, MVT::f64, Expand); |
| setOperationAction(ISD::FEXP, MVT::f64, Expand); |
| setOperationAction(ISD::FEXP2, MVT::f64, Expand); |
| setOperationAction(ISD::FCEIL, MVT::f64, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::f64, Expand); |
| setOperationAction(ISD::FRINT, MVT::f64, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::f64, Expand); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); |
| setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
| setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); |
| } |
| |
| computeRegisterProperties(Subtarget->getRegisterInfo()); |
| |
| // ARM does not have floating-point extending loads. |
| for (MVT VT : MVT::fp_valuetypes()) { |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); |
| } |
| |
| // ... or truncating stores |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
| |
| // ARM does not have i1 sign extending load. |
| for (MVT VT : MVT::integer_valuetypes()) |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
| |
| // ARM supports all 4 flavors of integer indexed load / store. |
| if (!Subtarget->isThumb1Only()) { |
| for (unsigned im = (unsigned)ISD::PRE_INC; |
| im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
| setIndexedLoadAction(im, MVT::i1, Legal); |
| setIndexedLoadAction(im, MVT::i8, Legal); |
| setIndexedLoadAction(im, MVT::i16, Legal); |
| setIndexedLoadAction(im, MVT::i32, Legal); |
| setIndexedStoreAction(im, MVT::i1, Legal); |
| setIndexedStoreAction(im, MVT::i8, Legal); |
| setIndexedStoreAction(im, MVT::i16, Legal); |
| setIndexedStoreAction(im, MVT::i32, Legal); |
| } |
| } else { |
| // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. |
| setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); |
| setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); |
| } |
| |
| setOperationAction(ISD::SADDO, MVT::i32, Custom); |
| setOperationAction(ISD::UADDO, MVT::i32, Custom); |
| setOperationAction(ISD::SSUBO, MVT::i32, Custom); |
| setOperationAction(ISD::USUBO, MVT::i32, Custom); |
| |
| setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); |
| setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); |
| |
| // i64 operation support. |
| setOperationAction(ISD::MUL, MVT::i64, Expand); |
| setOperationAction(ISD::MULHU, MVT::i32, Expand); |
| if (Subtarget->isThumb1Only()) { |
| setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); |
| } |
| if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() |
| || (Subtarget->isThumb2() && !Subtarget->hasDSP())) |
| setOperationAction(ISD::MULHS, MVT::i32, Expand); |
| |
| setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); |
| setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); |
| setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); |
| setOperationAction(ISD::SRL, MVT::i64, Custom); |
| setOperationAction(ISD::SRA, MVT::i64, Custom); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); |
| |
| // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1. |
| if (Subtarget->isThumb1Only()) { |
| setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); |
| setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); |
| setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); |
| } |
| |
| if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) |
| setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
| |
| // ARM does not have ROTL. |
| setOperationAction(ISD::ROTL, MVT::i32, Expand); |
| for (MVT VT : MVT::vector_valuetypes()) { |
| setOperationAction(ISD::ROTL, VT, Expand); |
| setOperationAction(ISD::ROTR, VT, Expand); |
| } |
| setOperationAction(ISD::CTTZ, MVT::i32, Custom); |
| setOperationAction(ISD::CTPOP, MVT::i32, Expand); |
| if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) { |
| setOperationAction(ISD::CTLZ, MVT::i32, Expand); |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall); |
| } |
| |
| // @llvm.readcyclecounter requires the Performance Monitors extension. |
| // Default to the 0 expansion on unsupported platforms. |
| // FIXME: Technically there are older ARM CPUs that have |
| // implementation-specific ways of obtaining this information. |
| if (Subtarget->hasPerfMon()) |
| setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); |
| |
| // Only ARMv6 has BSWAP. |
| if (!Subtarget->hasV6Ops()) |
| setOperationAction(ISD::BSWAP, MVT::i32, Expand); |
| |
| bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() |
| : Subtarget->hasDivideInARMMode(); |
| if (!hasDivide) { |
| // These are expanded into libcalls if the cpu doesn't have HW divider. |
| setOperationAction(ISD::SDIV, MVT::i32, LibCall); |
| setOperationAction(ISD::UDIV, MVT::i32, LibCall); |
| } |
| |
| if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { |
| setOperationAction(ISD::SDIV, MVT::i32, Custom); |
| setOperationAction(ISD::UDIV, MVT::i32, Custom); |
| |
| setOperationAction(ISD::SDIV, MVT::i64, Custom); |
| setOperationAction(ISD::UDIV, MVT::i64, Custom); |
| } |
| |
| setOperationAction(ISD::SREM, MVT::i32, Expand); |
| setOperationAction(ISD::UREM, MVT::i32, Expand); |
| |
| // Register based DivRem for AEABI (RTABI 4.2) |
| if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || |
| Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || |
| Subtarget->isTargetWindows()) { |
| setOperationAction(ISD::SREM, MVT::i64, Custom); |
| setOperationAction(ISD::UREM, MVT::i64, Custom); |
| HasStandaloneRem = false; |
| |
| if (Subtarget->isTargetWindows()) { |
| const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const CallingConv::ID CC; |
| } LibraryCalls[] = { |
| { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS }, |
| |
| { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| } |
| } else { |
| const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const CallingConv::ID CC; |
| } LibraryCalls[] = { |
| { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS }, |
| |
| { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| } |
| } |
| |
| setOperationAction(ISD::SDIVREM, MVT::i32, Custom); |
| setOperationAction(ISD::UDIVREM, MVT::i32, Custom); |
| setOperationAction(ISD::SDIVREM, MVT::i64, Custom); |
| setOperationAction(ISD::UDIVREM, MVT::i64, Custom); |
| } else { |
| setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
| } |
| |
| if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT()) |
| for (auto &VT : {MVT::f32, MVT::f64}) |
| setOperationAction(ISD::FPOWI, VT, Custom); |
| |
| setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); |
| setOperationAction(ISD::ConstantPool, MVT::i32, Custom); |
| setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); |
| setOperationAction(ISD::BlockAddress, MVT::i32, Custom); |
| |
| setOperationAction(ISD::TRAP, MVT::Other, Legal); |
| setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
| |
| // Use the default implementation. |
| setOperationAction(ISD::VASTART, MVT::Other, Custom); |
| setOperationAction(ISD::VAARG, MVT::Other, Expand); |
| setOperationAction(ISD::VACOPY, MVT::Other, Expand); |
| setOperationAction(ISD::VAEND, MVT::Other, Expand); |
| setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
| setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
| |
| if (Subtarget->isTargetWindows()) |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); |
| else |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); |
| |
| // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use |
| // the default expansion. |
| InsertFencesForAtomic = false; |
| if (Subtarget->hasAnyDataBarrier() && |
| (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { |
| // ATOMIC_FENCE needs custom lowering; the others should have been expanded |
| // to ldrex/strex loops already. |
| setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); |
| if (!Subtarget->isThumb() || !Subtarget->isMClass()) |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); |
| |
| // On v8, we have particularly efficient implementations of atomic fences |
| // if they can be combined with nearby atomic loads and stores. |
| if (!Subtarget->hasAcquireRelease() || |
| getTargetMachine().getOptLevel() == 0) { |
| // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. |
| InsertFencesForAtomic = true; |
| } |
| } else { |
| // If there's anything we can use as a barrier, go through custom lowering |
| // for ATOMIC_FENCE. |
| // If target has DMB in thumb, Fences can be inserted. |
| if (Subtarget->hasDataBarrier()) |
| InsertFencesForAtomic = true; |
| |
| setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, |
| Subtarget->hasAnyDataBarrier() ? Custom : Expand); |
| |
| // Set them all for expansion, which will force libcalls. |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); |
| // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the |
| // Unordered/Monotonic case. |
| if (!InsertFencesForAtomic) { |
| setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); |
| setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); |
| } |
| } |
| |
| setOperationAction(ISD::PREFETCH, MVT::Other, Custom); |
| |
| // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. |
| if (!Subtarget->hasV6Ops()) { |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); |
| } |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
| |
| if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && |
| !Subtarget->isThumb1Only()) { |
| // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR |
| // iff target supports vfp2. |
| setOperationAction(ISD::BITCAST, MVT::i64, Custom); |
| setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); |
| } |
| |
| // We want to custom lower some of our intrinsics. |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
| setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
| setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
| setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); |
| if (Subtarget->useSjLjEH()) |
| setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); |
| |
| setOperationAction(ISD::SETCC, MVT::i32, Expand); |
| setOperationAction(ISD::SETCC, MVT::f32, Expand); |
| setOperationAction(ISD::SETCC, MVT::f64, Expand); |
| setOperationAction(ISD::SELECT, MVT::i32, Custom); |
| setOperationAction(ISD::SELECT, MVT::f32, Custom); |
| setOperationAction(ISD::SELECT, MVT::f64, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); |
| if (Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::SETCC, MVT::f16, Expand); |
| setOperationAction(ISD::SELECT, MVT::f16, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); |
| } |
| |
| setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom); |
| |
| setOperationAction(ISD::BRCOND, MVT::Other, Custom); |
| setOperationAction(ISD::BR_CC, MVT::i32, Custom); |
| if (Subtarget->hasFullFP16()) |
| setOperationAction(ISD::BR_CC, MVT::f16, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f32, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f64, Custom); |
| setOperationAction(ISD::BR_JT, MVT::Other, Custom); |
| |
| // We don't support sin/cos/fmod/copysign/pow |
| setOperationAction(ISD::FSIN, MVT::f64, Expand); |
| setOperationAction(ISD::FSIN, MVT::f32, Expand); |
| setOperationAction(ISD::FCOS, MVT::f32, Expand); |
| setOperationAction(ISD::FCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
| setOperationAction(ISD::FREM, MVT::f64, Expand); |
| setOperationAction(ISD::FREM, MVT::f32, Expand); |
| if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && |
| !Subtarget->isThumb1Only()) { |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
| } |
| setOperationAction(ISD::FPOW, MVT::f64, Expand); |
| setOperationAction(ISD::FPOW, MVT::f32, Expand); |
| |
| if (!Subtarget->hasVFP4()) { |
| setOperationAction(ISD::FMA, MVT::f64, Expand); |
| setOperationAction(ISD::FMA, MVT::f32, Expand); |
| } |
| |
| // Various VFP goodness |
| if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { |
| // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. |
| if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { |
| setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); |
| setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); |
| } |
| |
| // fp16 is a special v7 extension that adds f16 <-> f32 conversions. |
| if (!Subtarget->hasFP16()) { |
| setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); |
| setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); |
| } |
| } |
| |
| // Use __sincos_stret if available. |
| if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
| getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
| setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
| } |
| |
| // FP-ARMv8 implements a lot of rounding-like FP operations. |
| if (Subtarget->hasFPARMv8()) { |
| setOperationAction(ISD::FFLOOR, MVT::f32, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f32, Legal); |
| setOperationAction(ISD::FROUND, MVT::f32, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::f32, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); |
| setOperationAction(ISD::FRINT, MVT::f32, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::f32, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); |
| |
| if (!Subtarget->isFPOnlySP()) { |
| setOperationAction(ISD::FFLOOR, MVT::f64, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f64, Legal); |
| setOperationAction(ISD::FROUND, MVT::f64, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::f64, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); |
| setOperationAction(ISD::FRINT, MVT::f64, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::f64, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); |
| } |
| } |
| |
| if (Subtarget->hasNEON()) { |
| // vmin and vmax aren't available in a scalar form, so we use |
| // a NEON instruction with an undef lane instead. |
| setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); |
| setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); |
| setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); |
| |
| if (Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal); |
| |
| setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal); |
| setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal); |
| } |
| } |
| |
| // We have target-specific dag combine patterns for the following nodes: |
| // ARMISD::VMOVRRD - No need to call setTargetDAGCombine |
| setTargetDAGCombine(ISD::ADD); |
| setTargetDAGCombine(ISD::SUB); |
| setTargetDAGCombine(ISD::MUL); |
| setTargetDAGCombine(ISD::AND); |
| setTargetDAGCombine(ISD::OR); |
| setTargetDAGCombine(ISD::XOR); |
| |
| if (Subtarget->hasV6Ops()) |
| setTargetDAGCombine(ISD::SRL); |
| if (Subtarget->isThumb1Only()) |
| setTargetDAGCombine(ISD::SHL); |
| |
| setStackPointerRegisterToSaveRestore(ARM::SP); |
| |
| if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || |
| !Subtarget->hasVFP2()) |
| setSchedulingPreference(Sched::RegPressure); |
| else |
| setSchedulingPreference(Sched::Hybrid); |
| |
| //// temporary - rewrite interface to use type |
| MaxStoresPerMemset = 8; |
| MaxStoresPerMemsetOptSize = 4; |
| MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores |
| MaxStoresPerMemcpyOptSize = 2; |
| MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores |
| MaxStoresPerMemmoveOptSize = 2; |
| |
| // On ARM arguments smaller than 4 bytes are extended, so all arguments |
| // are at least 4 bytes aligned. |
| setMinStackArgumentAlignment(4); |
| |
| // Prefer likely predicted branches to selects on out-of-order cores. |
| PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); |
| |
| setPrefLoopAlignment(Subtarget->getPrefLoopAlignment()); |
| |
| setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); |
| } |
| |
| bool ARMTargetLowering::useSoftFloat() const { |
| return Subtarget->useSoftFloat(); |
| } |
| |
| // FIXME: It might make sense to define the representative register class as the |
| // nearest super-register that has a non-null superset. For example, DPR_VFP2 is |
| // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, |
| // SPR's representative would be DPR_VFP2. This should work well if register |
| // pressure tracking were modified such that a register use would increment the |
| // pressure of the register class's representative and all of it's super |
| // classes' representatives transitively. We have not implemented this because |
| // of the difficulty prior to coalescing of modeling operand register classes |
| // due to the common occurrence of cross class copies and subregister insertions |
| // and extractions. |
| std::pair<const TargetRegisterClass *, uint8_t> |
| ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, |
| MVT VT) const { |
| const TargetRegisterClass *RRC = nullptr; |
| uint8_t Cost = 1; |
| switch (VT.SimpleTy) { |
| default: |
| return TargetLowering::findRepresentativeClass(TRI, VT); |
| // Use DPR as representative register class for all floating point |
| // and vector types. Since there are 32 SPR registers and 32 DPR registers so |
| // the cost is 1 for both f32 and f64. |
| case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: |
| case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: |
| RRC = &ARM::DPRRegClass; |
| // When NEON is used for SP, only half of the register file is available |
| // because operations that define both SP and DP results will be constrained |
| // to the VFP2 class (D0-D15). We currently model this constraint prior to |
| // coalescing by double-counting the SP regs. See the FIXME above. |
| if (Subtarget->useNEONForSinglePrecisionFP()) |
| Cost = 2; |
| break; |
| case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: |
| case MVT::v4f32: case MVT::v2f64: |
| RRC = &ARM::DPRRegClass; |
| Cost = 2; |
| break; |
| case MVT::v4i64: |
| RRC = &ARM::DPRRegClass; |
| Cost = 4; |
| break; |
| case MVT::v8i64: |
| RRC = &ARM::DPRRegClass; |
| Cost = 8; |
| break; |
| } |
| return std::make_pair(RRC, Cost); |
| } |
| |
| const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { |
| switch ((ARMISD::NodeType)Opcode) { |
| case ARMISD::FIRST_NUMBER: break; |
| case ARMISD::Wrapper: return "ARMISD::Wrapper"; |
| case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; |
| case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; |
| case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL"; |
| case ARMISD::CALL: return "ARMISD::CALL"; |
| case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; |
| case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; |
| case ARMISD::BRCOND: return "ARMISD::BRCOND"; |
| case ARMISD::BR_JT: return "ARMISD::BR_JT"; |
| case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; |
| case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; |
| case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; |
| case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; |
| case ARMISD::CMP: return "ARMISD::CMP"; |
| case ARMISD::CMN: return "ARMISD::CMN"; |
| case ARMISD::CMPZ: return "ARMISD::CMPZ"; |
| case ARMISD::CMPFP: return "ARMISD::CMPFP"; |
| case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; |
| case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; |
| case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; |
| |
| case ARMISD::CMOV: return "ARMISD::CMOV"; |
| case ARMISD::SUBS: return "ARMISD::SUBS"; |
| |
| case ARMISD::SSAT: return "ARMISD::SSAT"; |
| case ARMISD::USAT: return "ARMISD::USAT"; |
| |
| case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; |
| case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; |
| case ARMISD::RRX: return "ARMISD::RRX"; |
| |
| case ARMISD::ADDC: return "ARMISD::ADDC"; |
| case ARMISD::ADDE: return "ARMISD::ADDE"; |
| case ARMISD::SUBC: return "ARMISD::SUBC"; |
| case ARMISD::SUBE: return "ARMISD::SUBE"; |
| |
| case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; |
| case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; |
| case ARMISD::VMOVhr: return "ARMISD::VMOVhr"; |
| case ARMISD::VMOVrh: return "ARMISD::VMOVrh"; |
| case ARMISD::VMOVSR: return "ARMISD::VMOVSR"; |
| |
| case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; |
| case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP"; |
| case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH"; |
| |
| case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; |
| |
| case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; |
| |
| case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; |
| |
| case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; |
| |
| case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; |
| |
| case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; |
| case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; |
| |
| case ARMISD::VCEQ: return "ARMISD::VCEQ"; |
| case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; |
| case ARMISD::VCGE: return "ARMISD::VCGE"; |
| case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; |
| case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; |
| case ARMISD::VCGEU: return "ARMISD::VCGEU"; |
| case ARMISD::VCGT: return "ARMISD::VCGT"; |
| case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; |
| case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; |
| case ARMISD::VCGTU: return "ARMISD::VCGTU"; |
| case ARMISD::VTST: return "ARMISD::VTST"; |
| |
| case ARMISD::VSHL: return "ARMISD::VSHL"; |
| case ARMISD::VSHRs: return "ARMISD::VSHRs"; |
| case ARMISD::VSHRu: return "ARMISD::VSHRu"; |
| case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; |
| case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; |
| case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; |
| case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; |
| case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; |
| case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; |
| case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; |
| case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; |
| case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; |
| case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; |
| case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; |
| case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; |
| case ARMISD::VSLI: return "ARMISD::VSLI"; |
| case ARMISD::VSRI: return "ARMISD::VSRI"; |
| case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; |
| case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; |
| case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; |
| case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; |
| case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; |
| case ARMISD::VDUP: return "ARMISD::VDUP"; |
| case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; |
| case ARMISD::VEXT: return "ARMISD::VEXT"; |
| case ARMISD::VREV64: return "ARMISD::VREV64"; |
| case ARMISD::VREV32: return "ARMISD::VREV32"; |
| case ARMISD::VREV16: return "ARMISD::VREV16"; |
| case ARMISD::VZIP: return "ARMISD::VZIP"; |
| case ARMISD::VUZP: return "ARMISD::VUZP"; |
| case ARMISD::VTRN: return "ARMISD::VTRN"; |
| case ARMISD::VTBL1: return "ARMISD::VTBL1"; |
| case ARMISD::VTBL2: return "ARMISD::VTBL2"; |
| case ARMISD::VMULLs: return "ARMISD::VMULLs"; |
| case ARMISD::VMULLu: return "ARMISD::VMULLu"; |
| case ARMISD::UMAAL: return "ARMISD::UMAAL"; |
| case ARMISD::UMLAL: return "ARMISD::UMLAL"; |
| case ARMISD::SMLAL: return "ARMISD::SMLAL"; |
| case ARMISD::SMLALBB: return "ARMISD::SMLALBB"; |
| case ARMISD::SMLALBT: return "ARMISD::SMLALBT"; |
| case ARMISD::SMLALTB: return "ARMISD::SMLALTB"; |
| case ARMISD::SMLALTT: return "ARMISD::SMLALTT"; |
| case ARMISD::SMULWB: return "ARMISD::SMULWB"; |
| case ARMISD::SMULWT: return "ARMISD::SMULWT"; |
| case ARMISD::SMLALD: return "ARMISD::SMLALD"; |
| case ARMISD::SMLALDX: return "ARMISD::SMLALDX"; |
| case ARMISD::SMLSLD: return "ARMISD::SMLSLD"; |
| case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; |
| case ARMISD::SMMLAR: return "ARMISD::SMMLAR"; |
| case ARMISD::SMMLSR: return "ARMISD::SMMLSR"; |
| case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; |
| case ARMISD::BFI: return "ARMISD::BFI"; |
| case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; |
| case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; |
| case ARMISD::VBSL: return "ARMISD::VBSL"; |
| case ARMISD::MEMCPY: return "ARMISD::MEMCPY"; |
| case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP"; |
| case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; |
| case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; |
| case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; |
| case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; |
| case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; |
| case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; |
| case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; |
| case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; |
| case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; |
| case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; |
| case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD"; |
| case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; |
| case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; |
| case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; |
| case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; |
| case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; |
| case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; |
| case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; |
| case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; |
| case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; |
| case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; |
| } |
| return nullptr; |
| } |
| |
| EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, |
| EVT VT) const { |
| if (!VT.isVector()) |
| return getPointerTy(DL); |
| return VT.changeVectorElementTypeToInteger(); |
| } |
| |
| /// getRegClassFor - Return the register class that should be used for the |
| /// specified value type. |
| const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { |
| // Map v4i64 to QQ registers but do not make the type legal. Similarly map |
| // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to |
| // load / store 4 to 8 consecutive D registers. |
| if (Subtarget->hasNEON()) { |
| if (VT == MVT::v4i64) |
| return &ARM::QQPRRegClass; |
| if (VT == MVT::v8i64) |
| return &ARM::QQQQPRRegClass; |
| } |
| return TargetLowering::getRegClassFor(VT); |
| } |
| |
| // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the |
| // source/dest is aligned and the copy size is large enough. We therefore want |
| // to align such objects passed to memory intrinsics. |
| bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, |
| unsigned &PrefAlign) const { |
| if (!isa<MemIntrinsic>(CI)) |
| return false; |
| MinSize = 8; |
| // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 |
| // cycle faster than 4-byte aligned LDM. |
| PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); |
| return true; |
| } |
| |
| // Create a fast isel object. |
| FastISel * |
| ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
| const TargetLibraryInfo *libInfo) const { |
| return ARM::createFastISel(funcInfo, libInfo); |
| } |
| |
| Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { |
| unsigned NumVals = N->getNumValues(); |
| if (!NumVals) |
| return Sched::RegPressure; |
| |
| for (unsigned i = 0; i != NumVals; ++i) { |
| EVT VT = N->getValueType(i); |
| if (VT == MVT::Glue || VT == MVT::Other) |
| continue; |
| if (VT.isFloatingPoint() || VT.isVector()) |
| return Sched::ILP; |
| } |
| |
| if (!N->isMachineOpcode()) |
| return Sched::RegPressure; |
| |
| // Load are scheduled for latency even if there instruction itinerary |
| // is not available. |
| const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
| const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); |
| |
| if (MCID.getNumDefs() == 0) |
| return Sched::RegPressure; |
| if (!Itins->isEmpty() && |
| Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) |
| return Sched::ILP; |
| |
| return Sched::RegPressure; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Lowering Code |
| //===----------------------------------------------------------------------===// |
| |
| static bool isSRL16(const SDValue &Op) { |
| if (Op.getOpcode() != ISD::SRL) |
| return false; |
| if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
| return Const->getZExtValue() == 16; |
| return false; |
| } |
| |
| static bool isSRA16(const SDValue &Op) { |
| if (Op.getOpcode() != ISD::SRA) |
| return false; |
| if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
| return Const->getZExtValue() == 16; |
| return false; |
| } |
| |
| static bool isSHL16(const SDValue &Op) { |
| if (Op.getOpcode() != ISD::SHL) |
| return false; |
| if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
| return Const->getZExtValue() == 16; |
| return false; |
| } |
| |
| // Check for a signed 16-bit value. We special case SRA because it makes it |
| // more simple when also looking for SRAs that aren't sign extending a |
| // smaller value. Without the check, we'd need to take extra care with |
| // checking order for some operations. |
| static bool isS16(const SDValue &Op, SelectionDAG &DAG) { |
| if (isSRA16(Op)) |
| return isSHL16(Op.getOperand(0)); |
| return DAG.ComputeNumSignBits(Op) == 17; |
| } |
| |
| /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC |
| static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { |
| switch (CC) { |
| default: llvm_unreachable("Unknown condition code!"); |
| case ISD::SETNE: return ARMCC::NE; |
| case ISD::SETEQ: return ARMCC::EQ; |
| case ISD::SETGT: return ARMCC::GT; |
| case ISD::SETGE: return ARMCC::GE; |
| case ISD::SETLT: return ARMCC::LT; |
| case ISD::SETLE: return ARMCC::LE; |
| case ISD::SETUGT: return ARMCC::HI; |
| case ISD::SETUGE: return ARMCC::HS; |
| case ISD::SETULT: return ARMCC::LO; |
| case ISD::SETULE: return ARMCC::LS; |
| } |
| } |
| |
| /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. |
| static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, |
| ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) { |
| CondCode2 = ARMCC::AL; |
| InvalidOnQNaN = true; |
| switch (CC) { |
| default: llvm_unreachable("Unknown FP condition!"); |
| case ISD::SETEQ: |
| case ISD::SETOEQ: |
| CondCode = ARMCC::EQ; |
| InvalidOnQNaN = false; |
| break; |
| case ISD::SETGT: |
| case ISD::SETOGT: CondCode = ARMCC::GT; break; |
| case ISD::SETGE: |
| case ISD::SETOGE: CondCode = ARMCC::GE; break; |
| case ISD::SETOLT: CondCode = ARMCC::MI; break; |
| case ISD::SETOLE: CondCode = ARMCC::LS; break; |
| case ISD::SETONE: |
| CondCode = ARMCC::MI; |
| CondCode2 = ARMCC::GT; |
| InvalidOnQNaN = false; |
| break; |
| case ISD::SETO: CondCode = ARMCC::VC; break; |
| case ISD::SETUO: CondCode = ARMCC::VS; break; |
| case ISD::SETUEQ: |
| CondCode = ARMCC::EQ; |
| CondCode2 = ARMCC::VS; |
| InvalidOnQNaN = false; |
| break; |
| case ISD::SETUGT: CondCode = ARMCC::HI; break; |
| case ISD::SETUGE: CondCode = ARMCC::PL; break; |
| case ISD::SETLT: |
| case ISD::SETULT: CondCode = ARMCC::LT; break; |
| case ISD::SETLE: |
| case ISD::SETULE: CondCode = ARMCC::LE; break; |
| case ISD::SETNE: |
| case ISD::SETUNE: |
| CondCode = ARMCC::NE; |
| InvalidOnQNaN = false; |
| break; |
| } |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Calling Convention Implementation |
| //===----------------------------------------------------------------------===// |
| |
| /// getEffectiveCallingConv - Get the effective calling convention, taking into |
| /// account presence of floating point hardware and calling convention |
| /// limitations, such as support for variadic functions. |
| CallingConv::ID |
| ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, |
| bool isVarArg) const { |
| switch (CC) { |
| default: |
| report_fatal_error("Unsupported calling convention"); |
| case CallingConv::ARM_AAPCS: |
| case CallingConv::ARM_APCS: |
| case CallingConv::GHC: |
| return CC; |
| case CallingConv::PreserveMost: |
| return CallingConv::PreserveMost; |
| case CallingConv::ARM_AAPCS_VFP: |
| case CallingConv::Swift: |
| return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; |
| case CallingConv::C: |
| if (!Subtarget->isAAPCS_ABI()) |
| return CallingConv::ARM_APCS; |
| else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && |
| getTargetMachine().Options.FloatABIType == FloatABI::Hard && |
| !isVarArg) |
| return CallingConv::ARM_AAPCS_VFP; |
| else |
| return CallingConv::ARM_AAPCS; |
| case CallingConv::Fast: |
| case CallingConv::CXX_FAST_TLS: |
| if (!Subtarget->isAAPCS_ABI()) { |
| if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) |
| return CallingConv::Fast; |
| return CallingConv::ARM_APCS; |
| } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) |
| return CallingConv::ARM_AAPCS_VFP; |
| else |
| return CallingConv::ARM_AAPCS; |
| } |
| } |
| |
| CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC, |
| bool isVarArg) const { |
| return CCAssignFnForNode(CC, false, isVarArg); |
| } |
| |
| CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, |
| bool isVarArg) const { |
| return CCAssignFnForNode(CC, true, isVarArg); |
| } |
| |
| /// CCAssignFnForNode - Selects the correct CCAssignFn for the given |
| /// CallingConvention. |
| CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, |
| bool Return, |
| bool isVarArg) const { |
| switch (getEffectiveCallingConv(CC, isVarArg)) { |
| default: |
| report_fatal_error("Unsupported calling convention"); |
| case CallingConv::ARM_APCS: |
| return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); |
| case CallingConv::ARM_AAPCS: |
| return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); |
| case CallingConv::ARM_AAPCS_VFP: |
| return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); |
| case CallingConv::Fast: |
| return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); |
| case CallingConv::GHC: |
| return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); |
| case CallingConv::PreserveMost: |
| return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); |
| } |
| } |
| |
| /// LowerCallResult - Lower the result values of a call into the |
| /// appropriate copies out of appropriate physical registers. |
| SDValue ARMTargetLowering::LowerCallResult( |
| SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, |
| const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
| SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
| SDValue ThisVal) const { |
| // Assign locations to each value returned by this call. |
| SmallVector<CCValAssign, 16> RVLocs; |
| CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
| *DAG.getContext()); |
| CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); |
| |
| // Copy all of the result registers out of their specified physreg. |
| for (unsigned i = 0; i != RVLocs.size(); ++i) { |
| CCValAssign VA = RVLocs[i]; |
| |
| // Pass 'this' value directly from the argument to return value, to avoid |
| // reg unit interference |
| if (i == 0 && isThisReturn) { |
| assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && |
| "unexpected return calling convention register assignment"); |
| InVals.push_back(ThisVal); |
| continue; |
| } |
| |
| SDValue Val; |
| if (VA.needsCustom()) { |
| // Handle f64 or half of a v2f64. |
| SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, |
| InFlag); |
| Chain = Lo.getValue(1); |
| InFlag = Lo.getValue(2); |
| VA = RVLocs[++i]; // skip ahead to next loc |
| SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, |
| InFlag); |
| Chain = Hi.getValue(1); |
| InFlag = Hi.getValue(2); |
| if (!Subtarget->isLittle()) |
| std::swap (Lo, Hi); |
| Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); |
| |
| if (VA.getLocVT() == MVT::v2f64) { |
| SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); |
| Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, |
| DAG.getConstant(0, dl, MVT::i32)); |
| |
| VA = RVLocs[++i]; // skip ahead to next loc |
| Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); |
| Chain = Lo.getValue(1); |
| InFlag = Lo.getValue(2); |
| VA = RVLocs[++i]; // skip ahead to next loc |
| Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); |
| Chain = Hi.getValue(1); |
| InFlag = Hi.getValue(2); |
| if (!Subtarget->isLittle()) |
| std::swap (Lo, Hi); |
| Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); |
| Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, |
| DAG.getConstant(1, dl, MVT::i32)); |
| } |
| } else { |
| Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), |
| InFlag); |
| Chain = Val.getValue(1); |
| InFlag = Val.getValue(2); |
| } |
| |
| switch (VA.getLocInfo()) { |
| default: llvm_unreachable("Unknown loc info!"); |
| case CCValAssign::Full: break; |
| case CCValAssign::BCvt: |
| Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); |
| break; |
| } |
| |
| InVals.push_back(Val); |
| } |
| |
| return Chain; |
| } |
| |
| /// LowerMemOpCallTo - Store the argument to the stack. |
| SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, |
| SDValue Arg, const SDLoc &dl, |
| SelectionDAG &DAG, |
| const CCValAssign &VA, |
| ISD::ArgFlagsTy Flags) const { |
| unsigned LocMemOffset = VA.getLocMemOffset(); |
| SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); |
| PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
| StackPtr, PtrOff); |
| return DAG.getStore( |
| Chain, dl, Arg, PtrOff, |
| MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); |
| } |
| |
| void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, |
| SDValue Chain, SDValue &Arg, |
| RegsToPassVector &RegsToPass, |
| CCValAssign &VA, CCValAssign &NextVA, |
| SDValue &StackPtr, |
| SmallVectorImpl<SDValue> &MemOpChains, |
| ISD::ArgFlagsTy Flags) const { |
| SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, |
| DAG.getVTList(MVT::i32, MVT::i32), Arg); |
| unsigned id = Subtarget->isLittle() ? 0 : 1; |
| RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); |
| |
| if (NextVA.isRegLoc()) |
| RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); |
| else { |
| assert(NextVA.isMemLoc()); |
| if (!StackPtr.getNode()) |
| StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, |
| getPointerTy(DAG.getDataLayout())); |
| |
| MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), |
| dl, DAG, NextVA, |
| Flags)); |
| } |
| } |
| |
| /// LowerCall - Lowering a call into a callseq_start <- |
| /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter |
| /// nodes. |
| SDValue |
| ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
| SmallVectorImpl<SDValue> &InVals) const { |
| SelectionDAG &DAG = CLI.DAG; |
| SDLoc &dl = CLI.DL; |
| SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
| SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
| SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
| SDValue Chain = CLI.Chain; |
| SDValue Callee = CLI.Callee; |
| bool &isTailCall = CLI.IsTailCall; |
| CallingConv::ID CallConv = CLI.CallConv; |
| bool doesNotRet = CLI.DoesNotReturn; |
| bool isVarArg = CLI.IsVarArg; |
| |
| MachineFunction &MF = DAG.getMachineFunction(); |
| bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); |
| bool isThisReturn = false; |
| bool isSibCall = false; |
| auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); |
| |
| // Disable tail calls if they're not supported. |
| if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true") |
| isTailCall = false; |
| |
| if (isTailCall) { |
| // Check if it's really possible to do a tail call. |
| isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, |
| isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(), |
| Outs, OutVals, Ins, DAG); |
| if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall()) |
| report_fatal_error("failed to perform tail call elimination on a call " |
| "site marked musttail"); |
| // We don't support GuaranteedTailCallOpt for ARM, only automatically |
| // detected sibcalls. |
| if (isTailCall) { |
| ++NumTailCalls; |
| isSibCall = true; |
| } |
| } |
| |
| // Analyze operands of the call, assigning locations to each operand. |
| SmallVector<CCValAssign, 16> ArgLocs; |
| CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, |
| *DAG.getContext()); |
| CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); |
| |
| // Get a count of how many bytes are to be pushed on the stack. |
| unsigned NumBytes = CCInfo.getNextStackOffset(); |
| |
| // For tail calls, memory operands are available in our caller's stack. |
| if (isSibCall) |
| NumBytes = 0; |
| |
| // Adjust the stack pointer for the new arguments... |
| // These operations are automatically eliminated by the prolog/epilog pass |
| if (!isSibCall) |
| Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); |
| |
| SDValue StackPtr = |
| DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); |
| |
| RegsToPassVector RegsToPass; |
| SmallVector<SDValue, 8> MemOpChains; |
| |
| // Walk the register/memloc assignments, inserting copies/loads. In the case |
| // of tail call optimization, arguments are handled later. |
| for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); |
| i != e; |
| ++i, ++realArgIdx) { |
| CCValAssign &VA = ArgLocs[i]; |
| SDValue Arg = OutVals[realArgIdx]; |
| ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; |
| bool isByVal = Flags.isByVal(); |
| |
| // Promote the value if needed. |
| switch (VA.getLocInfo()) { |
| default: llvm_unreachable("Unknown loc info!"); |
| case CCValAssign::Full: break; |
| case CCValAssign::SExt: |
| Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); |
| break; |
| case CCValAssign::ZExt: |
| Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); |
| break; |
| case CCValAssign::AExt: |
| Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); |
| break; |
| case CCValAssign::BCvt: |
| Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); |
| break; |
| } |
| |
| // f64 and v2f64 might be passed in i32 pairs and must be split into pieces |
| if (VA.needsCustom()) { |
| if (VA.getLocVT() == MVT::v2f64) { |
| SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, |
| DAG.getConstant(0, dl, MVT::i32)); |
| SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, |
| DAG.getConstant(1, dl, MVT::i32)); |
| |
| PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, |
| VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); |
| |
| VA = ArgLocs[++i]; // skip ahead to next loc |
| if (VA.isRegLoc()) { |
| PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, |
| VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); |
| } else { |
| assert(VA.isMemLoc()); |
| |
| MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, |
| dl, DAG, VA, Flags)); |
| } |
| } else { |
| PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], |
| StackPtr, MemOpChains, Flags); |
| } |
| } else if (VA.isRegLoc()) { |
| if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && |
| Outs[0].VT == MVT::i32) { |
| assert(VA.getLocVT() == MVT::i32 && |
| "unexpected calling convention register assignment"); |
| assert(!Ins.empty() && Ins[0].VT == MVT::i32 && |
| "unexpected use of 'returned'"); |
| isThisReturn = true; |
| } |
| RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); |
| } else if (isByVal) { |
| assert(VA.isMemLoc()); |
| unsigned offset = 0; |
| |
| // True if this byval aggregate will be split between registers |
| // and memory. |
| unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); |
| unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); |
| |
| if (CurByValIdx < ByValArgsCount) { |
| |
| unsigned RegBegin, RegEnd; |
| CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); |
| |
| EVT PtrVT = |
| DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
| unsigned int i, j; |
| for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { |
| SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); |
| SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); |
| SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, |
| MachinePointerInfo(), |
| DAG.InferPtrAlignment(AddArg)); |
| MemOpChains.push_back(Load.getValue(1)); |
| RegsToPass.push_back(std::make_pair(j, Load)); |
| } |
| |
| // If parameter size outsides register area, "offset" value |
| // helps us to calculate stack slot for remained part properly. |
| offset = RegEnd - RegBegin; |
| |
| CCInfo.nextInRegsParam(); |
| } |
| |
| if (Flags.getByValSize() > 4*offset) { |
| auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| unsigned LocMemOffset = VA.getLocMemOffset(); |
| SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); |
| SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff); |
| SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); |
| SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); |
| SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, |
| MVT::i32); |
| SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl, |
| MVT::i32); |
| |
| SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); |
| SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; |
| MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, |
| Ops)); |
| } |
| } else if (!isSibCall) { |
| assert(VA.isMemLoc()); |
| |
| MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, |
| dl, DAG, VA, Flags)); |
| } |
| } |
| |
| if (!MemOpChains.empty()) |
| Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); |
| |
| // Build a sequence of copy-to-reg nodes chained together with token chain |
| // and flag operands which copy the outgoing args into the appropriate regs. |
| SDValue InFlag; |
| // Tail call byval lowering might overwrite argument registers so in case of |
| // tail call optimization the copies to registers are lowered later. |
| if (!isTailCall) |
| for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { |
| Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
| RegsToPass[i].second, InFlag); |
| InFlag = Chain.getValue(1); |
| } |
| |
| // For tail calls lower the arguments to the 'real' stack slot. |
| if (isTailCall) { |
| // Force all the incoming stack arguments to be loaded from the stack |
| // before any new outgoing arguments are stored to the stack, because the |
| // outgoing stack slots may alias the incoming argument stack slots, and |
| // the alias isn't otherwise explicit. This is slightly more conservative |
| // than necessary, because it means that each store effectively depends |
| // on every argument instead of just those arguments it would clobber. |
| |
| // Do not flag preceding copytoreg stuff together with the following stuff. |
| InFlag = SDValue(); |
| for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { |
| Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
| RegsToPass[i].second, InFlag); |
| InFlag = Chain.getValue(1); |
| } |
| InFlag = SDValue(); |
| } |
| |
| // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every |
| // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol |
| // node so that legalize doesn't hack it. |
| bool isDirect = false; |
| |
| const TargetMachine &TM = getTargetMachine(); |
| const Module *Mod = MF.getFunction().getParent(); |
| const GlobalValue *GV = nullptr; |
| if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) |
| GV = G->getGlobal(); |
| bool isStub = |
| !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO(); |
| |
| bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); |
| bool isLocalARMFunc = false; |
| ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
| auto PtrVt = getPointerTy(DAG.getDataLayout()); |
| |
| if (Subtarget->genLongCalls()) { |
| assert((!isPositionIndependent() || Subtarget->isTargetWindows()) && |
| "long-calls codegen is not position independent!"); |
| // Handle a global address or an external symbol. If it's not one of |
| // those, the target's already in a register, so we don't need to do |
| // anything extra. |
| if (isa<GlobalAddressSDNode>(Callee)) { |
| // Create a constant pool entry for the callee address |
| unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); |
| ARMConstantPoolValue *CPV = |
| ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); |
| |
| // Get the address of the callee into a register |
| SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); |
| CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); |
| Callee = DAG.getLoad( |
| PtrVt, dl, DAG.getEntryNode(), CPAddr, |
| MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
| } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { |
| const char *Sym = S->getSymbol(); |
| |
| // Create a constant pool entry for the callee address |
| unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); |
| ARMConstantPoolValue *CPV = |
| ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, |
| ARMPCLabelIndex, 0); |
| // Get the address of the callee into a register |
| SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); |
| CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); |
| Callee = DAG.getLoad( |
| PtrVt, dl, DAG.getEntryNode(), CPAddr, |
| MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
| } |
| } else if (isa<GlobalAddressSDNode>(Callee)) { |
| // If we're optimizing for minimum size and the function is called three or |
| // more times in this block, we can improve codesize by calling indirectly |
| // as BLXr has a 16-bit encoding. |
| auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); |
| auto *BB = CLI.CS.getParent(); |
| bool PreferIndirect = |
| Subtarget->isThumb() && Subtarget->optForMinSize() && |
| count_if(GV->users(), [&BB](const User *U) { |
| return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB; |
| }) > 2; |
| |
| if (!PreferIndirect) { |
| isDirect = true; |
| bool isDef = GV->isStrongDefinitionForLinker(); |
| |
| // ARM call to a local ARM function is predicable. |
| isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); |
| // tBX takes a register source operand. |
| if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { |
| assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); |
| Callee = DAG.getNode( |
| ARMISD::WrapperPIC, dl, PtrVt, |
| DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); |
| Callee = DAG.getLoad( |
| PtrVt, dl, DAG.getEntryNode(), Callee, |
| MachinePointerInfo::getGOT(DAG.getMachineFunction()), |
| /* Alignment = */ 0, MachineMemOperand::MODereferenceable | |
| MachineMemOperand::MOInvariant); |
| } else if (Subtarget->isTargetCOFF()) { |
| assert(Subtarget->isTargetWindows() && |
| "Windows is the only supported COFF target"); |
| unsigned TargetFlags = GV->hasDLLImportStorageClass() |
| ? ARMII::MO_DLLIMPORT |
| : ARMII::MO_NO_FLAG; |
| Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, |
| TargetFlags); |
| if (GV->hasDLLImportStorageClass()) |
| Callee = |
| DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), |
| DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), |
| MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
| } else { |
| Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0); |
| } |
| } |
| } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { |
| isDirect = true; |
| // tBX takes a register source operand. |
| const char *Sym = S->getSymbol(); |
| if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { |
| unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); |
| ARMConstantPoolValue *CPV = |
| ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, |
| ARMPCLabelIndex, 4); |
| SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); |
| CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); |
| Callee = DAG.getLoad( |
| PtrVt, dl, DAG.getEntryNode(), CPAddr, |
| MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
| SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); |
| Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); |
| } else { |
| Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0); |
| } |
| } |
| |
| // FIXME: handle tail calls differently. |
| unsigned CallOpc; |
| if (Subtarget->isThumb()) { |
| if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) |
| CallOpc = ARMISD::CALL_NOLINK; |
| else |
| CallOpc = ARMISD::CALL; |
| } else { |
| if (!isDirect && !Subtarget->hasV5TOps()) |
| CallOpc = ARMISD::CALL_NOLINK; |
| else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && |
| // Emit regular call when code size is the priority |
| !Subtarget->optForMinSize()) |
| // "mov lr, pc; b _foo" to avoid confusing the RSP |
| CallOpc = ARMISD::CALL_NOLINK; |
| else |
| CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; |
| } |
| |
| std::vector<SDValue> Ops; |
| Ops.push_back(Chain); |
| Ops.push_back(Callee); |
| |
| // Add argument registers to the end of the list so that they are known live |
| // into the call. |
| for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) |
| Ops.push_back(DAG.getRegister(RegsToPass[i].first, |
| RegsToPass[i].second.getValueType())); |
| |
| // Add a register mask operand representing the call-preserved registers. |
| if (!isTailCall) { |
| const uint32_t *Mask; |
| const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); |
| if (isThisReturn) { |
| // For 'this' returns, use the R0-preserving mask if applicable |
| Mask = ARI->getThisReturnPreservedMask(MF, CallConv); |
| if (!Mask) { |
| // Set isThisReturn to false if the calling convention is not one that |
| // allows 'returned' to be modeled in this way, so LowerCallResult does |
| // not try to pass 'this' straight through |
| isThisReturn = false; |
| Mask = ARI->getCallPreservedMask(MF, CallConv); |
| } |
| } else |
| Mask = ARI->getCallPreservedMask(MF, CallConv); |
| |
| assert(Mask && "Missing call preserved mask for calling convention"); |
| Ops.push_back(DAG.getRegisterMask(Mask)); |
| } |
| |
| if (InFlag.getNode()) |
| Ops.push_back(InFlag); |
| |
| SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
| if (isTailCall) { |
| MF.getFrameInfo().setHasTailCall(); |
| return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); |
| } |
| |
| // Returns a chain and a flag for retval copy to use. |
| Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); |
| InFlag = Chain.getValue(1); |
| |
| Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), |
| DAG.getIntPtrConstant(0, dl, true), InFlag, dl); |
| if (!Ins.empty()) |
| InFlag = Chain.getValue(1); |
| |
| // Handle result values, copying them out of physregs into vregs that we |
| // return. |
| return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, |
| InVals, isThisReturn, |
| isThisReturn ? OutVals[0] : SDValue()); |
| } |
| |
| /// HandleByVal - Every parameter *after* a byval parameter is passed |
| /// on the stack. Remember the next parameter register to allocate, |
| /// and then confiscate the rest of the parameter registers to insure |
| /// this. |
| void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, |
| unsigned Align) const { |
| // Byval (as with any stack) slots are always at least 4 byte aligned. |
| Align = std::max(Align, 4U); |
| |
| unsigned Reg = State->AllocateReg(GPRArgRegs); |
| if (!Reg) |
| return; |
| |
| unsigned AlignInRegs = Align / 4; |
| unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; |
| for (unsigned i = 0; i < Waste; ++i) |
| Reg = State->AllocateReg(GPRArgRegs); |
| |
| if (!Reg) |
| return; |
| |
| unsigned Excess = 4 * (ARM::R4 - Reg); |
| |
| // Special case when NSAA != SP and parameter size greater than size of |
| // all remained GPR regs. In that case we can't split parameter, we must |
| // send it to stack. We also must set NCRN to R4, so waste all |
| // remained registers. |
| const unsigned NSAAOffset = State->getNextStackOffset(); |
| if (NSAAOffset != 0 && Size > Excess) { |
| while (State->AllocateReg(GPRArgRegs)) |
| ; |
| return; |
| } |
| |
| // First register for byval parameter is the first register that wasn't |
| // allocated before this method call, so it would be "reg". |
| // If parameter is small enough to be saved in range [reg, r4), then |
| // the end (first after last) register would be reg + param-size-in-regs, |
| // else parameter would be splitted between registers and stack, |
| // end register would be r4 in this case. |
| unsigned ByValRegBegin = Reg; |
| unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4); |
| State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); |
| // Note, first register is allocated in the beginning of function already, |
| // allocate remained amount of registers we need. |
| for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) |
| State->AllocateReg(GPRArgRegs); |
| // A byval parameter that is split between registers and memory needs its |
| // size truncated here. |
| // In the case where the entire structure fits in registers, we set the |
| // size in memory to zero. |
| Size = std::max<int>(Size - Excess, 0); |
| } |
| |
| /// MatchingStackOffset - Return true if the given stack call argument is |
| /// already available in the same position (relatively) of the caller's |
| /// incoming argument stack. |
| static |
| bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, |
| MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, |
| const TargetInstrInfo *TII) { |
| unsigned Bytes = Arg.getValueSizeInBits() / 8; |
| int FI = std::numeric_limits<int>::max(); |
| if (Arg.getOpcode() == ISD::CopyFromReg) { |
| unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); |
| if (!TargetRegisterInfo::isVirtualRegister(VR)) |
| return false; |
| MachineInstr *Def = MRI->getVRegDef(VR); |
| if (!Def) |
| return false; |
| if (!Flags.isByVal()) { |
| if (!TII->isLoadFromStackSlot(*Def, FI)) |
| return false; |
| } else { |
| return false; |
| } |
| } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { |
| if (Flags.isByVal()) |
| // ByVal argument is passed in as a pointer but it's now being |
| // dereferenced. e.g. |
| // define @foo(%struct.X* %A) { |
| // tail call @bar(%struct.X* byval %A) |
| // } |
| return false; |
| SDValue Ptr = Ld->getBasePtr(); |
| FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); |
| if (!FINode) |
| return false; |
| FI = FINode->getIndex(); |
| } else |
| return false; |
| |
| assert(FI != std::numeric_limits<int>::max()); |
| if (!MFI.isFixedObjectIndex(FI)) |
| return false; |
| return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); |
| } |
| |
| /// IsEligibleForTailCallOptimization - Check whether the call is eligible |
| /// for tail call optimization. Targets which want to do tail call |
| /// optimization should implement this function. |
| bool |
| ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, |
| CallingConv::ID CalleeCC, |
| bool isVarArg, |
| bool isCalleeStructRet, |
| bool isCallerStructRet, |
| const SmallVectorImpl<ISD::OutputArg> &Outs, |
| const SmallVectorImpl<SDValue> &OutVals, |
| const SmallVectorImpl<ISD::InputArg> &Ins, |
| SelectionDAG& DAG) const { |
| MachineFunction &MF = DAG.getMachineFunction(); |
| const Function &CallerF = MF.getFunction(); |
| CallingConv::ID CallerCC = CallerF.getCallingConv(); |
| |
| assert(Subtarget->supportsTailCall()); |
| |
| // Tail calls to function pointers cannot be optimized for Thumb1 if the args |
| // to the call take up r0-r3. The reason is that there are no legal registers |
| // left to hold the pointer to the function to be called. |
| if (Subtarget->isThumb1Only() && Outs.size() >= 4 && |
| !isa<GlobalAddressSDNode>(Callee.getNode())) |
| return false; |
| |
| // Look for obvious safe cases to perform tail call optimization that do not |
| // require ABI changes. This is what gcc calls sibcall. |
| |
| // Exception-handling functions need a special set of instructions to indicate |
| // a return to the hardware. Tail-calling another function would probably |
| // break this. |
| if (CallerF.hasFnAttribute("interrupt")) |
| return false; |
| |
| // Also avoid sibcall optimization if either caller or callee uses struct |
| // return semantics. |
| if (isCalleeStructRet || isCallerStructRet) |
| return false; |
| |
| // Externally-defined functions with weak linkage should not be |
| // tail-called on ARM when the OS does not support dynamic |
| // pre-emption of symbols, as the AAELF spec requires normal calls |
| // to undefined weak functions to be replaced with a NOP or jump to the |
| // next instruction. The behaviour of branch instructions in this |
| // situation (as used for tail calls) is implementation-defined, so we |
| // cannot rely on the linker replacing the tail call with a return. |
| if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { |
| const GlobalValue *GV = G->getGlobal(); |
| const Triple &TT = getTargetMachine().getTargetTriple(); |
| if (GV->hasExternalWeakLinkage() && |
| (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) |
| return false; |
| } |
| |
| // Check that the call results are passed in the same way. |
| LLVMContext &C = *DAG.getContext(); |
| if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, |
| CCAssignFnForReturn(CalleeCC, isVarArg), |
| CCAssignFnForReturn(CallerCC, isVarArg))) |
| return false; |
| // The callee has to preserve all registers the caller needs to preserve. |
| const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); |
| const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
| if (CalleeCC != CallerCC) { |
| const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
| if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
| return false; |
| } |
| |
| // If Caller's vararg or byval argument has been split between registers and |
| // stack, do not perform tail call, since part of the argument is in caller's |
| // local frame. |
| const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); |
| if (AFI_Caller->getArgRegsSaveSize()) |
| return false; |
| |
| // If the callee takes no arguments then go on to check the results of the |
| // call. |
| if (!Outs.empty()) { |
| // Check if stack adjustment is needed. For now, do not do this if any |
| // argument is passed on the stack. |
| SmallVector<CCValAssign, 16> ArgLocs; |
| CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
| CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); |
| if (CCInfo.getNextStackOffset()) { |
| // Check if the arguments are already laid out in the right way as |
| // the caller's fixed stack objects. |
| MachineFrameInfo &MFI = MF.getFrameInfo(); |
| const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
| const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
| for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); |
| i != e; |
| ++i, ++realArgIdx) { |
| CCValAssign &VA = ArgLocs[i]; |
| EVT RegVT = VA.getLocVT(); |
| SDValue Arg = OutVals[realArgIdx]; |
| ISD::ArgFlagsTy Flags = Outs[realArgIdx |