| //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the interfaces that ARM uses to lower LLVM code into a |
| // selection DAG. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "ARMISelLowering.h" |
| #include "ARMBaseInstrInfo.h" |
| #include "ARMBaseRegisterInfo.h" |
| #include "ARMCallingConv.h" |
| #include "ARMConstantPoolValue.h" |
| #include "ARMMachineFunctionInfo.h" |
| #include "ARMPerfectShuffle.h" |
| #include "ARMRegisterInfo.h" |
| #include "ARMSelectionDAGInfo.h" |
| #include "ARMSubtarget.h" |
| #include "ARMTargetTransformInfo.h" |
| #include "MCTargetDesc/ARMAddressingModes.h" |
| #include "MCTargetDesc/ARMBaseInfo.h" |
| #include "Utils/ARMBaseInfo.h" |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/BitVector.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/StringSwitch.h" |
| #include "llvm/ADT/Triple.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/Analysis/VectorUtils.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/ISDOpcodes.h" |
| #include "llvm/CodeGen/IntrinsicLowering.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineConstantPool.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineJumpTableInfo.h" |
| #include "llvm/CodeGen/MachineMemOperand.h" |
| #include "llvm/CodeGen/MachineOperand.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/RuntimeLibcalls.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" |
| #include "llvm/CodeGen/SelectionDAGNodes.h" |
| #include "llvm/CodeGen/TargetInstrInfo.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| #include "llvm/CodeGen/TargetOpcodes.h" |
| #include "llvm/CodeGen/TargetRegisterInfo.h" |
| #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/Constant.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DebugLoc.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GlobalAlias.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/GlobalVariable.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/InlineAsm.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/IntrinsicsARM.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/PatternMatch.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/User.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/MC/MCInstrDesc.h" |
| #include "llvm/MC/MCInstrItineraries.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/MC/MCSchedule.h" |
| #include "llvm/Support/AtomicOrdering.h" |
| #include "llvm/Support/BranchProbability.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CodeGen.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MachineValueType.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Target/TargetOptions.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <cstdint> |
| #include <cstdlib> |
| #include <iterator> |
| #include <limits> |
| #include <string> |
| #include <tuple> |
| #include <utility> |
| #include <vector> |
| |
| using namespace llvm; |
| using namespace llvm::PatternMatch; |
| |
| #define DEBUG_TYPE "arm-isel" |
| |
| STATISTIC(NumTailCalls, "Number of tail calls"); |
| STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); |
| STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); |
| STATISTIC(NumConstpoolPromoted, |
| "Number of constants with their storage promoted into constant pools"); |
| |
| static cl::opt<bool> |
| ARMInterworking("arm-interworking", cl::Hidden, |
| cl::desc("Enable / disable ARM interworking (for debugging only)"), |
| cl::init(true)); |
| |
| static cl::opt<bool> EnableConstpoolPromotion( |
| "arm-promote-constant", cl::Hidden, |
| cl::desc("Enable / disable promotion of unnamed_addr constants into " |
| "constant pools"), |
| cl::init(false)); // FIXME: set to true by default once PR32780 is fixed |
| static cl::opt<unsigned> ConstpoolPromotionMaxSize( |
| "arm-promote-constant-max-size", cl::Hidden, |
| cl::desc("Maximum size of constant to promote into a constant pool"), |
| cl::init(64)); |
| static cl::opt<unsigned> ConstpoolPromotionMaxTotal( |
| "arm-promote-constant-max-total", cl::Hidden, |
| cl::desc("Maximum size of ALL constants to promote into a constant pool"), |
| cl::init(128)); |
| |
| cl::opt<unsigned> |
| MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, |
| cl::desc("Maximum interleave factor for MVE VLDn to generate."), |
| cl::init(2)); |
| |
| // The APCS parameter registers. |
| static const MCPhysReg GPRArgRegs[] = { |
| ARM::R0, ARM::R1, ARM::R2, ARM::R3 |
| }; |
| |
| void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) { |
| if (VT != PromotedLdStVT) { |
| setOperationAction(ISD::LOAD, VT, Promote); |
| AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); |
| |
| setOperationAction(ISD::STORE, VT, Promote); |
| AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); |
| } |
| |
| MVT ElemTy = VT.getVectorElementType(); |
| if (ElemTy != MVT::f64) |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| if (ElemTy == MVT::i32) { |
| setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| } else { |
| setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
| setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
| setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
| setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
| } |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
| setOperationAction(ISD::SELECT, VT, Expand); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| setOperationAction(ISD::VSELECT, VT, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); |
| if (VT.isInteger()) { |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::SRL, VT, Custom); |
| } |
| |
| // Neon does not support vector divide/remainder operations. |
| setOperationAction(ISD::SDIV, VT, Expand); |
| setOperationAction(ISD::UDIV, VT, Expand); |
| setOperationAction(ISD::FDIV, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::UREM, VT, Expand); |
| setOperationAction(ISD::FREM, VT, Expand); |
| setOperationAction(ISD::SDIVREM, VT, Expand); |
| setOperationAction(ISD::UDIVREM, VT, Expand); |
| |
| if (!VT.isFloatingPoint() && |
| VT != MVT::v2i64 && VT != MVT::v1i64) |
| for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) |
| setOperationAction(Opcode, VT, Legal); |
| if (!VT.isFloatingPoint()) |
| for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}) |
| setOperationAction(Opcode, VT, Legal); |
| } |
| |
| void ARMTargetLowering::addDRTypeForNEON(MVT VT) { |
| addRegisterClass(VT, &ARM::DPRRegClass); |
| addTypeForNEON(VT, MVT::f64); |
| } |
| |
| void ARMTargetLowering::addQRTypeForNEON(MVT VT) { |
| addRegisterClass(VT, &ARM::DPairRegClass); |
| addTypeForNEON(VT, MVT::v2f64); |
| } |
| |
| void ARMTargetLowering::setAllExpand(MVT VT) { |
| for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) |
| setOperationAction(Opc, VT, Expand); |
| |
| // We support these really simple operations even on types where all |
| // the actual arithmetic has to be broken down into simpler |
| // operations or turned into library calls. |
| setOperationAction(ISD::BITCAST, VT, Legal); |
| setOperationAction(ISD::LOAD, VT, Legal); |
| setOperationAction(ISD::STORE, VT, Legal); |
| setOperationAction(ISD::UNDEF, VT, Legal); |
| } |
| |
| void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To, |
| LegalizeAction Action) { |
| setLoadExtAction(ISD::EXTLOAD, From, To, Action); |
| setLoadExtAction(ISD::ZEXTLOAD, From, To, Action); |
| setLoadExtAction(ISD::SEXTLOAD, From, To, Action); |
| } |
| |
| void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { |
| const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 }; |
| |
| for (auto VT : IntTypes) { |
| addRegisterClass(VT, &ARM::MQPRRegClass); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::SRL, VT, Custom); |
| setOperationAction(ISD::SMIN, VT, Legal); |
| setOperationAction(ISD::SMAX, VT, Legal); |
| setOperationAction(ISD::UMIN, VT, Legal); |
| setOperationAction(ISD::UMAX, VT, Legal); |
| setOperationAction(ISD::ABS, VT, Legal); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Legal); |
| setOperationAction(ISD::CTLZ, VT, Legal); |
| setOperationAction(ISD::CTTZ, VT, Custom); |
| setOperationAction(ISD::BITREVERSE, VT, Legal); |
| setOperationAction(ISD::BSWAP, VT, Legal); |
| setOperationAction(ISD::SADDSAT, VT, Legal); |
| setOperationAction(ISD::UADDSAT, VT, Legal); |
| setOperationAction(ISD::SSUBSAT, VT, Legal); |
| setOperationAction(ISD::USUBSAT, VT, Legal); |
| setOperationAction(ISD::ABDS, VT, Legal); |
| setOperationAction(ISD::ABDU, VT, Legal); |
| |
| // No native support for these. |
| setOperationAction(ISD::UDIV, VT, Expand); |
| setOperationAction(ISD::SDIV, VT, Expand); |
| setOperationAction(ISD::UREM, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::UDIVREM, VT, Expand); |
| setOperationAction(ISD::SDIVREM, VT, Expand); |
| setOperationAction(ISD::CTPOP, VT, Expand); |
| setOperationAction(ISD::SELECT, VT, Expand); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| |
| // Vector reductions |
| setOperationAction(ISD::VECREDUCE_ADD, VT, Legal); |
| setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal); |
| setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal); |
| setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal); |
| setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal); |
| setOperationAction(ISD::VECREDUCE_MUL, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| |
| if (!HasMVEFP) { |
| setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
| setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
| setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
| setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
| } else { |
| setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); |
| } |
| |
| // Pre and Post inc are supported on loads and stores |
| for (unsigned im = (unsigned)ISD::PRE_INC; |
| im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
| setIndexedLoadAction(im, VT, Legal); |
| setIndexedStoreAction(im, VT, Legal); |
| setIndexedMaskedLoadAction(im, VT, Legal); |
| setIndexedMaskedStoreAction(im, VT, Legal); |
| } |
| } |
| |
| const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 }; |
| for (auto VT : FloatTypes) { |
| addRegisterClass(VT, &ARM::MQPRRegClass); |
| if (!HasMVEFP) |
| setAllExpand(VT); |
| |
| // These are legal or custom whether we have MVE.fp or not |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Legal); |
| setOperationAction(ISD::SELECT, VT, Expand); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| |
| // Pre and Post inc are supported on loads and stores |
| for (unsigned im = (unsigned)ISD::PRE_INC; |
| im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
| setIndexedLoadAction(im, VT, Legal); |
| setIndexedStoreAction(im, VT, Legal); |
| setIndexedMaskedLoadAction(im, VT, Legal); |
| setIndexedMaskedStoreAction(im, VT, Legal); |
| } |
| |
| if (HasMVEFP) { |
| setOperationAction(ISD::FMINNUM, VT, Legal); |
| setOperationAction(ISD::FMAXNUM, VT, Legal); |
| setOperationAction(ISD::FROUND, VT, Legal); |
| setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
| |
| // No native support for these. |
| setOperationAction(ISD::FDIV, VT, Expand); |
| setOperationAction(ISD::FREM, VT, Expand); |
| setOperationAction(ISD::FSQRT, VT, Expand); |
| setOperationAction(ISD::FSIN, VT, Expand); |
| setOperationAction(ISD::FCOS, VT, Expand); |
| setOperationAction(ISD::FPOW, VT, Expand); |
| setOperationAction(ISD::FLOG, VT, Expand); |
| setOperationAction(ISD::FLOG2, VT, Expand); |
| setOperationAction(ISD::FLOG10, VT, Expand); |
| setOperationAction(ISD::FEXP, VT, Expand); |
| setOperationAction(ISD::FEXP2, VT, Expand); |
| setOperationAction(ISD::FNEARBYINT, VT, Expand); |
| } |
| } |
| |
| // Custom Expand smaller than legal vector reductions to prevent false zero |
| // items being added. |
| setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom); |
| setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom); |
| setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom); |
| setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom); |
| setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom); |
| setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom); |
| setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom); |
| setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom); |
| |
| // We 'support' these types up to bitcast/load/store level, regardless of |
| // MVE integer-only / float support. Only doing FP data processing on the FP |
| // vector types is inhibited at integer-only level. |
| const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 }; |
| for (auto VT : LongTypes) { |
| addRegisterClass(VT, &ARM::MQPRRegClass); |
| setAllExpand(VT); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| } |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); |
| |
| // We can do bitwise operations on v2i64 vectors |
| setOperationAction(ISD::AND, MVT::v2i64, Legal); |
| setOperationAction(ISD::OR, MVT::v2i64, Legal); |
| setOperationAction(ISD::XOR, MVT::v2i64, Legal); |
| |
| // It is legal to extload from v4i8 to v4i16 or v4i32. |
| addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal); |
| addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal); |
| addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal); |
| |
| // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16. |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal); |
| |
| // Some truncating stores are legal too. |
| setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); |
| setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); |
| setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); |
| |
| // Pre and Post inc on these are legal, given the correct extends |
| for (unsigned im = (unsigned)ISD::PRE_INC; |
| im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
| for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) { |
| setIndexedLoadAction(im, VT, Legal); |
| setIndexedStoreAction(im, VT, Legal); |
| setIndexedMaskedLoadAction(im, VT, Legal); |
| setIndexedMaskedStoreAction(im, VT, Legal); |
| } |
| } |
| |
| // Predicate types |
| const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1}; |
| for (auto VT : pTypes) { |
| addRegisterClass(VT, &ARM::VCCRRegClass); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); |
| setOperationAction(ISD::LOAD, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| setOperationAction(ISD::VSELECT, VT, Expand); |
| setOperationAction(ISD::SELECT, VT, Expand); |
| } |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); |
| } |
| |
| ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, |
| const ARMSubtarget &STI) |
| : TargetLowering(TM), Subtarget(&STI) { |
| RegInfo = Subtarget->getRegisterInfo(); |
| Itins = Subtarget->getInstrItineraryData(); |
| |
| setBooleanContents(ZeroOrOneBooleanContent); |
| setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
| |
| if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && |
| !Subtarget->isTargetWatchOS()) { |
| bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; |
| for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) |
| setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID), |
| IsHFTarget ? CallingConv::ARM_AAPCS_VFP |
| : CallingConv::ARM_AAPCS); |
| } |
| |
| if (Subtarget->isTargetMachO()) { |
| // Uses VFP for Thumb libfuncs if available. |
| if (Subtarget->isThumb() && Subtarget->hasVFP2Base() && |
| Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { |
| static const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const ISD::CondCode Cond; |
| } LibraryCalls[] = { |
| // Single-precision floating-point arithmetic. |
| { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID }, |
| |
| // Double-precision floating-point arithmetic. |
| { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID }, |
| { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID }, |
| |
| // Single-precision comparisons. |
| { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE }, |
| { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE }, |
| { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE }, |
| { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE }, |
| { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE }, |
| { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE }, |
| { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE }, |
| |
| // Double-precision comparisons. |
| { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE }, |
| { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE }, |
| { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE }, |
| { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE }, |
| { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE }, |
| { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE }, |
| { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE }, |
| |
| // Floating-point to integer conversions. |
| // i64 conversions are done via library routines even when generating VFP |
| // instructions, so use the same ones. |
| { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID }, |
| { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID }, |
| |
| // Conversions between floating types. |
| { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID }, |
| { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID }, |
| |
| // Integer to floating-point conversions. |
| // i64 conversions are done via library routines even when generating VFP |
| // instructions, so use the same ones. |
| // FIXME: There appears to be some naming inconsistency in ARM libgcc: |
| // e.g., __floatunsidf vs. __floatunssidfvfp. |
| { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID }, |
| { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| if (LC.Cond != ISD::SETCC_INVALID) |
| setCmpLibcallCC(LC.Op, LC.Cond); |
| } |
| } |
| } |
| |
| // These libcalls are not available in 32-bit. |
| setLibcallName(RTLIB::SHL_I128, nullptr); |
| setLibcallName(RTLIB::SRL_I128, nullptr); |
| setLibcallName(RTLIB::SRA_I128, nullptr); |
| setLibcallName(RTLIB::MUL_I128, nullptr); |
| setLibcallName(RTLIB::MULO_I64, nullptr); |
| setLibcallName(RTLIB::MULO_I128, nullptr); |
| |
| // RTLIB |
| if (Subtarget->isAAPCS_ABI() && |
| (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || |
| Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { |
| static const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const CallingConv::ID CC; |
| const ISD::CondCode Cond; |
| } LibraryCalls[] = { |
| // Double-precision floating-point arithmetic helper functions |
| // RTABI chapter 4.1.2, Table 2 |
| { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Double-precision floating-point comparison helper functions |
| // RTABI chapter 4.1.2, Table 3 |
| { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, |
| { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| |
| // Single-precision floating-point arithmetic helper functions |
| // RTABI chapter 4.1.2, Table 4 |
| { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Single-precision floating-point comparison helper functions |
| // RTABI chapter 4.1.2, Table 5 |
| { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, |
| { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, |
| |
| // Floating-point to integer conversions. |
| // RTABI chapter 4.1.2, Table 6 |
| { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Conversions between floating types. |
| // RTABI chapter 4.1.2, Table 7 |
| { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Integer to floating-point conversions. |
| // RTABI chapter 4.1.2, Table 8 |
| { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Long long helper functions |
| // RTABI chapter 4.2, Table 9 |
| { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| |
| // Integer division functions |
| // RTABI chapter 4.3.1 |
| { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| if (LC.Cond != ISD::SETCC_INVALID) |
| setCmpLibcallCC(LC.Op, LC.Cond); |
| } |
| |
| // EABI dependent RTLIB |
| if (TM.Options.EABIVersion == EABI::EABI4 || |
| TM.Options.EABIVersion == EABI::EABI5) { |
| static const struct { |
| const RTLIB::Libcall Op; |
| const char *const Name; |
| const CallingConv::ID CC; |
| const ISD::CondCode Cond; |
| } MemOpsLibraryCalls[] = { |
| // Memory operations |
| // RTABI chapter 4.3.4 |
| { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
| }; |
| |
| for (const auto &LC : MemOpsLibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| if (LC.Cond != ISD::SETCC_INVALID) |
| setCmpLibcallCC(LC.Op, LC.Cond); |
| } |
| } |
| } |
| |
| if (Subtarget->isTargetWindows()) { |
| static const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const CallingConv::ID CC; |
| } LibraryCalls[] = { |
| { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, |
| { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| } |
| } |
| |
| // Use divmod compiler-rt calls for iOS 5.0 and later. |
| if (Subtarget->isTargetMachO() && |
| !(Subtarget->isTargetIOS() && |
| Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { |
| setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); |
| setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); |
| } |
| |
| // The half <-> float conversion functions are always soft-float on |
| // non-watchos platforms, but are needed for some targets which use a |
| // hard-float calling convention by default. |
| if (!Subtarget->isTargetWatchABI()) { |
| if (Subtarget->isAAPCS_ABI()) { |
| setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); |
| setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); |
| setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); |
| } else { |
| setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); |
| setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); |
| setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); |
| } |
| } |
| |
| // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have |
| // a __gnu_ prefix (which is the default). |
| if (Subtarget->isTargetAEABI()) { |
| static const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const CallingConv::ID CC; |
| } LibraryCalls[] = { |
| { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS }, |
| { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS }, |
| { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| } |
| } |
| |
| if (Subtarget->isThumb1Only()) |
| addRegisterClass(MVT::i32, &ARM::tGPRRegClass); |
| else |
| addRegisterClass(MVT::i32, &ARM::GPRRegClass); |
| |
| if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() && |
| Subtarget->hasFPRegs()) { |
| addRegisterClass(MVT::f32, &ARM::SPRRegClass); |
| addRegisterClass(MVT::f64, &ARM::DPRRegClass); |
| |
| setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); |
| |
| if (!Subtarget->hasVFP2Base()) |
| setAllExpand(MVT::f32); |
| if (!Subtarget->hasFP64()) |
| setAllExpand(MVT::f64); |
| } |
| |
| if (Subtarget->hasFullFP16()) { |
| addRegisterClass(MVT::f16, &ARM::HPRRegClass); |
| setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
| |
| setOperationAction(ISD::FMINNUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); |
| } |
| |
| if (Subtarget->hasBF16()) { |
| addRegisterClass(MVT::bf16, &ARM::HPRRegClass); |
| setAllExpand(MVT::bf16); |
| if (!Subtarget->hasFullFP16()) |
| setOperationAction(ISD::BITCAST, MVT::bf16, Custom); |
| } |
| |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
| setTruncStoreAction(VT, InnerVT, Expand); |
| addAllExtLoads(VT, InnerVT, Expand); |
| } |
| |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| |
| setOperationAction(ISD::BSWAP, VT, Expand); |
| } |
| |
| setOperationAction(ISD::ConstantFP, MVT::f32, Custom); |
| setOperationAction(ISD::ConstantFP, MVT::f64, Custom); |
| |
| setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); |
| setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); |
| |
| if (Subtarget->hasMVEIntegerOps()) |
| addMVEVectorTypes(Subtarget->hasMVEFloatOps()); |
| |
| // Combine low-overhead loop intrinsics so that we can lower i1 types. |
| if (Subtarget->hasLOB()) { |
| setTargetDAGCombine(ISD::BRCOND); |
| setTargetDAGCombine(ISD::BR_CC); |
| } |
| |
| if (Subtarget->hasNEON()) { |
| addDRTypeForNEON(MVT::v2f32); |
| addDRTypeForNEON(MVT::v8i8); |
| addDRTypeForNEON(MVT::v4i16); |
| addDRTypeForNEON(MVT::v2i32); |
| addDRTypeForNEON(MVT::v1i64); |
| |
| addQRTypeForNEON(MVT::v4f32); |
| addQRTypeForNEON(MVT::v2f64); |
| addQRTypeForNEON(MVT::v16i8); |
| addQRTypeForNEON(MVT::v8i16); |
| addQRTypeForNEON(MVT::v4i32); |
| addQRTypeForNEON(MVT::v2i64); |
| |
| if (Subtarget->hasFullFP16()) { |
| addQRTypeForNEON(MVT::v8f16); |
| addDRTypeForNEON(MVT::v4f16); |
| } |
| |
| if (Subtarget->hasBF16()) { |
| addQRTypeForNEON(MVT::v8bf16); |
| addDRTypeForNEON(MVT::v4bf16); |
| } |
| } |
| |
| if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) { |
| // v2f64 is legal so that QR subregs can be extracted as f64 elements, but |
| // none of Neon, MVE or VFP supports any arithmetic operations on it. |
| setOperationAction(ISD::FADD, MVT::v2f64, Expand); |
| setOperationAction(ISD::FSUB, MVT::v2f64, Expand); |
| setOperationAction(ISD::FMUL, MVT::v2f64, Expand); |
| // FIXME: Code duplication: FDIV and FREM are expanded always, see |
| // ARMTargetLowering::addTypeForNEON method for details. |
| setOperationAction(ISD::FDIV, MVT::v2f64, Expand); |
| setOperationAction(ISD::FREM, MVT::v2f64, Expand); |
| // FIXME: Create unittest. |
| // In another words, find a way when "copysign" appears in DAG with vector |
| // operands. |
| setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); |
| // FIXME: Code duplication: SETCC has custom operation action, see |
| // ARMTargetLowering::addTypeForNEON method for details. |
| setOperationAction(ISD::SETCC, MVT::v2f64, Expand); |
| // FIXME: Create unittest for FNEG and for FABS. |
| setOperationAction(ISD::FNEG, MVT::v2f64, Expand); |
| setOperationAction(ISD::FABS, MVT::v2f64, Expand); |
| setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); |
| setOperationAction(ISD::FSIN, MVT::v2f64, Expand); |
| setOperationAction(ISD::FCOS, MVT::v2f64, Expand); |
| setOperationAction(ISD::FPOW, MVT::v2f64, Expand); |
| setOperationAction(ISD::FLOG, MVT::v2f64, Expand); |
| setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); |
| setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); |
| setOperationAction(ISD::FEXP, MVT::v2f64, Expand); |
| setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); |
| // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. |
| setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); |
| setOperationAction(ISD::FRINT, MVT::v2f64, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); |
| setOperationAction(ISD::FMA, MVT::v2f64, Expand); |
| } |
| |
| if (Subtarget->hasNEON()) { |
| // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively |
| // supported for v4f32. |
| setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); |
| setOperationAction(ISD::FSIN, MVT::v4f32, Expand); |
| setOperationAction(ISD::FCOS, MVT::v4f32, Expand); |
| setOperationAction(ISD::FPOW, MVT::v4f32, Expand); |
| setOperationAction(ISD::FLOG, MVT::v4f32, Expand); |
| setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); |
| setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); |
| setOperationAction(ISD::FEXP, MVT::v4f32, Expand); |
| setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); |
| setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); |
| setOperationAction(ISD::FRINT, MVT::v4f32, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); |
| |
| // Mark v2f32 intrinsics. |
| setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); |
| setOperationAction(ISD::FSIN, MVT::v2f32, Expand); |
| setOperationAction(ISD::FCOS, MVT::v2f32, Expand); |
| setOperationAction(ISD::FPOW, MVT::v2f32, Expand); |
| setOperationAction(ISD::FLOG, MVT::v2f32, Expand); |
| setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); |
| setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); |
| setOperationAction(ISD::FEXP, MVT::v2f32, Expand); |
| setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); |
| setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); |
| setOperationAction(ISD::FRINT, MVT::v2f32, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); |
| |
| // Neon does not support some operations on v1i64 and v2i64 types. |
| setOperationAction(ISD::MUL, MVT::v1i64, Expand); |
| // Custom handling for some quad-vector types to detect VMULL. |
| setOperationAction(ISD::MUL, MVT::v8i16, Custom); |
| setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
| setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
| // Custom handling for some vector types to avoid expensive expansions |
| setOperationAction(ISD::SDIV, MVT::v4i16, Custom); |
| setOperationAction(ISD::SDIV, MVT::v8i8, Custom); |
| setOperationAction(ISD::UDIV, MVT::v4i16, Custom); |
| setOperationAction(ISD::UDIV, MVT::v8i8, Custom); |
| // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with |
| // a destination type that is wider than the source, and nor does |
| // it have a FP_TO_[SU]INT instruction with a narrower destination than |
| // source. |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); |
| |
| setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); |
| setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); |
| |
| // NEON does not have single instruction CTPOP for vectors with element |
| // types wider than 8-bits. However, custom lowering can leverage the |
| // v8i8/v16i8 vcnt instruction. |
| setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); |
| setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); |
| setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); |
| setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); |
| setOperationAction(ISD::CTPOP, MVT::v1i64, Custom); |
| setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); |
| |
| setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); |
| setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); |
| |
| // NEON does not have single instruction CTTZ for vectors. |
| setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); |
| |
| setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); |
| |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); |
| |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); |
| |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| setOperationAction(ISD::MULHS, VT, Expand); |
| setOperationAction(ISD::MULHU, VT, Expand); |
| } |
| |
| // NEON only has FMA instructions as of VFP4. |
| if (!Subtarget->hasVFP4Base()) { |
| setOperationAction(ISD::FMA, MVT::v2f32, Expand); |
| setOperationAction(ISD::FMA, MVT::v4f32, Expand); |
| } |
| |
| setTargetDAGCombine(ISD::SHL); |
| setTargetDAGCombine(ISD::SRL); |
| setTargetDAGCombine(ISD::SRA); |
| setTargetDAGCombine(ISD::FP_TO_SINT); |
| setTargetDAGCombine(ISD::FP_TO_UINT); |
| setTargetDAGCombine(ISD::FDIV); |
| setTargetDAGCombine(ISD::LOAD); |
| |
| // It is legal to extload from v4i8 to v4i16 or v4i32. |
| for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, |
| MVT::v2i32}) { |
| for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { |
| setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); |
| setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); |
| } |
| } |
| } |
| |
| if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) { |
| setTargetDAGCombine(ISD::BUILD_VECTOR); |
| setTargetDAGCombine(ISD::VECTOR_SHUFFLE); |
| setTargetDAGCombine(ISD::INSERT_SUBVECTOR); |
| setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); |
| setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); |
| setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
| setTargetDAGCombine(ISD::STORE); |
| setTargetDAGCombine(ISD::SIGN_EXTEND); |
| setTargetDAGCombine(ISD::ZERO_EXTEND); |
| setTargetDAGCombine(ISD::ANY_EXTEND); |
| setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
| setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); |
| setTargetDAGCombine(ISD::INTRINSIC_VOID); |
| setTargetDAGCombine(ISD::VECREDUCE_ADD); |
| setTargetDAGCombine(ISD::ADD); |
| setTargetDAGCombine(ISD::BITCAST); |
| } |
| if (Subtarget->hasMVEIntegerOps()) { |
| setTargetDAGCombine(ISD::SMIN); |
| setTargetDAGCombine(ISD::UMIN); |
| setTargetDAGCombine(ISD::SMAX); |
| setTargetDAGCombine(ISD::UMAX); |
| setTargetDAGCombine(ISD::FP_EXTEND); |
| setTargetDAGCombine(ISD::SELECT); |
| setTargetDAGCombine(ISD::SELECT_CC); |
| setTargetDAGCombine(ISD::SETCC); |
| } |
| if (Subtarget->hasMVEFloatOps()) { |
| setTargetDAGCombine(ISD::FADD); |
| } |
| |
| if (!Subtarget->hasFP64()) { |
| // When targeting a floating-point unit with only single-precision |
| // operations, f64 is legal for the few double-precision instructions which |
| // are present However, no double-precision operations other than moves, |
| // loads and stores are provided by the hardware. |
| setOperationAction(ISD::FADD, MVT::f64, Expand); |
| setOperationAction(ISD::FSUB, MVT::f64, Expand); |
| setOperationAction(ISD::FMUL, MVT::f64, Expand); |
| setOperationAction(ISD::FMA, MVT::f64, Expand); |
| setOperationAction(ISD::FDIV, MVT::f64, Expand); |
| setOperationAction(ISD::FREM, MVT::f64, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
| setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); |
| setOperationAction(ISD::FNEG, MVT::f64, Expand); |
| setOperationAction(ISD::FABS, MVT::f64, Expand); |
| setOperationAction(ISD::FSQRT, MVT::f64, Expand); |
| setOperationAction(ISD::FSIN, MVT::f64, Expand); |
| setOperationAction(ISD::FCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FPOW, MVT::f64, Expand); |
| setOperationAction(ISD::FLOG, MVT::f64, Expand); |
| setOperationAction(ISD::FLOG2, MVT::f64, Expand); |
| setOperationAction(ISD::FLOG10, MVT::f64, Expand); |
| setOperationAction(ISD::FEXP, MVT::f64, Expand); |
| setOperationAction(ISD::FEXP2, MVT::f64, Expand); |
| setOperationAction(ISD::FCEIL, MVT::f64, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::f64, Expand); |
| setOperationAction(ISD::FRINT, MVT::f64, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::f64, Expand); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); |
| setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); |
| } |
| |
| if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) { |
| setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); |
| setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); |
| if (Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); |
| } |
| } |
| |
| if (!Subtarget->hasFP16()) { |
| setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); |
| setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); |
| } |
| |
| computeRegisterProperties(Subtarget->getRegisterInfo()); |
| |
| // ARM does not have floating-point extending loads. |
| for (MVT VT : MVT::fp_valuetypes()) { |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); |
| } |
| |
| // ... or truncating stores |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
| |
| // ARM does not have i1 sign extending load. |
| for (MVT VT : MVT::integer_valuetypes()) |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
| |
| // ARM supports all 4 flavors of integer indexed load / store. |
| if (!Subtarget->isThumb1Only()) { |
| for (unsigned im = (unsigned)ISD::PRE_INC; |
| im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
| setIndexedLoadAction(im, MVT::i1, Legal); |
| setIndexedLoadAction(im, MVT::i8, Legal); |
| setIndexedLoadAction(im, MVT::i16, Legal); |
| setIndexedLoadAction(im, MVT::i32, Legal); |
| setIndexedStoreAction(im, MVT::i1, Legal); |
| setIndexedStoreAction(im, MVT::i8, Legal); |
| setIndexedStoreAction(im, MVT::i16, Legal); |
| setIndexedStoreAction(im, MVT::i32, Legal); |
| } |
| } else { |
| // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. |
| setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); |
| setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); |
| } |
| |
| setOperationAction(ISD::SADDO, MVT::i32, Custom); |
| setOperationAction(ISD::UADDO, MVT::i32, Custom); |
| setOperationAction(ISD::SSUBO, MVT::i32, Custom); |
| setOperationAction(ISD::USUBO, MVT::i32, Custom); |
| |
| setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); |
| setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); |
| if (Subtarget->hasDSP()) { |
| setOperationAction(ISD::SADDSAT, MVT::i8, Custom); |
| setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); |
| setOperationAction(ISD::SADDSAT, MVT::i16, Custom); |
| setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); |
| setOperationAction(ISD::UADDSAT, MVT::i8, Custom); |
| setOperationAction(ISD::USUBSAT, MVT::i8, Custom); |
| setOperationAction(ISD::UADDSAT, MVT::i16, Custom); |
| setOperationAction(ISD::USUBSAT, MVT::i16, Custom); |
| } |
| if (Subtarget->hasBaseDSP()) { |
| setOperationAction(ISD::SADDSAT, MVT::i32, Legal); |
| setOperationAction(ISD::SSUBSAT, MVT::i32, Legal); |
| } |
| |
| // i64 operation support. |
| setOperationAction(ISD::MUL, MVT::i64, Expand); |
| setOperationAction(ISD::MULHU, MVT::i32, Expand); |
| if (Subtarget->isThumb1Only()) { |
| setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); |
| } |
| if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() |
| || (Subtarget->isThumb2() && !Subtarget->hasDSP())) |
| setOperationAction(ISD::MULHS, MVT::i32, Expand); |
| |
| setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); |
| setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); |
| setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); |
| setOperationAction(ISD::SRL, MVT::i64, Custom); |
| setOperationAction(ISD::SRA, MVT::i64, Custom); |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); |
| setOperationAction(ISD::LOAD, MVT::i64, Custom); |
| setOperationAction(ISD::STORE, MVT::i64, Custom); |
| |
| // MVE lowers 64 bit shifts to lsll and lsrl |
| // assuming that ISD::SRL and SRA of i64 are already marked custom |
| if (Subtarget->hasMVEIntegerOps()) |
| setOperationAction(ISD::SHL, MVT::i64, Custom); |
| |
| // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1. |
| if (Subtarget->isThumb1Only()) { |
| setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); |
| setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); |
| setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); |
| } |
| |
| if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) |
| setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
| |
| // ARM does not have ROTL. |
| setOperationAction(ISD::ROTL, MVT::i32, Expand); |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| setOperationAction(ISD::ROTL, VT, Expand); |
| setOperationAction(ISD::ROTR, VT, Expand); |
| } |
| setOperationAction(ISD::CTTZ, MVT::i32, Custom); |
| setOperationAction(ISD::CTPOP, MVT::i32, Expand); |
| if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) { |
| setOperationAction(ISD::CTLZ, MVT::i32, Expand); |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall); |
| } |
| |
| // @llvm.readcyclecounter requires the Performance Monitors extension. |
| // Default to the 0 expansion on unsupported platforms. |
| // FIXME: Technically there are older ARM CPUs that have |
| // implementation-specific ways of obtaining this information. |
| if (Subtarget->hasPerfMon()) |
| setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); |
| |
| // Only ARMv6 has BSWAP. |
| if (!Subtarget->hasV6Ops()) |
| setOperationAction(ISD::BSWAP, MVT::i32, Expand); |
| |
| bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() |
| : Subtarget->hasDivideInARMMode(); |
| if (!hasDivide) { |
| // These are expanded into libcalls if the cpu doesn't have HW divider. |
| setOperationAction(ISD::SDIV, MVT::i32, LibCall); |
| setOperationAction(ISD::UDIV, MVT::i32, LibCall); |
| } |
| |
| if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { |
| setOperationAction(ISD::SDIV, MVT::i32, Custom); |
| setOperationAction(ISD::UDIV, MVT::i32, Custom); |
| |
| setOperationAction(ISD::SDIV, MVT::i64, Custom); |
| setOperationAction(ISD::UDIV, MVT::i64, Custom); |
| } |
| |
| setOperationAction(ISD::SREM, MVT::i32, Expand); |
| setOperationAction(ISD::UREM, MVT::i32, Expand); |
| |
| // Register based DivRem for AEABI (RTABI 4.2) |
| if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || |
| Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || |
| Subtarget->isTargetWindows()) { |
| setOperationAction(ISD::SREM, MVT::i64, Custom); |
| setOperationAction(ISD::UREM, MVT::i64, Custom); |
| HasStandaloneRem = false; |
| |
| if (Subtarget->isTargetWindows()) { |
| const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const CallingConv::ID CC; |
| } LibraryCalls[] = { |
| { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS }, |
| |
| { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| } |
| } else { |
| const struct { |
| const RTLIB::Libcall Op; |
| const char * const Name; |
| const CallingConv::ID CC; |
| } LibraryCalls[] = { |
| { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS }, |
| |
| { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, |
| { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS }, |
| }; |
| |
| for (const auto &LC : LibraryCalls) { |
| setLibcallName(LC.Op, LC.Name); |
| setLibcallCallingConv(LC.Op, LC.CC); |
| } |
| } |
| |
| setOperationAction(ISD::SDIVREM, MVT::i32, Custom); |
| setOperationAction(ISD::UDIVREM, MVT::i32, Custom); |
| setOperationAction(ISD::SDIVREM, MVT::i64, Custom); |
| setOperationAction(ISD::UDIVREM, MVT::i64, Custom); |
| } else { |
| setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
| } |
| |
| if (Subtarget->getTargetTriple().isOSMSVCRT()) { |
| // MSVCRT doesn't have powi; fall back to pow |
| setLibcallName(RTLIB::POWI_F32, nullptr); |
| setLibcallName(RTLIB::POWI_F64, nullptr); |
| } |
| |
| setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); |
| setOperationAction(ISD::ConstantPool, MVT::i32, Custom); |
| setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); |
| setOperationAction(ISD::BlockAddress, MVT::i32, Custom); |
| |
| setOperationAction(ISD::TRAP, MVT::Other, Legal); |
| setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
| |
| // Use the default implementation. |
| setOperationAction(ISD::VASTART, MVT::Other, Custom); |
| setOperationAction(ISD::VAARG, MVT::Other, Expand); |
| setOperationAction(ISD::VACOPY, MVT::Other, Expand); |
| setOperationAction(ISD::VAEND, MVT::Other, Expand); |
| setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
| setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
| |
| if (Subtarget->isTargetWindows()) |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); |
| else |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); |
| |
| // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use |
| // the default expansion. |
| InsertFencesForAtomic = false; |
| if (Subtarget->hasAnyDataBarrier() && |
| (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { |
| // ATOMIC_FENCE needs custom lowering; the others should have been expanded |
| // to ldrex/strex loops already. |
| setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); |
| if (!Subtarget->isThumb() || !Subtarget->isMClass()) |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); |
| |
| // On v8, we have particularly efficient implementations of atomic fences |
| // if they can be combined with nearby atomic loads and stores. |
| if (!Subtarget->hasAcquireRelease() || |
| getTargetMachine().getOptLevel() == 0) { |
| // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. |
| InsertFencesForAtomic = true; |
| } |
| } else { |
| // If there's anything we can use as a barrier, go through custom lowering |
| // for ATOMIC_FENCE. |
| // If target has DMB in thumb, Fences can be inserted. |
| if (Subtarget->hasDataBarrier()) |
| InsertFencesForAtomic = true; |
| |
| setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, |
| Subtarget->hasAnyDataBarrier() ? Custom : Expand); |
| |
| // Set them all for expansion, which will force libcalls. |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); |
| setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); |
| // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the |
| // Unordered/Monotonic case. |
| if (!InsertFencesForAtomic) { |
| setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); |
| setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); |
| } |
| } |
| |
| setOperationAction(ISD::PREFETCH, MVT::Other, Custom); |
| |
| // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. |
| if (!Subtarget->hasV6Ops()) { |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); |
| } |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
| |
| if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() && |
| !Subtarget->isThumb1Only()) { |
| // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR |
| // iff target supports vfp2. |
| setOperationAction(ISD::BITCAST, MVT::i64, Custom); |
| setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); |
| setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); |
| } |
| |
| // We want to custom lower some of our intrinsics. |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
| setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
| setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
| setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); |
| if (Subtarget->useSjLjEH()) |
| setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); |
| |
| setOperationAction(ISD::SETCC, MVT::i32, Expand); |
| setOperationAction(ISD::SETCC, MVT::f32, Expand); |
| setOperationAction(ISD::SETCC, MVT::f64, Expand); |
| setOperationAction(ISD::SELECT, MVT::i32, Custom); |
| setOperationAction(ISD::SELECT, MVT::f32, Custom); |
| setOperationAction(ISD::SELECT, MVT::f64, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); |
| if (Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::SETCC, MVT::f16, Expand); |
| setOperationAction(ISD::SELECT, MVT::f16, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); |
| } |
| |
| setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom); |
| |
| setOperationAction(ISD::BRCOND, MVT::Other, Custom); |
| setOperationAction(ISD::BR_CC, MVT::i32, Custom); |
| if (Subtarget->hasFullFP16()) |
| setOperationAction(ISD::BR_CC, MVT::f16, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f32, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f64, Custom); |
| setOperationAction(ISD::BR_JT, MVT::Other, Custom); |
| |
| // We don't support sin/cos/fmod/copysign/pow |
| setOperationAction(ISD::FSIN, MVT::f64, Expand); |
| setOperationAction(ISD::FSIN, MVT::f32, Expand); |
| setOperationAction(ISD::FCOS, MVT::f32, Expand); |
| setOperationAction(ISD::FCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
| setOperationAction(ISD::FREM, MVT::f64, Expand); |
| setOperationAction(ISD::FREM, MVT::f32, Expand); |
| if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() && |
| !Subtarget->isThumb1Only()) { |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
| } |
| setOperationAction(ISD::FPOW, MVT::f64, Expand); |
| setOperationAction(ISD::FPOW, MVT::f32, Expand); |
| |
| if (!Subtarget->hasVFP4Base()) { |
| setOperationAction(ISD::FMA, MVT::f64, Expand); |
| setOperationAction(ISD::FMA, MVT::f32, Expand); |
| } |
| |
| // Various VFP goodness |
| if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { |
| // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. |
| if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) { |
| setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); |
| setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); |
| } |
| |
| // fp16 is a special v7 extension that adds f16 <-> f32 conversions. |
| if (!Subtarget->hasFP16()) { |
| setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); |
| setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); |
| } |
| |
| // Strict floating-point comparisons need custom lowering. |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom); |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); |
| } |
| |
| // Use __sincos_stret if available. |
| if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
| getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
| setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
| } |
| |
| // FP-ARMv8 implements a lot of rounding-like FP operations. |
| if (Subtarget->hasFPARMv8Base()) { |
| setOperationAction(ISD::FFLOOR, MVT::f32, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f32, Legal); |
| setOperationAction(ISD::FROUND, MVT::f32, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::f32, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); |
| setOperationAction(ISD::FRINT, MVT::f32, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::f32, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); |
| if (Subtarget->hasNEON()) { |
| setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); |
| } |
| |
| if (Subtarget->hasFP64()) { |
| setOperationAction(ISD::FFLOOR, MVT::f64, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f64, Legal); |
| setOperationAction(ISD::FROUND, MVT::f64, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::f64, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); |
| setOperationAction(ISD::FRINT, MVT::f64, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::f64, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); |
| } |
| } |
| |
| // FP16 often need to be promoted to call lib functions |
| if (Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::FREM, MVT::f16, Promote); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); |
| setOperationAction(ISD::FSIN, MVT::f16, Promote); |
| setOperationAction(ISD::FCOS, MVT::f16, Promote); |
| setOperationAction(ISD::FSINCOS, MVT::f16, Promote); |
| setOperationAction(ISD::FPOWI, MVT::f16, Promote); |
| setOperationAction(ISD::FPOW, MVT::f16, Promote); |
| setOperationAction(ISD::FEXP, MVT::f16, Promote); |
| setOperationAction(ISD::FEXP2, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG10, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG2, MVT::f16, Promote); |
| |
| setOperationAction(ISD::FROUND, MVT::f16, Legal); |
| } |
| |
| if (Subtarget->hasNEON()) { |
| // vmin and vmax aren't available in a scalar form, so we can use |
| // a NEON instruction with an undef lane instead. This has a performance |
| // penalty on some cores, so we don't do this unless we have been |
| // asked to by the core tuning model. |
| if (Subtarget->useNEONForSinglePrecisionFP()) { |
| setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); |
| setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); |
| } |
| setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); |
| setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); |
| |
| if (Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal); |
| |
| setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal); |
| setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal); |
| } |
| } |
| |
| // We have target-specific dag combine patterns for the following nodes: |
| // ARMISD::VMOVRRD - No need to call setTargetDAGCombine |
| setTargetDAGCombine(ISD::ADD); |
| setTargetDAGCombine(ISD::SUB); |
| setTargetDAGCombine(ISD::MUL); |
| setTargetDAGCombine(ISD::AND); |
| setTargetDAGCombine(ISD::OR); |
| setTargetDAGCombine(ISD::XOR); |
| |
| if (Subtarget->hasMVEIntegerOps()) |
| setTargetDAGCombine(ISD::VSELECT); |
| |
| if (Subtarget->hasV6Ops()) |
| setTargetDAGCombine(ISD::SRL); |
| if (Subtarget->isThumb1Only()) |
| setTargetDAGCombine(ISD::SHL); |
| |
| setStackPointerRegisterToSaveRestore(ARM::SP); |
| |
| if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || |
| !Subtarget->hasVFP2Base() || Subtarget->hasMinSize()) |
| setSchedulingPreference(Sched::RegPressure); |
| else |
| setSchedulingPreference(Sched::Hybrid); |
| |
| //// temporary - rewrite interface to use type |
| MaxStoresPerMemset = 8; |
| MaxStoresPerMemsetOptSize = 4; |
| MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores |
| MaxStoresPerMemcpyOptSize = 2; |
| MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores |
| MaxStoresPerMemmoveOptSize = 2; |
| |
| // On ARM arguments smaller than 4 bytes are extended, so all arguments |
| // are at least 4 bytes aligned. |
| setMinStackArgumentAlignment(Align(4)); |
| |
| // Prefer likely predicted branches to selects on out-of-order cores. |
| PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); |
| |
| setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); |
| |
| setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4)); |
| |
| if (Subtarget->isThumb() || Subtarget->isThumb2()) |
| setTargetDAGCombine(ISD::ABS); |
| } |
| |
| bool ARMTargetLowering::useSoftFloat() const { |
| return Subtarget->useSoftFloat(); |
| } |
| |
| // FIXME: It might make sense to define the representative register class as the |
| // nearest super-register that has a non-null superset. For example, DPR_VFP2 is |
| // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, |
| // SPR's representative would be DPR_VFP2. This should work well if register |
| // pressure tracking were modified such that a register use would increment the |
| // pressure of the register class's representative and all of it's super |
| // classes' representatives transitively. We have not implemented this because |
| // of the difficulty prior to coalescing of modeling operand register classes |
| // due to the common occurrence of cross class copies and subregister insertions |
| // and extractions. |
| std::pair<const TargetRegisterClass *, uint8_t> |
| ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, |
| MVT VT) const { |
| const TargetRegisterClass *RRC = nullptr; |
| uint8_t Cost = 1; |
| switch (VT.SimpleTy) { |
| default: |
| return TargetLowering::findRepresentativeClass(TRI, VT); |
| // Use DPR as representative register class for all floating point |
| // and vector types. Since there are 32 SPR registers and 32 DPR registers so |
| // the cost is 1 for both f32 and f64. |
| case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: |
| case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: |
| RRC = &ARM::DPRRegClass; |
| // When NEON is used for SP, only half of the register file is available |
| // because operations that define both SP and DP results will be constrained |
| // to the VFP2 class (D0-D15). We currently model this constraint prior to |
| // coalescing by double-counting the SP regs. See the FIXME above. |
| if (Subtarget->useNEONForSinglePrecisionFP()) |
| Cost = 2; |
| break; |
| case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: |
| case MVT::v4f32: case MVT::v2f64: |
| RRC = &ARM::DPRRegClass; |
| Cost = 2; |
| break; |
| case MVT::v4i64: |
| RRC = &ARM::DPRRegClass; |
| Cost = 4; |
| break; |
| case MVT::v8i64: |
| RRC = &ARM::DPRRegClass; |
| Cost = 8; |
| break; |
| } |
| return std::make_pair(RRC, Cost); |
| } |
| |
| const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { |
| #define MAKE_CASE(V) \ |
| case V: \ |
| return #V; |
| switch ((ARMISD::NodeType)Opcode) { |
| case ARMISD::FIRST_NUMBER: |
| break; |
| MAKE_CASE(ARMISD::Wrapper) |
| MAKE_CASE(ARMISD::WrapperPIC) |
| MAKE_CASE(ARMISD::WrapperJT) |
| MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL) |
| MAKE_CASE(ARMISD::CALL) |
| MAKE_CASE(ARMISD::CALL_PRED) |
| MAKE_CASE(ARMISD::CALL_NOLINK) |
| MAKE_CASE(ARMISD::tSECALL) |
| MAKE_CASE(ARMISD::BRCOND) |
| MAKE_CASE(ARMISD::BR_JT) |
| MAKE_CASE(ARMISD::BR2_JT) |
| MAKE_CASE(ARMISD::RET_FLAG) |
| MAKE_CASE(ARMISD::SERET_FLAG) |
| MAKE_CASE(ARMISD::INTRET_FLAG) |
| MAKE_CASE(ARMISD::PIC_ADD) |
| MAKE_CASE(ARMISD::CMP) |
| MAKE_CASE(ARMISD::CMN) |
| MAKE_CASE(ARMISD::CMPZ) |
| MAKE_CASE(ARMISD::CMPFP) |
| MAKE_CASE(ARMISD::CMPFPE) |
| MAKE_CASE(ARMISD::CMPFPw0) |
| MAKE_CASE(ARMISD::CMPFPEw0) |
| MAKE_CASE(ARMISD::BCC_i64) |
| MAKE_CASE(ARMISD::FMSTAT) |
| MAKE_CASE(ARMISD::CMOV) |
| MAKE_CASE(ARMISD::SUBS) |
| MAKE_CASE(ARMISD::SSAT) |
| MAKE_CASE(ARMISD::USAT) |
| MAKE_CASE(ARMISD::ASRL) |
| MAKE_CASE(ARMISD::LSRL) |
| MAKE_CASE(ARMISD::LSLL) |
| MAKE_CASE(ARMISD::SRL_FLAG) |
| MAKE_CASE(ARMISD::SRA_FLAG) |
| MAKE_CASE(ARMISD::RRX) |
| MAKE_CASE(ARMISD::ADDC) |
| MAKE_CASE(ARMISD::ADDE) |
| MAKE_CASE(ARMISD::SUBC) |
| MAKE_CASE(ARMISD::SUBE) |
| MAKE_CASE(ARMISD::LSLS) |
| MAKE_CASE(ARMISD::VMOVRRD) |
| MAKE_CASE(ARMISD::VMOVDRR) |
| MAKE_CASE(ARMISD::VMOVhr) |
| MAKE_CASE(ARMISD::VMOVrh) |
| MAKE_CASE(ARMISD::VMOVSR) |
| MAKE_CASE(ARMISD::EH_SJLJ_SETJMP) |
| MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP) |
| MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH) |
| MAKE_CASE(ARMISD::TC_RETURN) |
| MAKE_CASE(ARMISD::THREAD_POINTER) |
| MAKE_CASE(ARMISD::DYN_ALLOC) |
| MAKE_CASE(ARMISD::MEMBARRIER_MCR) |
| MAKE_CASE(ARMISD::PRELOAD) |
| MAKE_CASE(ARMISD::LDRD) |
| MAKE_CASE(ARMISD::STRD) |
| MAKE_CASE(ARMISD::WIN__CHKSTK) |
| MAKE_CASE(ARMISD::WIN__DBZCHK) |
| MAKE_CASE(ARMISD::PREDICATE_CAST) |
| MAKE_CASE(ARMISD::VECTOR_REG_CAST) |
| MAKE_CASE(ARMISD::MVESEXT) |
| MAKE_CASE(ARMISD::MVEZEXT) |
| MAKE_CASE(ARMISD::MVETRUNC) |
| MAKE_CASE(ARMISD::VCMP) |
| MAKE_CASE(ARMISD::VCMPZ) |
| MAKE_CASE(ARMISD::VTST) |
| MAKE_CASE(ARMISD::VSHLs) |
| MAKE_CASE(ARMISD::VSHLu) |
| MAKE_CASE(ARMISD::VSHLIMM) |
| MAKE_CASE(ARMISD::VSHRsIMM) |
| MAKE_CASE(ARMISD::VSHRuIMM) |
| MAKE_CASE(ARMISD::VRSHRsIMM) |
| MAKE_CASE(ARMISD::VRSHRuIMM) |
| MAKE_CASE(ARMISD::VRSHRNIMM) |
| MAKE_CASE(ARMISD::VQSHLsIMM) |
| MAKE_CASE(ARMISD::VQSHLuIMM) |
| MAKE_CASE(ARMISD::VQSHLsuIMM) |
| MAKE_CASE(ARMISD::VQSHRNsIMM) |
| MAKE_CASE(ARMISD::VQSHRNuIMM) |
| MAKE_CASE(ARMISD::VQSHRNsuIMM) |
| MAKE_CASE(ARMISD::VQRSHRNsIMM) |
| MAKE_CASE(ARMISD::VQRSHRNuIMM) |
| MAKE_CASE(ARMISD::VQRSHRNsuIMM) |
| MAKE_CASE(ARMISD::VSLIIMM) |
| MAKE_CASE(ARMISD::VSRIIMM) |
| MAKE_CASE(ARMISD::VGETLANEu) |
| MAKE_CASE(ARMISD::VGETLANEs) |
| MAKE_CASE(ARMISD::VMOVIMM) |
| MAKE_CASE(ARMISD::VMVNIMM) |
| MAKE_CASE(ARMISD::VMOVFPIMM) |
| MAKE_CASE(ARMISD::VDUP) |
| MAKE_CASE(ARMISD::VDUPLANE) |
| MAKE_CASE(ARMISD::VEXT) |
| MAKE_CASE(ARMISD::VREV64) |
| MAKE_CASE(ARMISD::VREV32) |
| MAKE_CASE(ARMISD::VREV16) |
| MAKE_CASE(ARMISD::VZIP) |
| MAKE_CASE(ARMISD::VUZP) |
| MAKE_CASE(ARMISD::VTRN) |
| MAKE_CASE(ARMISD::VTBL1) |
| MAKE_CASE(ARMISD::VTBL2) |
| MAKE_CASE(ARMISD::VMOVN) |
| MAKE_CASE(ARMISD::VQMOVNs) |
| MAKE_CASE(ARMISD::VQMOVNu) |
| MAKE_CASE(ARMISD::VCVTN) |
| MAKE_CASE(ARMISD::VCVTL) |
| MAKE_CASE(ARMISD::VIDUP) |
| MAKE_CASE(ARMISD::VMULLs) |
| MAKE_CASE(ARMISD::VMULLu) |
| MAKE_CASE(ARMISD::VQDMULH) |
| MAKE_CASE(ARMISD::VADDVs) |
| MAKE_CASE(ARMISD::VADDVu) |
| MAKE_CASE(ARMISD::VADDVps) |
| MAKE_CASE(ARMISD::VADDVpu) |
| MAKE_CASE(ARMISD::VADDLVs) |
| MAKE_CASE(ARMISD::VADDLVu) |
| MAKE_CASE(ARMISD::VADDLVAs) |
| MAKE_CASE(ARMISD::VADDLVAu) |
| MAKE_CASE(ARMISD::VADDLVps) |
| MAKE_CASE(ARMISD::VADDLVpu) |
| MAKE_CASE(ARMISD::VADDLVAps) |
| MAKE_CASE(ARMISD::VADDLVApu) |
| MAKE_CASE(ARMISD::VMLAVs) |
| MAKE_CASE(ARMISD::VMLAVu) |
| MAKE_CASE(ARMISD::VMLAVps) |
| MAKE_CASE(ARMISD::VMLAVpu) |
| MAKE_CASE(ARMISD::VMLALVs) |
| MAKE_CASE(ARMISD::VMLALVu) |
| MAKE_CASE(ARMISD::VMLALVps) |
| MAKE_CASE(ARMISD::VMLALVpu) |
| MAKE_CASE(ARMISD::VMLALVAs) |
| MAKE_CASE(ARMISD::VMLALVAu) |
| MAKE_CASE(ARMISD::VMLALVAps) |
| MAKE_CASE(ARMISD::VMLALVApu) |
| MAKE_CASE(ARMISD::VMINVu) |
| MAKE_CASE(ARMISD::VMINVs) |
| MAKE_CASE(ARMISD::VMAXVu) |
| MAKE_CASE(ARMISD::VMAXVs) |
| MAKE_CASE(ARMISD::UMAAL) |
| MAKE_CASE(ARMISD::UMLAL) |
| MAKE_CASE(ARMISD::SMLAL) |
| MAKE_CASE(ARMISD::SMLALBB) |
| MAKE_CASE(ARMISD::SMLALBT) |
| MAKE_CASE(ARMISD::SMLALTB) |
| MAKE_CASE(ARMISD::SMLALTT) |
| MAKE_CASE(ARMISD::SMULWB) |
| MAKE_CASE(ARMISD::SMULWT) |
| MAKE_CASE(ARMISD::SMLALD) |
| MAKE_CASE(ARMISD::SMLALDX) |
| MAKE_CASE(ARMISD::SMLSLD) |
| MAKE_CASE(ARMISD::SMLSLDX) |
| MAKE_CASE(ARMISD::SMMLAR) |
| MAKE_CASE(ARMISD::SMMLSR) |
| MAKE_CASE(ARMISD::QADD16b) |
| MAKE_CASE(ARMISD::QSUB16b) |
| MAKE_CASE(ARMISD::QADD8b) |
| MAKE_CASE(ARMISD::QSUB8b) |
| MAKE_CASE(ARMISD::UQADD16b) |
| MAKE_CASE(ARMISD::UQSUB16b) |
| MAKE_CASE(ARMISD::UQADD8b) |
| MAKE_CASE(ARMISD::UQSUB8b) |
| MAKE_CASE(ARMISD::BUILD_VECTOR) |
| MAKE_CASE(ARMISD::BFI) |
| MAKE_CASE(ARMISD::VORRIMM) |
| MAKE_CASE(ARMISD::VBICIMM) |
| MAKE_CASE(ARMISD::VBSP) |
| MAKE_CASE(ARMISD::MEMCPY) |
| MAKE_CASE(ARMISD::VLD1DUP) |
| MAKE_CASE(ARMISD::VLD2DUP) |
| MAKE_CASE(ARMISD::VLD3DUP) |
| MAKE_CASE(ARMISD::VLD4DUP) |
| MAKE_CASE(ARMISD::VLD1_UPD) |
| MAKE_CASE(ARMISD::VLD2_UPD) |
| MAKE_CASE(ARMISD::VLD3_UPD) |
| MAKE_CASE(ARMISD::VLD4_UPD) |
| MAKE_CASE(ARMISD::VLD1x2_UPD) |
| MAKE_CASE(ARMISD::VLD1x3_UPD) |
| MAKE_CASE(ARMISD::VLD1x4_UPD) |
| MAKE_CASE(ARMISD::VLD2LN_UPD) |
| MAKE_CASE(ARMISD::VLD3LN_UPD) |
| MAKE_CASE(ARMISD::VLD4LN_UPD) |
| MAKE_CASE(ARMISD::VLD1DUP_UPD) |
| MAKE_CASE(ARMISD::VLD2DUP_UPD) |
| MAKE_CASE(ARMISD::VLD3DUP_UPD) |
| MAKE_CASE(ARMISD::VLD4DUP_UPD) |
| MAKE_CASE(ARMISD::VST1_UPD) |
| MAKE_CASE(ARMISD::VST2_UPD) |
| MAKE_CASE(ARMISD::VST3_UPD) |
| MAKE_CASE(ARMISD::VST4_UPD) |
| MAKE_CASE(ARMISD::VST1x2_UPD) |
| MAKE_CASE(ARMISD::VST1x3_UPD) |
| MAKE_CASE(ARMISD::VST1x4_UPD) |
| MAKE_CASE(ARMISD::VST2LN_UPD) |
| MAKE_CASE(ARMISD::VST3LN_UPD) |
| MAKE_CASE(ARMISD::VST4LN_UPD) |
| MAKE_CASE(ARMISD::WLS) |
| MAKE_CASE(ARMISD::WLSSETUP) |
| MAKE_CASE(ARMISD::LE) |
| MAKE_CASE(ARMISD::LOOP_DEC) |
| MAKE_CASE(ARMISD::CSINV) |
| MAKE_CASE(ARMISD::CSNEG) |
| MAKE_CASE(ARMISD::CSINC) |
| MAKE_CASE(ARMISD::MEMCPYLOOP) |
| MAKE_CASE(ARMISD::MEMSETLOOP) |
| #undef MAKE_CASE |
| } |
| return nullptr; |
| } |
| |
| EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, |
| EVT VT) const { |
| if (!VT.isVector()) |
| return getPointerTy(DL); |
| |
| // MVE has a predicate register. |
| if ((Subtarget->hasMVEIntegerOps() && |
| (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) || |
| (Subtarget->hasMVEFloatOps() && (VT == MVT::v4f32 || VT == MVT::v8f16))) |
| return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); |
| return VT.changeVectorElementTypeToInteger(); |
| } |
| |
| /// getRegClassFor - Return the register class that should be used for the |
| /// specified value type. |
| const TargetRegisterClass * |
| ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { |
| (void)isDivergent; |
| // Map v4i64 to QQ registers but do not make the type legal. Similarly map |
| // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to |
| // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive |
| // MVE Q registers. |
| if (Subtarget->hasNEON()) { |
| if (VT == MVT::v4i64) |
| return &ARM::QQPRRegClass; |
| if (VT == MVT::v8i64) |
| return &ARM::QQQQPRRegClass; |
| } |
| if (Subtarget->hasMVEIntegerOps()) { |
| if (VT == MVT::v4i64) |
| return &ARM::MQQPRRegClass; |
| if (VT == MVT::v8i64) |
| return &ARM::MQQQQPRRegClass; |
| } |
| return TargetLowering::getRegClassFor(VT); |
| } |
| |
| // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the |
| // source/dest is aligned and the copy size is large enough. We therefore want |
| // to align such objects passed to memory intrinsics. |
| bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, |
| unsigned &PrefAlign) const { |
| if (!isa<MemIntrinsic>(CI)) |
| return false; |
| MinSize = 8; |
| // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 |
| // cycle faster than 4-byte aligned LDM. |
| PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); |
| return true; |
| } |
| |
| // Create a fast isel object. |
| FastISel * |
| ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
| const TargetLibraryInfo *libInfo) const { |
| return ARM::createFastISel(funcInfo, libInfo); |
| } |
| |
| Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { |
| unsigned NumVals = N->getNumValues(); |
| if (!NumVals) |
| return Sched::RegPressure; |
| |
| for (unsigned i = 0; i != NumVals; ++i) { |
| EVT VT = N->getValueType(i); |
| if (VT == MVT::Glue || VT == MVT::Other) |
| continue; |
| if (VT.isFloatingPoint() || VT.isVector()) |
| return Sched::ILP; |
| } |
| |
| if (!N->isMachineOpcode()) |
| return Sched::RegPressure; |
| |
| // Load are scheduled for latency even if there instruction itinerary |
| // is not available. |
| const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
| const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); |
| |
| if (MCID.getNumDefs() == 0) |
| return Sched::RegPressure; |
| if (!Itins->isEmpty() && |
| Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) |
| return Sched::ILP; |
| |
| return Sched::RegPressure; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Lowering Code |
| //===----------------------------------------------------------------------===// |
| |
| static bool isSRL16(const SDValue &Op) { |
| if (Op.getOpcode() != ISD::SRL) |
| return false; |
| if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
| return Const->getZExtValue() == 16; |
| return false; |
| } |
| |
| static bool isSRA16(const SDValue &Op) { |
| if (Op.getOpcode() != ISD::SRA) |
| return false; |
| if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
| return Const->getZExtValue() == 16; |
| return false; |
| } |
| |
| static bool isSHL16(const SDValue &Op) { |
| if (Op.getOpcode() != ISD::SHL) |
| return false; |
| if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
| return Const->getZExtValue() == 16; |
| return false; |
| } |
| |
| // Check for a signed 16-bit value. We special case SRA because it makes it |
| // more simple when also looking for SRAs that aren't sign extending a |
| // smaller value. Without the check, we'd need to take extra care with |
| // checking order for some operations. |
| static bool isS16(const SDValue &Op, SelectionDAG &DAG) { |
| if (isSRA16(Op)) |
| return isSHL16(Op.getOperand(0)); |
| return DAG.ComputeNumSignBits(Op) == 17; |
| } |
| |
| /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC |
| static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { |
| switch (CC) { |
| default: llvm_unreachable("Unknown condition code!"); |
| case ISD::SETNE: return ARMCC::NE; |
| case ISD::SETEQ: return ARMCC::EQ; |
| case ISD::SETGT: return ARMCC::GT; |
| case ISD::SETGE: return ARMCC::GE; |
| case ISD::SETLT: return ARMCC::LT; |
| case ISD::SETLE: return ARMCC::LE; |
| case ISD::SETUGT: return ARMCC::HI; |
| case ISD::SETUGE: return ARMCC::HS; |
| case ISD::SETULT: return ARMCC::LO; |
| case ISD::SETULE: return ARMCC::LS; |
| } |
| } |
| |
| /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. |
| static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, |
| ARMCC::CondCodes &CondCode2) { |
| CondCode2 = ARMCC::AL; |
| switch (CC) { |
| default: llvm_unreachable("Unknown FP condition!"); |
| case ISD::SETEQ: |
| case ISD::SETOEQ: CondCode = ARMCC::EQ; break; |
| case ISD::SETGT: |
| case ISD::SETOGT: CondCode = ARMCC::GT; break; |
| case ISD::SETGE: |
| case ISD::SETOGE: CondCode = ARMCC::GE; break; |
| case ISD::SETOLT: CondCode = ARMCC::MI; break; |
| case ISD::SETOLE: CondCode = ARMCC::LS; break; |
| case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; |
| case ISD::SETO: CondCode = ARMCC::VC; break; |
| case ISD::SETUO: CondCode = ARMCC::VS; break; |
| case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; |
| case ISD::SETUGT: CondCode = ARMCC::HI; break; |
| case ISD::SETUGE: CondCode = ARMCC::PL; break; |
| case ISD::SETLT: |
| case ISD::SETULT: CondCode = ARMCC::LT; break; |
| case ISD::SETLE: |
| case ISD::SETULE: CondCode = ARMCC::LE; break; |
| case ISD::SETNE: |
| case ISD::SETUNE: CondCode = ARMCC::NE; break; |
| } |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Calling Convention Implementation |
| //===----------------------------------------------------------------------===// |
| |
| /// getEffectiveCallingConv - Get the effective calling convention, taking into |
| /// account presence of floating point hardware and calling convention |
| /// limitations, such as support for variadic functions. |
| CallingConv::ID |
| ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, |
| bool isVarArg) const { |
| switch (CC) { |
| default: |
| report_fatal_error("Unsupported calling convention"); |
| case CallingConv::ARM_AAPCS: |
| case CallingConv::ARM_APCS: |
| case CallingConv::GHC: |
| case CallingConv::CFGuard_Check: |
| return CC; |
| case CallingConv::PreserveMost: |
| return CallingConv::PreserveMost; |
| case CallingConv::ARM_AAPCS_VFP: |
| case CallingConv::Swift: |
| case CallingConv::SwiftTail: |
| return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; |
| case CallingConv::C: |
| case CallingConv::Tail: |
| if (!Subtarget->isAAPCS_ABI()) |
| return CallingConv::ARM_APCS; |
| else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && |
| getTargetMachine().Options.FloatABIType == FloatABI::Hard && |
| !isVarArg) |
| return CallingConv::ARM_AAPCS_VFP; |
| else |
| return CallingConv::ARM_AAPCS; |
| case CallingConv::Fast: |
| case CallingConv::CXX_FAST_TLS: |
| if (!Subtarget->isAAPCS_ABI()) { |
| if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg) |
| return CallingConv::Fast; |
| return CallingConv::ARM_APCS; |
| } else if (Subtarget->hasVFP2Base() && |
| !Subtarget->isThumb1Only() && !isVarArg) |
| return CallingConv::ARM_AAPCS_VFP; |
| else |
| return CallingConv::ARM_AAPCS; |
| } |
| } |
| |
| CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC, |
| bool isVarArg) const { |
| return CCAssignFnForNode(CC, false, isVarArg); |
| } |
| |
| CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, |
| bool isVarArg) const { |
| return CCAssignFnForNode(CC, true, isVarArg); |
| } |
| |
| /// CCAssignFnForNode - Selects the correct CCAssignFn for the given |
| /// CallingConvention. |
| CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, |
| bool Return, |
| bool isVarArg) const { |
| switch (getEffectiveCallingConv(CC, isVarArg)) { |
| default: |
| report_fatal_error("Unsupported calling convention"); |
| case CallingConv::ARM_APCS: |
| return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); |
| case CallingConv::ARM_AAPCS: |
| return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); |
| case CallingConv::ARM_AAPCS_VFP: |
| return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); |
| case CallingConv::Fast: |
| return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); |
| case CallingConv::GHC: |
| return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); |
| case CallingConv::PreserveMost: |
| return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); |
| case CallingConv::CFGuard_Check: |
| return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check); |
| } |
| } |
| |
| SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG, |
| MVT LocVT, MVT ValVT, SDValue Val) const { |
| Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()), |
| Val); |
| if (Subtarget->hasFullFP16()) { |
| Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val); |
| } else { |
| Val = DAG.getNode(ISD::TRUNCATE, dl, |
| MVT::getIntegerVT(ValVT.getSizeInBits()), Val); |
| Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val); |
| } |
| return Val; |
| } |
| |
| SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG, |
| MVT LocVT, MVT ValVT, |
| SDValue Val) const { |
| if (Subtarget->hasFullFP16()) { |
| Val = DAG.getNode(ARMISD::VMOVrh, dl, |
| MVT::getIntegerVT(LocVT.getSizeInBits()), Val); |
| } else { |
| Val = DAG.getNode(ISD::BITCAST, dl, |
| MVT::getIntegerVT(ValVT.getSizeInBits()), Val); |
| Val = DAG.getNode(ISD::ZERO_EXTEND, dl, |
| MVT::getIntegerVT(LocVT.getSizeInBits()), Val); |
| } |
| return DAG.getNode(ISD::BITCAST, dl, LocVT, Val); |
| } |
| |
| /// LowerCallResult - Lower the result values of a call into the |
| /// appropriate copies out of appropriate physical registers. |
| SDValue ARMTargetLowering::LowerCallResult( |
| SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, |
| const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
| SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
| SDValue ThisVal) const { |
| // Assign locations to each value returned by this call. |
| SmallVector<CCValAssign, 16> RVLocs; |
| CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
| *DAG.getContext()); |
| CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); |
| |
| // Copy all of the result registers out of their specified physreg. |
| for (unsigned i = 0; i != RVLocs.size(); ++i) { |
| CCValAssign VA = RVLocs[i]; |
| |
| // Pass 'this' value directly from the argument to return value, to avoid |
| // reg unit interference |
| if (i == 0 && isThisReturn) { |
| assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && |
| "unexpected return calling convention register assignment"); |
| InVals.push_back(ThisVal); |
| continue; |
| } |
| |
| SDValue Val; |
| if (VA.needsCustom() && |
| (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) { |
| // Handle f64 or half of a v2f64. |
| SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, |
| InFlag); |
| Chain = Lo.getValue(1); |
| InFlag = Lo.getValue(2); |
| VA = RVLocs[++i]; // skip ahead to next loc |
| SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, |
| InFlag); |
| Chain = Hi.getValue(1); |
| InFlag = Hi.getValue(2); |
| if (!Subtarget->isLittle()) |
| std::swap (Lo, Hi); |
| Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); |
| |
| if (VA.getLocVT() == MVT::v2f64) { |
| SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); |
| Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, |
| DAG.getConstant(0, dl, MVT::i32)); |
| |
| VA = RVLocs[++i]; // skip ahead to next loc |
| Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); |
| Chain = Lo.getValue(1); |
| InFlag = Lo.getValue(2); |
| VA = RVLocs[++i]; // skip ahead to next loc |
| Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); |
| Chain = Hi.getValue(1); |
| InFlag = Hi.getValue(2); |
| if (!Subtarget->isLittle()) |
| std::swap (Lo, Hi); |
| Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); |
| Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, |
| DAG.getConstant(1, dl, MVT::i32)); |
| } |
| } else { |
| Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), |
| InFlag); |
| Chain = Val.getValue(1); |
| InFlag = Val.getValue(2); |
| } |
| |
| switch (VA.getLocInfo()) { |
| default: llvm_unreachable("Unknown loc info!"); |
| case CCValAssign::Full: break; |
| case CCValAssign::BCvt: |
| Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); |
| break; |
| } |
| |
| // f16 arguments have their size extended to 4 bytes and passed as if they |
| // had been copied to the LSBs of a 32-bit register. |
| // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) |
| if (VA.needsCustom() && |
| (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) |
| Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val); |
| |
| InVals.push_back(Val); |
| } |
| |
| return Chain; |
| } |
| |
| std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg( |
| const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr, |
| bool IsTailCall, int SPDiff) const { |
| SDValue DstAddr; |
| MachinePointerInfo DstInfo; |
| int32_t Offset = VA.getLocMemOffset(); |
| MachineFunction &MF = DAG.getMachineFunction(); |
| |
| if (IsTailCall) { |
| Offset += SPDiff; |
| auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| int Size = VA.getLocVT().getFixedSizeInBits() / 8; |
| int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); |
| DstAddr = DAG.getFrameIndex(FI, PtrVT); |
| DstInfo = |
| MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); |
| } else { |
| SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl); |
| DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
| StackPtr, PtrOff); |
| DstInfo = |
| MachinePointerInfo::getStack(DAG.getMachineFunction(), Offset); |
| } |
| |
| return std::make_pair(DstAddr, DstInfo); |
| } |
| |
| void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, |
| SDValue Chain, SDValue &Arg, |
| RegsToPassVector &RegsToPass, |
| CCValAssign &VA, CCValAssign &NextVA, |
| SDValue &StackPtr, |
| SmallVectorImpl<SDValue> &MemOpChains, |
| bool IsTailCall, |
| int SPDiff) const { |
| SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, |
| DAG.getVTList(MVT::i32, MVT::i32), Arg); |
| unsigned id = Subtarget->isLittle() ? 0 : 1; |
| RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); |
| |
| if (NextVA.isRegLoc()) |
| RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); |
| else { |
| assert(NextVA.isMemLoc()); |
| if (!StackPtr.getNode()) |
| StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, |
| getPointerTy(DAG.getDataLayout())); |
| |
| SDValue DstAddr; |
| MachinePointerInfo DstInfo; |
| std::tie(DstAddr, DstInfo) = |
| computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff); |
| MemOpChains.push_back( |
| DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo)); |
| } |
| } |
| |
| static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { |
| return (CC == CallingConv::Fast && GuaranteeTailCalls) || |
| CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
| } |
| |
| /// LowerCall - Lowering a call into a callseq_start <- |
| /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter |
| /// nodes. |
| SDValue |
| ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
| SmallVectorImpl<SDValue> &InVals) const { |
| SelectionDAG &DAG = CLI.DAG; |
| SDLoc &dl = CLI.DL; |
| SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
| SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
| SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
| SDValue Chain = CLI.Chain; |
| SDValue Callee = CLI.Callee; |
| bool &isTailCall = CLI.IsTailCall; |
| CallingConv::ID CallConv = CLI.CallConv; |
| bool doesNotRet = CLI.DoesNotReturn; |
| bool isVarArg = CLI.IsVarArg; |
| |
| MachineFunction &MF = DAG.getMachineFunction(); |
| ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
| MachineFunction::CallSiteInfo CSInfo; |
| bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); |
| bool isThisReturn = false; |
| bool isCmseNSCall = false; |
| bool isSibCall = false; |
| bool PreferIndirect = false; |
| |
| // Determine whether this is a non-secure function call. |
| if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call")) |
| isCmseNSCall = true; |
| |
| // Disable tail calls if they're not supported. |
| if (!Subtarget->supportsTailCall()) |
| isTailCall = false; |
| |
| // For both the non-secure calls and the returns from a CMSE entry function, |
| // the function needs to do some extra work afte r the call, or before the |
| // return, respectively, thus it cannot end with atail call |
| if (isCmseNSCall || AFI->isCmseNSEntryFunction()) |
| isTailCall = false; |
| |
| if (isa<GlobalAddressSDNode>(Callee)) { |
| // If we're optimizing for minimum size and the function is called three or |
| // more times in this block, we can improve codesize by calling indirectly |
| // as BLXr has a 16-bit encoding. |
| auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); |
| if (CLI.CB) { |
| auto *BB = CLI.CB->getParent(); |
| PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() && |
| count_if(GV->users(), [&BB](const User *U) { |
| return isa<Instruction>(U) && |
| cast<Instruction>(U)->getParent() == BB; |
| }) > 2; |
| } |
| } |
| if (isTailCall) { |
| // Check if it's really possible to do a tail call. |
| isTailCall = IsEligibleForTailCallOptimization( |
| Callee, CallConv, isVarArg, isStructRet, |
| MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG, |
| PreferIndirect); |
| |
| if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt && |
| CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail) |
| isSibCall = true; |
| |
| // We don't support GuaranteedTailCallOpt for ARM, only automatically |
| // detected sibcalls. |
| if (isTailCall) |
| ++NumTailCalls; |
| } |
| |
| if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall()) |
| report_fatal_error("failed to perform tail call elimination on a call " |
| "site marked musttail"); |
| // Analyze operands of the call, assigning locations to each operand. |
| SmallVector<CCValAssign, 16> ArgLocs; |
| CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, |
| *DAG.getContext()); |
| CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); |
| |
| // Get a count of how many bytes are to be pushed on the stack. |
| unsigned NumBytes = CCInfo.getNextStackOffset(); |
| |
| // SPDiff is the byte offset of the call's argument area from the callee's. |
| // Stores to callee stack arguments will be placed in FixedStackSlots offset |
| // by this amount for a tail call. In a sibling call it must be 0 because the |
| // caller will deallocate the entire stack and the callee still expects its |
| // arguments to begin at SP+0. Completely unused for non-tail calls. |
| int SPDiff = 0; |
| |
| if (isTailCall && !isSibCall) { |
| auto FuncInfo = MF.getInfo<ARMFunctionInfo>(); |
| unsigned NumReusableBytes = FuncInfo->getArgumentStackSize(); |
| |
| // Since callee will pop argument stack as a tail call, we must keep the |
| // popped size 16-byte aligned. |
| Align StackAlign = DAG.getDataLayout().getStackAlignment(); |
| NumBytes = alignTo(NumBytes, StackAlign); |
| |
| // SPDiff will be negative if this tail call requires more space than we |
| // would automatically have in our incoming argument space. Positive if we |
| // can actually shrink the stack. |
| SPDiff = NumReusableBytes - NumBytes; |
| |
| // If this call requires more stack than we have available from |
| // LowerFormalArguments, tell FrameLowering to reserve space for it. |
| if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff) |
| AFI->setArgRegsSaveSize(-SPDiff); |
| } |
| |
| if (isSibCall) { |
| // For sibling tail calls, memory operands are available in our caller's stack. |
| NumBytes = 0; |
| } else { |
| // Adjust the stack pointer for the new arguments... |
| // These operations are automatically eliminated by the prolog/epilog pass |
| Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl); |
| } |
| |
| SDValue StackPtr = |
| DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); |
| |
| RegsToPassVector RegsToPass; |
| SmallVector<SDValue, 8> MemOpChains; |
| |
| // During a tail call, stores to the argument area must happen after all of |
| // the function's incoming arguments have been loaded because they may alias. |
| // This is done by folding in a TokenFactor from LowerFormalArguments, but |
| // there's no point in doing so repeatedly so this tracks whether that's |
| // happened yet. |
| bool AfterFormalArgLoads = false; |
| |
| // Walk the register/memloc assignments, inserting copies/loads. In the case |
| // of tail call optimization, arguments are handled later. |
| for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); |
| i != e; |
| ++i, ++realArgIdx) { |
| CCValAssign &VA = ArgLocs[i]; |
| SDValue Arg = OutVals[realArgIdx]; |
| ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; |
| bool isByVal = Flags.isByVal(); |
| |
| // Promote the value if needed. |
| switch (VA.getLocInfo()) { |
| default: llvm_unreachable("Unknown loc info!"); |
| case CCValAssign::Full: break; |
| case CCValAssign::SExt: |
| Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); |
| break; |
| case CCValAssign::ZExt: |
| Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); |
| break; |
| case CCValAssign::AExt: |
| Arg = DAG.getNode |