blob: 33d115945614fca4126003ecd5c51679721b5254 [file] [log] [blame]
//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that ARM uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "ARMISelLowering.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
#include "ARMRegisterInfo.h"
#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetTransformInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "Utils/ARMBaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "arm-isel"
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
STATISTIC(NumConstpoolPromoted,
"Number of constants with their storage promoted into constant pools");
static cl::opt<bool>
ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
static cl::opt<bool> EnableConstpoolPromotion(
"arm-promote-constant", cl::Hidden,
cl::desc("Enable / disable promotion of unnamed_addr constants into "
"constant pools"),
cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
static cl::opt<unsigned> ConstpoolPromotionMaxSize(
"arm-promote-constant-max-size", cl::Hidden,
cl::desc("Maximum size of constant to promote into a constant pool"),
cl::init(64));
static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
"arm-promote-constant-max-total", cl::Hidden,
cl::desc("Maximum size of ALL constants to promote into a constant pool"),
cl::init(128));
cl::opt<unsigned>
MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
cl::init(2));
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
if (VT != PromotedLdStVT) {
setOperationAction(ISD::LOAD, VT, Promote);
AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
setOperationAction(ISD::STORE, VT, Promote);
AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
}
MVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::f64)
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
if (ElemTy == MVT::i32) {
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
} else {
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
}
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT.isInteger()) {
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
}
// Neon does not support vector divide/remainder operations.
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
if (!VT.isFloatingPoint() &&
VT != MVT::v2i64 && VT != MVT::v1i64)
for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
if (!VT.isFloatingPoint())
for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
setOperationAction(Opcode, VT, Legal);
}
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
addRegisterClass(VT, &ARM::DPRRegClass);
addTypeForNEON(VT, MVT::f64);
}
void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
addRegisterClass(VT, &ARM::DPairRegClass);
addTypeForNEON(VT, MVT::v2f64);
}
void ARMTargetLowering::setAllExpand(MVT VT) {
for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
setOperationAction(Opc, VT, Expand);
// We support these really simple operations even on types where all
// the actual arithmetic has to be broken down into simpler
// operations or turned into library calls.
setOperationAction(ISD::BITCAST, VT, Legal);
setOperationAction(ISD::LOAD, VT, Legal);
setOperationAction(ISD::STORE, VT, Legal);
setOperationAction(ISD::UNDEF, VT, Legal);
}
void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
LegalizeAction Action) {
setLoadExtAction(ISD::EXTLOAD, From, To, Action);
setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
}
void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
for (auto VT : IntTypes) {
addRegisterClass(VT, &ARM::MQPRRegClass);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::CTLZ, VT, Legal);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::BITREVERSE, VT, Legal);
setOperationAction(ISD::BSWAP, VT, Legal);
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::ABDS, VT, Legal);
setOperationAction(ISD::ABDU, VT, Legal);
// No native support for these.
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
// Vector reductions
setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
if (!HasMVEFP) {
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
} else {
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
}
// Pre and Post inc are supported on loads and stores
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
setIndexedMaskedLoadAction(im, VT, Legal);
setIndexedMaskedStoreAction(im, VT, Legal);
}
}
const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
for (auto VT : FloatTypes) {
addRegisterClass(VT, &ARM::MQPRRegClass);
if (!HasMVEFP)
setAllExpand(VT);
// These are legal or custom whether we have MVE.fp or not
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
// Pre and Post inc are supported on loads and stores
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
setIndexedMaskedLoadAction(im, VT, Legal);
setIndexedMaskedStoreAction(im, VT, Legal);
}
if (HasMVEFP) {
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
setOperationAction(ISD::FROUND, VT, Legal);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
// No native support for these.
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
}
}
// Custom Expand smaller than legal vector reductions to prevent false zero
// items being added.
setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
// We 'support' these types up to bitcast/load/store level, regardless of
// MVE integer-only / float support. Only doing FP data processing on the FP
// vector types is inhibited at integer-only level.
const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
for (auto VT : LongTypes) {
addRegisterClass(VT, &ARM::MQPRRegClass);
setAllExpand(VT);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
}
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
// We can do bitwise operations on v2i64 vectors
setOperationAction(ISD::AND, MVT::v2i64, Legal);
setOperationAction(ISD::OR, MVT::v2i64, Legal);
setOperationAction(ISD::XOR, MVT::v2i64, Legal);
// It is legal to extload from v4i8 to v4i16 or v4i32.
addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
// It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
// Some truncating stores are legal too.
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
// Pre and Post inc on these are legal, given the correct extends
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
setIndexedMaskedLoadAction(im, VT, Legal);
setIndexedMaskedStoreAction(im, VT, Legal);
}
}
// Predicate types
const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
for (auto VT : pTypes) {
addRegisterClass(VT, &ARM::VCCRRegClass);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
}
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
}
ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
const ARMSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
RegInfo = Subtarget->getRegisterInfo();
Itins = Subtarget->getInstrItineraryData();
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
!Subtarget->isTargetWatchOS()) {
bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
IsHFTarget ? CallingConv::ARM_AAPCS_VFP
: CallingConv::ARM_AAPCS);
}
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
const ISD::CondCode Cond;
} LibraryCalls[] = {
// Single-precision floating-point arithmetic.
{ RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
{ RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
{ RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
{ RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
// Double-precision floating-point arithmetic.
{ RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
{ RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
{ RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
{ RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
// Single-precision comparisons.
{ RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
{ RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
{ RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
{ RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
{ RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
{ RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
{ RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
// Double-precision comparisons.
{ RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
{ RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
{ RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
{ RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
{ RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
{ RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
{ RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
// Floating-point to integer conversions.
// i64 conversions are done via library routines even when generating VFP
// instructions, so use the same ones.
{ RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
{ RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
// Conversions between floating types.
{ RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
{ RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
// Integer to floating-point conversions.
// i64 conversions are done via library routines even when generating VFP
// instructions, so use the same ones.
// FIXME: There appears to be some naming inconsistency in ARM libgcc:
// e.g., __floatunsidf vs. __floatunssidfvfp.
{ RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
{ RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
if (LC.Cond != ISD::SETCC_INVALID)
setCmpLibcallCC(LC.Op, LC.Cond);
}
}
}
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MUL_I128, nullptr);
setLibcallName(RTLIB::MULO_I64, nullptr);
setLibcallName(RTLIB::MULO_I128, nullptr);
// RTLIB
if (Subtarget->isAAPCS_ABI() &&
(Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
const CallingConv::ID CC;
const ISD::CondCode Cond;
} LibraryCalls[] = {
// Double-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 2
{ RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Double-precision floating-point comparison helper functions
// RTABI chapter 4.1.2, Table 3
{ RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
{ RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
// Single-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 4
{ RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Single-precision floating-point comparison helper functions
// RTABI chapter 4.1.2, Table 5
{ RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
{ RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
// Floating-point to integer conversions.
// RTABI chapter 4.1.2, Table 6
{ RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Conversions between floating types.
// RTABI chapter 4.1.2, Table 7
{ RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Integer to floating-point conversions.
// RTABI chapter 4.1.2, Table 8
{ RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Long long helper functions
// RTABI chapter 4.2, Table 9
{ RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Integer division functions
// RTABI chapter 4.3.1
{ RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
if (LC.Cond != ISD::SETCC_INVALID)
setCmpLibcallCC(LC.Op, LC.Cond);
}
// EABI dependent RTLIB
if (TM.Options.EABIVersion == EABI::EABI4 ||
TM.Options.EABIVersion == EABI::EABI5) {
static const struct {
const RTLIB::Libcall Op;
const char *const Name;
const CallingConv::ID CC;
const ISD::CondCode Cond;
} MemOpsLibraryCalls[] = {
// Memory operations
// RTABI chapter 4.3.4
{ RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
};
for (const auto &LC : MemOpsLibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
if (LC.Cond != ISD::SETCC_INVALID)
setCmpLibcallCC(LC.Op, LC.Cond);
}
}
}
if (Subtarget->isTargetWindows()) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
const CallingConv::ID CC;
} LibraryCalls[] = {
{ RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
}
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
if (Subtarget->isTargetMachO() &&
!(Subtarget->isTargetIOS() &&
Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
// The half <-> float conversion functions are always soft-float on
// non-watchos platforms, but are needed for some targets which use a
// hard-float calling convention by default.
if (!Subtarget->isTargetWatchABI()) {
if (Subtarget->isAAPCS_ABI()) {
setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
} else {
setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
}
}
// In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
// a __gnu_ prefix (which is the default).
if (Subtarget->isTargetAEABI()) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
const CallingConv::ID CC;
} LibraryCalls[] = {
{ RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
{ RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
{ RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
}
}
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
addRegisterClass(MVT::i32, &ARM::GPRRegClass);
if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
Subtarget->hasFPRegs()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
if (!Subtarget->hasVFP2Base())
setAllExpand(MVT::f32);
if (!Subtarget->hasFP64())
setAllExpand(MVT::f64);
}
if (Subtarget->hasFullFP16()) {
addRegisterClass(MVT::f16, &ARM::HPRRegClass);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
}
if (Subtarget->hasBF16()) {
addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
setAllExpand(MVT::bf16);
if (!Subtarget->hasFullFP16())
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
}
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
addAllExtLoads(VT, InnerVT, Expand);
}
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
if (Subtarget->hasMVEIntegerOps())
addMVEVectorTypes(Subtarget->hasMVEFloatOps());
// Combine low-overhead loop intrinsics so that we can lower i1 types.
if (Subtarget->hasLOB()) {
setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BR_CC);
}
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
addDRTypeForNEON(MVT::v4i16);
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
addQRTypeForNEON(MVT::v16i8);
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
if (Subtarget->hasFullFP16()) {
addQRTypeForNEON(MVT::v8f16);
addDRTypeForNEON(MVT::v4f16);
}
if (Subtarget->hasBF16()) {
addQRTypeForNEON(MVT::v8bf16);
addDRTypeForNEON(MVT::v4bf16);
}
}
if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
// none of Neon, MVE or VFP supports any arithmetic operations on it.
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
// FIXME: Code duplication: FDIV and FREM are expanded always, see
// ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
// FIXME: Create unittest.
// In another words, find a way when "copysign" appears in DAG with vector
// operands.
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
// FIXME: Code duplication: SETCC has custom operation action, see
// ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
// FIXME: Create unittest for FNEG and for FABS.
setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
// FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
setOperationAction(ISD::FMA, MVT::v2f64, Expand);
}
if (Subtarget->hasNEON()) {
// The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
// supported for v4f32.
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
// Mark v2f32 intrinsics.
setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// Custom handling for some quad-vector types to detect VMULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// Custom handling for some vector types to avoid expensive expansions
setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
// a destination type that is wider than the source, and nor does
// it have a FP_TO_[SU]INT instruction with a narrower destination than
// source.
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v1i64, Custom);
setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
// NEON does not have single instruction CTTZ for vectors.
setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
}
// NEON only has FMA instructions as of VFP4.
if (!Subtarget->hasVFP4Base()) {
setOperationAction(ISD::FMA, MVT::v2f32, Expand);
setOperationAction(ISD::FMA, MVT::v4f32, Expand);
}
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
setTargetDAGCombine(ISD::LOAD);
// It is legal to extload from v4i8 to v4i16 or v4i32.
for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
MVT::v2i32}) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
}
}
}
if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::VECREDUCE_ADD);
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::BITCAST);
}
if (Subtarget->hasMVEIntegerOps()) {
setTargetDAGCombine(ISD::SMIN);
setTargetDAGCombine(ISD::UMIN);
setTargetDAGCombine(ISD::SMAX);
setTargetDAGCombine(ISD::UMAX);
setTargetDAGCombine(ISD::FP_EXTEND);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
}
if (Subtarget->hasMVEFloatOps()) {
setTargetDAGCombine(ISD::FADD);
}
if (!Subtarget->hasFP64()) {
// When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
// are present However, no double-precision operations other than moves,
// loads and stores are provided by the hardware.
setOperationAction(ISD::FADD, MVT::f64, Expand);
setOperationAction(ISD::FSUB, MVT::f64, Expand);
setOperationAction(ISD::FMUL, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FDIV, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
setOperationAction(ISD::FNEG, MVT::f64, Expand);
setOperationAction(ISD::FABS, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FLOG, MVT::f64, Expand);
setOperationAction(ISD::FLOG2, MVT::f64, Expand);
setOperationAction(ISD::FLOG10, MVT::f64, Expand);
setOperationAction(ISD::FEXP, MVT::f64, Expand);
setOperationAction(ISD::FEXP2, MVT::f64, Expand);
setOperationAction(ISD::FCEIL, MVT::f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
setOperationAction(ISD::FRINT, MVT::f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
}
if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
}
}
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
}
computeRegisterProperties(Subtarget->getRegisterInfo());
// ARM does not have floating-point extending loads.
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
}
// ... or truncating stores
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
// ARM does not have i1 sign extending load.
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// ARM supports all 4 flavors of integer indexed load / store.
if (!Subtarget->isThumb1Only()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, MVT::i1, Legal);
setIndexedLoadAction(im, MVT::i8, Legal);
setIndexedLoadAction(im, MVT::i16, Legal);
setIndexedLoadAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i1, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
}
} else {
// Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
}
setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
if (Subtarget->hasDSP()) {
setOperationAction(ISD::SADDSAT, MVT::i8, Custom);
setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
setOperationAction(ISD::UADDSAT, MVT::i8, Custom);
setOperationAction(ISD::USUBSAT, MVT::i8, Custom);
setOperationAction(ISD::UADDSAT, MVT::i16, Custom);
setOperationAction(ISD::USUBSAT, MVT::i16, Custom);
}
if (Subtarget->hasBaseDSP()) {
setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);
}
// i64 operation support.
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
if (Subtarget->isThumb1Only()) {
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
}
if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
|| (Subtarget->isThumb2() && !Subtarget->hasDSP()))
setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
// MVE lowers 64 bit shifts to lsll and lsrl
// assuming that ISD::SRL and SRA of i64 are already marked custom
if (Subtarget->hasMVEIntegerOps())
setOperationAction(ISD::SHL, MVT::i64, Custom);
// Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
if (Subtarget->isThumb1Only()) {
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
}
if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
// ARM does not have ROTL.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
}
// @llvm.readcyclecounter requires the Performance Monitors extension.
// Default to the 0 expansion on unsupported platforms.
// FIXME: Technically there are older ARM CPUs that have
// implementation-specific ways of obtaining this information.
if (Subtarget->hasPerfMon())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
: Subtarget->hasDivideInARMMode();
if (!hasDivide) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
setOperationAction(ISD::SDIV, MVT::i32, LibCall);
setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
setOperationAction(ISD::SDIV, MVT::i64, Custom);
setOperationAction(ISD::UDIV, MVT::i64, Custom);
}
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
// Register based DivRem for AEABI (RTABI 4.2)
if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
Subtarget->isTargetWindows()) {
setOperationAction(ISD::SREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
HasStandaloneRem = false;
if (Subtarget->isTargetWindows()) {
const struct {
const RTLIB::Libcall Op;
const char * const Name;
const CallingConv::ID CC;
} LibraryCalls[] = {
{ RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
}
} else {
const struct {
const RTLIB::Libcall Op;
const char * const Name;
const CallingConv::ID CC;
} LibraryCalls[] = {
{ RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
}
}
setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
} else {
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
}
if (Subtarget->getTargetTriple().isOSMSVCRT()) {
// MSVCRT doesn't have powi; fall back to pow
setLibcallName(RTLIB::POWI_F32, nullptr);
setLibcallName(RTLIB::POWI_F64, nullptr);
}
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// Use the default implementation.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
InsertFencesForAtomic = false;
if (Subtarget->hasAnyDataBarrier() &&
(!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
// ATOMIC_FENCE needs custom lowering; the others should have been expanded
// to ldrex/strex loops already.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
if (!Subtarget->isThumb() || !Subtarget->isMClass())
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
if (!Subtarget->hasAcquireRelease() ||
getTargetMachine().getOptLevel() == 0) {
// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
InsertFencesForAtomic = true;
}
} else {
// If there's anything we can use as a barrier, go through custom lowering
// for ATOMIC_FENCE.
// If target has DMB in thumb, Fences can be inserted.
if (Subtarget->hasDataBarrier())
InsertFencesForAtomic = true;
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
Subtarget->hasAnyDataBarrier() ? Custom : Expand);
// Set them all for expansion, which will force libcalls.
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
// Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
// Unordered/Monotonic case.
if (!InsertFencesForAtomic) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
}
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
if (!Subtarget->hasV6Ops()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
!Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
if (Subtarget->useSjLjEH())
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::SETCC, MVT::f16, Expand);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
}
setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
if (Subtarget->hasFullFP16())
setOperationAction(ISD::BR_CC, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
// We don't support sin/cos/fmod/copysign/pow
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
!Subtarget->isThumb1Only()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
}
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
if (!Subtarget->hasVFP4Base()) {
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
}
// Various VFP goodness
if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
// FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
}
// fp16 is a special v7 extension that adds f16 <-> f32 conversions.
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
// Strict floating-point comparisons need custom lowering.
setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
}
// Use __sincos_stret if available.
if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
}
// FP-ARMv8 implements a lot of rounding-like FP operations.
if (Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
if (Subtarget->hasNEON()) {
setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
}
if (Subtarget->hasFP64()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
}
}
// FP16 often need to be promoted to call lib functions
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
}
if (Subtarget->hasNEON()) {
// vmin and vmax aren't available in a scalar form, so we can use
// a NEON instruction with an undef lane instead. This has a performance
// penalty on some cores, so we don't do this unless we have been
// asked to by the core tuning model.
if (Subtarget->useNEONForSinglePrecisionFP()) {
setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
}
setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
}
}
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
if (Subtarget->hasMVEIntegerOps())
setTargetDAGCombine(ISD::VSELECT);
if (Subtarget->hasV6Ops())
setTargetDAGCombine(ISD::SRL);
if (Subtarget->isThumb1Only())
setTargetDAGCombine(ISD::SHL);
setStackPointerRegisterToSaveRestore(ARM::SP);
if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
!Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
MaxStoresPerMemset = 8;
MaxStoresPerMemsetOptSize = 4;
MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
MaxStoresPerMemcpyOptSize = 2;
MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(Align(4));
// Prefer likely predicted branches to selects on out-of-order cores.
PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
if (Subtarget->isThumb() || Subtarget->isThumb2())
setTargetDAGCombine(ISD::ABS);
}
bool ARMTargetLowering::useSoftFloat() const {
return Subtarget->useSoftFloat();
}
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
// SPR's representative would be DPR_VFP2. This should work well if register
// pressure tracking were modified such that a register use would increment the
// pressure of the register class's representative and all of it's super
// classes' representatives transitively. We have not implemented this because
// of the difficulty prior to coalescing of modeling operand register classes
// due to the common occurrence of cross class copies and subregister insertions
// and extractions.
std::pair<const TargetRegisterClass *, uint8_t>
ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
const TargetRegisterClass *RRC = nullptr;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(TRI, VT);
// Use DPR as representative register class for all floating point
// and vector types. Since there are 32 SPR registers and 32 DPR registers so
// the cost is 1 for both f32 and f64.
case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
RRC = &ARM::DPRRegClass;
// When NEON is used for SP, only half of the register file is available
// because operations that define both SP and DP results will be constrained
// to the VFP2 class (D0-D15). We currently model this constraint prior to
// coalescing by double-counting the SP regs. See the FIXME above.
if (Subtarget->useNEONForSinglePrecisionFP())
Cost = 2;
break;
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
RRC = &ARM::DPRRegClass;
Cost = 2;
break;
case MVT::v4i64:
RRC = &ARM::DPRRegClass;
Cost = 4;
break;
case MVT::v8i64:
RRC = &ARM::DPRRegClass;
Cost = 8;
break;
}
return std::make_pair(RRC, Cost);
}
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
#define MAKE_CASE(V) \
case V: \
return #V;
switch ((ARMISD::NodeType)Opcode) {
case ARMISD::FIRST_NUMBER:
break;
MAKE_CASE(ARMISD::Wrapper)
MAKE_CASE(ARMISD::WrapperPIC)
MAKE_CASE(ARMISD::WrapperJT)
MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL)
MAKE_CASE(ARMISD::CALL)
MAKE_CASE(ARMISD::CALL_PRED)
MAKE_CASE(ARMISD::CALL_NOLINK)
MAKE_CASE(ARMISD::tSECALL)
MAKE_CASE(ARMISD::BRCOND)
MAKE_CASE(ARMISD::BR_JT)
MAKE_CASE(ARMISD::BR2_JT)
MAKE_CASE(ARMISD::RET_FLAG)
MAKE_CASE(ARMISD::SERET_FLAG)
MAKE_CASE(ARMISD::INTRET_FLAG)
MAKE_CASE(ARMISD::PIC_ADD)
MAKE_CASE(ARMISD::CMP)
MAKE_CASE(ARMISD::CMN)
MAKE_CASE(ARMISD::CMPZ)
MAKE_CASE(ARMISD::CMPFP)
MAKE_CASE(ARMISD::CMPFPE)
MAKE_CASE(ARMISD::CMPFPw0)
MAKE_CASE(ARMISD::CMPFPEw0)
MAKE_CASE(ARMISD::BCC_i64)
MAKE_CASE(ARMISD::FMSTAT)
MAKE_CASE(ARMISD::CMOV)
MAKE_CASE(ARMISD::SUBS)
MAKE_CASE(ARMISD::SSAT)
MAKE_CASE(ARMISD::USAT)
MAKE_CASE(ARMISD::ASRL)
MAKE_CASE(ARMISD::LSRL)
MAKE_CASE(ARMISD::LSLL)
MAKE_CASE(ARMISD::SRL_FLAG)
MAKE_CASE(ARMISD::SRA_FLAG)
MAKE_CASE(ARMISD::RRX)
MAKE_CASE(ARMISD::ADDC)
MAKE_CASE(ARMISD::ADDE)
MAKE_CASE(ARMISD::SUBC)
MAKE_CASE(ARMISD::SUBE)
MAKE_CASE(ARMISD::LSLS)
MAKE_CASE(ARMISD::VMOVRRD)
MAKE_CASE(ARMISD::VMOVDRR)
MAKE_CASE(ARMISD::VMOVhr)
MAKE_CASE(ARMISD::VMOVrh)
MAKE_CASE(ARMISD::VMOVSR)
MAKE_CASE(ARMISD::EH_SJLJ_SETJMP)
MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP)
MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH)
MAKE_CASE(ARMISD::TC_RETURN)
MAKE_CASE(ARMISD::THREAD_POINTER)
MAKE_CASE(ARMISD::DYN_ALLOC)
MAKE_CASE(ARMISD::MEMBARRIER_MCR)
MAKE_CASE(ARMISD::PRELOAD)
MAKE_CASE(ARMISD::LDRD)
MAKE_CASE(ARMISD::STRD)
MAKE_CASE(ARMISD::WIN__CHKSTK)
MAKE_CASE(ARMISD::WIN__DBZCHK)
MAKE_CASE(ARMISD::PREDICATE_CAST)
MAKE_CASE(ARMISD::VECTOR_REG_CAST)
MAKE_CASE(ARMISD::MVESEXT)
MAKE_CASE(ARMISD::MVEZEXT)
MAKE_CASE(ARMISD::MVETRUNC)
MAKE_CASE(ARMISD::VCMP)
MAKE_CASE(ARMISD::VCMPZ)
MAKE_CASE(ARMISD::VTST)
MAKE_CASE(ARMISD::VSHLs)
MAKE_CASE(ARMISD::VSHLu)
MAKE_CASE(ARMISD::VSHLIMM)
MAKE_CASE(ARMISD::VSHRsIMM)
MAKE_CASE(ARMISD::VSHRuIMM)
MAKE_CASE(ARMISD::VRSHRsIMM)
MAKE_CASE(ARMISD::VRSHRuIMM)
MAKE_CASE(ARMISD::VRSHRNIMM)
MAKE_CASE(ARMISD::VQSHLsIMM)
MAKE_CASE(ARMISD::VQSHLuIMM)
MAKE_CASE(ARMISD::VQSHLsuIMM)
MAKE_CASE(ARMISD::VQSHRNsIMM)
MAKE_CASE(ARMISD::VQSHRNuIMM)
MAKE_CASE(ARMISD::VQSHRNsuIMM)
MAKE_CASE(ARMISD::VQRSHRNsIMM)
MAKE_CASE(ARMISD::VQRSHRNuIMM)
MAKE_CASE(ARMISD::VQRSHRNsuIMM)
MAKE_CASE(ARMISD::VSLIIMM)
MAKE_CASE(ARMISD::VSRIIMM)
MAKE_CASE(ARMISD::VGETLANEu)
MAKE_CASE(ARMISD::VGETLANEs)
MAKE_CASE(ARMISD::VMOVIMM)
MAKE_CASE(ARMISD::VMVNIMM)
MAKE_CASE(ARMISD::VMOVFPIMM)
MAKE_CASE(ARMISD::VDUP)
MAKE_CASE(ARMISD::VDUPLANE)
MAKE_CASE(ARMISD::VEXT)
MAKE_CASE(ARMISD::VREV64)
MAKE_CASE(ARMISD::VREV32)
MAKE_CASE(ARMISD::VREV16)
MAKE_CASE(ARMISD::VZIP)
MAKE_CASE(ARMISD::VUZP)
MAKE_CASE(ARMISD::VTRN)
MAKE_CASE(ARMISD::VTBL1)
MAKE_CASE(ARMISD::VTBL2)
MAKE_CASE(ARMISD::VMOVN)
MAKE_CASE(ARMISD::VQMOVNs)
MAKE_CASE(ARMISD::VQMOVNu)
MAKE_CASE(ARMISD::VCVTN)
MAKE_CASE(ARMISD::VCVTL)
MAKE_CASE(ARMISD::VIDUP)
MAKE_CASE(ARMISD::VMULLs)
MAKE_CASE(ARMISD::VMULLu)
MAKE_CASE(ARMISD::VQDMULH)
MAKE_CASE(ARMISD::VADDVs)
MAKE_CASE(ARMISD::VADDVu)
MAKE_CASE(ARMISD::VADDVps)
MAKE_CASE(ARMISD::VADDVpu)
MAKE_CASE(ARMISD::VADDLVs)
MAKE_CASE(ARMISD::VADDLVu)
MAKE_CASE(ARMISD::VADDLVAs)
MAKE_CASE(ARMISD::VADDLVAu)
MAKE_CASE(ARMISD::VADDLVps)
MAKE_CASE(ARMISD::VADDLVpu)
MAKE_CASE(ARMISD::VADDLVAps)
MAKE_CASE(ARMISD::VADDLVApu)
MAKE_CASE(ARMISD::VMLAVs)
MAKE_CASE(ARMISD::VMLAVu)
MAKE_CASE(ARMISD::VMLAVps)
MAKE_CASE(ARMISD::VMLAVpu)
MAKE_CASE(ARMISD::VMLALVs)
MAKE_CASE(ARMISD::VMLALVu)
MAKE_CASE(ARMISD::VMLALVps)
MAKE_CASE(ARMISD::VMLALVpu)
MAKE_CASE(ARMISD::VMLALVAs)
MAKE_CASE(ARMISD::VMLALVAu)
MAKE_CASE(ARMISD::VMLALVAps)
MAKE_CASE(ARMISD::VMLALVApu)
MAKE_CASE(ARMISD::VMINVu)
MAKE_CASE(ARMISD::VMINVs)
MAKE_CASE(ARMISD::VMAXVu)
MAKE_CASE(ARMISD::VMAXVs)
MAKE_CASE(ARMISD::UMAAL)
MAKE_CASE(ARMISD::UMLAL)
MAKE_CASE(ARMISD::SMLAL)
MAKE_CASE(ARMISD::SMLALBB)
MAKE_CASE(ARMISD::SMLALBT)
MAKE_CASE(ARMISD::SMLALTB)
MAKE_CASE(ARMISD::SMLALTT)
MAKE_CASE(ARMISD::SMULWB)
MAKE_CASE(ARMISD::SMULWT)
MAKE_CASE(ARMISD::SMLALD)
MAKE_CASE(ARMISD::SMLALDX)
MAKE_CASE(ARMISD::SMLSLD)
MAKE_CASE(ARMISD::SMLSLDX)
MAKE_CASE(ARMISD::SMMLAR)
MAKE_CASE(ARMISD::SMMLSR)
MAKE_CASE(ARMISD::QADD16b)
MAKE_CASE(ARMISD::QSUB16b)
MAKE_CASE(ARMISD::QADD8b)
MAKE_CASE(ARMISD::QSUB8b)
MAKE_CASE(ARMISD::UQADD16b)
MAKE_CASE(ARMISD::UQSUB16b)
MAKE_CASE(ARMISD::UQADD8b)
MAKE_CASE(ARMISD::UQSUB8b)
MAKE_CASE(ARMISD::BUILD_VECTOR)
MAKE_CASE(ARMISD::BFI)
MAKE_CASE(ARMISD::VORRIMM)
MAKE_CASE(ARMISD::VBICIMM)
MAKE_CASE(ARMISD::VBSP)
MAKE_CASE(ARMISD::MEMCPY)
MAKE_CASE(ARMISD::VLD1DUP)
MAKE_CASE(ARMISD::VLD2DUP)
MAKE_CASE(ARMISD::VLD3DUP)
MAKE_CASE(ARMISD::VLD4DUP)
MAKE_CASE(ARMISD::VLD1_UPD)
MAKE_CASE(ARMISD::VLD2_UPD)
MAKE_CASE(ARMISD::VLD3_UPD)
MAKE_CASE(ARMISD::VLD4_UPD)
MAKE_CASE(ARMISD::VLD1x2_UPD)
MAKE_CASE(ARMISD::VLD1x3_UPD)
MAKE_CASE(ARMISD::VLD1x4_UPD)
MAKE_CASE(ARMISD::VLD2LN_UPD)
MAKE_CASE(ARMISD::VLD3LN_UPD)
MAKE_CASE(ARMISD::VLD4LN_UPD)
MAKE_CASE(ARMISD::VLD1DUP_UPD)
MAKE_CASE(ARMISD::VLD2DUP_UPD)
MAKE_CASE(ARMISD::VLD3DUP_UPD)
MAKE_CASE(ARMISD::VLD4DUP_UPD)
MAKE_CASE(ARMISD::VST1_UPD)
MAKE_CASE(ARMISD::VST2_UPD)
MAKE_CASE(ARMISD::VST3_UPD)
MAKE_CASE(ARMISD::VST4_UPD)
MAKE_CASE(ARMISD::VST1x2_UPD)
MAKE_CASE(ARMISD::VST1x3_UPD)
MAKE_CASE(ARMISD::VST1x4_UPD)
MAKE_CASE(ARMISD::VST2LN_UPD)
MAKE_CASE(ARMISD::VST3LN_UPD)
MAKE_CASE(ARMISD::VST4LN_UPD)
MAKE_CASE(ARMISD::WLS)
MAKE_CASE(ARMISD::WLSSETUP)
MAKE_CASE(ARMISD::LE)
MAKE_CASE(ARMISD::LOOP_DEC)
MAKE_CASE(ARMISD::CSINV)
MAKE_CASE(ARMISD::CSNEG)
MAKE_CASE(ARMISD::CSINC)
MAKE_CASE(ARMISD::MEMCPYLOOP)
MAKE_CASE(ARMISD::MEMSETLOOP)
#undef MAKE_CASE
}
return nullptr;
}
EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
// MVE has a predicate register.
if ((Subtarget->hasMVEIntegerOps() &&
(VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) ||
(Subtarget->hasMVEFloatOps() && (VT == MVT::v4f32 || VT == MVT::v8f16)))
return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
const TargetRegisterClass *
ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
(void)isDivergent;
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
// MVE Q registers.
if (Subtarget->hasNEON()) {
if (VT == MVT::v4i64)
return &ARM::QQPRRegClass;
if (VT == MVT::v8i64)
return &ARM::QQQQPRRegClass;
}
if (Subtarget->hasMVEIntegerOps()) {
if (VT == MVT::v4i64)
return &ARM::MQQPRRegClass;
if (VT == MVT::v8i64)
return &ARM::MQQQQPRRegClass;
}
return TargetLowering::getRegClassFor(VT);
}
// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
// source/dest is aligned and the copy size is large enough. We therefore want
// to align such objects passed to memory intrinsics.
bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
unsigned &PrefAlign) const {
if (!isa<MemIntrinsic>(CI))
return false;
MinSize = 8;
// On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
// cycle faster than 4-byte aligned LDM.
PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
return true;
}
// Create a fast isel object.
FastISel *
ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
return ARM::createFastISel(funcInfo, libInfo);
}
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned NumVals = N->getNumValues();
if (!NumVals)
return Sched::RegPressure;
for (unsigned i = 0; i != NumVals; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (VT.isFloatingPoint() || VT.isVector())
return Sched::ILP;
}
if (!N->isMachineOpcode())
return Sched::RegPressure;
// Load are scheduled for latency even if there instruction itinerary
// is not available.
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
if (MCID.getNumDefs() == 0)
return Sched::RegPressure;
if (!Itins->isEmpty() &&
Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
return Sched::ILP;
return Sched::RegPressure;
}
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
static bool isSRL16(const SDValue &Op) {
if (Op.getOpcode() != ISD::SRL)
return false;
if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
return Const->getZExtValue() == 16;
return false;
}
static bool isSRA16(const SDValue &Op) {
if (Op.getOpcode() != ISD::SRA)
return false;
if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
return Const->getZExtValue() == 16;
return false;
}
static bool isSHL16(const SDValue &Op) {
if (Op.getOpcode() != ISD::SHL)
return false;
if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
return Const->getZExtValue() == 16;
return false;
}
// Check for a signed 16-bit value. We special case SRA because it makes it
// more simple when also looking for SRAs that aren't sign extending a
// smaller value. Without the check, we'd need to take extra care with
// checking order for some operations.
static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
if (isSRA16(Op))
return isSHL16(Op.getOperand(0));
return DAG.ComputeNumSignBits(Op) == 17;
}
/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Unknown condition code!");
case ISD::SETNE: return ARMCC::NE;
case ISD::SETEQ: return ARMCC::EQ;
case ISD::SETGT: return ARMCC::GT;
case ISD::SETGE: return ARMCC::GE;
case ISD::SETLT: return ARMCC::LT;
case ISD::SETLE: return ARMCC::LE;
case ISD::SETUGT: return ARMCC::HI;
case ISD::SETUGE: return ARMCC::HS;
case ISD::SETULT: return ARMCC::LO;
case ISD::SETULE: return ARMCC::LS;
}
}
/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
ARMCC::CondCodes &CondCode2) {
CondCode2 = ARMCC::AL;
switch (CC) {
default: llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
case ISD::SETGT:
case ISD::SETOGT: CondCode = ARMCC::GT; break;
case ISD::SETGE:
case ISD::SETOGE: CondCode = ARMCC::GE; break;
case ISD::SETOLT: CondCode = ARMCC::MI; break;
case ISD::SETOLE: CondCode = ARMCC::LS; break;
case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
case ISD::SETO: CondCode = ARMCC::VC; break;
case ISD::SETUO: CondCode = ARMCC::VS; break;
case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
case ISD::SETUGT: CondCode = ARMCC::HI; break;
case ISD::SETUGE: CondCode = ARMCC::PL; break;
case ISD::SETLT:
case ISD::SETULT: CondCode = ARMCC::LT; break;
case ISD::SETLE:
case ISD::SETULE: CondCode = ARMCC::LE; break;
case ISD::SETNE:
case ISD::SETUNE: CondCode = ARMCC::NE; break;
}
}
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
/// getEffectiveCallingConv - Get the effective calling convention, taking into
/// account presence of floating point hardware and calling convention
/// limitations, such as support for variadic functions.
CallingConv::ID
ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
bool isVarArg) const {
switch (CC) {
default:
report_fatal_error("Unsupported calling convention");
case CallingConv::ARM_AAPCS:
case CallingConv::ARM_APCS:
case CallingConv::GHC:
case CallingConv::CFGuard_Check:
return CC;
case CallingConv::PreserveMost:
return CallingConv::PreserveMost;
case CallingConv::ARM_AAPCS_VFP:
case CallingConv::Swift:
case CallingConv::SwiftTail:
return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
case CallingConv::C:
case CallingConv::Tail:
if (!Subtarget->isAAPCS_ABI())
return CallingConv::ARM_APCS;
else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
return CallingConv::ARM_AAPCS_VFP;
else
return CallingConv::ARM_AAPCS;
case CallingConv::Fast:
case CallingConv::CXX_FAST_TLS:
if (!Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::Fast;
return CallingConv::ARM_APCS;
} else if (Subtarget->hasVFP2Base() &&
!Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::ARM_AAPCS_VFP;
else
return CallingConv::ARM_AAPCS;
}
}
CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool isVarArg) const {
return CCAssignFnForNode(CC, false, isVarArg);
}
CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
bool isVarArg) const {
return CCAssignFnForNode(CC, true, isVarArg);
}
/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
/// CallingConvention.
CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
bool Return,
bool isVarArg) const {
switch (getEffectiveCallingConv(CC, isVarArg)) {
default:
report_fatal_error("Unsupported calling convention");
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
case CallingConv::ARM_AAPCS:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_AAPCS_VFP:
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
case CallingConv::Fast:
return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
case CallingConv::GHC:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
case CallingConv::PreserveMost:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::CFGuard_Check:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
}
}
SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
MVT LocVT, MVT ValVT, SDValue Val) const {
Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
Val);
if (Subtarget->hasFullFP16()) {
Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
} else {
Val = DAG.getNode(ISD::TRUNCATE, dl,
MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
}
return Val;
}
SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
MVT LocVT, MVT ValVT,
SDValue Val) const {
if (Subtarget->hasFullFP16()) {
Val = DAG.getNode(ARMISD::VMOVrh, dl,
MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
} else {
Val = DAG.getNode(ISD::BITCAST, dl,
MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
}
return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue ARMTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
// Pass 'this' value directly from the argument to return value, to avoid
// reg unit interference
if (i == 0 && isThisReturn) {
assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment");
InVals.push_back(ThisVal);
continue;
}
SDValue Val;
if (VA.needsCustom() &&
(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
// Handle f64 or half of a v2f64.
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);
Chain = Lo.getValue(1);
InFlag = Lo.getValue(2);
VA = RVLocs[++i]; // skip ahead to next loc
SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
if (!Subtarget->isLittle())
std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
if (VA.getLocVT() == MVT::v2f64) {
SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(0, dl, MVT::i32));
VA = RVLocs[++i]; // skip ahead to next loc
Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Lo.getValue(1);
InFlag = Lo.getValue(2);
VA = RVLocs[++i]; // skip ahead to next loc
Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
if (!Subtarget->isLittle())
std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(1, dl, MVT::i32));
}
} else {
Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
}
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
break;
}
// f16 arguments have their size extended to 4 bytes and passed as if they
// had been copied to the LSBs of a 32-bit register.
// For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
if (VA.needsCustom() &&
(VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
InVals.push_back(Val);
}
return Chain;
}
std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
bool IsTailCall, int SPDiff) const {
SDValue DstAddr;
MachinePointerInfo DstInfo;
int32_t Offset = VA.getLocMemOffset();
MachineFunction &MF = DAG.getMachineFunction();
if (IsTailCall) {
Offset += SPDiff;
auto PtrVT = getPointerTy(DAG.getDataLayout());
int Size = VA.getLocVT().getFixedSizeInBits() / 8;
int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
} else {
SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, PtrOff);
DstInfo =
MachinePointerInfo::getStack(DAG.getMachineFunction(), Offset);
}
return std::make_pair(DstAddr, DstInfo);
}
void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
SDValue &StackPtr,
SmallVectorImpl<SDValue> &MemOpChains,
bool IsTailCall,
int SPDiff) const {
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
unsigned id = Subtarget->isLittle() ? 0 : 1;
RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
if (NextVA.isRegLoc())
RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
else {
assert(NextVA.isMemLoc());
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
getPointerTy(DAG.getDataLayout()));
SDValue DstAddr;
MachinePointerInfo DstInfo;
std::tie(DstAddr, DstInfo) =
computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
MemOpChains.push_back(
DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
}
}
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
}
/// LowerCall - Lowering a call into a callseq_start <-
/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
/// nodes.
SDValue
ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &dl = CLI.DL;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool doesNotRet = CLI.DoesNotReturn;
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFunction::CallSiteInfo CSInfo;
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
bool isCmseNSCall = false;
bool isSibCall = false;
bool PreferIndirect = false;
// Determine whether this is a non-secure function call.
if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
isCmseNSCall = true;
// Disable tail calls if they're not supported.
if (!Subtarget->supportsTailCall())
isTailCall = false;
// For both the non-secure calls and the returns from a CMSE entry function,
// the function needs to do some extra work afte r the call, or before the
// return, respectively, thus it cannot end with atail call
if (isCmseNSCall || AFI->isCmseNSEntryFunction())
isTailCall = false;
if (isa<GlobalAddressSDNode>(Callee)) {
// If we're optimizing for minimum size and the function is called three or
// more times in this block, we can improve codesize by calling indirectly
// as BLXr has a 16-bit encoding.
auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
if (CLI.CB) {
auto *BB = CLI.CB->getParent();
PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
count_if(GV->users(), [&BB](const User *U) {
return isa<Instruction>(U) &&
cast<Instruction>(U)->getParent() == BB;
}) > 2;
}
}
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(
Callee, CallConv, isVarArg, isStructRet,
MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
PreferIndirect);
if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
isSibCall = true;
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall)
++NumTailCalls;
}
if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
// SPDiff is the byte offset of the call's argument area from the callee's.
// Stores to callee stack arguments will be placed in FixedStackSlots offset
// by this amount for a tail call. In a sibling call it must be 0 because the
// caller will deallocate the entire stack and the callee still expects its
// arguments to begin at SP+0. Completely unused for non-tail calls.
int SPDiff = 0;
if (isTailCall && !isSibCall) {
auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
// Since callee will pop argument stack as a tail call, we must keep the
// popped size 16-byte aligned.
Align StackAlign = DAG.getDataLayout().getStackAlignment();
NumBytes = alignTo(NumBytes, StackAlign);
// SPDiff will be negative if this tail call requires more space than we
// would automatically have in our incoming argument space. Positive if we
// can actually shrink the stack.
SPDiff = NumReusableBytes - NumBytes;
// If this call requires more stack than we have available from
// LowerFormalArguments, tell FrameLowering to reserve space for it.
if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
AFI->setArgRegsSaveSize(-SPDiff);
}
if (isSibCall) {
// For sibling tail calls, memory operands are available in our caller's stack.
NumBytes = 0;
} else {
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
}
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
RegsToPassVector RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
// During a tail call, stores to the argument area must happen after all of
// the function's incoming arguments have been loaded because they may alias.
// This is done by folding in a TokenFactor from LowerFormalArguments, but
// there's no point in doing so repeatedly so this tracks whether that's
// happened yet.
bool AfterFormalArgLoads = false;
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization, arguments are handled later.
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
Arg = DAG.getNode