blob: a103c94cede4f612a7a8ce2d5f3c69a7cda109f3 [file] [log] [blame]
//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that ARM uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-isel"
#include "ARMISelLowering.h"
#include "ARM.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
#include "llvm/Instruction.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
// This option should go away when tail calls fully work.
static cl::opt<bool>
EnableARMTailCalls("arm-tail-calls", cl::Hidden,
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
cl::init(false));
cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
cl::desc("Generate calls via indirect call instructions"),
cl::init(false));
static cl::opt<bool>
ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
namespace {
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
LLVMContext &C, ParmContext PC)
: CCState(CC, isVarArg, MF, TM, locs, C) {
assert(((PC == Call) || (PC == Prologue)) &&
"ARMCCState users must specify whether their context is call"
"or prologue generation.");
CallOrPrologue = PC;
}
};
}
// The APCS parameter registers.
static const uint16_t GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
EVT PromotedBitwiseVT) {
if (VT != PromotedLdStVT) {
setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
PromotedLdStVT.getSimpleVT());
setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
PromotedLdStVT.getSimpleVT());
}
EVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
if (ElemTy == MVT::i32) {
setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
} else {
setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
}
setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
if (VT.isInteger()) {
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
}
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
AddPromotedToType (ISD::AND, VT.getSimpleVT(),
PromotedBitwiseVT.getSimpleVT());
setOperationAction(ISD::OR, VT.getSimpleVT(), Promote);
AddPromotedToType (ISD::OR, VT.getSimpleVT(),
PromotedBitwiseVT.getSimpleVT());
setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
PromotedBitwiseVT.getSimpleVT());
}
// Neon does not support vector divide/remainder operations.
setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
}
void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
addRegisterClass(VT, ARM::DPRRegisterClass);
addTypeForNEON(VT, MVT::f64, MVT::v2i32);
}
void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
addRegisterClass(VT, ARM::QPRRegisterClass);
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
return new TargetLoweringObjectFileMachO();
return new ARMElfTargetObjectFile();
}
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
RegInfo = TM.getRegisterInfo();
Itins = TM.getInstrItineraryData();
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
// Double-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
// Single-precision comparisons.
setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
// Double-precision comparisons.
setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
// Floating-point to integer conversions.
// i64 conversions are done via library routines even when generating VFP
// instructions, so use the same ones.
setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
// Conversions between floating types.
setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
// Integer to floating-point conversions.
// i64 conversions are done via library routines even when generating VFP
// instructions, so use the same ones.
// FIXME: There appears to be some naming inconsistency in ARM libgcc:
// e.g., __floatunsidf vs. __floatunssidfvfp.
setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
}
}
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, 0);
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
// Double-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 2
setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
// Double-precision floating-point comparison helper functions
// RTABI chapter 4.1.2, Table 3
setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun");
setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun");
setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
// Single-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 4
setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
// Single-precision floating-point comparison helper functions
// RTABI chapter 4.1.2, Table 5
setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun");
setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun");
setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
// Floating-point to integer conversions.
// RTABI chapter 4.1.2, Table 6
setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
// Conversions between floating types.
// RTABI chapter 4.1.2, Table 7
setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d");
setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
// Integer to floating-point conversions.
// RTABI chapter 4.1.2, Table 8
setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
// Long long helper functions
// RTABI chapter 4.2, Table 9
setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
// Integer division functions
// RTABI chapter 4.3.1
setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
// Memory operations
// RTABI chapter 4.3.4
setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy");
setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
!Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
else
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
if (!Subtarget->isFPOnlySP())
addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
setTruncStoreAction((MVT::SimpleValueType)VT,
(MVT::SimpleValueType)InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
addDRTypeForNEON(MVT::v4i16);
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
addQRTypeForNEON(MVT::v16i8);
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
// neither Neon nor VFP support any arithmetic operations on it.
// The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
// supported for v4f32.
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
// FIXME: Code duplication: FDIV and FREM are expanded always, see
// ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
// FIXME: Create unittest.
// In another words, find a way when "copysign" appears in DAG with vector
// operands.
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
// FIXME: Code duplication: SETCC has custom operation action, see
// ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
// FIXME: Create unittest for FNEG and for FABS.
setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
// FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// Custom handling for some quad-vector types to detect VMULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// Custom handling for some vector types to avoid expensive expansions
setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
// a destination type that is wider than the source, and nor does
// it have a FP_TO_[SU]INT instruction with a narrower destination than
// source.
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
// It is legal to extload from v4i8 to v4i16 or v4i32.
MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
MVT::v4i16, MVT::v2i16,
MVT::v2i32};
for (unsigned i = 0; i < 6; ++i) {
setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
}
}
computeRegisterProperties();
// ARM does not have f32 extending load.
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
// ARM does not have i1 sign extending load.
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
// ARM supports all 4 flavors of integer indexed load / store.
if (!Subtarget->isThumb1Only()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, MVT::i1, Legal);
setIndexedLoadAction(im, MVT::i8, Legal);
setIndexedLoadAction(im, MVT::i16, Legal);
setIndexedLoadAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i1, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
}
}
// i64 operation support.
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
if (Subtarget->isThumb1Only()) {
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
}
if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
|| (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
if (!Subtarget->isThumb1Only()) {
// FIXME: We should do this for Thumb1 as well.
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
setOperationAction(ISD::SUBE, MVT::i32, Custom);
}
// ARM does not have ROTL.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
// These just redirect to CTTZ and CTLZ on ARM.
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
// These are expanded into libcalls.
if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
// v7M has a hardware divider
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// Use the default implementation.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (!Subtarget->isTargetDarwin()) {
// Non-Darwin platforms may return values in these registers via the
// personality function.
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setExceptionPointerRegister(ARM::R0);
setExceptionSelectorRegister(ARM::R1);
}
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
// FIXME: This should be checking for v6k, not just v6.
if (Subtarget->hasDataBarrier() ||
(Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
// Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
setInsertFencesForAtomic(true);
} else {
// Set them all for expansion, which will force libcalls.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
// Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
// Unordered/Monotonic case.
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
// Since the libcalls include locking, fold in the fences
setShouldFoldAtomicFences(true);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
if (!Subtarget->hasV6Ops()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget->isTargetDarwin()) {
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
}
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
// We don't support sin/cos/fmod/copysign/pow
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
}
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
if (!Subtarget->hasVFP4()) {
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
}
// Various VFP goodness
if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
// int <-> fp are custom expanded into bit_convert + ARMISD ops.
if (Subtarget->hasVFP2()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
}
// Special handling for half-precision FP.
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) {
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
}
if (Subtarget->hasV6Ops())
setTargetDAGCombine(ISD::SRL);
setStackPointerRegisterToSaveRestore(ARM::SP);
if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
!Subtarget->hasVFP2())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
maxStoresPerMemset = 16;
maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(4);
benefitFromCodePlacementOpt = true;
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
// SPR's representative would be DPR_VFP2. This should work well if register
// pressure tracking were modified such that a register use would increment the
// pressure of the register class's representative and all of it's super
// classes' representatives transitively. We have not implemented this because
// of the difficulty prior to coalescing of modeling operand register classes
// due to the common occurrence of cross class copies and subregister insertions
// and extractions.
std::pair<const TargetRegisterClass*, uint8_t>
ARMTargetLowering::findRepresentativeClass(EVT VT) const{
const TargetRegisterClass *RRC = 0;
uint8_t Cost = 1;
switch (VT.getSimpleVT().SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(VT);
// Use DPR as representative register class for all floating point
// and vector types. Since there are 32 SPR registers and 32 DPR registers so
// the cost is 1 for both f32 and f64.
case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
RRC = ARM::DPRRegisterClass;
// When NEON is used for SP, only half of the register file is available
// because operations that define both SP and DP results will be constrained
// to the VFP2 class (D0-D15). We currently model this constraint prior to
// coalescing by double-counting the SP regs. See the FIXME above.
if (Subtarget->useNEONForSinglePrecisionFP())
Cost = 2;
break;
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
RRC = ARM::DPRRegisterClass;
Cost = 2;
break;
case MVT::v4i64:
RRC = ARM::DPRRegisterClass;
Cost = 4;
break;
case MVT::v8i64:
RRC = ARM::DPRRegisterClass;
Cost = 8;
break;
}
return std::make_pair(RRC, Cost);
}
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
case ARMISD::Wrapper: return "ARMISD::Wrapper";
case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN";
case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
case ARMISD::CALL: return "ARMISD::CALL";
case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
case ARMISD::tCALL: return "ARMISD::tCALL";
case ARMISD::BRCOND: return "ARMISD::BRCOND";
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
case ARMISD::CAND: return "ARMISD::CAND";
case ARMISD::COR: return "ARMISD::COR";
case ARMISD::CXOR: return "ARMISD::CXOR";
case ARMISD::RBIT: return "ARMISD::RBIT";
case ARMISD::FTOSI: return "ARMISD::FTOSI";
case ARMISD::FTOUI: return "ARMISD::FTOUI";
case ARMISD::SITOF: return "ARMISD::SITOF";
case ARMISD::UITOF: return "ARMISD::UITOF";
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
case ARMISD::ADDC: return "ARMISD::ADDC";
case ARMISD::ADDE: return "ARMISD::ADDE";
case ARMISD::SUBC: return "ARMISD::SUBC";
case ARMISD::SUBE: return "ARMISD::SUBE";
case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
case ARMISD::VCGT: return "ARMISD::VCGT";
case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
case ARMISD::VCGTU: return "ARMISD::VCGTU";
case ARMISD::VTST: return "ARMISD::VTST";
case ARMISD::VSHL: return "ARMISD::VSHL";
case ARMISD::VSHRs: return "ARMISD::VSHRs";
case ARMISD::VSHRu: return "ARMISD::VSHRu";
case ARMISD::VSHLLs: return "ARMISD::VSHLLs";
case ARMISD::VSHLLu: return "ARMISD::VSHLLu";
case ARMISD::VSHLLi: return "ARMISD::VSHLLi";
case ARMISD::VSHRN: return "ARMISD::VSHRN";
case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
case ARMISD::VDUP: return "ARMISD::VDUP";
case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
case ARMISD::VEXT: return "ARMISD::VEXT";
case ARMISD::VREV64: return "ARMISD::VREV64";
case ARMISD::VREV32: return "ARMISD::VREV32";
case ARMISD::VREV16: return "ARMISD::VREV16";
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
case ARMISD::VTBL1: return "ARMISD::VTBL1";
case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
case ARMISD::VBSL: return "ARMISD::VBSL";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
}
}
EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
if (!VT.isVector()) return getPointerTy();
return VT.changeVectorElementTypeToInteger();
}
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive D registers.
if (Subtarget->hasNEON()) {
if (VT == MVT::v4i64)
return ARM::QQPRRegisterClass;
else if (VT == MVT::v8i64)
return ARM::QQQQPRRegisterClass;
}
return TargetLowering::getRegClassFor(VT);
}
// Create a fast isel object.
FastISel *
ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
return ARM::createFastISel(funcInfo);
}
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
/// be used for loads / stores from the global.
unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
return (Subtarget->isThumb1Only() ? 127 : 4095);
}
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned NumVals = N->getNumValues();
if (!NumVals)
return Sched::RegPressure;
for (unsigned i = 0; i != NumVals; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (VT.isFloatingPoint() || VT.isVector())
return Sched::ILP;
}
if (!N->isMachineOpcode())
return Sched::RegPressure;
// Load are scheduled for latency even if there instruction itinerary
// is not available.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
if (MCID.getNumDefs() == 0)
return Sched::RegPressure;
if (!Itins->isEmpty() &&
Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
return Sched::ILP;
return Sched::RegPressure;
}
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Unknown condition code!");
case ISD::SETNE: return ARMCC::NE;
case ISD::SETEQ: return ARMCC::EQ;
case ISD::SETGT: return ARMCC::GT;
case ISD::SETGE: return ARMCC::GE;
case ISD::SETLT: return ARMCC::LT;
case ISD::SETLE: return ARMCC::LE;
case ISD::SETUGT: return ARMCC::HI;
case ISD::SETUGE: return ARMCC::HS;
case ISD::SETULT: return ARMCC::LO;
case ISD::SETULE: return ARMCC::LS;
}
}
/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
ARMCC::CondCodes &CondCode2) {
CondCode2 = ARMCC::AL;
switch (CC) {
default: llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
case ISD::SETGT:
case ISD::SETOGT: CondCode = ARMCC::GT; break;
case ISD::SETGE:
case ISD::SETOGE: CondCode = ARMCC::GE; break;
case ISD::SETOLT: CondCode = ARMCC::MI; break;
case ISD::SETOLE: CondCode = ARMCC::LS; break;
case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
case ISD::SETO: CondCode = ARMCC::VC; break;
case ISD::SETUO: CondCode = ARMCC::VS; break;
case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
case ISD::SETUGT: CondCode = ARMCC::HI; break;
case ISD::SETUGE: CondCode = ARMCC::PL; break;
case ISD::SETLT:
case ISD::SETULT: CondCode = ARMCC::LT; break;
case ISD::SETLE:
case ISD::SETULE: CondCode = ARMCC::LE; break;
case ISD::SETNE:
case ISD::SETUNE: CondCode = ARMCC::NE; break;
}
}
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
#include "ARMGenCallingConv.inc"
/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
/// given CallingConvention value.
CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
bool Return,
bool isVarArg) const {
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
case CallingConv::Fast:
if (Subtarget->hasVFP2() && !isVarArg) {
if (!Subtarget->isAAPCS_ABI())
return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
// For AAPCS ABI targets, just use VFP variant of the calling convention.
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
}
// Fallthrough
case CallingConv::C: {
// Use target triple & subtarget features to do actual dispatch.
if (!Subtarget->isAAPCS_ABI())
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
else if (Subtarget->hasVFP2() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
}
case CallingConv::ARM_AAPCS_VFP:
if (!isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
// Fallthrough
case CallingConv::ARM_AAPCS:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
}
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue
ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext(), Call);
CCInfo.AnalyzeCallResult(Ins,
CCAssignFnForNode(CallConv, /* Return*/ true,
isVarArg));
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
SDValue Val;
if (VA.needsCustom()) {
// Handle f64 or half of a v2f64.
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);
Chain = Lo.getValue(1);
InFlag = Lo.getValue(2);
VA = RVLocs[++i]; // skip ahead to next loc
SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
if (VA.getLocVT() == MVT::v2f64) {
SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(0, MVT::i32));
VA = RVLocs[++i]; // skip ahead to next loc
Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Lo.getValue(1);
InFlag = Lo.getValue(2);
VA = RVLocs[++i]; // skip ahead to next loc
Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(1, MVT::i32));
}
} else {
Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
}
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
break;
}
InVals.push_back(Val);
}
return Chain;
}
/// LowerMemOpCallTo - Store the argument to the stack.
SDValue
ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue StackPtr, SDValue Arg,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
return DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
}
void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
SDValue &StackPtr,
SmallVector<SDValue, 8> &MemOpChains,
ISD::ArgFlagsTy Flags) const {
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
if (NextVA.isRegLoc())
RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
else {
assert(NextVA.isMemLoc());
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
dl, DAG, NextVA,
Flags));
}
}
/// LowerCall - Lowering a call into a callseq_start <-
/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
/// nodes.
SDValue
ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool IsSibCall = false;
// Disable tail calls if they're not supported.
if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
isTailCall = false;
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
++NumTailCalls;
IsSibCall = true;
}
}
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
// For tail calls, memory operands are available in our caller's stack.
if (IsSibCall)
NumBytes = 0;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall)
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
RegsToPassVector RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization, arguments are handled later.
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
// f64 and v2f64 might be passed in i32 pairs and must be split into pieces
if (VA.needsCustom()) {
if (VA.getLocVT() == MVT::v2f64) {
SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(0, MVT::i32));
SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(1, MVT::i32));
PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
VA = ArgLocs[++i]; // skip ahead to next loc
if (VA.isRegLoc()) {
PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
} else {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
dl, DAG, VA, Flags));
}
} else {
PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else if (isByVal) {
assert(VA.isMemLoc());
unsigned offset = 0;
// True if this byval aggregate will be split between registers
// and memory.
if (CCInfo.isFirstByValRegValid()) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned int i, j;
for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
SDValue Const = DAG.getConstant(4*i, MVT::i32);
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
offset = ARM::R4 - CCInfo.getFirstByValReg();
CCInfo.clearFirstByValReg();
}
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
StkPtrOff);
SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
MVT::i32);
MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
Flags.getByValAlign(),
/*isVolatile=*/false,
/*AlwaysInline=*/false,
MachinePointerInfo(0),
MachinePointerInfo(0)));
} else if (!IsSibCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
// Tail call byval lowering might overwrite argument registers so in case of
// tail call optimization the copies to registers are lowered later.
if (!isTailCall)
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
// For tail calls lower the arguments to the 'real' stack slot.
if (isTailCall) {
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments are stored to the stack, because the
// outgoing stack slots may alias the incoming argument stack slots, and
// the alias isn't otherwise explicit. This is slightly more conservative
// than necessary, because it means that each store effectively depends
// on every argument instead of just those arguments it would clobber.
// Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
InFlag =SDValue();
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
bool isDirect = false;
bool isARMFunc = false;
bool isLocalARMFunc = false;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (EnableARMLongCalls) {
assert (getTargetMachine().getRelocationModel() == Reloc::Static
&& "long-calls with non-static relocation model!");
// Handle a global address or an external symbol. If it's not one of
// those, the target's already in a register, so we don't need to do
// anything extra.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
// Create a constant pool entry for the callee address
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
// Create a constant pool entry for the callee address
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
ARMPCLabelIndex, 0);
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
isDirect = true;
bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
OpFlags = ARMII::MO_PLT;
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
bool isStub = Subtarget->isTargetDarwin() &&
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// tBX takes a register source operand.
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
ARMPCLabelIndex, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
} else {
unsigned OpFlags = 0;
// On ELF targets for PIC code, direct calls should go through the PLT
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
OpFlags = ARMII::MO_PLT;
Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
}
}
// FIXME: handle tail calls differently.
unsigned CallOpc;
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
else if (doesNotRet && isDirect && !isARMFunc &&
Subtarget->hasRAS() && !Subtarget->isThumb1Only())
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
} else {
if (!isDirect && !Subtarget->hasV5TOps()) {
CallOpc = ARMISD::CALL_NOLINK;
} else if (doesNotRet && isDirect && Subtarget->hasRAS())
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
}
std::vector<SDValue> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (isTailCall)
return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(0, true), InFlag);
if (!Ins.empty())
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
dl, DAG, InVals);
}
/// HandleByVal - Every parameter *after* a byval parameter is passed
/// on the stack. Remember the next parameter register to allocate,
/// and then confiscate the rest of the parameter registers to insure
/// this.
void
ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
unsigned reg = State->AllocateReg(GPRArgRegs, 4);
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
if ((!State->isFirstByValRegValid()) &&
(ARM::R0 <= reg) && (reg <= ARM::R3)) {
State->setFirstByValReg(reg);
// At a call site, a byval parameter that is split between
// registers and memory needs its size truncated here. In a
// function prologue, such byval parameters are reassembled in
// memory, and are not truncated.
if (State->getCallOrPrologue() == Call) {
unsigned excess = 4 * (ARM::R4 - reg);
assert(size >= excess && "expected larger existing stack allocation");
size -= excess;
}
}
// Confiscate any remaining parameter registers to preclude their
// assignment to subsequent parameters.
while (State->AllocateReg(GPRArgRegs, 4))
;
}
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!TargetRegisterInfo::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
return false;
if (!Flags.isByVal()) {
if (!TII->isLoadFromStackSlot(Def, FI))
return false;
} else {
return false;
}
} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
if (Flags.isByVal())
// ByVal argument is passed in as a pointer but it's now being
// dereferenced. e.g.
// define @foo(%struct.X* %A) {
// tail call @bar(%struct.X* byval %A)
// }
return false;
SDValue Ptr = Ld->getBasePtr();
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
if (!FINode)
return false;
FI = FINode->getIndex();
} else
return false;
assert(FI != INT_MAX);
if (!MFI->isFixedObjectIndex(FI))
return false;
return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
}
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
bool
ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
const Function *CallerF = DAG.getMachineFunction().getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
// Do not sibcall optimize vararg calls unless the call site is not passing
// any arguments.
if (isVarArg && !Outs.empty())
return false;
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
return false;
// FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
// emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
// the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
// support in the assembler and linker to be used. This would need to be
// fixed to fully support tail calls in Thumb1.
//
// Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
// LR. This means if we need to reload LR, it takes an extra instructions,
// which outweighs the value of the tail call; but here we don't know yet
// whether LR is going to be used. Probably the right approach is to
// generate the tail call here and turn it back into CALL/RET in
// emitEpilogue if LR is used.
// Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
// but we need to make sure there are enough registers; the only valid
// registers are the 4 used for parameters. We don't currently do this
// case.
if (Subtarget->isThumb1Only())
return false;
// If the calling conventions do not match, then we'd better make sure the
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
SmallVector<CCValAssign, 16> RVLocs2;
ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
if (RVLocs1.size() != RVLocs2.size())
return false;
for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
return false;
if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
return false;
if (RVLocs1[i].isRegLoc()) {
if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
return false;
} else {
if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
return false;
}
}
}
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CalleeCC, false, isVarArg));
if (CCInfo.getNextStackOffset()) {
MachineFunction &MF = DAG.getMachineFunction();
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
if (VA.needsCustom()) {
// f64 and vector types are split into multiple registers or
// register/stack-slot combinations. The types will not match
// the registers; give up on memory f64 refs until we figure
// out what to do about this.
if (!VA.isRegLoc())
return false;
if (!ArgLocs[++i].isRegLoc())
return false;
if (RegVT == MVT::v2f64) {
if (!ArgLocs[++i].isRegLoc())
return false;
if (!ArgLocs[++i].isRegLoc())
return false;
}
} else if (!VA.isRegLoc()) {
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
MFI, MRI, TII))
return false;
}
}
}
}
return true;
}
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slots.
ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext(), Call);
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
isVarArg));
// If this is the first return lowered for this function, add
// the regs to the liveout set for the function.
if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
for (unsigned i = 0; i != RVLocs.size(); ++i)
if (RVLocs[i].isRegLoc())
DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
SDValue Flag;
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue Arg = OutVals[realRVLocIdx];
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
if (VA.needsCustom()) {
if (VA.getLocVT() == MVT::v2f64) {
// Extract the first half and return it in two registers.
SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(0, MVT::i32));
SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Half);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
Flag = Chain.getValue(1);
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
HalfGPRs.getValue(1), Flag);
Flag = Chain.getValue(1);
VA = RVLocs[++i]; // skip ahead to next loc
// Extract the 2nd half and fall through to handle it as an f64 value.
Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(1, MVT::i32));
}
// Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
// available.
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
Flag = Chain.getValue(1);
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
Flag);
} else
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
// Guarantee that all emitted copies are
// stuck together, avoiding something bad.
Flag = Chain.getValue(1);
}
SDValue result;
if (Flag.getNode())
result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
else // Return Void
result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
return result;
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
return false;
TCChain = Copy->getOperand(0);
} else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
SDNode *VMov = Copy;
// f64 returned in a pair of GPRs.
SmallPtrSet<SDNode*, 2> Copies;
for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != ISD::CopyToReg)
return false;
Copies.insert(*UI);
}
if (Copies.size() > 2)
return false;
for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
UI != UE; ++UI) {
SDValue UseChain = UI->getOperand(0);
if (Copies.count(UseChain.getNode()))
// Second CopyToReg
Copy = *UI;
else
// First CopyToReg
TCChain = UseChain;
}
} else if (Copy->getOpcode() == ISD::BITCAST) {
// f32 returned in a single GPR.
if (!Copy->hasOneUse())
return false;
Copy = *Copy->use_begin();
if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
Chain = Copy->getOperand(0);
} else {
return false;
}
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != ARMISD::RET_FLAG)
return false;
HasRet = true;
}
if (!HasRet)
return false;
Chain = TCChain;
return true;
}
bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
return false;
if (!CI->isTailCall())
return false;
return !Subtarget->isThumb1Only();
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOVi.
static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
EVT PtrVT = Op.getValueType();
// FIXME there is no actual debug info here
DebugLoc dl = Op.getDebugLoc();
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Res;
if (CP->isMachineConstantPoolEntry())
Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
CP->getAlignment());
else
Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
CP->getAlignment());
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
}
unsigned ARMTargetLowering::getJumpTableEncoding() const {
return MachineJumpTableInfo::EK_Inline;
}
SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = 0;
DebugLoc DL = Op.getDebugLoc();
EVT PtrVT = getPointerTy();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
if (RelocM == Reloc::Static) {
CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
} else {
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
ARMCP::CPBlockAddress, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
if (RelocM == Reloc::Static)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const {
DebugLoc dl = GA->getDebugLoc();
EVT PtrVT = getPointerTy();
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
// call __tls_get_addr.
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Argument;
Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
// FIXME: is there useful debug info available here?
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, /*isTailCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
return CallResult.first;
}
// Lower ISD::GlobalTLSAddress using the "initial exec" or
// "local exec" model.
SDValue
ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const {
const GlobalValue *GV = GA->getGlobal();
DebugLoc dl = GA->getDebugLoc();
SDValue Offset;
SDValue Chain = DAG.getEntryNode();
EVT PtrVT = getPointerTy();
// Get the Thread Pointer
SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
if (GV->isDeclaration()) {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
// Initial exec model.
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
true);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
} else {
// local exec model
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
}
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
}
SDValue
ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// TODO: implement the "local dynamic" model
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
// If the relocation model is PIC, use the "General Dynamic" TLS Model,
// otherwise use the "Local Exec" TLS Model
if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
return LowerToTLSGeneralDynamicModel(GA, DAG);
else
return LowerToTLSExecModels(GA, DAG);
}
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
if (RelocM == Reloc::PIC_) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV,
UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
SDValue Chain = Result.getValue(1);
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
if (!UseGOTOFF)
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
MachinePointerInfo::getGOT(),
false, false, false, 0);
return Result;
}
// If we have T2 ops, we can materialize the address directly via movt/movw
// pair. This is always cheaper.
if (Subtarget->useMovt()) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
} else {
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
}
}
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// FIXME: Enable this for static codegen when tool issues are fixed. Also
// update ARMFastISel::ARMMaterializeGV.
if (Subtarget->useMovt() && RelocM != Reloc::Static) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
if (RelocM == Reloc::Static)
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
unsigned Wrapper = (RelocM == Reloc::PIC_)
? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(),
false, false, false, 0);
return Result;
}
unsigned ARMPCLabelIndex = 0;
SDValue CPAddr;
if (RelocM == Reloc::Static) {
CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
} else {
ARMPCLabelIndex = AFI->createPICLabelUId();
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
false, false, false, 0);
return Result;
}
SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() &&
"GLOBAL OFFSET TABLE not implemented for non-ELF targets");
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
ARMPCLabelIndex, PCAdj);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue Val = DAG.getConstant(0, MVT::i32);
return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
Op.getOperand(1), Val);
}
SDValue
ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
Op.getOperand(1), DAG.getConstant(0, MVT::i32));
}
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::arm_thread_pointer: {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
unsigned PCAdj = (RelocM != Reloc::PIC_)
? 0 : (Subtarget->isThumb() ? 4 : 8);
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
ARMCP::CPLSDA, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
return Result;
}
case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu: {
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
? ARMISD::VMULLs : ARMISD::VMULLu;
return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
}
}
static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
DebugLoc dl = Op.getDebugLoc();
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
"Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
}
SDValue Op5 = Op.getOperand(5);
bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
ARM_MB::MemBOpt DMBOpt;
if (isDeviceBarrier)
DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
else
DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(DMBOpt, MVT::i32));
}
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// FIXME: handle "fence singlethread" more efficiently.
DebugLoc dl = Op.getDebugLoc();
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
"Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
return DAG.