blob: 04299f30080166cd2356e1ff1e77764c2ce4ed2f [file] [log] [blame]
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
// The LLVM Compiler Infrastructure
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.
#define DEBUG_TYPE "x86-isel"
#include "X86ISelLowering.h"
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "Utils/X86ShuffleDecode.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/VariadicFunction.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include <bitset>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 instruction or a
/// simple subregister reference. Idx is an index in the 128 bits we
/// want. It need not be aligned to a 128-bit bounday. That makes
/// lowering EXTRACT_VECTOR_ELT operations easier.
static SDValue Extract128BitVector(SDValue Vec,
SDValue Idx,
SelectionDAG &DAG,
DebugLoc dl) {
EVT VT = Vec.getValueType();
assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
int Factor = VT.getSizeInBits()/128;
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
// Extract from UNDEF is UNDEF.
if (Vec.getOpcode() == ISD::UNDEF)
return DAG.getNode(ISD::UNDEF, dl, ResultVT);
if (isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
// we can match to VEXTRACTF128.
unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
// This is the index of the first element of the 128-bit chunk
// we want.
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
* ElemsPerChunk);
SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
return Result;
return SDValue();
/// Generate a DAG to put 128-bits into a vector > 128 bits. This
/// sets things up to match to an AVX VINSERTF128 instruction or a
/// simple superregister reference. Idx is an index in the 128 bits
/// we want. It need not be aligned to a 128-bit bounday. That makes
/// lowering INSERT_VECTOR_ELT operations easier.
static SDValue Insert128BitVector(SDValue Result,
SDValue Vec,
SDValue Idx,
SelectionDAG &DAG,
DebugLoc dl) {
if (isa<ConstantSDNode>(Idx)) {
EVT VT = Vec.getValueType();
assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
EVT ResultVT = Result.getValueType();
// Insert the relevant 128 bits.
unsigned ElemsPerChunk = 128/ElVT.getSizeInBits();
// This is the index of the first element of the 128-bit chunk
// we want.
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128)
* ElemsPerChunk);
SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
return Result;
return SDValue();
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
bool is64Bit = Subtarget->is64Bit();
if (Subtarget->isTargetEnvMacho()) {
if (is64Bit)
return new X8664_MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
if (Subtarget->isTargetELF())
return new TargetLoweringObjectFileELF();
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
return new TargetLoweringObjectFileCOFF();
llvm_unreachable("unknown subtarget type");
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
RegInfo = TM.getRegisterInfo();
TD = getTargetData();
// Set up the TargetLowering object.
static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// X86 is weird, it always uses i8 for shift amounts and setcc results.
// X86-SSE is even stranger. It uses -1 or 0 for vector masks.
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
// For 32 bit Atom, use Hybrid (register pressure + latency) scheduling.
if (Subtarget->is64Bit())
else if (Subtarget->isAtom())
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
// Setup Windows compiler runtime calls.
setLibcallName(RTLIB::SDIV_I64, "_alldiv");
setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
setLibcallName(RTLIB::SREM_I64, "_allrem");
setLibcallName(RTLIB::UREM_I64, "_aullrem");
setLibcallName(RTLIB::MUL_I64, "_allmul");
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
// The _ftol2 runtime function has an unusual calling conv, which
// is modeled by a special pseudo-instruction.
setLibcallName(RTLIB::FPTOUINT_F64_I64, 0);
setLibcallName(RTLIB::FPTOUINT_F32_I64, 0);
setLibcallName(RTLIB::FPTOUINT_F64_I32, 0);
setLibcallName(RTLIB::FPTOUINT_F32_I32, 0);
if (Subtarget->isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
} else if (Subtarget->isTargetMingw()) {
// MS runtime is weird: it exports _setjmp, but longjmp!
} else {
// Set up the register classes.
addRegisterClass(MVT::i8, X86::GR8RegisterClass);
addRegisterClass(MVT::i16, X86::GR16RegisterClass);
addRegisterClass(MVT::i32, X86::GR32RegisterClass);
if (Subtarget->is64Bit())
addRegisterClass(MVT::i64, X86::GR64RegisterClass);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
// We don't accept any truncstore of integer registers.
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
setTruncStoreAction(MVT::i32, MVT::i16, Expand);
setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
// SETOEQ and SETUNE require checking two conditions.
setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
} else if (!TM.Options.UseSoftFloat) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
if (!TM.Options.UseSoftFloat) {
// SSE has no i16 to fp conversion, only i32
if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
// f32 and f64 cases are Legal, f80 case is not
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
} else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
} else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
if (X86ScalarSSEf32) {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
// f32 and f64 cases are Legal, f80 case is not
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
// Handle FP_TO_UINT by promoting the destination to a larger signed
// conversion.
setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
} else if (!TM.Options.UseSoftFloat) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
// With SSE3 we can use fisttpll to convert to a signed i64; without
// SSE, we're stuck with a fistpll.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
if (isTargetFTOL()) {
// Use the _ftol2 runtime function, which has a pseudo-instruction
// to handle its weird calling convention.
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!X86ScalarSSEf64) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
// the two-result form to trivial CSE, which is able to combine x/y and x%y
// into a single instruction.
// Scalar integer multiply-high is also lowered to use two-result
// operations, to match the available instructions. However, plain multiply
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
for (unsigned i = 0, e = 4; i != e; ++i) {
MVT VT = IntVTs[i];
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
// Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
setOperationAction(ISD::ADDC, VT, Custom);
setOperationAction(ISD::ADDE, VT, Custom);
setOperationAction(ISD::SUBC, VT, Custom);
setOperationAction(ISD::SUBE, VT, Custom);
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
setOperationAction(ISD::BR_CC , MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
// Promote the i8 variants and force them on up to i32 which has a shorter
// encoding.
setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
if (Subtarget->hasBMI()) {
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
if (Subtarget->hasLZCNT()) {
// When promoting the i8 variants, force them to i32 for a shorter
// encoding.
setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
if (Subtarget->hasPOPCNT()) {
setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
} else {
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
// These should be promoted to a larger select which is supported.
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
// X86 wants to expand cmov itself.
setOperationAction(ISD::SELECT , MVT::i8 , Custom);
setOperationAction(ISD::SELECT , MVT::i16 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
setOperationAction(ISD::SELECT , MVT::f80 , Custom);
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
setOperationAction(ISD::SETCC , MVT::f80 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::SELECT , MVT::i64 , Custom);
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// Darwin ABI issue.
setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
if (Subtarget->is64Bit())
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
// 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and they
// are not buffered), so we can fold away the common pattern of
// fence-atomic-fence.
// Expand certain atomics
for (unsigned i = 0, e = 4; i != e; ++i) {
MVT VT = IntVTs[i];
setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
if (!Subtarget->is64Bit()) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
if (Subtarget->hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
!Subtarget->isTargetELF() &&
!Subtarget->isTargetCygMing()) {
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
if (Subtarget->is64Bit()) {
} else {
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
} else {
setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else if (TM.Options.EnableSegmentedStacks)
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Expand);
if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
addRegisterClass(MVT::f64, X86::FR64RegisterClass);
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS , MVT::f64, Custom);
setOperationAction(ISD::FABS , MVT::f32, Custom);
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f64, Custom);
setOperationAction(ISD::FNEG , MVT::f32, Custom);
// Use ANDPD and ORPD to simulate FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// Lower this to FGETSIGNx86 plus an AND.
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
addLegalFPImmediate(APFloat(+0.0f)); // xorps
} else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
// Use ANDPS to simulate FABS.
setOperationAction(ISD::FABS , MVT::f32, Custom);
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f32, Custom);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
// Use ANDPS and ORPS to simulate FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
// Special cases we handle for FP constants.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
} else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
// Set up the FP register classes.
addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
setOperationAction(ISD::UNDEF, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
addLegalFPImmediate(APFloat(+0.0f)); // FLD0
addLegalFPImmediate(APFloat(+1.0f)); // FLD1
addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
// We don't support FMA.
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Long double always uses X87.
if (!TM.Options.UseSoftFloat) {
addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
addLegalFPImmediate(TmpFlt); // FLD0
addLegalFPImmediate(TmpFlt); // FLD0/FCHS
bool ignored;
APFloat TmpFlt2(+1.0);
TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
addLegalFPImmediate(TmpFlt2); // FLD1
addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f80 , Expand);
setOperationAction(ISD::FCOS , MVT::f80 , Expand);
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
setOperationAction(ISD::FCEIL, MVT::f80, Expand);
setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
setOperationAction(ISD::FRINT, MVT::f80, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
// Always use a library call for pow.
setOperationAction(ISD::FPOW , MVT::f32 , Expand);
setOperationAction(ISD::FPOW , MVT::f64 , Expand);
setOperationAction(ISD::FPOW , MVT::f80 , Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
setOperationAction(ISD::FLOG10, MVT::f80, Expand);
setOperationAction(ISD::FEXP, MVT::f80, Expand);
setOperationAction(ISD::FEXP2, MVT::f80, Expand);
// First set operation action for all vector types to either promote
// (for widening) or expand (for scalarization). Then we will selectively
// turn on ones that can be effectively codegen'd.
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SETCC, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand);
for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
(MVT::SimpleValueType)InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
// No operations on x86mmx supported, everything uses intrinsics.
// MMX-sized vectors (other than x86mmx) are expected to be expanded
// into smaller operations.
setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
setOperationAction(ISD::AND, MVT::v8i8, Expand);
setOperationAction(ISD::AND, MVT::v4i16, Expand);
setOperationAction(ISD::AND, MVT::v2i32, Expand);
setOperationAction(ISD::AND, MVT::v1i64, Expand);
setOperationAction(ISD::OR, MVT::v8i8, Expand);
setOperationAction(ISD::OR, MVT::v4i16, Expand);
setOperationAction(ISD::OR, MVT::v2i32, Expand);
setOperationAction(ISD::OR, MVT::v1i64, Expand);
setOperationAction(ISD::XOR, MVT::v8i8, Expand);
setOperationAction(ISD::XOR, MVT::v4i16, Expand);
setOperationAction(ISD::XOR, MVT::v2i32, Expand);
setOperationAction(ISD::XOR, MVT::v1i64, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
// registers cannot be used even for integer operations.
addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
setOperationAction(ISD::ADD, MVT::v16i8, Legal);
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
setOperationAction(ISD::SUB, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::FADD, MVT::v2f64, Legal);
setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
EVT VT = (MVT::SimpleValueType)i;
// Do not attempt to custom lower non-power-of-2 vectors
if (!isPowerOf2_32(VT.getVectorNumElements()))
// Do not attempt to custom lower non-128-bit vectors
if (!VT.is128BitVector())
VT.getSimpleVT().SimpleTy, Custom);
VT.getSimpleVT().SimpleTy, Custom);
VT.getSimpleVT().SimpleTy, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector())
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
setOperationAction(ISD::OR, SVT, Promote);
AddPromotedToType (ISD::OR, SVT, MVT::v2i64);
setOperationAction(ISD::XOR, SVT, Promote);
AddPromotedToType (ISD::XOR, SVT, MVT::v2i64);
setOperationAction(ISD::LOAD, SVT, Promote);
AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64);
setOperationAction(ISD::SELECT, SVT, Promote);
AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
if (Subtarget->hasSSE41()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
setOperationAction(ISD::VSELECT, MVT::v2i64, Legal);
setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
// information.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
// FIXME: these should be Legal but thats only for the case where
// the index is constant. For now custom expand to deal with that.
if (Subtarget->is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
if (Subtarget->hasSSE2()) {
setOperationAction(ISD::SRL, MVT::v8i16, Custom);
setOperationAction(ISD::SRL, MVT::v16i8, Custom);
setOperationAction(ISD::SHL, MVT::v8i16, Custom);
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
setOperationAction(ISD::SRA, MVT::v16i8, Custom);
if (Subtarget->hasAVX2()) {
setOperationAction(ISD::SRL, MVT::v2i64, Legal);
setOperationAction(ISD::SRL, MVT::v4i32, Legal);
setOperationAction(ISD::SHL, MVT::v2i64, Legal);
setOperationAction(ISD::SHL, MVT::v4i32, Legal);
setOperationAction(ISD::SRA, MVT::v4i32, Legal);
} else {
setOperationAction(ISD::SRL, MVT::v2i64, Custom);
setOperationAction(ISD::SRL, MVT::v4i32, Custom);
setOperationAction(ISD::SHL, MVT::v2i64, Custom);
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
if (Subtarget->hasSSE42())
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
setOperationAction(ISD::FADD, MVT::v8f32, Legal);
setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
setOperationAction(ISD::FADD, MVT::v4f64, Legal);
setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
setOperationAction(ISD::SHL, MVT::v16i16, Custom);
setOperationAction(ISD::SHL, MVT::v32i8, Custom);
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
setOperationAction(ISD::SRA, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
if (Subtarget->hasAVX2()) {
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
setOperationAction(ISD::ADD, MVT::v16i16, Legal);
setOperationAction(ISD::ADD, MVT::v32i8, Legal);
setOperationAction(ISD::SUB, MVT::v4i64, Legal);
setOperationAction(ISD::SUB, MVT::v8i32, Legal);
setOperationAction(ISD::SUB, MVT::v16i16, Legal);
setOperationAction(ISD::SUB, MVT::v32i8, Legal);
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
setOperationAction(ISD::MUL, MVT::v8i32, Legal);
setOperationAction(ISD::MUL, MVT::v16i16, Legal);
// Don't lower v32i8 because there is no 128-bit byte mul
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
setOperationAction(ISD::SRL, MVT::v4i64, Legal);
setOperationAction(ISD::SRL, MVT::v8i32, Legal);
setOperationAction(ISD::SHL, MVT::v4i64, Legal);
setOperationAction(ISD::SHL, MVT::v8i32, Legal);
setOperationAction(ISD::SRA, MVT::v8i32, Legal);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
setOperationAction(ISD::ADD, MVT::v16i16, Custom);
setOperationAction(ISD::ADD, MVT::v32i8, Custom);
setOperationAction(ISD::SUB, MVT::v4i64, Custom);
setOperationAction(ISD::SUB, MVT::v8i32, Custom);
setOperationAction(ISD::SUB, MVT::v16i16, Custom);
setOperationAction(ISD::SUB, MVT::v32i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
// Don't lower v32i8 because there is no 128-bit byte mul
setOperationAction(ISD::SRL, MVT::v4i64, Custom);
setOperationAction(ISD::SRL, MVT::v8i32, Custom);
setOperationAction(ISD::SHL, MVT::v4i64, Custom);
setOperationAction(ISD::SHL, MVT::v8i32, Custom);
setOperationAction(ISD::SRA, MVT::v8i32, Custom);
// Custom lower several nodes for 256-bit types.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
if (VT.is128BitVector())
setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
// Do not attempt to custom lower other non-256-bit vectors
if (!VT.is256BitVector())
setOperationAction(ISD::BUILD_VECTOR, SVT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom);
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) {
MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-256-bit vectors
if (!VT.is256BitVector())
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v4i64);
setOperationAction(ISD::OR, SVT, Promote);
AddPromotedToType (ISD::OR, SVT, MVT::v4i64);
setOperationAction(ISD::XOR, SVT, Promote);
AddPromotedToType (ISD::XOR, SVT, MVT::v4i64);
setOperationAction(ISD::LOAD, SVT, Promote);
AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64);
setOperationAction(ISD::SELECT, SVT, Promote);
AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
// of this type with custom code.
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
// Add/Sub/Mul with overflow operations are custom lowered.
MVT VT = IntVTs[i];
setOperationAction(ISD::SADDO, VT, Custom);
setOperationAction(ISD::UADDO, VT, Custom);
setOperationAction(ISD::SSUBO, VT, Custom);
setOperationAction(ISD::USUBO, VT, Custom);
setOperationAction(ISD::SMULO, VT, Custom);
setOperationAction(ISD::UMULO, VT, Custom);
// There are no 8-bit 3-address imul/mul instructions
setOperationAction(ISD::SMULO, MVT::i8, Expand);
setOperationAction(ISD::UMULO, MVT::i8, Expand);
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, 0);
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
// We have target-specific dag combine patterns for the following nodes:
if (Subtarget->is64Bit())
if (Subtarget->hasBMI())
// On Darwin, -Os means optimize for size without hurting performance,
// do not reduce the limit.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(4); // 2^4 bytes.
benefitFromCodePlacementOpt = true;
setPrefFunctionAlignment(4); // 2^4 bytes.
EVT X86TargetLowering::getSetCCResultType(EVT VT) const {
if (!VT.isVector()) return MVT::i8;
return VT.changeVectorElementTypeToInteger();
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (MaxAlign == 16)
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (VTy->getBitWidth() == 128)
MaxAlign = 16;
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned EltAlign = 0;
getMaxByValAlign(STy->getElementType(i), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == 16)
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
if (Subtarget->is64Bit()) {
// Max of 8 and alignment of type.
unsigned TyAlign = TD->getABITypeAlignment(Ty);
if (TyAlign > 8)
return TyAlign;
return 8;
unsigned Align = 4;
if (Subtarget->hasSSE1())
getMaxByValAlign(Ty, Align);
return Align;
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If
/// 'IsZeroVal' is true, that means it's safe to return a
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
/// constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsZeroVal,
bool MemcpyStrSrc,
MachineFunction &MF) const {
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
// linux. This is because the stack realignment code can't handle certain
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
if (IsZeroVal &&
!F->hasFnAttr(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16))) &&
Subtarget->getStackAlignment() >= 16) {
if (Subtarget->getStackAlignment() >= 32) {
if (Subtarget->hasAVX2())
return MVT::v8i32;
if (Subtarget->hasAVX())
return MVT::v8f32;
if (Subtarget->hasSSE2())
return MVT::v4i32;
if (Subtarget->hasSSE1())
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
Subtarget->getStackAlignment() >= 8 &&
Subtarget->hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
return MVT::f64;
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
return MVT::i32;
/// getJumpTableEncoding - Return the entry encoding for a jump table in the
/// current function. The returned value is a member of the
/// MachineJumpTableInfo::JTEntryKind enum.
unsigned X86TargetLowering::getJumpTableEncoding() const {
// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
// symbol.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
return MachineJumpTableInfo::EK_Custom32;
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned uid,MCContext &Ctx) const{
assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
return MCSymbolRefExpr::Create(MBB->getSymbol(),
MCSymbolRefExpr::VK_GOTOFF, Ctx);
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
if (!Subtarget->is64Bit())
// This doesn't have DebugLoc associated with it, but is not really the
// same as a Register.
return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy());
return Table;
/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
/// MCExpr.
const MCExpr *X86TargetLowering::
getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
MCContext &Ctx) const {
// X86-64 uses RIP relative addressing based on the jump table label.
if (Subtarget->isPICStyleRIPRel())
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
// Otherwise, the reference is relative to the PIC base.
return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
X86TargetLowering::findRepresentativeClass(EVT VT) const{
const TargetRegisterClass *RRC = 0;
uint8_t Cost = 1;
switch (VT.getSimpleVT().SimpleTy) {
return TargetLowering::findRepresentativeClass(VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
RRC = (Subtarget->is64Bit()
? X86::GR64RegisterClass : X86::GR32RegisterClass);
case MVT::x86mmx:
RRC = X86::VR64RegisterClass;
case MVT::f32: case MVT::f64:
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
case MVT::v4f64:
RRC = X86::VR128RegisterClass;
return std::make_pair(RRC, Cost);
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
if (!Subtarget->isTargetLinux())
return false;
if (Subtarget->is64Bit()) {
// %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
Offset = 0x28;
if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
AddressSpace = 256;
AddressSpace = 257;
} else {
// %gs:0x14 on i386
Offset = 0x14;
AddressSpace = 256;
return true;
// Return Value Calling Convention Implementation
#include ""
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
// Add the regs to the liveout set for the function.
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
for (unsigned i = 0; i != RVLocs.size(); ++i)
if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
SDValue Flag;
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue ValToCopy = OutVals[i];
EVT ValVT = ValToCopy.getValueType();
// If this is x86-64, and we disabled SSE, we can't return FP values,
// or SSE or MMX vectors.
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
(Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
// llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now.
if (ValVT == MVT::f64 &&
(Subtarget->is64Bit() && !Subtarget->hasSSE2()))
report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
if (VA.getLocReg() == X86::ST0 ||
VA.getLocReg() == X86::ST1) {
// If this is a copy from an xmm register to ST(0), use an FPExtend to
// change the value to the FP stack register class.
if (isScalarFPTypeInSSEReg(VA.getValVT()))
ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
// Don't emit a copytoreg.
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
if (!Subtarget->hasSSE2())
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. We saved the argument into
// a virtual register in the entry block, so now we copy the value out
// and into %rax.
if (Subtarget->is64Bit() &&
DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
Flag = Chain.getValue(1);
// RAX now acts like a return value.
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
if (Flag.getNode())
return DAG.getNode(X86ISD::RET_FLAG, dl,
MVT::Other, &RetOps[0], RetOps.size());
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
return false;
TCChain = Copy->getOperand(0);
} else if (Copy->getOpcode() != ISD::FP_EXTEND)
return false;
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != X86ISD::RET_FLAG)
return false;
HasRet = true;
if (!HasRet)
return false;
Chain = TCChain;
return true;
X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT;
// TODO: Is this also valid on 32-bit?
if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
ReturnMVT = MVT::i8;
ReturnMVT = MVT::i32;
EVT MinVT = getRegisterType(Context, ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget->is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
SDValue Val;
// If this is a call to a function that returns an fp value on the floating
// point stack, we must guarantee the the value is popped from the stack, so
// a CopyFromReg is not good enough - the copy instruction may be eliminated
// if the return value is not used. We use the FpPOP_RETVAL instruction
// instead.
if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
// If we prefer to use the value in xmm registers, copy it out as f80 and
// use a truncate to move it from fp stack reg to xmm reg.
if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
SDValue Ops[] = { Chain, InFlag };
Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
MVT::Other, MVT::Glue, Ops, 2), 1);
Val = Chain.getValue(0);
// Round the f80 to the right size, which also moves it to the appropriate
// xmm register.
if (CopyVT != VA.getValVT())
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
// This truncation won't change the value.
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
CopyVT, InFlag).getValue(1);
Val = Chain.getValue(0);
InFlag = Chain.getValue(2);
return Chain;
// C & StdCall & Fast Calling Convention implementation
// StdCall calling convention seems to be standard for many Windows' API
// routines and around. It differs from C calling convention just a little:
// callee should clean up the stack, not caller. Symbols should be also
// decorated in some fancy way :) It doesn't support any vector arguments.
// For info on fast calling convention see Fast Calling Convention (tail call)
// implementation LowerX86_32FastCCCallTo.
/// CallIsStructReturn - Determines whether a call uses struct return
/// semantics.
static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
if (Outs.empty())
return false;
return Outs[0].Flags.isSRet();
/// ArgsAreStructReturn - Determines whether a function uses struct
/// return semantics.
static bool
ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
if (Ins.empty())
return false;
return Ins[0].Flags.isSRet();
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
/// function parameter.
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
DebugLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile*/false, /*AlwaysInline=*/true,
MachinePointerInfo(), MachinePointerInfo());
/// IsTailCallConvention - Return true if the calling convention is one that
/// supports tail call optimization.
static bool IsTailCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC);
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
return false;
CallSite CS(CI);
CallingConv::ID CalleeCC = CS.getCallingConv();
if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
return false;
return true;
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
bool GuaranteedTailCallOpt) {
return GuaranteedTailCallOpt && IsTailCallConvention(CC);
X86TargetLowering::LowerMemArgument(SDValue Chain,
CallingConv::ID CallConv,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo *MFI,
unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv,
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
// If value is passed by pointer we have address passed instead of the value
// itself.
if (VA.getLocInfo() == CCValAssign::Indirect)
ValVT = VA.getLocVT();
ValVT = VA.getValVT();
// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
if (Flags.isByVal()) {
unsigned Bytes = Flags.getByValSize();
if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
return DAG.getFrameIndex(FI, getPointerTy());
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
false, false, false, 0);
X86TargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Function* Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
Subtarget->isTargetCygMing() &&
Fn->getName() == "main")
MachineFrameInfo *MFI = MF.getFrameInfo();
bool Is64Bit = Subtarget->is64Bit();
bool IsWindows = Subtarget->isTargetWindows();
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc or ghc");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsWin64) {
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
unsigned LastVal = ~0U;
SDValue ArgValue;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
// places.
assert(VA.getValNo() != LastVal &&
"Don't support value assigned to multiple locs yet");
LastVal = VA.getValNo();
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = X86::GR32RegisterClass;
else if (Is64Bit && RegVT == MVT::i64)
RC = X86::GR64RegisterClass;
else if (RegVT == MVT::f32)
RC = X86::FR32RegisterClass;
else if (RegVT == MVT::f64)
RC = X86::FR64RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
RC = X86::VR256RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
RC = X86::VR128RegisterClass;
else if (RegVT == MVT::x86mmx)
RC = X86::VR64RegisterClass;
llvm_unreachable("Unknown argument type!");
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
// bits. Insert an assert[sz]ext to capture this, then truncate to the
// right size.
if (VA.getLocInfo() == CCValAssign::SExt)
ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
else if (VA.getLocInfo() == CCValAssign::ZExt)
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
else if (VA.getLocInfo() == CCValAssign::BCvt)
ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
if (RegVT.isVector()) {
ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
} else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
} else {
ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
// If value is passed via pointer - do a load.
if (VA.getLocInfo() == CCValAssign::Indirect)
ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
MachinePointerInfo(), false, false, false, 0);
// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. Save the argument into
// a virtual register so that we can access it from the return points.
if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
unsigned StackSize = CCInfo.getNextStackOffset();
// Align stack specially for tail calls.
if (FuncIsMadeTailCallSafe(CallConv,
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
CallConv != CallingConv::X86_ThisCall)) {
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
if (Is64Bit) {
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
// FIXME: We should really autogenerate these arrays
static const uint16_t GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
static const uint16_t GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
static const uint16_t XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
const uint16_t *GPR64ArgRegs;
unsigned NumXMMRegs = 0;
if (IsWin64) {
// The XMM registers which might contain var arg parameters are shadowed
// in their paired GPR. So we only need to save the GPR to their home
// slots.
TotalNumIntRegs = 4;
GPR64ArgRegs = GPR64ArgRegsWin64;
} else {
TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
GPR64ArgRegs = GPR64ArgRegs64Bit;
NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit,
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
if (IsWin64) {
const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
// Fixup to set vararg frame on shadow area (4 x i64).
if (NumIntRegs < 4)
} else {
// For X86-64, if there are vararg parameters that are passed via
// registers, then we must store them to their spots on the stack so
// they may be loaded by deferencing the result of va_next.
FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
unsigned Offset = FuncInfo->getVarArgsGPOffset();
for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
FuncInfo->getRegSaveFrameIndex(), Offset),
false, false, 0);
Offset += 8;
if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
// Now store the XMM (fp + vector) parameter registers.
SmallVector<SDValue, 11> SaveXMMOps;
unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
&SaveXMMOps[0], SaveXMMOps.size()));
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
// Some CCs need callee pop.
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
if (!Is64Bit) {
// RegSaveFrameIndex is X86-64 only.
if (CallConv == CallingConv::X86_FastCall ||
CallConv == CallingConv::X86_ThisCall)
// fastcc functions can't have varargs.
return Chain;
X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue StackPtr, SDValue Arg,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
return DAG.getStore(Chain, dl, Arg, PtrOff,
false, false, 0);
/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
/// optimization is performed and it is required.
X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
SDValue &OutRetAddr, SDValue Chain,
bool IsTailCall, bool Is64Bit,
int FPDiff, DebugLoc dl) const {
// Adjust the Return address stack slot.
EVT VT = getPointerTy();
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
false, false, false, 0);
return SDValue(OutRetAddr.getNode(), 1);
/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
SDValue Chain, SDValue RetAddrFrIdx,
bool Is64Bit, int FPDiff, DebugLoc dl) {
// Store the return address to the appropriate stack slot.
if (!FPDiff) return Chain;
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
false, false, 0);
return Chain;
X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
bool IsWindows = Subtarget->isTargetWindows();
bool IsStructRet = CallIsStructReturn(Outs);
bool IsSibcall = false;
if (MF.getTarget().Options.DisableTailCalls)
isTailCall = false;
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
IsSibcall = true;
if (isTailCall)
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc or ghc");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsWin64) {
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (IsSibcall)
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
else if (getTargetMachine().Options.GuaranteedTailCallOpt &&
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
if (isTailCall && !IsSibcall) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed =
FPDiff = NumBytesCallerPushed - NumBytes;
// Set the delta of movement of the returnaddr stackslot.
// But only set if delta is greater than previous delta.
if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
if (!IsSibcall)
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue RetAddrFrIdx;
// Load return address for tail calls.
if (isTailCall && FPDiff)
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
Is64Bit, FPDiff, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
case CCValAssign::AExt:
if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
// Special case: passing MMX values in XMM registers.
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
case CCValAssign::Indirect: {
// Store the argument.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
false, false, 0);
Arg = SpillSlot;
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
if (isVarArg && IsWin64) {
// Win64 ABI requires argument XMM reg to be copied to the corresponding
// shadow reg if callee is a varargs function.
unsigned ShadowReg = 0;
switch (VA.getLocReg()) {
case X86::XMM0: ShadowReg = X86::RCX; break;
case X86::XMM1: ShadowReg = X86::RDX; break;
case X86::XMM2: ShadowReg = X86::R8; break;
case X86::XMM3: ShadowReg = X86::R9; break;
if (ShadowReg)
RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
} else if (!IsSibcall && (!isTailCall || isByVal)) {
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
SDValue InFlag;
// Tail call byval lowering might overwrite argument registers so in case of
// tail call optimization the copies to registers are lowered later.
if (!isTailCall)
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
if (Subtarget->isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (!isTailCall) {
Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
DebugLoc(), getPointerTy()),
InFlag = Chain.getValue(1);
} else {
// If we are tail calling and generating PIC/GOT style code load the
// address of the callee into ECX. The value in ecx is used as target of
// the tail jump. This is done to circumvent the ebx/callee-saved problem
// for tail calls on PIC/GOT architectures. Normally we would just put the
// address of GOT into ebx and then call target@PLT. But for tail calls
// ebx would be restored (since ebx is callee saved) before jumping to the
// target@PLT.
// Note: The actual moving to ECX is done further down.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
if (G && !G->getGlobal()->hasHiddenVisibility() &&
Callee = LowerGlobalAddress(Callee, DAG);
else if (isa<ExternalSymbolSDNode>(Callee))
Callee = LowerExternalSymbol(Callee, DAG);
if (Is64Bit && isVarArg && !IsWin64) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
// the declaration) %al is used as hidden argument to specify the number
// of SSE registers used. The contents of %al do not need to match exactly
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
// Count the number of XMM registers allocated.
static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
assert((Subtarget->hasSSE1() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
InFlag = Chain.getValue(1);
// For tail calls lower the arguments to the 'real' stack slot.
if (isTailCall) {
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments are stored to the stack, because the
// outgoing stack slots may alias the incoming argument stack slots, and
// the alias isn't otherwise explicit. This is slightly more conservative
// than necessary, because it means that each store effectively depends
// on every argument instead of just those arguments it would clobber.
SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
// Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
if (getTargetMachine().Options.GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc())
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal()) {
// Copy relative to framepointer.
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
Flags, DAG, dl));
} else {
// Store relative to framepointer.
DAG.getStore(ArgChain, dl, Arg, FIN,
false, false, 0));
if (!MemOpChains2.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
// Copy arguments to their registers.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
InFlag =SDValue();
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
FPDiff, dl);
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
// through a register, since the call instruction's 32-bit
// pc-relative offset may not be large enough to hold the whole
// address.
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
// it.
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
const GlobalValue *GV = G->getGlobal();
if (!GV->hasDLLImportLinkage()) {
unsigned char OpFlags = 0;
bool ExtraLoad = false;
unsigned WrapperKind = ISD::DELETED_NODE;
// On ELF targets, in both X86-64 and X86-32 mode, direct calls to
// external symbols most go through the PLT in PIC mode. If the symbol
// has hidden or protected visibility, or if it is static or local, then
// we don't need to use the PLT - we can directly call it.
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(GV->isDeclaration() || GV->isWeakForLinker()) &&
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
} else if (Subtarget->isPICStyleRIPRel() &&
isa<Function>(GV) &&
cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
// at the cost of eager binding (and one extra byte of encoding).
WrapperKind = X86ISD::WrapperRIP;
ExtraLoad = true;