blob: 4989c61f68f8c780901a00917f38d28637340992 [file] [log] [blame]
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86CallingConv.h"
#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86IntrinsicsInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <bitset>
#include <cctype>
#include <numeric>
using namespace llvm;
#define DEBUG_TYPE "x86-isel"
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool> ExperimentalVectorWideningLegalization(
"x86-experimental-vector-widening-legalization", cl::init(false),
cl::desc("Enable an experimental vector type legalization through widening "
"rather than promotion."),
cl::Hidden);
static cl::opt<int> ExperimentalPrefLoopAlignment(
"x86-experimental-pref-loop-alignment", cl::init(4),
cl::desc("Sets the preferable loop alignment for experiments "
"(the last x86-experimental-pref-loop-alignment bits"
" of the loop header PC will be 0)."),
cl::Hidden);
static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like "
"SHIFT, LEA, etc."),
cl::Hidden);
/// Call this when the user attempts to do something unsupported, like
/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
/// report_fatal_error, so calling code should attempt to recover without
/// crashing.
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
const char *Msg) {
MachineFunction &MF = DAG.getMachineFunction();
DAG.getContext()->diagnose(
DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
}
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
X86ScalarSSEf64 = Subtarget.hasSSE2();
X86ScalarSSEf32 = Subtarget.hasSSE1();
MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
// Set up the TargetLowering object.
// X86 is weird. It always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
// X86-SSE is even stranger. It uses -1 or 0 for vector masks.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// For 64-bit, since we have so many registers, use the ILP scheduler.
// For 32-bit, use the register pressure specific scheduling.
// For Atom, always use ILP scheduling.
if (Subtarget.isAtom())
setSchedulingPreference(Sched::ILP);
else if (Subtarget.is64Bit())
setSchedulingPreference(Sched::ILP);
else
setSchedulingPreference(Sched::RegPressure);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
// Bypass expensive divides and use cheaper ones.
if (TM.getOptLevel() >= CodeGenOpt::Default) {
if (Subtarget.hasSlowDivide32())
addBypassSlowDiv(32, 8);
if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
addBypassSlowDiv(64, 32);
}
if (Subtarget.isTargetKnownWindowsMSVC() ||
Subtarget.isTargetWindowsItanium()) {
// Setup Windows compiler runtime calls.
setLibcallName(RTLIB::SDIV_I64, "_alldiv");
setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
setLibcallName(RTLIB::SREM_I64, "_allrem");
setLibcallName(RTLIB::UREM_I64, "_aullrem");
setLibcallName(RTLIB::MUL_I64, "_allmul");
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
}
if (Subtarget.isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(false);
setUseUnderscoreLongJmp(false);
} else if (Subtarget.isTargetWindowsGNU()) {
// MS runtime is weird: it exports _setjmp, but longjmp!
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(false);
} else {
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
}
// Set up the register classes.
addRegisterClass(MVT::i8, &X86::GR8RegClass);
addRegisterClass(MVT::i16, &X86::GR16RegClass);
addRegisterClass(MVT::i32, &X86::GR32RegClass);
if (Subtarget.is64Bit())
addRegisterClass(MVT::i64, &X86::GR64RegClass);
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// We don't accept any truncstore of integer registers.
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
setTruncStoreAction(MVT::i32, MVT::i16, Expand);
setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// SETOEQ and SETUNE require checking two conditions.
setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
// Integer absolute.
if (Subtarget.hasCMov()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom);
setOperationAction(ISD::ABS , MVT::i32 , Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
setOperationAction(ShiftOp , MVT::i16 , Custom);
setOperationAction(ShiftOp , MVT::i32 , Custom);
if (Subtarget.is64Bit())
setOperationAction(ShiftOp , MVT::i64 , Custom);
}
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
if (Subtarget.is64Bit()) {
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
// f32/f64 are legal, f80 is custom.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
else
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
} else if (!Subtarget.useSoftFloat()) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD or VCVTUSI2SS/SD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
} else {
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
}
// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
if (!Subtarget.useSoftFloat()) {
// SSE has no i16 to fp conversion, only i32.
if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
// f32 and f64 cases are Legal, f80 case is not
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
} else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
}
} else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand);
}
// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
if (!Subtarget.useSoftFloat()) {
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
if (X86ScalarSSEf32) {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
// f32 and f64 cases are Legal, f80 case is not
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
}
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
}
// Handle FP_TO_UINT by promoting the destination to a larger signed
// conversion.
setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
if (Subtarget.is64Bit()) {
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
// FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
} else {
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
}
} else if (!Subtarget.useSoftFloat()) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
// With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
// With SSE3 we can use fisttpll to convert to a signed i64; without
// SSE, we're stuck with a fistpll.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!X86ScalarSSEf64) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
} else if (!Subtarget.is64Bit())
setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
// the two-result form to trivial CSE, which is able to combine x/y and x%y
// into a single instruction.
//
// Scalar integer multiply-high is also lowered to use two-result
// operations, to match the available instructions. However, plain multiply
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
}
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::BR_CC, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
}
if (Subtarget.is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
// Promote the i8 variants and force them on up to i32 which has a shorter
// encoding.
setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
if (!Subtarget.hasBMI()) {
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
}
}
if (Subtarget.hasLZCNT()) {
// When promoting the i8 variants, force them to i32 for a shorter
// encoding.
setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
} else {
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
}
}
// Special handling for half-precision floating point conversions.
// If we don't have F16C support, then lower half float conversions
// into library calls.
if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
// There's never any support for operations beyond MVT::f32.
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f80, MVT::f16, Expand);
if (Subtarget.hasPOPCNT()) {
setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
} else {
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
if (Subtarget.is64Bit())
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
if (!Subtarget.hasMOVBE())
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
// These should be promoted to a larger select which is supported.
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
// X86 wants to expand cmov itself.
for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
}
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
}
// Custom action for SELECT MMX and expand action for SELECT_CC MMX
setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
// LLVM/Clang supports zero-cost DWARF and SEH exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
// Darwin ABI issue.
for (auto VT : { MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::ConstantPool , VT, Custom);
setOperationAction(ISD::JumpTable , VT, Custom);
setOperationAction(ISD::GlobalAddress , VT, Custom);
setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
setOperationAction(ISD::ExternalSymbol , VT, Custom);
setOperationAction(ISD::BlockAddress , VT, Custom);
}
// 64-bit shl, sra, srl (iff 32-bit x86)
for (auto VT : { MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::SHL_PARTS, VT, Custom);
setOperationAction(ISD::SRA_PARTS, VT, Custom);
setOperationAction(ISD::SRL_PARTS, VT, Custom);
}
if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
// Expand certain atomics
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
if (Subtarget.hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
// FIXME - use subtarget debug flags
if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
!Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
bool Is64Bit = Subtarget.is64Bit();
setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
// GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
: &X86::FR32RegClass);
addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
: &X86::FR64RegClass);
for (auto VT : { MVT::f32, MVT::f64 }) {
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS, VT, Custom);
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG, VT, Custom);
// Use ANDPD and ORPD to simulate FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
// These might be better off as horizontal vector ops.
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , VT, Expand);
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
// Lower this to MOVMSK plus an AND.
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
} else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, &X86::FR32RegClass);
if (UseX87)
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
setOperationAction(ISD::FABS , MVT::f32, Custom);
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f32, Custom);
if (UseX87)
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
// Use ANDPS and ORPS to simulate FCOPYSIGN.
if (UseX87)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
if (UseX87) {
// Always expand sin/cos functions even though x87 has an instruction.
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
}
} else if (UseX87) {
// f32 and f64 in x87.
// Set up the FP register classes.
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
addRegisterClass(MVT::f32, &X86::RFP32RegClass);
for (auto VT : { MVT::f32, MVT::f64 }) {
setOperationAction(ISD::UNDEF, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
// Always expand sin/cos functions even though x87 has an instruction.
setOperationAction(ISD::FSIN , VT, Expand);
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
}
// Expand FP32 immediates into loads from the stack, save special cases.
if (isTypeLegal(MVT::f32)) {
if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
addLegalFPImmediate(APFloat(+0.0f)); // FLD0
addLegalFPImmediate(APFloat(+1.0f)); // FLD1
addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
} else // SSE immediates.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
}
// Expand FP64 immediates into loads from the stack, save special cases.
if (isTypeLegal(MVT::f64)) {
if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
} else // SSE immediates.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
}
// We don't support FMA.
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Long double always uses X87, except f128 in MMX.
if (UseX87) {
if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
setOperationAction(ISD::FABS , MVT::f128, Custom);
setOperationAction(ISD::FNEG , MVT::f128, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
}
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
addLegalFPImmediate(TmpFlt); // FLD0
TmpFlt.changeSign();
addLegalFPImmediate(TmpFlt); // FLD0/FCHS
bool ignored;
APFloat TmpFlt2(+1.0);
TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
&ignored);
addLegalFPImmediate(TmpFlt2); // FLD1
TmpFlt2.changeSign();
addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
}
// Always expand sin/cos functions even though x87 has an instruction.
setOperationAction(ISD::FSIN , MVT::f80, Expand);
setOperationAction(ISD::FCOS , MVT::f80, Expand);
setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
setOperationAction(ISD::FCEIL, MVT::f80, Expand);
setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
setOperationAction(ISD::FRINT, MVT::f80, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
}
// Always use a library call for pow.
setOperationAction(ISD::FPOW , MVT::f32 , Expand);
setOperationAction(ISD::FPOW , MVT::f64 , Expand);
setOperationAction(ISD::FPOW , MVT::f80 , Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
setOperationAction(ISD::FLOG10, MVT::f80, Expand);
setOperationAction(ISD::FEXP, MVT::f80, Expand);
setOperationAction(ISD::FEXP2, MVT::f80, Expand);
setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
// Some FP actions are always expanded for vector types.
for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
}
// First set operation action for all vector types to either promote
// (for widening) or expand (for scalarization). Then we will selectively
// turn on ones that can be effectively codegen'd.
for (MVT VT : MVT::vector_valuetypes()) {
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::SETCC, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
setOperationAction(ISD::TRUNCATE, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(InnerVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
// N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
// types, we have to deal with them whether we ask for Expansion or not.
// Setting Expand causes its own optimisation problems though, so leave
// them legal.
if (VT.getVectorElementType() == MVT::i1)
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
// EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
// split/scalarized right now.
if (VT.getVectorElementType() == MVT::f16)
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
}
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
// No operations on x86mmx supported, everything uses intrinsics.
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
// FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
// registers cannot be used even for integer operations.
addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SREM, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::UREM, VT, Custom);
}
setOperationAction(ISD::MUL, MVT::v2i8, Custom);
setOperationAction(ISD::MUL, MVT::v2i16, Custom);
setOperationAction(ISD::MUL, MVT::v2i32, Custom);
setOperationAction(ISD::MUL, MVT::v4i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i16, Custom);
setOperationAction(ISD::MUL, MVT::v8i8, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
}
setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
if (!ExperimentalVectorWideningLegalization) {
// Use widening instead of promotion.
for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
MVT::v4i16, MVT::v2i16 }) {
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::SSUBSAT, VT, Custom);
}
}
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
// Provide custom widening for v2f32 setcc. This is really for VLX when
// setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
// type legalization changing the result type to v4i1 during widening.
// It works fine for SSE2 and is probably faster so no need to qualify with
// VLX support.
setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
setCondCodeAction(ISD::SETLT, VT, Custom);
setCondCodeAction(ISD::SETLE, VT, Custom);
}
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
// We support custom legalizing of sext and anyext loads for specific
// memory vector types which we can load as a scalar (or sequence of
// scalars) and extend in-register to a legal 128-bit vector type. For sext
// loads these must work with a single scalar load.
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
}
for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
if (VT == MVT::v2i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
// Custom legalize these to avoid over promotion or custom promotion.
setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
// By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
// promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
// split again based on the input type, this will cause an AssertSExt i16 to
// be emitted instead of an AssertZExt. This will allow packssdw followed by
// packuswb to be used to truncate to v8i8. This is necessary since packusdw
// isn't available until sse4.1.
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
// We want to legalize this to an f64 load rather than an i64 load on
// 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
// store.
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i16, Custom);
setOperationAction(ISD::STORE, MVT::v8i8, Custom);
setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
if (ExperimentalVectorWideningLegalization) {
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
} else {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
}
// In the customized shift lowering, the legal v4i32/v2i64 cases
// in AVX2 will be recognized.
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
}
setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
// With AVX512, expanding (and promoting the shifts) is better.
if (!Subtarget.hasAVX512())
setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
setOperationAction(ISD::ABS, MVT::v16i8, Legal);
setOperationAction(ISD::ABS, MVT::v8i16, Legal);
setOperationAction(ISD::ABS, MVT::v4i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
setOperationAction(ISD::FCEIL, RoundedTy, Legal);
setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
setOperationAction(ISD::FRINT, RoundedTy, Legal);
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
}
setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
// We directly match byte blends in the backend as they match the VSELECT
// condition form.
setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
// SSE41 brings specific instructions for doing vector sign extend even in
// cases where we don't have SRA.
for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
}
if (!ExperimentalVectorWideningLegalization) {
// Avoid narrow result types when widening. The legal types are listed
// in the next loop.
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
}
}
// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
if (!ExperimentalVectorWideningLegalization)
setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
}
// i8 vectors are custom because the source register and source
// source memory operand types are not the same width.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
setOperationAction(ISD::ROTL, VT, Custom);
// XOP can efficiently perform BITREVERSE with VPPERM.
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
setOperationAction(ISD::BITREVERSE, VT, Custom);
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
setOperationAction(ISD::BITREVERSE, VT, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
bool HasInt256 = Subtarget.hasInt256();
addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
// even though v8i16 is a legal type.
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
// In the customized shift lowering, the legal v8i32/v4i64 cases
// in AVX2 will be recognized.
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
}
if (ExperimentalVectorWideningLegalization) {
// These types need custom splitting if their input is a 128-bit vector.
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
}
setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
// With BWI, expanding (and promoting the shifts) is the better.
if (!Subtarget.hasBWI())
setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
}
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
// TODO - remove this once 256-bit X86ISD::ANDNP correctly split.
setOperationAction(ISD::CTTZ, VT, HasInt256 ? Expand : Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
setCondCodeAction(ISD::SETLT, VT, Custom);
setCondCodeAction(ISD::SETLE, VT, Custom);
}
if (Subtarget.hasAnyFMA()) {
for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
MVT::v2f64, MVT::v4f64 })
setOperationAction(ISD::FMA, VT, Legal);
}
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
}
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MUL, MVT::v32i8, Custom);
setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
setOperationAction(ISD::ABS, MVT::v4i64, Custom);
setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
}
for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
}
if (HasInt256) {
// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
// AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
}
}
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
}
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
}
// Custom lower several nodes for 256-bit types.
for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
MVT::v8f32, MVT::v4f64 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
if (HasInt256)
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
if (HasInt256) {
// Custom legalize 2x32 to get a little better code.
setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
setOperationAction(ISD::MGATHER, VT, Custom);
}
}
// This block controls legalization of the mask vector sizes that are
// available with AVX512. 512-bit vectors are in a separate block controlled
// by useAVX512Regs.
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
// There is no byte sized k-register load or store without AVX512DQ.
if (!Subtarget.hasDQI()) {
setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
setOperationAction(ISD::STORE, MVT::v1i1, Custom);
setOperationAction(ISD::STORE, MVT::v2i1, Custom);
setOperationAction(ISD::STORE, MVT::v4i1, Custom);
setOperationAction(ISD::STORE, MVT::v8i1, Custom);
}
// Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
}
for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
}
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
// This block controls legalization for 512-bit operations with 32/64 bit
// elements. 512-bits can be disabled based on prefer-vector-width and
// required-vector-width function attributes.
if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
}
for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
if (!Subtarget.hasVLX()) {
// With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
// to 512-bit rather than use the AVX2 instructions so that we can use
// k-masks.
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
}
}
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
if (ExperimentalVectorWideningLegalization) {
// Need to custom widen this if we don't have AVX512BW.
setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
}
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
}
// Without BWI we need to use custom lowering to handle MVT::v64i8 input.
for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
}
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
setOperationAction(ISD::SELECT, MVT::v16i32, Custom);
setOperationAction(ISD::SELECT, MVT::v32i16, Custom);
setOperationAction(ISD::SELECT, MVT::v64i8, Custom);
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
setCondCodeAction(ISD::SETLT, VT, Custom);
setCondCodeAction(ISD::SETLE, VT, Custom);
}
if (Subtarget.hasDQI()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
}
if (Subtarget.hasCDI()) {
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
setOperationAction(ISD::CTLZ, VT, Legal);
}
} // Subtarget.hasCDI()
if (Subtarget.hasVPOPCNTDQ()) {
for (auto VT : { MVT::v16i32, MVT::v8i64 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
// Extract subvector is special because the value type
// (result) is 256-bit but the source is 512-bit wide.
// 128-bit was made Legal under AVX1.
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
MVT::v8f32, MVT::v4f64 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
// Need to custom split v32i16/v64i8 bitcasts.
if (!Subtarget.hasBWI()) {
setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
}
if (Subtarget.hasVBMI2()) {
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::FSHL, VT, Custom);
setOperationAction(ISD::FSHR, VT, Custom);
}
}
}// has AVX-512
// This block controls legalization for operations that don't have
// pre-AVX512 equivalents. Without VLX we use 512-bit operations for
// narrower widths.
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
// These operations are handled on non-VLX by artificially widening in
// isel patterns.
// TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
}
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
}
// Custom legalize 2x32 to get a little better code.
setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
setOperationAction(ISD::MSCATTER, VT, Custom);
if (Subtarget.hasDQI()) {
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::SINT_TO_FP, VT, Legal);
setOperationAction(ISD::UINT_TO_FP, VT, Legal);
setOperationAction(ISD::FP_TO_SINT, VT, Legal);
setOperationAction(ISD::FP_TO_UINT, VT, Legal);
setOperationAction(ISD::MUL, VT, Legal);
}
}
if (Subtarget.hasCDI()) {
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::CTLZ, VT, Legal);
}
} // Subtarget.hasCDI()
if (Subtarget.hasVPOPCNTDQ()) {
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
}
// This block control legalization of v32i1/v64i1 which are available with
// AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
// useBWIRegs.
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
for (auto VT : { MVT::v16i1, MVT::v32i1 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Extends from v32i1 masks to 256-bit vectors.
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
}
// This block controls legalization for v32i16 and v64i8. 512-bits can be
// disabled based on prefer-vector-width and required-vector-width function
// attributes.
if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
// Extends from v64i1 masks to 512-bit vectors.
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::MUL, MVT::v32i16, Legal);
setOperationAction(ISD::MUL, MVT::v64i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
setCondCodeAction(ISD::SETLT, VT, Custom);
setCondCodeAction(ISD::SETLE, VT, Custom);
}
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
}
if (Subtarget.hasBITALG()) {
for (auto VT : { MVT::v64i8, MVT::v32i16 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
if (Subtarget.hasVBMI2()) {
setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
}
}
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
}
// These operations are handled on non-VLX by artificially widening in
// isel patterns.
// TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
if (Subtarget.hasBITALG()) {
for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
}
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
if (Subtarget.hasDQI()) {
// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
// v2f32 UINT_TO_FP is already custom under SSE2.
setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
"Unexpected operation action!");
// v2i64 FP_TO_S/UINT(v2f32) custom conversion.
setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
}
if (Subtarget.hasBWI()) {
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
if (Subtarget.hasVBMI2()) {
// TODO: Make these legal even without VLX?
for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::FSHL, VT, Custom);
setOperationAction(ISD::FSHR, VT, Custom);
}
}
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
if (!Subtarget.is64Bit()) {
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
}
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
//
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
// Add/Sub/Mul with overflow operations are custom lowered.
setOperationAction(ISD::SADDO, VT, Custom);
setOperationAction(ISD::UADDO, VT, Custom);
setOperationAction(ISD::SSUBO, VT, Custom);
setOperationAction(ISD::USUBO, VT, Custom);
setOperationAction(ISD::SMULO, VT, Custom);
setOperationAction(ISD::UMULO, VT, Custom);
// Support carry in as value rather than glue.
setOperationAction(ISD::ADDCARRY, VT, Custom);
setOperationAction(ISD::SUBCARRY, VT, Custom);
setOperationAction(ISD::SETCCCARRY, VT, Custom);
}
if (!Subtarget.is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MUL_I128, nullptr);
}
// Combine sin / cos into _sincos_stret if it is available.
if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
}
if (Subtarget.isTargetWin64()) {
setOperationAction(ISD::SDIV, MVT::i128, Custom);
setOperationAction(ISD::UDIV, MVT::i128, Custom);
setOperationAction(ISD::SREM, MVT::i128, Custom);
setOperationAction(ISD::UREM, MVT::i128, Custom);
setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
}
// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
// is. We should promote the value to 64-bits to solve this.
// This is what the CRT headers do - `fmodf` is an inline header
// function casting to f64 and calling `fmod`.
if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
Subtarget.isTargetWindowsItanium()))
for (ISD::NodeType Op :
{ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
ISD::FLOG10, ISD::FPOW, ISD::FSIN})
if (isOperationExpand(Op, MVT::f32))
setOperationAction(Op, MVT::f32, Promote);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FNEG);
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::FMINNUM);
setTargetDAGCombine(ISD::FMAXNUM);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MLOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::MSTORE);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::MSCATTER);
setTargetDAGCombine(ISD::MGATHER);
computeRegisterProperties(Subtarget.getRegisterInfo());
MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = 4;
// TODO: These control memcmp expansion in CGP and could be raised higher, but
// that needs to benchmarked and balanced with the potential use of vector
// load/store types (PR33329, PR33914).
MaxLoadsPerMemcmp = 2;
MaxLoadsPerMemcmpOptSize = 2;
// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
// An out-of-order CPU can speculatively execute past a predictable branch,
// but a conditional move could be stalled by an expensive earlier operation.
PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
EnableExtLdPromotion = true;
setPrefFunctionAlignment(4); // 2^4 bytes.
verifyIntrinsicTables();
}
// This has so far only been implemented for 64-bit MachO.
bool X86TargetLowering::useLoadStackGuardNode() const {
return Subtarget.isTargetMachO() && Subtarget.is64Bit();
}
bool X86TargetLowering::useStackGuardXorFP() const {
// Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
return Subtarget.getTargetTriple().isOSMSVCRT();
}
SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
const SDLoc &DL) const {
EVT PtrTy = getPointerTy(DAG.getDataLayout());
unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
return SDValue(Node, 0);
}
TargetLoweringBase::LegalizeTypeAction
X86TargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return TypeSplitVector;
if (ExperimentalVectorWideningLegalization &&
VT.getVectorNumElements() != 1 &&
VT.getVectorElementType() != MVT::i1)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return MVT::v32i8;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext& Context,
EVT VT) const {
if (!VT.isVector())
return MVT::i8;
if (Subtarget.hasAVX512()) {
const unsigned NumElts = VT.getVectorNumElements();
// Figure out what this type will be legalized to.
EVT LegalVT = VT;
while (getTypeAction(Context, LegalVT) != TypeLegal)
LegalVT = getTypeToTransformTo(Context, LegalVT);
// If we got a 512-bit vector then we'll definitely have a vXi1 compare.
if (LegalVT.getSimpleVT().is512BitVector())
return EVT::getVectorVT(Context, MVT::i1, NumElts);
if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
// If we legalized to less than a 512-bit vector, then we will use a vXi1
// compare for vXi32/vXi64 for sure. If we have BWI we will also support
// vXi16/vXi8.
MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
return EVT::getVectorVT(Context, MVT::i1, NumElts);
}
}
return VT.changeVectorElementTypeToInteger();
}
/// Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (MaxAlign == 16)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (VTy->getBitWidth() == 128)
MaxAlign = 16;
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (auto *EltTy : STy->elements()) {
unsigned EltAlign = 0;
getMaxByValAlign(EltTy, EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == 16)
break;
}
}
}
/// Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
if (Subtarget.is64Bit()) {
// Max of 8 and alignment of type.
unsigned TyAlign = DL.getABITypeAlignment(Ty);
if (TyAlign > 8)
return TyAlign;
return 8;
}
unsigned Align = 4;
if (Subtarget.hasSSE1())
getMaxByValAlign(Ty, Align);
return Align;
}
/// Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If 'IsMemset' is
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function &F = MF.getFunction();
if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(!Subtarget.isUnalignedMem16Slow() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) {
// FIXME: Check if unaligned 32-byte accesses are slow.
if (Size >= 32 && Subtarget.hasAVX()) {
// Although this isn't a well-supported type for AVX1, we'll let
// legalization and shuffle lowering produce the optimal codegen. If we
// choose an optimal type with a vector element larger than a byte,
// getMemsetStores() may create an intermediate splat (using an integer
// multiply) before we splat as a vector.
return MVT::v32i8;
}
if (Subtarget.hasSSE2())
return MVT::v16i8;
// TODO: Can SSE1 handle a byte vector?
// If we have SSE1 registers we should be able to use them.
if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()))
return MVT::v4f32;
} else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
!Subtarget.is64Bit() && Subtarget.hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
// Also, do not use f64 to lower memset unless this is a memset of zeros.
// The gymnastics of splatting a byte value into an XMM register and then
// only using 8-byte stores (because this is a CPU with slow unaligned
// 16-byte accesses) makes that a loser.
return MVT::f64;
}
}
// This is a compromise. If we reach here, unaligned accesses may be slow on
// this target. However, creating smaller, aligned accesses could be even
// slower and would certainly be a lot more code.
if (Subtarget.is64Bit() && Size >= 8)
return MVT::i64;
return MVT::i32;
}
bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
if (VT == MVT::f32)
return X86ScalarSSEf32;
else if (VT == MVT::f64)
return X86ScalarSSEf64;
return true;
}
bool
X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned,
unsigned,
bool *Fast) const {
if (Fast) {
switch (VT.getSizeInBits()) {
default:
// 8-byte and under are always assumed to be fast.
*Fast = true;
break;
case 128:
*Fast = !Subtarget.isUnalignedMem16Slow();
break;
case 256:
*Fast = !Subtarget.isUnalignedMem32Slow();
break;
// TODO: What about AVX-512 (512-bit) accesses?
}
}
// Misaligned accesses of any size are always allowed.
return true;
}
/// Return the entry encoding for a jump table in the
/// current function. The returned value is a member of the
/// MachineJumpTableInfo::JTEntryKind enum.
unsigned X86TargetLowering::getJumpTableEncoding() const {
// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
// symbol.
if (isPositionIndependent() && Subtarget.isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
}
bool X86TargetLowering::useSoftFloat() const {
return Subtarget.useSoftFloat();
}
void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
ArgListTy &Args) const {
// Only relabel X86-32 for C / Stdcall CCs.
if (Subtarget.is64Bit())
return;
if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
return;
unsigned ParamRegs = 0;
if (auto *M = MF->getFunction().getParent())
ParamRegs = M->getNumberRegisterParameters();
// Mark the first N int arguments as having reg
for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
Type *T = Args[Idx].Ty;
if (T->isIntOrPtrTy())
if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
unsigned numRegs = 1;
if (MF->getDataLayout().getTypeAllocSize(T) > 4)
numRegs = 2;
if (ParamRegs < numRegs)
return;
ParamRegs -= numRegs;
Args[Idx].IsInReg = true;
}
}
}
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned uid,MCContext &Ctx) const{
assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
return MCSymbolRefExpr::create(MBB->getSymbol(),
MCSymbolRefExpr::VK_GOTOFF, Ctx);
}
/// Returns relocation base for the given PIC jumptable.
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
if (!Subtarget.is64Bit())
// This doesn't have SDLoc associated with it, but is not really the
// same as a Register.
return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
getPointerTy(DAG.getDataLayout()));
return Table;
}
/// This returns the relocation base for the given PIC jumptable,
/// the same as getPICJumpTableRelocBase, but as an MCExpr.
const MCExpr *X86TargetLowering::
getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
MCContext &Ctx)