| //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the interfaces that X86 uses to lower LLVM code into a |
| // selection DAG. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "X86ISelLowering.h" |
| #include "Utils/X86ShuffleDecode.h" |
| #include "X86CallingConv.h" |
| #include "X86FrameLowering.h" |
| #include "X86InstrBuilder.h" |
| #include "X86IntrinsicsInfo.h" |
| #include "X86MachineFunctionInfo.h" |
| #include "X86TargetMachine.h" |
| #include "X86TargetObjectFile.h" |
| #include "llvm/ADT/SmallBitVector.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/ADT/StringSwitch.h" |
| #include "llvm/Analysis/EHPersonalities.h" |
| #include "llvm/CodeGen/IntrinsicLowering.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineJumpTableInfo.h" |
| #include "llvm/CodeGen/MachineModuleInfo.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| #include "llvm/CodeGen/WinEHFuncInfo.h" |
| #include "llvm/IR/CallSite.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/DiagnosticInfo.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GlobalAlias.h" |
| #include "llvm/IR/GlobalVariable.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/MC/MCAsmInfo.h" |
| #include "llvm/MC/MCContext.h" |
| #include "llvm/MC/MCExpr.h" |
| #include "llvm/MC/MCSymbol.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Target/TargetOptions.h" |
| #include <algorithm> |
| #include <bitset> |
| #include <cctype> |
| #include <numeric> |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "x86-isel" |
| |
| STATISTIC(NumTailCalls, "Number of tail calls"); |
| |
| static cl::opt<int> ExperimentalPrefLoopAlignment( |
| "x86-experimental-pref-loop-alignment", cl::init(4), |
| cl::desc( |
| "Sets the preferable loop alignment for experiments (as log2 bytes)" |
| "(the last x86-experimental-pref-loop-alignment bits" |
| " of the loop header PC will be 0)."), |
| cl::Hidden); |
| |
| // Added in 10.0. |
| static cl::opt<bool> EnableOldKNLABI( |
| "x86-enable-old-knl-abi", cl::init(false), |
| cl::desc("Enables passing v32i16 and v64i8 in 2 YMM registers instead of " |
| "one ZMM register on AVX512F, but not AVX512BW targets."), |
| cl::Hidden); |
| |
| static cl::opt<bool> MulConstantOptimization( |
| "mul-constant-optimization", cl::init(true), |
| cl::desc("Replace 'mul x, Const' with more effective instructions like " |
| "SHIFT, LEA, etc."), |
| cl::Hidden); |
| |
| static cl::opt<bool> ExperimentalUnorderedISEL( |
| "x86-experimental-unordered-atomic-isel", cl::init(false), |
| cl::desc("Use LoadSDNode and StoreSDNode instead of " |
| "AtomicSDNode for unordered atomic loads and " |
| "stores respectively."), |
| cl::Hidden); |
| |
| /// Call this when the user attempts to do something unsupported, like |
| /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike |
| /// report_fatal_error, so calling code should attempt to recover without |
| /// crashing. |
| static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, |
| const char *Msg) { |
| MachineFunction &MF = DAG.getMachineFunction(); |
| DAG.getContext()->diagnose( |
| DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); |
| } |
| |
| X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, |
| const X86Subtarget &STI) |
| : TargetLowering(TM), Subtarget(STI) { |
| bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); |
| X86ScalarSSEf64 = Subtarget.hasSSE2(); |
| X86ScalarSSEf32 = Subtarget.hasSSE1(); |
| MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); |
| |
| // Set up the TargetLowering object. |
| |
| // X86 is weird. It always uses i8 for shift amounts and setcc results. |
| setBooleanContents(ZeroOrOneBooleanContent); |
| // X86-SSE is even stranger. It uses -1 or 0 for vector masks. |
| setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
| |
| // For 64-bit, since we have so many registers, use the ILP scheduler. |
| // For 32-bit, use the register pressure specific scheduling. |
| // For Atom, always use ILP scheduling. |
| if (Subtarget.isAtom()) |
| setSchedulingPreference(Sched::ILP); |
| else if (Subtarget.is64Bit()) |
| setSchedulingPreference(Sched::ILP); |
| else |
| setSchedulingPreference(Sched::RegPressure); |
| const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); |
| |
| // Bypass expensive divides and use cheaper ones. |
| if (TM.getOptLevel() >= CodeGenOpt::Default) { |
| if (Subtarget.hasSlowDivide32()) |
| addBypassSlowDiv(32, 8); |
| if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit()) |
| addBypassSlowDiv(64, 32); |
| } |
| |
| if (Subtarget.isTargetWindowsMSVC() || |
| Subtarget.isTargetWindowsItanium()) { |
| // Setup Windows compiler runtime calls. |
| setLibcallName(RTLIB::SDIV_I64, "_alldiv"); |
| setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); |
| setLibcallName(RTLIB::SREM_I64, "_allrem"); |
| setLibcallName(RTLIB::UREM_I64, "_aullrem"); |
| setLibcallName(RTLIB::MUL_I64, "_allmul"); |
| setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); |
| setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); |
| setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); |
| setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); |
| setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); |
| } |
| |
| if (Subtarget.isTargetDarwin()) { |
| // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. |
| setUseUnderscoreSetJmp(false); |
| setUseUnderscoreLongJmp(false); |
| } else if (Subtarget.isTargetWindowsGNU()) { |
| // MS runtime is weird: it exports _setjmp, but longjmp! |
| setUseUnderscoreSetJmp(true); |
| setUseUnderscoreLongJmp(false); |
| } else { |
| setUseUnderscoreSetJmp(true); |
| setUseUnderscoreLongJmp(true); |
| } |
| |
| // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to |
| // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b. |
| // FIXME: Should we be limitting the atomic size on other configs? Default is |
| // 1024. |
| if (!Subtarget.hasCmpxchg8b()) |
| setMaxAtomicSizeInBitsSupported(32); |
| |
| // Set up the register classes. |
| addRegisterClass(MVT::i8, &X86::GR8RegClass); |
| addRegisterClass(MVT::i16, &X86::GR16RegClass); |
| addRegisterClass(MVT::i32, &X86::GR32RegClass); |
| if (Subtarget.is64Bit()) |
| addRegisterClass(MVT::i64, &X86::GR64RegClass); |
| |
| for (MVT VT : MVT::integer_valuetypes()) |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
| |
| // We don't accept any truncstore of integer registers. |
| setTruncStoreAction(MVT::i64, MVT::i32, Expand); |
| setTruncStoreAction(MVT::i64, MVT::i16, Expand); |
| setTruncStoreAction(MVT::i64, MVT::i8 , Expand); |
| setTruncStoreAction(MVT::i32, MVT::i16, Expand); |
| setTruncStoreAction(MVT::i32, MVT::i8 , Expand); |
| setTruncStoreAction(MVT::i16, MVT::i8, Expand); |
| |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| |
| // SETOEQ and SETUNE require checking two conditions. |
| setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); |
| setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); |
| |
| // Integer absolute. |
| if (Subtarget.hasCMov()) { |
| setOperationAction(ISD::ABS , MVT::i16 , Custom); |
| setOperationAction(ISD::ABS , MVT::i32 , Custom); |
| } |
| setOperationAction(ISD::ABS , MVT::i64 , Custom); |
| |
| // Funnel shifts. |
| for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { |
| setOperationAction(ShiftOp , MVT::i16 , Custom); |
| setOperationAction(ShiftOp , MVT::i32 , Custom); |
| if (Subtarget.is64Bit()) |
| setOperationAction(ShiftOp , MVT::i64 , Custom); |
| } |
| |
| // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this |
| // operation. |
| setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); |
| setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); |
| setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); |
| |
| if (!Subtarget.useSoftFloat()) { |
| // We have an algorithm for SSE2->double, and we turn this into a |
| // 64-bit FILD followed by conditional FADD for other targets. |
| setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); |
| // We have an algorithm for SSE2, and we turn this into a 64-bit |
| // FILD or VCVTUSI2SS/SD for other targets. |
| setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); |
| } else { |
| setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); |
| } |
| |
| // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have |
| // this operation. |
| setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); |
| setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); |
| |
| if (!Subtarget.useSoftFloat()) { |
| // SSE has no i16 to fp conversion, only i32. |
| if (X86ScalarSSEf32) { |
| setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); |
| // f32 and f64 cases are Legal, f80 case is not |
| setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); |
| } else { |
| setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); |
| setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); |
| } |
| } else { |
| setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); |
| setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand); |
| } |
| |
| // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have |
| // this operation. |
| setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); |
| setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); |
| |
| if (!Subtarget.useSoftFloat()) { |
| // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 |
| // are Legal, f80 is custom lowered. |
| setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); |
| setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); |
| |
| setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); |
| setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); |
| } else { |
| setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); |
| setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand); |
| setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand); |
| } |
| |
| // Handle FP_TO_UINT by promoting the destination to a larger signed |
| // conversion. |
| setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); |
| setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); |
| setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); |
| |
| if (!Subtarget.useSoftFloat()) { |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
| } |
| |
| // TODO: when we have SSE, these could be more efficient, by using movd/movq. |
| if (!X86ScalarSSEf64) { |
| setOperationAction(ISD::BITCAST , MVT::f32 , Expand); |
| setOperationAction(ISD::BITCAST , MVT::i32 , Expand); |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::BITCAST , MVT::f64 , Expand); |
| // Without SSE, i64->f64 goes through memory. |
| setOperationAction(ISD::BITCAST , MVT::i64 , Expand); |
| } |
| } else if (!Subtarget.is64Bit()) |
| setOperationAction(ISD::BITCAST , MVT::i64 , Custom); |
| |
| // Scalar integer divide and remainder are lowered to use operations that |
| // produce two results, to match the available instructions. This exposes |
| // the two-result form to trivial CSE, which is able to combine x/y and x%y |
| // into a single instruction. |
| // |
| // Scalar integer multiply-high is also lowered to use two-result |
| // operations, to match the available instructions. However, plain multiply |
| // (low) operations are left as Legal, as there are single-result |
| // instructions for this in x86. Using the two-result multiply instructions |
| // when both high and low results are needed must be arranged by dagcombine. |
| for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
| setOperationAction(ISD::MULHS, VT, Expand); |
| setOperationAction(ISD::MULHU, VT, Expand); |
| setOperationAction(ISD::SDIV, VT, Expand); |
| setOperationAction(ISD::UDIV, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::UREM, VT, Expand); |
| } |
| |
| setOperationAction(ISD::BR_JT , MVT::Other, Expand); |
| setOperationAction(ISD::BRCOND , MVT::Other, Custom); |
| for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, |
| MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
| setOperationAction(ISD::BR_CC, VT, Expand); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| } |
| if (Subtarget.is64Bit()) |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); |
| |
| setOperationAction(ISD::FREM , MVT::f32 , Expand); |
| setOperationAction(ISD::FREM , MVT::f64 , Expand); |
| setOperationAction(ISD::FREM , MVT::f80 , Expand); |
| setOperationAction(ISD::FREM , MVT::f128 , Expand); |
| setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); |
| |
| // Promote the i8 variants and force them on up to i32 which has a shorter |
| // encoding. |
| setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32); |
| setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32); |
| if (!Subtarget.hasBMI()) { |
| setOperationAction(ISD::CTTZ , MVT::i16 , Custom); |
| setOperationAction(ISD::CTTZ , MVT::i32 , Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::CTTZ , MVT::i64 , Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal); |
| } |
| } |
| |
| if (Subtarget.hasLZCNT()) { |
| // When promoting the i8 variants, force them to i32 for a shorter |
| // encoding. |
| setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32); |
| setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); |
| } else { |
| setOperationAction(ISD::CTLZ , MVT::i8 , Custom); |
| setOperationAction(ISD::CTLZ , MVT::i16 , Custom); |
| setOperationAction(ISD::CTLZ , MVT::i32 , Custom); |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom); |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom); |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom); |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::CTLZ , MVT::i64 , Custom); |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); |
| } |
| } |
| |
| // Special handling for half-precision floating point conversions. |
| // If we don't have F16C support, then lower half float conversions |
| // into library calls. |
| if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) { |
| setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); |
| setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); |
| } |
| |
| // There's never any support for operations beyond MVT::f32. |
| setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); |
| setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand); |
| setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand); |
| setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); |
| setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand); |
| setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand); |
| |
| setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f80, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f16, Expand); |
| |
| if (Subtarget.hasPOPCNT()) { |
| setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); |
| } else { |
| setOperationAction(ISD::CTPOP , MVT::i8 , Expand); |
| setOperationAction(ISD::CTPOP , MVT::i16 , Expand); |
| setOperationAction(ISD::CTPOP , MVT::i32 , Expand); |
| if (Subtarget.is64Bit()) |
| setOperationAction(ISD::CTPOP , MVT::i64 , Expand); |
| else |
| setOperationAction(ISD::CTPOP , MVT::i64 , Custom); |
| } |
| |
| setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); |
| |
| if (!Subtarget.hasMOVBE()) |
| setOperationAction(ISD::BSWAP , MVT::i16 , Expand); |
| |
| // These should be promoted to a larger select which is supported. |
| setOperationAction(ISD::SELECT , MVT::i1 , Promote); |
| // X86 wants to expand cmov itself. |
| for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) { |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| } |
| for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
| if (VT == MVT::i64 && !Subtarget.is64Bit()) |
| continue; |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| } |
| |
| // Custom action for SELECT MMX and expand action for SELECT_CC MMX |
| setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); |
| |
| setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); |
| // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since |
| // LLVM/Clang supports zero-cost DWARF and SEH exception handling. |
| setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
| setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
| setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); |
| if (TM.Options.ExceptionModel == ExceptionHandling::SjLj) |
| setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); |
| |
| // Darwin ABI issue. |
| for (auto VT : { MVT::i32, MVT::i64 }) { |
| if (VT == MVT::i64 && !Subtarget.is64Bit()) |
| continue; |
| setOperationAction(ISD::ConstantPool , VT, Custom); |
| setOperationAction(ISD::JumpTable , VT, Custom); |
| setOperationAction(ISD::GlobalAddress , VT, Custom); |
| setOperationAction(ISD::GlobalTLSAddress, VT, Custom); |
| setOperationAction(ISD::ExternalSymbol , VT, Custom); |
| setOperationAction(ISD::BlockAddress , VT, Custom); |
| } |
| |
| // 64-bit shl, sra, srl (iff 32-bit x86) |
| for (auto VT : { MVT::i32, MVT::i64 }) { |
| if (VT == MVT::i64 && !Subtarget.is64Bit()) |
| continue; |
| setOperationAction(ISD::SHL_PARTS, VT, Custom); |
| setOperationAction(ISD::SRA_PARTS, VT, Custom); |
| setOperationAction(ISD::SRL_PARTS, VT, Custom); |
| } |
| |
| if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow()) |
| setOperationAction(ISD::PREFETCH , MVT::Other, Legal); |
| |
| setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); |
| |
| // Expand certain atomics |
| for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
| setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); |
| setOperationAction(ISD::ATOMIC_STORE, VT, Custom); |
| } |
| |
| if (!Subtarget.is64Bit()) |
| setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); |
| |
| if (Subtarget.hasCmpxchg16b()) { |
| setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); |
| } |
| |
| // FIXME - use subtarget debug flags |
| if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && |
| !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() && |
| TM.Options.ExceptionModel != ExceptionHandling::SjLj) { |
| setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); |
| } |
| |
| setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); |
| setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); |
| |
| setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); |
| setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); |
| |
| setOperationAction(ISD::TRAP, MVT::Other, Legal); |
| setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
| |
| // VASTART needs to be custom lowered to use the VarArgsFrameIndex |
| setOperationAction(ISD::VASTART , MVT::Other, Custom); |
| setOperationAction(ISD::VAEND , MVT::Other, Expand); |
| bool Is64Bit = Subtarget.is64Bit(); |
| setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand); |
| setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand); |
| |
| setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
| setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
| |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); |
| |
| // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering. |
| setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); |
| setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); |
| |
| if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { |
| // f32 and f64 use SSE. |
| // Set up the FP register classes. |
| addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass |
| : &X86::FR32RegClass); |
| addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass |
| : &X86::FR64RegClass); |
| |
| // Disable f32->f64 extload as we can only generate this in one instruction |
| // under optsize. So its easier to pattern match (fpext (load)) for that |
| // case instead of needing to emit 2 instructions for extload in the |
| // non-optsize case. |
| setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
| |
| for (auto VT : { MVT::f32, MVT::f64 }) { |
| // Use ANDPD to simulate FABS. |
| setOperationAction(ISD::FABS, VT, Custom); |
| |
| // Use XORP to simulate FNEG. |
| setOperationAction(ISD::FNEG, VT, Custom); |
| |
| // Use ANDPD and ORPD to simulate FCOPYSIGN. |
| setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| |
| // These might be better off as horizontal vector ops. |
| setOperationAction(ISD::FADD, VT, Custom); |
| setOperationAction(ISD::FSUB, VT, Custom); |
| |
| // We don't support sin/cos/fmod |
| setOperationAction(ISD::FSIN , VT, Expand); |
| setOperationAction(ISD::FCOS , VT, Expand); |
| setOperationAction(ISD::FSINCOS, VT, Expand); |
| } |
| |
| // Lower this to MOVMSK plus an AND. |
| setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); |
| setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); |
| |
| } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) { |
| // Use SSE for f32, x87 for f64. |
| // Set up the FP register classes. |
| addRegisterClass(MVT::f32, &X86::FR32RegClass); |
| if (UseX87) |
| addRegisterClass(MVT::f64, &X86::RFP64RegClass); |
| |
| // Use ANDPS to simulate FABS. |
| setOperationAction(ISD::FABS , MVT::f32, Custom); |
| |
| // Use XORP to simulate FNEG. |
| setOperationAction(ISD::FNEG , MVT::f32, Custom); |
| |
| if (UseX87) |
| setOperationAction(ISD::UNDEF, MVT::f64, Expand); |
| |
| // Use ANDPS and ORPS to simulate FCOPYSIGN. |
| if (UseX87) |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
| |
| // We don't support sin/cos/fmod |
| setOperationAction(ISD::FSIN , MVT::f32, Expand); |
| setOperationAction(ISD::FCOS , MVT::f32, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
| |
| if (UseX87) { |
| // Always expand sin/cos functions even though x87 has an instruction. |
| setOperationAction(ISD::FSIN, MVT::f64, Expand); |
| setOperationAction(ISD::FCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
| } |
| } else if (UseX87) { |
| // f32 and f64 in x87. |
| // Set up the FP register classes. |
| addRegisterClass(MVT::f64, &X86::RFP64RegClass); |
| addRegisterClass(MVT::f32, &X86::RFP32RegClass); |
| |
| for (auto VT : { MVT::f32, MVT::f64 }) { |
| setOperationAction(ISD::UNDEF, VT, Expand); |
| setOperationAction(ISD::FCOPYSIGN, VT, Expand); |
| |
| // Always expand sin/cos functions even though x87 has an instruction. |
| setOperationAction(ISD::FSIN , VT, Expand); |
| setOperationAction(ISD::FCOS , VT, Expand); |
| setOperationAction(ISD::FSINCOS, VT, Expand); |
| } |
| } |
| |
| // Expand FP32 immediates into loads from the stack, save special cases. |
| if (isTypeLegal(MVT::f32)) { |
| if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) { |
| addLegalFPImmediate(APFloat(+0.0f)); // FLD0 |
| addLegalFPImmediate(APFloat(+1.0f)); // FLD1 |
| addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS |
| addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS |
| } else // SSE immediates. |
| addLegalFPImmediate(APFloat(+0.0f)); // xorps |
| } |
| // Expand FP64 immediates into loads from the stack, save special cases. |
| if (isTypeLegal(MVT::f64)) { |
| if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) { |
| addLegalFPImmediate(APFloat(+0.0)); // FLD0 |
| addLegalFPImmediate(APFloat(+1.0)); // FLD1 |
| addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS |
| addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS |
| } else // SSE immediates. |
| addLegalFPImmediate(APFloat(+0.0)); // xorpd |
| } |
| |
| // We don't support FMA. |
| setOperationAction(ISD::FMA, MVT::f64, Expand); |
| setOperationAction(ISD::FMA, MVT::f32, Expand); |
| |
| // f80 always uses X87. |
| if (UseX87) { |
| addRegisterClass(MVT::f80, &X86::RFP80RegClass); |
| setOperationAction(ISD::UNDEF, MVT::f80, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); |
| { |
| APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended()); |
| addLegalFPImmediate(TmpFlt); // FLD0 |
| TmpFlt.changeSign(); |
| addLegalFPImmediate(TmpFlt); // FLD0/FCHS |
| |
| bool ignored; |
| APFloat TmpFlt2(+1.0); |
| TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, |
| &ignored); |
| addLegalFPImmediate(TmpFlt2); // FLD1 |
| TmpFlt2.changeSign(); |
| addLegalFPImmediate(TmpFlt2); // FLD1/FCHS |
| } |
| |
| // Always expand sin/cos functions even though x87 has an instruction. |
| setOperationAction(ISD::FSIN , MVT::f80, Expand); |
| setOperationAction(ISD::FCOS , MVT::f80, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f80, Expand); |
| |
| setOperationAction(ISD::FFLOOR, MVT::f80, Expand); |
| setOperationAction(ISD::FCEIL, MVT::f80, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::f80, Expand); |
| setOperationAction(ISD::FRINT, MVT::f80, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); |
| setOperationAction(ISD::FMA, MVT::f80, Expand); |
| setOperationAction(ISD::LROUND, MVT::f80, Expand); |
| setOperationAction(ISD::LLROUND, MVT::f80, Expand); |
| setOperationAction(ISD::LRINT, MVT::f80, Expand); |
| setOperationAction(ISD::LLRINT, MVT::f80, Expand); |
| } |
| |
| // f128 uses xmm registers, but most operations require libcalls. |
| if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) { |
| addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| : &X86::VR128RegClass); |
| |
| addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps |
| |
| setOperationAction(ISD::FADD, MVT::f128, Custom); |
| setOperationAction(ISD::FSUB, MVT::f128, Custom); |
| setOperationAction(ISD::FDIV, MVT::f128, Custom); |
| setOperationAction(ISD::FMUL, MVT::f128, Custom); |
| setOperationAction(ISD::FMA, MVT::f128, Expand); |
| |
| setOperationAction(ISD::FABS, MVT::f128, Custom); |
| setOperationAction(ISD::FNEG, MVT::f128, Custom); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); |
| |
| setOperationAction(ISD::FSIN, MVT::f128, Expand); |
| setOperationAction(ISD::FCOS, MVT::f128, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f128, Expand); |
| setOperationAction(ISD::FSQRT, MVT::f128, Expand); |
| |
| setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); |
| // We need to custom handle any FP_ROUND with an f128 input, but |
| // LegalizeDAG uses the result type to know when to run a custom handler. |
| // So we have to list all legal floating point result types here. |
| if (isTypeLegal(MVT::f32)) { |
| setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); |
| } |
| if (isTypeLegal(MVT::f64)) { |
| setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); |
| } |
| if (isTypeLegal(MVT::f80)) { |
| setOperationAction(ISD::FP_ROUND, MVT::f80, Custom); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom); |
| } |
| |
| setOperationAction(ISD::SETCC, MVT::f128, Custom); |
| |
| setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f80, Expand); |
| } |
| |
| // Always use a library call for pow. |
| setOperationAction(ISD::FPOW , MVT::f32 , Expand); |
| setOperationAction(ISD::FPOW , MVT::f64 , Expand); |
| setOperationAction(ISD::FPOW , MVT::f80 , Expand); |
| setOperationAction(ISD::FPOW , MVT::f128 , Expand); |
| |
| setOperationAction(ISD::FLOG, MVT::f80, Expand); |
| setOperationAction(ISD::FLOG2, MVT::f80, Expand); |
| setOperationAction(ISD::FLOG10, MVT::f80, Expand); |
| setOperationAction(ISD::FEXP, MVT::f80, Expand); |
| setOperationAction(ISD::FEXP2, MVT::f80, Expand); |
| setOperationAction(ISD::FMINNUM, MVT::f80, Expand); |
| setOperationAction(ISD::FMAXNUM, MVT::f80, Expand); |
| |
| // Some FP actions are always expanded for vector types. |
| for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, |
| MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { |
| setOperationAction(ISD::FSIN, VT, Expand); |
| setOperationAction(ISD::FSINCOS, VT, Expand); |
| setOperationAction(ISD::FCOS, VT, Expand); |
| setOperationAction(ISD::FREM, VT, Expand); |
| setOperationAction(ISD::FCOPYSIGN, VT, Expand); |
| setOperationAction(ISD::FPOW, VT, Expand); |
| setOperationAction(ISD::FLOG, VT, Expand); |
| setOperationAction(ISD::FLOG2, VT, Expand); |
| setOperationAction(ISD::FLOG10, VT, Expand); |
| setOperationAction(ISD::FEXP, VT, Expand); |
| setOperationAction(ISD::FEXP2, VT, Expand); |
| } |
| |
| // First set operation action for all vector types to either promote |
| // (for widening) or expand (for scalarization). Then we will selectively |
| // turn on ones that can be effectively codegen'd. |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| setOperationAction(ISD::SDIV, VT, Expand); |
| setOperationAction(ISD::UDIV, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::UREM, VT, Expand); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); |
| setOperationAction(ISD::FMA, VT, Expand); |
| setOperationAction(ISD::FFLOOR, VT, Expand); |
| setOperationAction(ISD::FCEIL, VT, Expand); |
| setOperationAction(ISD::FTRUNC, VT, Expand); |
| setOperationAction(ISD::FRINT, VT, Expand); |
| setOperationAction(ISD::FNEARBYINT, VT, Expand); |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::MULHS, VT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::MULHU, VT, Expand); |
| setOperationAction(ISD::SDIVREM, VT, Expand); |
| setOperationAction(ISD::UDIVREM, VT, Expand); |
| setOperationAction(ISD::CTPOP, VT, Expand); |
| setOperationAction(ISD::CTTZ, VT, Expand); |
| setOperationAction(ISD::CTLZ, VT, Expand); |
| setOperationAction(ISD::ROTL, VT, Expand); |
| setOperationAction(ISD::ROTR, VT, Expand); |
| setOperationAction(ISD::BSWAP, VT, Expand); |
| setOperationAction(ISD::SETCC, VT, Expand); |
| setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
| setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
| setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
| setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); |
| setOperationAction(ISD::TRUNCATE, VT, Expand); |
| setOperationAction(ISD::SIGN_EXTEND, VT, Expand); |
| setOperationAction(ISD::ZERO_EXTEND, VT, Expand); |
| setOperationAction(ISD::ANY_EXTEND, VT, Expand); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
| setTruncStoreAction(InnerVT, VT, Expand); |
| |
| setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand); |
| |
| // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like |
| // types, we have to deal with them whether we ask for Expansion or not. |
| // Setting Expand causes its own optimisation problems though, so leave |
| // them legal. |
| if (VT.getVectorElementType() == MVT::i1) |
| setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
| |
| // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are |
| // split/scalarized right now. |
| if (VT.getVectorElementType() == MVT::f16) |
| setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
| } |
| } |
| |
| // FIXME: In order to prevent SSE instructions being expanded to MMX ones |
| // with -msoft-float, disable use of MMX as well. |
| if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) { |
| addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); |
| // No operations on x86mmx supported, everything uses intrinsics. |
| } |
| |
| if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { |
| addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| : &X86::VR128RegClass); |
| |
| setOperationAction(ISD::FNEG, MVT::v4f32, Custom); |
| setOperationAction(ISD::FABS, MVT::v4f32, Custom); |
| setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); |
| setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); |
| setOperationAction(ISD::SELECT, MVT::v4f32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); |
| |
| setOperationAction(ISD::LOAD, MVT::v2f32, Custom); |
| setOperationAction(ISD::STORE, MVT::v2f32, Custom); |
| |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Custom); |
| } |
| |
| if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { |
| addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| : &X86::VR128RegClass); |
| |
| // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM |
| // registers cannot be used even for integer operations. |
| addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| : &X86::VR128RegClass); |
| addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| : &X86::VR128RegClass); |
| addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| : &X86::VR128RegClass); |
| addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| : &X86::VR128RegClass); |
| |
| for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8, |
| MVT::v2i16, MVT::v4i16, MVT::v2i32 }) { |
| setOperationAction(ISD::SDIV, VT, Custom); |
| setOperationAction(ISD::SREM, VT, Custom); |
| setOperationAction(ISD::UDIV, VT, Custom); |
| setOperationAction(ISD::UREM, VT, Custom); |
| } |
| |
| setOperationAction(ISD::MUL, MVT::v2i8, Custom); |
| setOperationAction(ISD::MUL, MVT::v4i8, Custom); |
| setOperationAction(ISD::MUL, MVT::v8i8, Custom); |
| |
| setOperationAction(ISD::MUL, MVT::v16i8, Custom); |
| setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
| setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
| setOperationAction(ISD::MULHU, MVT::v4i32, Custom); |
| setOperationAction(ISD::MULHS, MVT::v4i32, Custom); |
| setOperationAction(ISD::MULHU, MVT::v16i8, Custom); |
| setOperationAction(ISD::MULHS, MVT::v16i8, Custom); |
| setOperationAction(ISD::MULHU, MVT::v8i16, Legal); |
| setOperationAction(ISD::MULHS, MVT::v8i16, Legal); |
| setOperationAction(ISD::MUL, MVT::v8i16, Legal); |
| setOperationAction(ISD::FNEG, MVT::v2f64, Custom); |
| setOperationAction(ISD::FABS, MVT::v2f64, Custom); |
| setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); |
| |
| for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom); |
| setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom); |
| setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom); |
| setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom); |
| } |
| |
| setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); |
| setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal); |
| setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); |
| setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal); |
| setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); |
| setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal); |
| setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); |
| setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal); |
| setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom); |
| setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom); |
| setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom); |
| setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom); |
| |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); |
| |
| for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::CTPOP, VT, Custom); |
| setOperationAction(ISD::ABS, VT, Custom); |
| |
| // The condition codes aren't legal in SSE/AVX and under AVX512 we use |
| // setcc all the way to isel and prefer SETGT in some isel patterns. |
| setCondCodeAction(ISD::SETLT, VT, Custom); |
| setCondCodeAction(ISD::SETLE, VT, Custom); |
| } |
| |
| for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { |
| setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::VSELECT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| } |
| |
| for (auto VT : { MVT::v2f64, MVT::v2i64 }) { |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::VSELECT, VT, Custom); |
| |
| if (VT == MVT::v2i64 && !Subtarget.is64Bit()) |
| continue; |
| |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| } |
| |
| // Custom lower v2i64 and v2f64 selects. |
| setOperationAction(ISD::SELECT, MVT::v2f64, Custom); |
| setOperationAction(ISD::SELECT, MVT::v2i64, Custom); |
| setOperationAction(ISD::SELECT, MVT::v4i32, Custom); |
| setOperationAction(ISD::SELECT, MVT::v8i16, Custom); |
| setOperationAction(ISD::SELECT, MVT::v16i8, Custom); |
| |
| setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); |
| |
| // Custom legalize these to avoid over promotion or custom promotion. |
| setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); |
| |
| // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into |
| // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is |
| // split again based on the input type, this will cause an AssertSExt i16 to |
| // be emitted instead of an AssertZExt. This will allow packssdw followed by |
| // packuswb to be used to truncate to v8i8. This is necessary since packusdw |
| // isn't available until sse4.1. |
| setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); |
| |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); |
| |
| setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); |
| |
| // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion. |
| setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); |
| |
| setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); |
| setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); |
| |
| // We want to legalize this to an f64 load rather than an i64 load on |
| // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for |
| // store. |
| setOperationAction(ISD::LOAD, MVT::v2i32, Custom); |
| setOperationAction(ISD::LOAD, MVT::v4i16, Custom); |
| setOperationAction(ISD::LOAD, MVT::v8i8, Custom); |
| setOperationAction(ISD::STORE, MVT::v2i32, Custom); |
| setOperationAction(ISD::STORE, MVT::v4i16, Custom); |
| setOperationAction(ISD::STORE, MVT::v8i8, Custom); |
| |
| setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); |
| setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); |
| if (!Subtarget.hasAVX512()) |
| setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); |
| |
| setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom); |
| setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); |
| setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); |
| |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); |
| |
| setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); |
| |
| // In the customized shift lowering, the legal v4i32/v2i64 cases |
| // in AVX2 will be recognized. |
| for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| setOperationAction(ISD::SRL, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| } |
| |
| setOperationAction(ISD::ROTL, MVT::v4i32, Custom); |
| setOperationAction(ISD::ROTL, MVT::v8i16, Custom); |
| |
| // With AVX512, expanding (and promoting the shifts) is better. |
| if (!Subtarget.hasAVX512()) |
| setOperationAction(ISD::ROTL, MVT::v16i8, Custom); |
| } |
| |
| if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { |
| setOperationAction(ISD::ABS, MVT::v16i8, Legal); |
| setOperationAction(ISD::ABS, MVT::v8i16, Legal); |
| setOperationAction(ISD::ABS, MVT::v4i32, Legal); |
| setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); |
| setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); |
| setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); |
| setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); |
| setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); |
| |
| // These might be better off as horizontal vector ops. |
| setOperationAction(ISD::ADD, MVT::i16, Custom); |
| setOperationAction(ISD::ADD, MVT::i32, Custom); |
| setOperationAction(ISD::SUB, MVT::i16, Custom); |
| setOperationAction(ISD::SUB, MVT::i32, Custom); |
| } |
| |
| if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { |
| for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { |
| setOperationAction(ISD::FFLOOR, RoundedTy, Legal); |
| setOperationAction(ISD::FCEIL, RoundedTy, Legal); |
| setOperationAction(ISD::FTRUNC, RoundedTy, Legal); |
| setOperationAction(ISD::FRINT, RoundedTy, Legal); |
| setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); |
| } |
| |
| setOperationAction(ISD::SMAX, MVT::v16i8, Legal); |
| setOperationAction(ISD::SMAX, MVT::v4i32, Legal); |
| setOperationAction(ISD::UMAX, MVT::v8i16, Legal); |
| setOperationAction(ISD::UMAX, MVT::v4i32, Legal); |
| setOperationAction(ISD::SMIN, MVT::v16i8, Legal); |
| setOperationAction(ISD::SMIN, MVT::v4i32, Legal); |
| setOperationAction(ISD::UMIN, MVT::v8i16, Legal); |
| setOperationAction(ISD::UMIN, MVT::v4i32, Legal); |
| |
| // FIXME: Do we need to handle scalar-to-vector here? |
| setOperationAction(ISD::MUL, MVT::v4i32, Legal); |
| |
| // We directly match byte blends in the backend as they match the VSELECT |
| // condition form. |
| setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); |
| |
| // SSE41 brings specific instructions for doing vector sign extend even in |
| // cases where we don't have SRA. |
| for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); |
| setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); |
| } |
| |
| // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X |
| for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { |
| setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); |
| setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); |
| setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); |
| setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); |
| setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); |
| setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal); |
| } |
| |
| // i8 vectors are custom because the source register and source |
| // source memory operand types are not the same width. |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); |
| } |
| |
| if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) { |
| for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
| MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) |
| setOperationAction(ISD::ROTL, VT, Custom); |
| |
| // XOP can efficiently perform BITREVERSE with VPPERM. |
| for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) |
| setOperationAction(ISD::BITREVERSE, VT, Custom); |
| |
| for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
| MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) |
| setOperationAction(ISD::BITREVERSE, VT, Custom); |
| } |
| |
| if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) { |
| bool HasInt256 = Subtarget.hasInt256(); |
| |
| addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| : &X86::VR256RegClass); |
| addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| : &X86::VR256RegClass); |
| addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| : &X86::VR256RegClass); |
| addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| : &X86::VR256RegClass); |
| addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| : &X86::VR256RegClass); |
| addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| : &X86::VR256RegClass); |
| |
| for (auto VT : { MVT::v8f32, MVT::v4f64 }) { |
| setOperationAction(ISD::FFLOOR, VT, Legal); |
| setOperationAction(ISD::FCEIL, VT, Legal); |
| setOperationAction(ISD::FTRUNC, VT, Legal); |
| setOperationAction(ISD::FRINT, VT, Legal); |
| setOperationAction(ISD::FNEARBYINT, VT, Legal); |
| setOperationAction(ISD::FNEG, VT, Custom); |
| setOperationAction(ISD::FABS, VT, Custom); |
| setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| } |
| |
| // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted |
| // even though v8i16 is a legal type. |
| setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32); |
| setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); |
| |
| setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); |
| |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Custom); |
| |
| if (!Subtarget.hasAVX512()) |
| setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); |
| |
| // In the customized shift lowering, the legal v8i32/v4i64 cases |
| // in AVX2 will be recognized. |
| for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
| setOperationAction(ISD::SRL, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| } |
| |
| // These types need custom splitting if their input is a 128-bit vector. |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
| |
| setOperationAction(ISD::ROTL, MVT::v8i32, Custom); |
| setOperationAction(ISD::ROTL, MVT::v16i16, Custom); |
| |
| // With BWI, expanding (and promoting the shifts) is the better. |
| if (!Subtarget.hasBWI()) |
| setOperationAction(ISD::ROTL, MVT::v32i8, Custom); |
| |
| setOperationAction(ISD::SELECT, MVT::v4f64, Custom); |
| setOperationAction(ISD::SELECT, MVT::v4i64, Custom); |
| setOperationAction(ISD::SELECT, MVT::v8i32, Custom); |
| setOperationAction(ISD::SELECT, MVT::v16i16, Custom); |
| setOperationAction(ISD::SELECT, MVT::v32i8, Custom); |
| setOperationAction(ISD::SELECT, MVT::v8f32, Custom); |
| |
| for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
| setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
| setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
| } |
| |
| setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); |
| setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); |
| |
| for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::CTPOP, VT, Custom); |
| setOperationAction(ISD::CTLZ, VT, Custom); |
| |
| // The condition codes aren't legal in SSE/AVX and under AVX512 we use |
| // setcc all the way to isel and prefer SETGT in some isel patterns. |
| setCondCodeAction(ISD::SETLT, VT, Custom); |
| setCondCodeAction(ISD::SETLE, VT, Custom); |
| } |
| |
| if (Subtarget.hasAnyFMA()) { |
| for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, |
| MVT::v2f64, MVT::v4f64 }) |
| setOperationAction(ISD::FMA, VT, Legal); |
| } |
| |
| for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
| setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom); |
| } |
| |
| setOperationAction(ISD::MUL, MVT::v4i64, Custom); |
| setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::MUL, MVT::v32i8, Custom); |
| |
| setOperationAction(ISD::MULHU, MVT::v8i32, Custom); |
| setOperationAction(ISD::MULHS, MVT::v8i32, Custom); |
| setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::MULHU, MVT::v32i8, Custom); |
| setOperationAction(ISD::MULHS, MVT::v32i8, Custom); |
| |
| setOperationAction(ISD::ABS, MVT::v4i64, Custom); |
| setOperationAction(ISD::SMAX, MVT::v4i64, Custom); |
| setOperationAction(ISD::UMAX, MVT::v4i64, Custom); |
| setOperationAction(ISD::SMIN, MVT::v4i64, Custom); |
| setOperationAction(ISD::UMIN, MVT::v4i64, Custom); |
| |
| setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
| |
| for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { |
| setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); |
| setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom); |
| } |
| |
| for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) { |
| setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
| setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
| } |
| |
| if (HasInt256) { |
| // The custom lowering for UINT_TO_FP for v8i32 becomes interesting |
| // when we have a 256bit-wide blend with immediate. |
| setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); |
| |
| // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X |
| for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { |
| setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal); |
| setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal); |
| setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal); |
| setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal); |
| setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal); |
| setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal); |
| } |
| } |
| |
| for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
| MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { |
| setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); |
| setOperationAction(ISD::MSTORE, VT, Legal); |
| } |
| |
| // Extract subvector is special because the value type |
| // (result) is 128-bit but the source is 256-bit wide. |
| for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
| MVT::v4f32, MVT::v2f64 }) { |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
| } |
| |
| // Custom lower several nodes for 256-bit types. |
| for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, |
| MVT::v8f32, MVT::v4f64 }) { |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::VSELECT, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| } |
| |
| if (HasInt256) { |
| setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); |
| |
| // Custom legalize 2x32 to get a little better code. |
| setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); |
| setOperationAction(ISD::MGATHER, MVT::v2i32, Custom); |
| |
| for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
| MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| } |
| } |
| |
| // This block controls legalization of the mask vector sizes that are |
| // available with AVX512. 512-bit vectors are in a separate block controlled |
| // by useAVX512Regs. |
| if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { |
| addRegisterClass(MVT::v1i1, &X86::VK1RegClass); |
| addRegisterClass(MVT::v2i1, &X86::VK2RegClass); |
| addRegisterClass(MVT::v4i1, &X86::VK4RegClass); |
| addRegisterClass(MVT::v8i1, &X86::VK8RegClass); |
| addRegisterClass(MVT::v16i1, &X86::VK16RegClass); |
| |
| setOperationAction(ISD::SELECT, MVT::v1i1, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); |
| |
| setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); |
| setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); |
| setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); |
| setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); |
| |
| // There is no byte sized k-register load or store without AVX512DQ. |
| if (!Subtarget.hasDQI()) { |
| setOperationAction(ISD::LOAD, MVT::v1i1, Custom); |
| setOperationAction(ISD::LOAD, MVT::v2i1, Custom); |
| setOperationAction(ISD::LOAD, MVT::v4i1, Custom); |
| setOperationAction(ISD::LOAD, MVT::v8i1, Custom); |
| |
| setOperationAction(ISD::STORE, MVT::v1i1, Custom); |
| setOperationAction(ISD::STORE, MVT::v2i1, Custom); |
| setOperationAction(ISD::STORE, MVT::v4i1, Custom); |
| setOperationAction(ISD::STORE, MVT::v8i1, Custom); |
| } |
| |
| // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors. |
| for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
| setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
| } |
| |
| for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { |
| setOperationAction(ISD::ADD, VT, Custom); |
| setOperationAction(ISD::SUB, VT, Custom); |
| setOperationAction(ISD::MUL, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| setOperationAction(ISD::UADDSAT, VT, Custom); |
| setOperationAction(ISD::SADDSAT, VT, Custom); |
| setOperationAction(ISD::USUBSAT, VT, Custom); |
| setOperationAction(ISD::SSUBSAT, VT, Custom); |
| |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::VSELECT, VT, Expand); |
| } |
| |
| for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| } |
| |
| // This block controls legalization for 512-bit operations with 32/64 bit |
| // elements. 512-bits can be disabled based on prefer-vector-width and |
| // required-vector-width function attributes. |
| if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) { |
| addRegisterClass(MVT::v16i32, &X86::VR512RegClass); |
| addRegisterClass(MVT::v16f32, &X86::VR512RegClass); |
| addRegisterClass(MVT::v8i64, &X86::VR512RegClass); |
| addRegisterClass(MVT::v8f64, &X86::VR512RegClass); |
| |
| for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { |
| setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); |
| setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); |
| setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); |
| setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); |
| setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); |
| } |
| |
| for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { |
| setOperationAction(ISD::FNEG, VT, Custom); |
| setOperationAction(ISD::FABS, VT, Custom); |
| setOperationAction(ISD::FMA, VT, Legal); |
| setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| } |
| |
| setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); |
| setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32); |
| setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32); |
| setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); |
| setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32); |
| setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32); |
| setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); |
| |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Custom); |
| |
| setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); |
| setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); |
| setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); |
| setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); |
| setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); |
| |
| // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE |
| // to 512-bit rather than use the AVX2 instructions so that we can use |
| // k-masks. |
| if (!Subtarget.hasVLX()) { |
| for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
| MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| } |
| } |
| |
| setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); |
| setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); |
| setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); |
| |
| // Need to custom widen this if we don't have AVX512BW. |
| setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); |
| |
| for (auto VT : { MVT::v16f32, MVT::v8f64 }) { |
| setOperationAction(ISD::FFLOOR, VT, Legal); |
| setOperationAction(ISD::FCEIL, VT, Legal); |
| setOperationAction(ISD::FTRUNC, VT, Legal); |
| setOperationAction(ISD::FRINT, VT, Legal); |
| setOperationAction(ISD::FNEARBYINT, VT, Legal); |
| |
| setOperationAction(ISD::SELECT, VT, Custom); |
| } |
| |
| // Without BWI we need to use custom lowering to handle MVT::v64i8 input. |
| for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) { |
| setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
| setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
| } |
| |
| setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); |
| |
| setOperationAction(ISD::MUL, MVT::v8i64, Custom); |
| setOperationAction(ISD::MUL, MVT::v16i32, Legal); |
| |
| setOperationAction(ISD::MULHU, MVT::v16i32, Custom); |
| setOperationAction(ISD::MULHS, MVT::v16i32, Custom); |
| |
| for (auto VT : { MVT::v16i32, MVT::v8i64 }) { |
| setOperationAction(ISD::SMAX, VT, Legal); |
| setOperationAction(ISD::UMAX, VT, Legal); |
| setOperationAction(ISD::SMIN, VT, Legal); |
| setOperationAction(ISD::UMIN, VT, Legal); |
| setOperationAction(ISD::ABS, VT, Legal); |
| setOperationAction(ISD::SRL, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::CTPOP, VT, Custom); |
| setOperationAction(ISD::ROTL, VT, Custom); |
| setOperationAction(ISD::ROTR, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::SELECT, VT, Custom); |
| |
| // The condition codes aren't legal in SSE/AVX and under AVX512 we use |
| // setcc all the way to isel and prefer SETGT in some isel patterns. |
| setCondCodeAction(ISD::SETLT, VT, Custom); |
| setCondCodeAction(ISD::SETLE, VT, Custom); |
| } |
| |
| if (Subtarget.hasDQI()) { |
| setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); |
| |
| setOperationAction(ISD::MUL, MVT::v8i64, Legal); |
| } |
| |
| if (Subtarget.hasCDI()) { |
| // NonVLX sub-targets extend 128/256 vectors to use the 512 version. |
| for (auto VT : { MVT::v16i32, MVT::v8i64} ) { |
| setOperationAction(ISD::CTLZ, VT, Legal); |
| } |
| } // Subtarget.hasCDI() |
| |
| if (Subtarget.hasVPOPCNTDQ()) { |
| for (auto VT : { MVT::v16i32, MVT::v8i64 }) |
| setOperationAction(ISD::CTPOP, VT, Legal); |
| } |
| |
| // Extract subvector is special because the value type |
| // (result) is 256-bit but the source is 512-bit wide. |
| // 128-bit was made Legal under AVX1. |
| for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, |
| MVT::v8f32, MVT::v4f64 }) |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
| |
| for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VSELECT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); |
| setOperationAction(ISD::MLOAD, VT, Legal); |
| setOperationAction(ISD::MSTORE, VT, Legal); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| } |
| if (!Subtarget.hasBWI()) { |
| // Need to custom split v32i16/v64i8 bitcasts. |
| setOperationAction(ISD::BITCAST, MVT::v32i16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::v64i8, Custom); |
| |
| // Better to split these into two 256-bit ops. |
| setOperationAction(ISD::BITREVERSE, MVT::v8i64, Custom); |
| setOperationAction(ISD::BITREVERSE, MVT::v16i32, Custom); |
| } |
| |
| if (Subtarget.hasVBMI2()) { |
| for (auto VT : { MVT::v16i32, MVT::v8i64 }) { |
| setOperationAction(ISD::FSHL, VT, Custom); |
| setOperationAction(ISD::FSHR, VT, Custom); |
| } |
| } |
| }// has AVX-512 |
| |
| // This block controls legalization for operations that don't have |
| // pre-AVX512 equivalents. Without VLX we use 512-bit operations for |
| // narrower widths. |
| if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { |
| // These operations are handled on non-VLX by artificially widening in |
| // isel patterns. |
| // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? |
| |
| setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); |
| |
| for (auto VT : { MVT::v2i64, MVT::v4i64 }) { |
| setOperationAction(ISD::SMAX, VT, Legal); |
| setOperationAction(ISD::UMAX, VT, Legal); |
| setOperationAction(ISD::SMIN, VT, Legal); |
| setOperationAction(ISD::UMIN, VT, Legal); |
| setOperationAction(ISD::ABS, VT, Legal); |
| } |
| |
| for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { |
| setOperationAction(ISD::ROTL, VT, Custom); |
| setOperationAction(ISD::ROTR, VT, Custom); |
| } |
| |
| // Custom legalize 2x32 to get a little better code. |
| setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom); |
| setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom); |
| |
| for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
| MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| |
| if (Subtarget.hasDQI()) { |
| for (auto VT : { MVT::v2i64, MVT::v4i64 }) { |
| setOperationAction(ISD::SINT_TO_FP, VT, Legal); |
| setOperationAction(ISD::UINT_TO_FP, VT, Legal); |
| setOperationAction(ISD::FP_TO_SINT, VT, Legal); |
| setOperationAction(ISD::FP_TO_UINT, VT, Legal); |
| |
| setOperationAction(ISD::MUL, VT, Legal); |
| } |
| } |
| |
| if (Subtarget.hasCDI()) { |
| for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { |
| setOperationAction(ISD::CTLZ, VT, Legal); |
| } |
| } // Subtarget.hasCDI() |
| |
| if (Subtarget.hasVPOPCNTDQ()) { |
| for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) |
| setOperationAction(ISD::CTPOP, VT, Legal); |
| } |
| } |
| |
| // This block control legalization of v32i1/v64i1 which are available with |
| // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with |
| // useBWIRegs. |
| if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { |
| addRegisterClass(MVT::v32i1, &X86::VK32RegClass); |
| addRegisterClass(MVT::v64i1, &X86::VK64RegClass); |
| |
| for (auto VT : { MVT::v32i1, MVT::v64i1 }) { |
| setOperationAction(ISD::ADD, VT, Custom); |
| setOperationAction(ISD::SUB, VT, Custom); |
| setOperationAction(ISD::MUL, VT, Custom); |
| setOperationAction(ISD::VSELECT, VT, Expand); |
| setOperationAction(ISD::UADDSAT, VT, Custom); |
| setOperationAction(ISD::SADDSAT, VT, Custom); |
| setOperationAction(ISD::USUBSAT, VT, Custom); |
| setOperationAction(ISD::SSUBSAT, VT, Custom); |
| |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| } |
| |
| setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); |
| for (auto VT : { MVT::v16i1, MVT::v32i1 }) |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| // Extends from v32i1 masks to 256-bit vectors. |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); |
| setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); |
| } |
| |
| // This block controls legalization for v32i16 and v64i8. 512-bits can be |
| // disabled based on prefer-vector-width and required-vector-width function |
| // attributes. |
| if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) { |
| addRegisterClass(MVT::v32i16, &X86::VR512RegClass); |
| addRegisterClass(MVT::v64i8, &X86::VR512RegClass); |
| |
| // Extends from v64i1 masks to 512-bit vectors. |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); |
| setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); |
| |
| setOperationAction(ISD::MUL, MVT::v32i16, Legal); |
| setOperationAction(ISD::MUL, MVT::v64i8, Custom); |
| setOperationAction(ISD::MULHS, MVT::v32i16, Legal); |
| setOperationAction(ISD::MULHU, MVT::v32i16, Legal); |
| setOperationAction(ISD::MULHS, MVT::v64i8, Custom); |
| setOperationAction(ISD::MULHU, MVT::v64i8, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal); |
| setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); |
| setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); |
| setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); |
| |
| setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); |
| setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); |
| |
| setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); |
| |
| for (auto VT : { MVT::v64i8, MVT::v32i16 }) { |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VSELECT, VT, Custom); |
| setOperationAction(ISD::ABS, VT, Legal); |
| setOperationAction(ISD::SRL, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::MLOAD, VT, Legal); |
| setOperationAction(ISD::MSTORE, VT, Legal); |
| setOperationAction(ISD::CTPOP, VT, Custom); |
| setOperationAction(ISD::CTLZ, VT, Custom); |
| setOperationAction(ISD::SMAX, VT, Legal); |
| setOperationAction(ISD::UMAX, VT, Legal); |
| setOperationAction(ISD::SMIN, VT, Legal); |
| setOperationAction(ISD::UMIN, VT, Legal); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::UADDSAT, VT, Legal); |
| setOperationAction(ISD::SADDSAT, VT, Legal); |
| setOperationAction(ISD::USUBSAT, VT, Legal); |
| setOperationAction(ISD::SSUBSAT, VT, Legal); |
| setOperationAction(ISD::SELECT, VT, Custom); |
| |
| // The condition codes aren't legal in SSE/AVX and under AVX512 we use |
| // setcc all the way to isel and prefer SETGT in some isel patterns. |
| setCondCodeAction(ISD::SETLT, VT, Custom); |
| setCondCodeAction(ISD::SETLE, VT, Custom); |
| } |
| |
| for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { |
| setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); |
| } |
| |
| if (Subtarget.hasBITALG()) { |
| for (auto VT : { MVT::v64i8, MVT::v32i16 }) |
| setOperationAction(ISD::CTPOP, VT, Legal); |
| } |
| |
| if (Subtarget.hasVBMI2()) { |
| setOperationAction(ISD::FSHL, MVT::v32i16, Custom); |
| setOperationAction(ISD::FSHR, MVT::v32i16, Custom); |
| } |
| } |
| |
| if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { |
| for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { |
| setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); |
| setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); |
| } |
| |
| // These operations are handled on non-VLX by artificially widening in |
| // isel patterns. |
| // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? |
| |
| if (Subtarget.hasBITALG()) { |
| for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 }) |
| setOperationAction(ISD::CTPOP, VT, Legal); |
| } |
| } |
| |
| if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { |
| setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); |
| setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); |
| setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); |
| setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); |
| setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); |
| |
| setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); |
| setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); |
| setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); |
| setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); |
| setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); |
| |
| if (Subtarget.hasDQI()) { |
| // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion. |
| // v2f32 UINT_TO_FP is already custom under SSE2. |
| setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); |
| assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && |
| "Unexpected operation action!"); |
| // v2i64 FP_TO_S/UINT(v2f32) custom conversion. |
| setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom); |
| } |
| |
| if (Subtarget.hasBWI()) { |
| setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); |
| setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); |
| } |
| |
| if (Subtarget.hasVBMI2()) { |
| // TODO: Make these legal even without VLX? |
| for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64, |
| MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
| setOperationAction(ISD::FSHL, VT, Custom); |
| setOperationAction(ISD::FSHR, VT, Custom); |
| } |
| } |
| |
| setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); |
| } |
| |
| // We want to custom lower some of our intrinsics. |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
| if (!Subtarget.is64Bit()) { |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); |
| } |
| |
| // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't |
| // handle type legalization for these operations here. |
| // |
| // FIXME: We really should do custom legalization for addition and |
| // subtraction on x86-32 once PR3203 is fixed. We really can't do much better |
| // than generic legalization for 64-bit multiplication-with-overflow, though. |
| for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
| if (VT == MVT::i64 && !Subtarget.is64Bit()) |
| continue; |
| // Add/Sub/Mul with overflow operations are custom lowered. |
| setOperationAction(ISD::SADDO, VT, Custom); |
| setOperationAction(ISD::UADDO, VT, Custom); |
| setOperationAction(ISD::SSUBO, VT, Custom); |
| setOperationAction(ISD::USUBO, VT, Custom); |
| setOperationAction(ISD::SMULO, VT, Custom); |
| setOperationAction(ISD::UMULO, VT, Custom); |
| |
| // Support carry in as value rather than glue. |
| setOperationAction(ISD::ADDCARRY, VT, Custom); |
| setOperationAction(ISD::SUBCARRY, VT, Custom); |
| setOperationAction(ISD::SETCCCARRY, VT, Custom); |
| } |
| |
| if (!Subtarget.is64Bit()) { |
| // These libcalls are not available in 32-bit. |
| setLibcallName(RTLIB::SHL_I128, nullptr); |
| setLibcallName(RTLIB::SRL_I128, nullptr); |
| setLibcallName(RTLIB::SRA_I128, nullptr); |
| setLibcallName(RTLIB::MUL_I128, nullptr); |
| } |
| |
| // Combine sin / cos into _sincos_stret if it is available. |
| if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
| getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
| setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
| } |
| |
| if (Subtarget.isTargetWin64()) { |
| setOperationAction(ISD::SDIV, MVT::i128, Custom); |
| setOperationAction(ISD::UDIV, MVT::i128, Custom); |
| setOperationAction(ISD::SREM, MVT::i128, Custom); |
| setOperationAction(ISD::UREM, MVT::i128, Custom); |
| setOperationAction(ISD::SDIVREM, MVT::i128, Custom); |
| setOperationAction(ISD::UDIVREM, MVT::i128, Custom); |
| } |
| |
| // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)` |
| // is. We should promote the value to 64-bits to solve this. |
| // This is what the CRT headers do - `fmodf` is an inline header |
| // function casting to f64 and calling `fmod`. |
| if (Subtarget.is32Bit() && |
| (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium())) |
| for (ISD::NodeType Op : |
| {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG, |
| ISD::FLOG10, ISD::FPOW, ISD::FSIN}) |
| if (isOperationExpand(Op, MVT::f32)) |
| setOperationAction(Op, MVT::f32, Promote); |
| |
| // We have target-specific dag combine patterns for the following nodes: |
| setTargetDAGCombine(ISD::VECTOR_SHUFFLE); |
| setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); |
| setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); |
| setTargetDAGCombine(ISD::CONCAT_VECTORS); |
| setTargetDAGCombine(ISD::INSERT_SUBVECTOR); |
| setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); |
| setTargetDAGCombine(ISD::BITCAST); |
| setTargetDAGCombine(ISD::VSELECT); |
| setTargetDAGCombine(ISD::SELECT); |
| setTargetDAGCombine(ISD::SHL); |
| setTargetDAGCombine(ISD::SRA); |
| setTargetDAGCombine(ISD::SRL); |
| setTargetDAGCombine(ISD::OR); |
| setTargetDAGCombine(ISD::AND); |
| setTargetDAGCombine(ISD::ADD); |
| setTargetDAGCombine(ISD::FADD); |
| setTargetDAGCombine(ISD::FSUB); |
| setTargetDAGCombine(ISD::FNEG); |
| setTargetDAGCombine(ISD::FMA); |
| setTargetDAGCombine(ISD::FMINNUM); |
| setTargetDAGCombine(ISD::FMAXNUM); |
| setTargetDAGCombine(ISD::SUB); |
| setTargetDAGCombine(ISD::LOAD); |
| setTargetDAGCombine(ISD::MLOAD); |
| setTargetDAGCombine(ISD::STORE); |
| setTargetDAGCombine(ISD::MSTORE); |
| setTargetDAGCombine(ISD::TRUNCATE); |
| setTargetDAGCombine(ISD::ZERO_EXTEND); |
| setTargetDAGCombine(ISD::ANY_EXTEND); |
| setTargetDAGCombine(ISD::SIGN_EXTEND); |
| setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
| setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG); |
| setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG); |
| setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG); |
| setTargetDAGCombine(ISD::SINT_TO_FP); |
| setTargetDAGCombine(ISD::UINT_TO_FP); |
| setTargetDAGCombine(ISD::SETCC); |
| setTargetDAGCombine(ISD::MUL); |
| setTargetDAGCombine(ISD::XOR); |
| setTargetDAGCombine(ISD::MSCATTER); |
| setTargetDAGCombine(ISD::MGATHER); |
| |
| computeRegisterProperties(Subtarget.getRegisterInfo()); |
| |
| MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores |
| MaxStoresPerMemsetOptSize = 8; |
| MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores |
| MaxStoresPerMemcpyOptSize = 4; |
| MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores |
| MaxStoresPerMemmoveOptSize = 4; |
| |
| // TODO: These control memcmp expansion in CGP and could be raised higher, but |
| // that needs to benchmarked and balanced with the potential use of vector |
| // load/store types (PR33329, PR33914). |
| MaxLoadsPerMemcmp = 2; |
| MaxLoadsPerMemcmpOptSize = 2; |
| |
| // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). |
| setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment)); |
| |
| // An out-of-order CPU can speculatively execute past a predictable branch, |
| // but a conditional move could be stalled by an expensive earlier operation. |
| PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder(); |
| EnableExtLdPromotion = true; |
| setPrefFunctionAlignment(Align(16)); |
| |
| verifyIntrinsicTables(); |
| } |
| |
| // This has so far only been implemented for 64-bit MachO. |
| bool X86TargetLowering::useLoadStackGuardNode() const { |
| return Subtarget.isTargetMachO() && Subtarget.is64Bit(); |
| } |
| |
| bool X86TargetLowering::useStackGuardXorFP() const { |
| // Currently only MSVC CRTs XOR the frame pointer into the stack guard value. |
| return Subtarget.getTargetTriple().isOSMSVCRT(); |
| } |
| |
| SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
| const SDLoc &DL) const { |
| EVT PtrTy = getPointerTy(DAG.getDataLayout()); |
| unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP; |
| MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val); |
| return SDValue(Node, 0); |
| } |
| |
| TargetLoweringBase::LegalizeTypeAction |
| X86TargetLowering::getPreferredVectorAction(MVT VT) const { |
| if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) |
| return TypeSplitVector; |
| |
| if (VT.getVectorNumElements() != 1 && |
| VT.getVectorElementType() != MVT::i1) |
| return TypeWidenVector; |
| |
| return TargetLoweringBase::getPreferredVectorAction(VT); |
| } |
| |
| MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
| CallingConv::ID CC, |
| EVT VT) const { |
| // v32i1 vectors should be promoted to v32i8 to match avx2. |
| if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) |
| return MVT::v32i8; |
| // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. |
| if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
| Subtarget.hasAVX512() && |
| (!isPowerOf2_32(VT.getVectorNumElements()) || |
| (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || |
| (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) |
| return MVT::i8; |
| // FIXME: Should we just make these types legal and custom split operations? |
| if ((VT == MVT::v32i16 || VT == MVT::v64i8) && |
| Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI) |
| return MVT::v16i32; |
| return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
| } |
| |
| unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
| CallingConv::ID CC, |
| EVT VT) const { |
| // v32i1 vectors should be promoted to v32i8 to match avx2. |
| if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) |
| return 1; |
| // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. |
| if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
| Subtarget.hasAVX512() && |
| (!isPowerOf2_32(VT.getVectorNumElements()) || |
| (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || |
| (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) |
| return VT.getVectorNumElements(); |
| // FIXME: Should we just make these types legal and custom split operations? |
| if ((VT == MVT::v32i16 || VT == MVT::v64i8) && |
| Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI) |
| return 1; |
| return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
| } |
| |
| unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( |
| LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
| unsigned &NumIntermediates, MVT &RegisterVT) const { |
| // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. |
| if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
| Subtarget.hasAVX512() && |
| (!isPowerOf2_32(VT.getVectorNumElements()) || |
| (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || |
| (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) { |
| RegisterVT = MVT::i8; |
| IntermediateVT = MVT::i1; |
| NumIntermediates = VT.getVectorNumElements(); |
| return NumIntermediates; |
| } |
| |
| return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, |
| NumIntermediates, RegisterVT); |
| } |
| |
| EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, |
| LLVMContext& Context, |
| EVT VT) const { |
| if (!VT.isVector()) |
| return MVT::i8; |
| |
| if (Subtarget.hasAVX512()) { |
| const unsigned NumElts = VT.getVectorNumElements(); |
| |
| // Figure out what this type will be legalized to. |
| EVT LegalVT = VT; |
| while (getTypeAction(Context, LegalVT) != TypeLegal) |
| LegalVT = getTypeToTransformTo(Context, LegalVT); |
| |
| // If we got a 512-bit vector then we'll definitely have a vXi1 compare. |
| if (LegalVT.getSimpleVT().is512BitVector()) |
| return EVT::getVectorVT(Context, MVT::i1, NumElts); |
| |
| if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { |
| // If we legalized to less than a 512-bit vector, then we will use a vXi1 |
| // compare for vXi32/vXi64 for sure. If we have BWI we will also support |
| // vXi16/vXi8. |
| MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); |
| if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) |
| return EVT::getVectorVT(Context, MVT::i1, NumElts); |
| } |
| } |
| |
| return VT.changeVectorElementTypeToInteger(); |
| } |
| |
| /// Helper for getByValTypeAlignment to determine |
| /// the desired ByVal argument alignment. |
| static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { |
| if (MaxAlign == 16) |
| return; |
| if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { |
| if (VTy->getBitWidth() == 128) |
| MaxAlign = 16; |
| } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { |
| unsigned EltAlign = 0; |
| getMaxByValAlign(ATy->getElementType(), EltAlign); |
| if (EltAlign > MaxAlign) |
| MaxAlign = EltAlign; |
| } else if (StructType *STy = dyn_cast<StructType>(Ty)) { |
| for (auto *EltTy : STy->elements()) { |
| unsigned EltAlign = 0; |
| getMaxByValAlign(EltTy, EltAlign); |
| if (EltAlign > MaxAlign) |
| MaxAlign = EltAlign; |
| if (MaxAlign == 16) |
| break; |
| } |
| } |
| } |
| |
| /// Return the desired alignment for ByVal aggregate |
| /// function arguments in the caller parameter area. For X86, aggregates |
| /// that contain SSE vectors are placed at 16-byte boundaries while the rest |
| /// are at 4-byte boundaries. |
| unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, |
| const DataLayout &DL) const { |
| if (Subtarget.is64Bit()) { |
| // Max of 8 and alignment of type. |
| unsigned TyAlign = DL.getABITypeAlignment(Ty); |
| if (TyAlign > 8) |
| return TyAlign; |
| return 8; |
| } |
| |
| unsigned Align = 4; |
| if (Subtarget.hasSSE1()) |
| getMaxByValAlign(Ty, Align); |
| return Align; |
| } |
| |
| /// Returns the target specific optimal type for load |
| /// and store operations as a result of memset, memcpy, and memmove |
| /// lowering. If DstAlign is zero that means it's safe to destination |
| /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it |
| /// means there isn't a need to check it against alignment requirement, |
| /// probably because the source does not need to be loaded. If 'IsMemset' is |
| /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that |
| /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy |
| /// source is constant so it does not need to be loaded. |
| /// It returns EVT::Other if the type should be determined using generic |
| /// target-independent logic. |
| /// For vector ops we check that the overall size isn't larger than our |
| /// preferred vector width. |
| EVT X86TargetLowering::getOptimalMemOpType( |
| uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, |
| bool ZeroMemset, bool MemcpyStrSrc, |
| const AttributeList &FuncAttributes) const { |
| if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { |
| if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() || |
| ((DstAlign == 0 || DstAlign >= 16) && |
| (SrcAlign == 0 || SrcAlign >= 16)))) { |
| // FIXME: Check if unaligned 64-byte accesses are slow. |
| if (Size >= 64 && Subtarget.hasAVX512() && |
| (Subtarget.getPreferVectorWidth() >= 512)) { |
| return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; |
| } |
| // FIXME: Check if unaligned 32-byte accesses are slow. |
| if (Size >= 32 && Subtarget.hasAVX() && |
| (Subtarget.getPreferVectorWidth() >= 256)) { |
| // Although this isn't a well-supported type for AVX1, we'll let |
| // legalization and shuffle lowering produce the optimal codegen. If we |
| // choose an optimal type with a vector element larger than a byte, |
| // getMemsetStores() may create an intermediate splat (using an integer |
| // multiply) before we splat as a vector. |
| return MVT::v32i8; |
| } |
| if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128)) |
| return MVT::v16i8; |
| // TODO: Can SSE1 handle a byte vector? |
| // If we have SSE1 registers we should be able to use them. |
| if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && |
| (Subtarget.getPreferVectorWidth() >= 128)) |
| return MVT::v4f32; |
| } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 && |
| !Subtarget.is64Bit() && Subtarget.hasSSE2()) { |
| // Do not use f64 to lower memcpy if source is string constant. It's |
| // better to use i32 to avoid the loads. |
| // Also, do not use f64 to lower memset unless this is a memset of zeros. |
| // The gymnastics of splatting a byte value into an XMM register and then |
| // only using 8-byte stores (because this is a CPU with slow unaligned |
| // 16-byte accesses) makes that a loser. |
| return MVT::f64; |
| } |
| } |
| // This is a compromise. If we reach here, unaligned accesses may be slow on |
| // this target. However, creating smaller, aligned accesses could be even |
| // slower and would certainly be a lot more code. |
| if (Subtarget.is64Bit() && Size >= 8) |
| return MVT::i64; |
| return MVT::i32; |
| } |
| |
| bool X86TargetLowering::isSafeMemOpType(MVT VT) const { |
| if (VT == MVT::f32) |
| return X86ScalarSSEf32; |
| else if (VT == MVT::f64) |
| return X86ScalarSSEf64; |
| return true; |
| } |
| |
| bool X86TargetLowering::allowsMisalignedMemoryAccesses( |
| EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags, |
| bool *Fast) const { |
| if (Fast) { |
| switch (VT.getSizeInBits()) { |
| default: |
| // 8-byte and under are always assumed to be fast. |
| *Fast = true; |
| break; |
| case 128: |
| *Fast = !Subtarget.isUnalignedMem16Slow(); |
| break; |
| case 256: |
| *Fast = !Subtarget.isUnalignedMem32Slow(); |
| break; |
| // TODO: What about AVX-512 (512-bit) accesses? |
| } |
| } |
| // NonTemporal vector memory ops must be aligned. |
| if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { |
| // NT loads can only be vector aligned, so if its less aligned than the |
| // minimum vector size (which we can split the vector down to), we might as |
| // well use a regular unaligned vector load. |
| // We don't have any NT loads pre-SSE41. |
| if (!!(Flags & MachineMemOperand::MOLoad)) |
| return (Align < 16 || !Subtarget.hasSSE41()); |
| return false; |
| } |
| // Misaligned accesses of any size are always allowed. |
| return true; |
| } |
| |
| /// Return the entry encoding for a jump table in the |
| /// current function. The returned value is a member of the |
| /// MachineJumpTableInfo::JTEntryKind enum. |
| unsigned X86TargetLowering::getJumpTableEncoding() const { |
| // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF |
| // symbol. |
| if (isPositionIndependent() && Subtarget.isPICStyleGOT()) |
| return MachineJumpTableInfo::EK_Custom32; |
| |
| // Otherwise, use the normal jump table encoding heuristics. |
| return TargetLowering::getJumpTableEncoding(); |
| } |
| |
| bool X86TargetLowering::useSoftFloat() const { |
| return Subtarget.useSoftFloat(); |
| } |
| |
| void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, |
| ArgListTy &Args) const { |
| |
| // Only relabel X86-32 for C / Stdcall CCs. |
| if (Subtarget.is64Bit()) |
| return; |
| if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) |
| return; |
| unsigned ParamRegs = 0; |
| if (auto *M = MF->getFunction().getParent()) |
| ParamRegs = M->getNumberRegisterParameters(); |
| |
| // Mark the first N int arguments as having reg |
| for (unsigned Idx = 0; Idx < Args.size(); Idx++) { |
| Type *T = Args[Idx].Ty; |
| if (T->isIntOrPtrTy()) |
| if (MF->getDataLayout().getTypeAllocSize(T) <= 8) { |
| unsigned numRegs = 1; |
| if (MF->getDataLayout().getTypeAllocSize(T) > 4) |
| numRegs = 2; |
| if (ParamRegs < numRegs) |
| return; |
| ParamRegs -= numRegs; |
| Args[Idx].IsInReg = true; |
| } |
| } |
| } |
| |
| const MCExpr * |
| X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
| const MachineBasicBlock *MBB, |
| unsigned uid,MCContext &Ctx) const{ |
| assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); |
| // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF |
| // entries. |
| return MCSymbolRefExpr::create(MBB->getSymbol(), |
| MCSymbolRefExpr::VK_GOTOFF, Ctx); |
| } |
| |
| /// Returns relocation base for the given PIC jumptable. |
| SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, |
| SelectionDAG &DAG) const { |
| if (!Subtarget.is64Bit()) |
| // This doesn't have SDLoc associated with it, but is not really the |
| // same as a Register. |
| return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), |
| getPointerTy(DAG.getDataLayout())); |
| return Table; |
| } |
| |
| /// This returns the relocation base for the given PIC jumptable, |
| /// the same as getPICJumpTableRelocBase, but as an MCExpr. |
| const MCExpr *X86TargetLowering:: |
| getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, |
| MCContext &Ctx) const { |
| // X86-64 uses RIP relative addressing based on the jump table label. |
| if (Subtarget.isPICStyleRIPRel()) |
| return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); |
| |
| // Otherwise, the reference is relative to the PIC base. |
| return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); |
| } |
| |
| std::pair<const TargetRegisterClass *, uint8_t> |
| X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, |
| MVT VT) const { |
| const TargetRegisterClass *RRC = nullptr; |
| uint8_t Cost = 1; |
| switch (VT.SimpleTy) { |
| default: |
| return TargetLowering::findRepresentativeClass(TRI, VT); |
| case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: |
| RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; |
| break; |
| case MVT::x86mmx: |
| RRC = &X86::VR64RegClass; |
| break; |
| case MVT::f32: case MVT::f64: |
| case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: |
| case MVT::v4f32: case MVT::v2f64: |
| case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: |
| case MVT::v8f32: case MVT::v4f64: |
| case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: |
| case MVT::v16f32: case MVT::v8f64: |
| RRC = &X86::VR128XRegClass; |
| break; |
| } |
| return std::make_pair(RRC, Cost); |
| } |
| |
| unsigned X86TargetLowering::getAddressSpace() const { |
| if (Subtarget.is64Bit()) |
| return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257; |
| return 256; |
| } |
| |
| static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { |
| return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || |
| (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); |
| } |
| |
| static Constant* SegmentOffset(IRBuilder<> &IRB, |
| unsigned Offset, unsigned AddressSpace) { |
| return ConstantExpr::getIntToPtr( |
| ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), |
| Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); |
| } |
| |
| Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const { |
| // glibc, bionic, and Fuchsia have a special slot for the stack guard in |
| // tcbhead_t; use it instead of the usual global variable (see |
| // sysdeps/{i386,x86_64}/nptl/tls.h) |
| if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) { |
| if (Subtarget.isTargetFuchsia()) { |
| // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. |
| return SegmentOffset(IRB, 0x10, getAddressSpace()); |
| } else { |
| // %fs:0x28, unless we're using a Kernel code model, in which case |
| // it's %gs:0x28. gs:0x14 on i386. |
| unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; |
| return SegmentOffset(IRB, Offset, getAddressSpace()); |
| } |
| } |
| |
| return TargetLowering::getIRStackGuard(IRB); |
| } |
| |
| void X86TargetLowering::insertSSPDeclarations(Module &M) const { |
| // MSVC CRT provides functionalities for stack protection. |
| if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
| Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
| // MSVC CRT has a global variable holding security cookie. |
| M.getOrInsertGlobal("__security_cookie", |
| Type::getInt8PtrTy(M.getContext())); |
| |
| // MSVC CRT has a function to validate security cookie. |
| FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( |
| "__security_check_cookie", Type::getVoidTy(M.getContext()), |
| Type::getInt8PtrTy(M.getContext())); |
| if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { |
| F->setCallingConv(CallingConv::X86_FastCall); |
| F->addAttribute(1, Attribute::AttrKind::InReg); |
| } |
| return; |
| } |
| // glibc, bionic, and Fuchsia have a special slot for the stack guard. |
| if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) |
| return; |
| TargetLowering::insertSSPDeclarations(M); |
| } |
| |
| Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { |
| // MSVC CRT has a global variable holding security cookie. |
| if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
| Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
| return M.getGlobalVariable("__security_cookie"); |
| } |
| return TargetLowering::getSDagStackGuard(M); |
| } |
| |
| Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { |
| // MSVC CRT has a function to validate security cookie. |
| if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
| Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
| return M.getFunction("__security_check_cookie"); |
| } |
| return TargetLowering::getSSPStackGuardCheck(M); |
| } |
| |
| Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { |
| if (Subtarget.getTargetTriple().isOSContiki()) |
| return getDefaultSafeStackPointerLocation(IRB, false); |
| |
| // Android provides a fixed TLS slot for the SafeStack pointer. See the |
| // definition of TLS_SLOT_SAFESTACK in |
| // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h |
| if (Subtarget.isTargetAndroid()) { |
| // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: |
| // %gs:0x24 on i386 |
| unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; |
| return SegmentOffset(IRB, Offset, getAddressSpace()); |
| } |
| |
| // Fuchsia is similar. |
| if (Subtarget.isTargetFuchsia()) { |
| // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value. |
| return SegmentOffset(IRB, 0x18, getAddressSpace()); |
| } |
| |
| return TargetLowering::getSafeStackPointerLocation(IRB); |
| } |
| |
| bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, |
| unsigned DestAS) const { |
| assert(SrcAS != DestAS && "Expected different address spaces!"); |
| |
| return SrcAS < 256 && DestAS < 256; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Return Value Calling Convention Implementation |
| //===-
|