| //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the PPCISelLowering class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "PPCISelLowering.h" |
| #include "MCTargetDesc/PPCPredicates.h" |
| #include "PPC.h" |
| #include "PPCCCState.h" |
| #include "PPCCallingConv.h" |
| #include "PPCFrameLowering.h" |
| #include "PPCInstrInfo.h" |
| #include "PPCMachineFunctionInfo.h" |
| #include "PPCPerfectShuffle.h" |
| #include "PPCRegisterInfo.h" |
| #include "PPCSubtarget.h" |
| #include "PPCTargetMachine.h" |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/None.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/StringSwitch.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/ISDOpcodes.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineJumpTableInfo.h" |
| #include "llvm/CodeGen/MachineLoopInfo.h" |
| #include "llvm/CodeGen/MachineMemOperand.h" |
| #include "llvm/CodeGen/MachineModuleInfo.h" |
| #include "llvm/CodeGen/MachineOperand.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/RuntimeLibcalls.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/SelectionDAGNodes.h" |
| #include "llvm/CodeGen/TargetInstrInfo.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
| #include "llvm/CodeGen/TargetRegisterInfo.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/Constant.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DebugLoc.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/IntrinsicsPowerPC.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Use.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/MC/MCContext.h" |
| #include "llvm/MC/MCExpr.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/MC/MCSectionXCOFF.h" |
| #include "llvm/MC/MCSymbolXCOFF.h" |
| #include "llvm/Support/AtomicOrdering.h" |
| #include "llvm/Support/BranchProbability.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CodeGen.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/Format.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MachineValueType.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Target/TargetOptions.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <cstdint> |
| #include <iterator> |
| #include <list> |
| #include <utility> |
| #include <vector> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "ppc-lowering" |
| |
| static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", |
| cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); |
| |
| static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", |
| cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); |
| |
| static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", |
| cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); |
| |
| static cl::opt<bool> DisableSCO("disable-ppc-sco", |
| cl::desc("disable sibling call optimization on ppc"), cl::Hidden); |
| |
| static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", |
| cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden); |
| |
| static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables", |
| cl::desc("use absolute jump tables on ppc"), cl::Hidden); |
| |
| static cl::opt<bool> EnableQuadwordAtomics( |
| "ppc-quadword-atomics", |
| cl::desc("enable quadword lock-free atomic operations"), cl::init(false), |
| cl::Hidden); |
| |
| STATISTIC(NumTailCalls, "Number of tail calls"); |
| STATISTIC(NumSiblingCalls, "Number of sibling calls"); |
| STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM"); |
| STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed"); |
| |
| static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); |
| |
| static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl); |
| |
| static const char AIXSSPCanaryWordName[] = "__ssp_canary_word"; |
| |
| // FIXME: Remove this once the bug has been fixed! |
| extern cl::opt<bool> ANDIGlueBug; |
| |
| PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, |
| const PPCSubtarget &STI) |
| : TargetLowering(TM), Subtarget(STI) { |
| // Initialize map that relates the PPC addressing modes to the computed flags |
| // of a load/store instruction. The map is used to determine the optimal |
| // addressing mode when selecting load and stores. |
| initializeAddrModeMap(); |
| // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all |
| // arguments are at least 4/8 bytes aligned. |
| bool isPPC64 = Subtarget.isPPC64(); |
| setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4)); |
| |
| // Set up the register classes. |
| addRegisterClass(MVT::i32, &PPC::GPRCRegClass); |
| if (!useSoftFloat()) { |
| if (hasSPE()) { |
| addRegisterClass(MVT::f32, &PPC::GPRCRegClass); |
| // EFPU2 APU only supports f32 |
| if (!Subtarget.hasEFPU2()) |
| addRegisterClass(MVT::f64, &PPC::SPERCRegClass); |
| } else { |
| addRegisterClass(MVT::f32, &PPC::F4RCRegClass); |
| addRegisterClass(MVT::f64, &PPC::F8RCRegClass); |
| } |
| } |
| |
| // Match BITREVERSE to customized fast code sequence in the td file. |
| setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
| setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); |
| |
| // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); |
| |
| // Custom lower inline assembly to check for special registers. |
| setOperationAction(ISD::INLINEASM, MVT::Other, Custom); |
| setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom); |
| |
| // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. |
| for (MVT VT : MVT::integer_valuetypes()) { |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); |
| } |
| |
| if (Subtarget.isISA3_0()) { |
| setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal); |
| setTruncStoreAction(MVT::f64, MVT::f16, Legal); |
| setTruncStoreAction(MVT::f32, MVT::f16, Legal); |
| } else { |
| // No extending loads from f16 or HW conversions back and forth. |
| setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
| setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); |
| setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
| setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); |
| setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
| } |
| |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| |
| // PowerPC has pre-inc load and store's. |
| setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); |
| setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); |
| setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); |
| setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); |
| setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); |
| if (!Subtarget.hasSPE()) { |
| setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); |
| setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); |
| } |
| |
| // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry. |
| const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; |
| for (MVT VT : ScalarIntVTs) { |
| setOperationAction(ISD::ADDC, VT, Legal); |
| setOperationAction(ISD::ADDE, VT, Legal); |
| setOperationAction(ISD::SUBC, VT, Legal); |
| setOperationAction(ISD::SUBE, VT, Legal); |
| } |
| |
| if (Subtarget.useCRBits()) { |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
| |
| if (isPPC64 || Subtarget.hasFPCVT()) { |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote); |
| AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1, |
| isPPC64 ? MVT::i64 : MVT::i32); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote); |
| AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1, |
| isPPC64 ? MVT::i64 : MVT::i32); |
| |
| setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); |
| AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, |
| isPPC64 ? MVT::i64 : MVT::i32); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); |
| AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, |
| isPPC64 ? MVT::i64 : MVT::i32); |
| |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote); |
| AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1, |
| isPPC64 ? MVT::i64 : MVT::i32); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote); |
| AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1, |
| isPPC64 ? MVT::i64 : MVT::i32); |
| |
| setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); |
| AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, |
| isPPC64 ? MVT::i64 : MVT::i32); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); |
| AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, |
| isPPC64 ? MVT::i64 : MVT::i32); |
| } else { |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); |
| } |
| |
| // PowerPC does not support direct load/store of condition registers. |
| setOperationAction(ISD::LOAD, MVT::i1, Custom); |
| setOperationAction(ISD::STORE, MVT::i1, Custom); |
| |
| // FIXME: Remove this once the ANDI glue bug is fixed: |
| if (ANDIGlueBug) |
| setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); |
| |
| for (MVT VT : MVT::integer_valuetypes()) { |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); |
| setTruncStoreAction(VT, MVT::i1, Expand); |
| } |
| |
| addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); |
| } |
| |
| // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on |
| // PPC (the libcall is not available). |
| setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom); |
| |
| // We do not currently implement these libm ops for PowerPC. |
| setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); |
| setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); |
| setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); |
| setOperationAction(ISD::FREM, MVT::ppcf128, Expand); |
| |
| // PowerPC has no SREM/UREM instructions unless we are on P9 |
| // On P9 we may use a hardware instruction to compute the remainder. |
| // When the result of both the remainder and the division is required it is |
| // more efficient to compute the remainder from the result of the division |
| // rather than use the remainder instruction. The instructions are legalized |
| // directly because the DivRemPairsPass performs the transformation at the IR |
| // level. |
| if (Subtarget.isISA3_0()) { |
| setOperationAction(ISD::SREM, MVT::i32, Legal); |
| setOperationAction(ISD::UREM, MVT::i32, Legal); |
| setOperationAction(ISD::SREM, MVT::i64, Legal); |
| setOperationAction(ISD::UREM, MVT::i64, Legal); |
| } else { |
| setOperationAction(ISD::SREM, MVT::i32, Expand); |
| setOperationAction(ISD::UREM, MVT::i32, Expand); |
| setOperationAction(ISD::SREM, MVT::i64, Expand); |
| setOperationAction(ISD::UREM, MVT::i64, Expand); |
| } |
| |
| // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. |
| setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); |
| setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i64, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i64, Expand); |
| |
| // Handle constrained floating-point operations of scalar. |
| // TODO: Handle SPE specific operation. |
| setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); |
| |
| setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); |
| setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); |
| setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); |
| setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); |
| |
| if (!Subtarget.hasSPE()) { |
| setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); |
| } |
| |
| if (Subtarget.hasVSX()) { |
| setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal); |
| } |
| |
| if (Subtarget.hasFSQRT()) { |
| setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); |
| } |
| |
| if (Subtarget.hasFPRND()) { |
| setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal); |
| |
| setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal); |
| setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal); |
| setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal); |
| setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal); |
| } |
| |
| // We don't support sin/cos/sqrt/fmod/pow |
| setOperationAction(ISD::FSIN , MVT::f64, Expand); |
| setOperationAction(ISD::FCOS , MVT::f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FREM , MVT::f64, Expand); |
| setOperationAction(ISD::FPOW , MVT::f64, Expand); |
| setOperationAction(ISD::FSIN , MVT::f32, Expand); |
| setOperationAction(ISD::FCOS , MVT::f32, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
| setOperationAction(ISD::FREM , MVT::f32, Expand); |
| setOperationAction(ISD::FPOW , MVT::f32, Expand); |
| if (Subtarget.hasSPE()) { |
| setOperationAction(ISD::FMA , MVT::f64, Expand); |
| setOperationAction(ISD::FMA , MVT::f32, Expand); |
| } else { |
| setOperationAction(ISD::FMA , MVT::f64, Legal); |
| setOperationAction(ISD::FMA , MVT::f32, Legal); |
| } |
| |
| if (Subtarget.hasSPE()) |
| setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
| |
| setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); |
| |
| // If we're enabling GP optimizations, use hardware square root |
| if (!Subtarget.hasFSQRT() && |
| !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && |
| Subtarget.hasFRE())) |
| setOperationAction(ISD::FSQRT, MVT::f64, Expand); |
| |
| if (!Subtarget.hasFSQRT() && |
| !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && |
| Subtarget.hasFRES())) |
| setOperationAction(ISD::FSQRT, MVT::f32, Expand); |
| |
| if (Subtarget.hasFCPSGN()) { |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); |
| } else { |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); |
| } |
| |
| if (Subtarget.hasFPRND()) { |
| setOperationAction(ISD::FFLOOR, MVT::f64, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f64, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::f64, Legal); |
| setOperationAction(ISD::FROUND, MVT::f64, Legal); |
| |
| setOperationAction(ISD::FFLOOR, MVT::f32, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f32, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::f32, Legal); |
| setOperationAction(ISD::FROUND, MVT::f32, Legal); |
| } |
| |
| // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd |
| // to speed up scalar BSWAP64. |
| // CTPOP or CTTZ were introduced in P8/P9 respectively |
| setOperationAction(ISD::BSWAP, MVT::i32 , Expand); |
| if (Subtarget.hasP9Vector() && Subtarget.isPPC64()) |
| setOperationAction(ISD::BSWAP, MVT::i64 , Custom); |
| else |
| setOperationAction(ISD::BSWAP, MVT::i64 , Expand); |
| if (Subtarget.isISA3_0()) { |
| setOperationAction(ISD::CTTZ , MVT::i32 , Legal); |
| setOperationAction(ISD::CTTZ , MVT::i64 , Legal); |
| } else { |
| setOperationAction(ISD::CTTZ , MVT::i32 , Expand); |
| setOperationAction(ISD::CTTZ , MVT::i64 , Expand); |
| } |
| |
| if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { |
| setOperationAction(ISD::CTPOP, MVT::i32 , Legal); |
| setOperationAction(ISD::CTPOP, MVT::i64 , Legal); |
| } else { |
| setOperationAction(ISD::CTPOP, MVT::i32 , Expand); |
| setOperationAction(ISD::CTPOP, MVT::i64 , Expand); |
| } |
| |
| // PowerPC does not have ROTR |
| setOperationAction(ISD::ROTR, MVT::i32 , Expand); |
| setOperationAction(ISD::ROTR, MVT::i64 , Expand); |
| |
| if (!Subtarget.useCRBits()) { |
| // PowerPC does not have Select |
| setOperationAction(ISD::SELECT, MVT::i32, Expand); |
| setOperationAction(ISD::SELECT, MVT::i64, Expand); |
| setOperationAction(ISD::SELECT, MVT::f32, Expand); |
| setOperationAction(ISD::SELECT, MVT::f64, Expand); |
| } |
| |
| // PowerPC wants to turn select_cc of FP into fsel when possible. |
| setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); |
| |
| // PowerPC wants to optimize integer setcc a bit |
| if (!Subtarget.useCRBits()) |
| setOperationAction(ISD::SETCC, MVT::i32, Custom); |
| |
| if (Subtarget.hasFPU()) { |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal); |
| |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal); |
| } |
| |
| // PowerPC does not have BRCOND which requires SetCC |
| if (!Subtarget.useCRBits()) |
| setOperationAction(ISD::BRCOND, MVT::Other, Expand); |
| |
| setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
| |
| if (Subtarget.hasSPE()) { |
| // SPE has built-in conversions |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); |
| |
| // SPE supports signaling compare of f32/f64. |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); |
| } else { |
| // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| |
| // PowerPC does not have [U|S]INT_TO_FP |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); |
| } |
| |
| if (Subtarget.hasDirectMove() && isPPC64) { |
| setOperationAction(ISD::BITCAST, MVT::f32, Legal); |
| setOperationAction(ISD::BITCAST, MVT::i32, Legal); |
| setOperationAction(ISD::BITCAST, MVT::i64, Legal); |
| setOperationAction(ISD::BITCAST, MVT::f64, Legal); |
| if (TM.Options.UnsafeFPMath) { |
| setOperationAction(ISD::LRINT, MVT::f64, Legal); |
| setOperationAction(ISD::LRINT, MVT::f32, Legal); |
| setOperationAction(ISD::LLRINT, MVT::f64, Legal); |
| setOperationAction(ISD::LLRINT, MVT::f32, Legal); |
| setOperationAction(ISD::LROUND, MVT::f64, Legal); |
| setOperationAction(ISD::LROUND, MVT::f32, Legal); |
| setOperationAction(ISD::LLROUND, MVT::f64, Legal); |
| setOperationAction(ISD::LLROUND, MVT::f32, Legal); |
| } |
| } else { |
| setOperationAction(ISD::BITCAST, MVT::f32, Expand); |
| setOperationAction(ISD::BITCAST, MVT::i32, Expand); |
| setOperationAction(ISD::BITCAST, MVT::i64, Expand); |
| setOperationAction(ISD::BITCAST, MVT::f64, Expand); |
| } |
| |
| // We cannot sextinreg(i1). Expand to shifts. |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
| |
| // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support |
| // SjLj exception handling but a light-weight setjmp/longjmp replacement to |
| // support continuation, user-level threading, and etc.. As a result, no |
| // other SjLj exception interfaces are implemented and please don't build |
| // your own exception handling based on them. |
| // LLVM/Clang supports zero-cost DWARF exception handling. |
| setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
| setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
| |
| // We want to legalize GlobalAddress and ConstantPool nodes into the |
| // appropriate instructions to materialize the address. |
| setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); |
| setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); |
| setOperationAction(ISD::BlockAddress, MVT::i32, Custom); |
| setOperationAction(ISD::ConstantPool, MVT::i32, Custom); |
| setOperationAction(ISD::JumpTable, MVT::i32, Custom); |
| setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); |
| setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); |
| setOperationAction(ISD::BlockAddress, MVT::i64, Custom); |
| setOperationAction(ISD::ConstantPool, MVT::i64, Custom); |
| setOperationAction(ISD::JumpTable, MVT::i64, Custom); |
| |
| // TRAP is legal. |
| setOperationAction(ISD::TRAP, MVT::Other, Legal); |
| |
| // TRAMPOLINE is custom lowered. |
| setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); |
| setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); |
| |
| // VASTART needs to be custom lowered to use the VarArgsFrameIndex |
| setOperationAction(ISD::VASTART , MVT::Other, Custom); |
| |
| if (Subtarget.is64BitELFABI()) { |
| // VAARG always uses double-word chunks, so promote anything smaller. |
| setOperationAction(ISD::VAARG, MVT::i1, Promote); |
| AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64); |
| setOperationAction(ISD::VAARG, MVT::i8, Promote); |
| AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64); |
| setOperationAction(ISD::VAARG, MVT::i16, Promote); |
| AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64); |
| setOperationAction(ISD::VAARG, MVT::i32, Promote); |
| AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64); |
| setOperationAction(ISD::VAARG, MVT::Other, Expand); |
| } else if (Subtarget.is32BitELFABI()) { |
| // VAARG is custom lowered with the 32-bit SVR4 ABI. |
| setOperationAction(ISD::VAARG, MVT::Other, Custom); |
| setOperationAction(ISD::VAARG, MVT::i64, Custom); |
| } else |
| setOperationAction(ISD::VAARG, MVT::Other, Expand); |
| |
| // VACOPY is custom lowered with the 32-bit SVR4 ABI. |
| if (Subtarget.is32BitELFABI()) |
| setOperationAction(ISD::VACOPY , MVT::Other, Custom); |
| else |
| setOperationAction(ISD::VACOPY , MVT::Other, Expand); |
| |
| // Use the default implementation. |
| setOperationAction(ISD::VAEND , MVT::Other, Expand); |
| setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); |
| setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); |
| setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); |
| setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); |
| setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); |
| setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); |
| |
| // We want to custom lower some of our intrinsics. |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); |
| |
| // To handle counter-based loop conditions. |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); |
| |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
| |
| // Comparisons that require checking two conditions. |
| if (Subtarget.hasSPE()) { |
| setCondCodeAction(ISD::SETO, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETO, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETUO, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETUO, MVT::f64, Expand); |
| } |
| setCondCodeAction(ISD::SETULT, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETULT, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETONE, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETONE, MVT::f64, Expand); |
| |
| setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); |
| |
| if (Subtarget.has64BitSupport()) { |
| // They also have instructions for converting between i64 and fp. |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); |
| // This is just the low 32 bits of a (signed) fp->i64 conversion. |
| // We cannot do this with Promote because i64 is not a legal type. |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| |
| if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) { |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); |
| } |
| } else { |
| // PowerPC does not have FP_TO_UINT on 32-bit implementations. |
| if (Subtarget.hasSPE()) { |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); |
| } else { |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); |
| } |
| } |
| |
| // With the instructions enabled under FPCVT, we can do everything. |
| if (Subtarget.hasFPCVT()) { |
| if (Subtarget.has64BitSupport()) { |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
| } |
| |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
| } |
| |
| if (Subtarget.use64BitRegs()) { |
| // 64-bit PowerPC implementations can support i64 types directly |
| addRegisterClass(MVT::i64, &PPC::G8RCRegClass); |
| // BUILD_PAIR can't be handled natively, and should be expanded to shl/or |
| setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); |
| // 64-bit PowerPC wants to expand i128 shifts itself. |
| setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); |
| setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); |
| setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); |
| } else { |
| // 32-bit PowerPC wants to expand i64 shifts itself. |
| setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); |
| setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); |
| setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); |
| } |
| |
| // PowerPC has better expansions for funnel shifts than the generic |
| // TargetLowering::expandFunnelShift. |
| if (Subtarget.has64BitSupport()) { |
| setOperationAction(ISD::FSHL, MVT::i64, Custom); |
| setOperationAction(ISD::FSHR, MVT::i64, Custom); |
| } |
| setOperationAction(ISD::FSHL, MVT::i32, Custom); |
| setOperationAction(ISD::FSHR, MVT::i32, Custom); |
| |
| if (Subtarget.hasVSX()) { |
| setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); |
| setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); |
| setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); |
| setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); |
| } |
| |
| if (Subtarget.hasAltivec()) { |
| for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { |
| setOperationAction(ISD::SADDSAT, VT, Legal); |
| setOperationAction(ISD::SSUBSAT, VT, Legal); |
| setOperationAction(ISD::UADDSAT, VT, Legal); |
| setOperationAction(ISD::USUBSAT, VT, Legal); |
| } |
| // First set operation action for all vector types to expand. Then we |
| // will selectively turn on ones that can be effectively codegen'd. |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| // add/sub are legal for all supported vector VT's. |
| setOperationAction(ISD::ADD, VT, Legal); |
| setOperationAction(ISD::SUB, VT, Legal); |
| |
| // For v2i64, these are only valid with P8Vector. This is corrected after |
| // the loop. |
| if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) { |
| setOperationAction(ISD::SMAX, VT, Legal); |
| setOperationAction(ISD::SMIN, VT, Legal); |
| setOperationAction(ISD::UMAX, VT, Legal); |
| setOperationAction(ISD::UMIN, VT, Legal); |
| } |
| else { |
| setOperationAction(ISD::SMAX, VT, Expand); |
| setOperationAction(ISD::SMIN, VT, Expand); |
| setOperationAction(ISD::UMAX, VT, Expand); |
| setOperationAction(ISD::UMIN, VT, Expand); |
| } |
| |
| if (Subtarget.hasVSX()) { |
| setOperationAction(ISD::FMAXNUM, VT, Legal); |
| setOperationAction(ISD::FMINNUM, VT, Legal); |
| } |
| |
| // Vector instructions introduced in P8 |
| if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { |
| setOperationAction(ISD::CTPOP, VT, Legal); |
| setOperationAction(ISD::CTLZ, VT, Legal); |
| } |
| else { |
| setOperationAction(ISD::CTPOP, VT, Expand); |
| setOperationAction(ISD::CTLZ, VT, Expand); |
| } |
| |
| // Vector instructions introduced in P9 |
| if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) |
| setOperationAction(ISD::CTTZ, VT, Legal); |
| else |
| setOperationAction(ISD::CTTZ, VT, Expand); |
| |
| // We promote all shuffles to v16i8. |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); |
| AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); |
| |
| // We promote all non-typed operations to v4i32. |
| setOperationAction(ISD::AND , VT, Promote); |
| AddPromotedToType (ISD::AND , VT, MVT::v4i32); |
| setOperationAction(ISD::OR , VT, Promote); |
| AddPromotedToType (ISD::OR , VT, MVT::v4i32); |
| setOperationAction(ISD::XOR , VT, Promote); |
| AddPromotedToType (ISD::XOR , VT, MVT::v4i32); |
| setOperationAction(ISD::LOAD , VT, Promote); |
| AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); |
| setOperationAction(ISD::SELECT, VT, Promote); |
| AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); |
| setOperationAction(ISD::VSELECT, VT, Legal); |
| setOperationAction(ISD::SELECT_CC, VT, Promote); |
| AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); |
| setOperationAction(ISD::STORE, VT, Promote); |
| AddPromotedToType (ISD::STORE, VT, MVT::v4i32); |
| |
| // No other operations are legal. |
| setOperationAction(ISD::MUL , VT, Expand); |
| setOperationAction(ISD::SDIV, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::UDIV, VT, Expand); |
| setOperationAction(ISD::UREM, VT, Expand); |
| setOperationAction(ISD::FDIV, VT, Expand); |
| setOperationAction(ISD::FREM, VT, Expand); |
| setOperationAction(ISD::FNEG, VT, Expand); |
| setOperationAction(ISD::FSQRT, VT, Expand); |
| setOperationAction(ISD::FLOG, VT, Expand); |
| setOperationAction(ISD::FLOG10, VT, Expand); |
| setOperationAction(ISD::FLOG2, VT, Expand); |
| setOperationAction(ISD::FEXP, VT, Expand); |
| setOperationAction(ISD::FEXP2, VT, Expand); |
| setOperationAction(ISD::FSIN, VT, Expand); |
| setOperationAction(ISD::FCOS, VT, Expand); |
| setOperationAction(ISD::FABS, VT, Expand); |
| setOperationAction(ISD::FFLOOR, VT, Expand); |
| setOperationAction(ISD::FCEIL, VT, Expand); |
| setOperationAction(ISD::FTRUNC, VT, Expand); |
| setOperationAction(ISD::FRINT, VT, Expand); |
| setOperationAction(ISD::FNEARBYINT, VT, Expand); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Expand); |
| setOperationAction(ISD::MULHU, VT, Expand); |
| setOperationAction(ISD::MULHS, VT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::UDIVREM, VT, Expand); |
| setOperationAction(ISD::SDIVREM, VT, Expand); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); |
| setOperationAction(ISD::FPOW, VT, Expand); |
| setOperationAction(ISD::BSWAP, VT, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); |
| setOperationAction(ISD::ROTL, VT, Expand); |
| setOperationAction(ISD::ROTR, VT, Expand); |
| |
| for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
| setTruncStoreAction(VT, InnerVT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
| } |
| } |
| setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand); |
| if (!Subtarget.hasP8Vector()) { |
| setOperationAction(ISD::SMAX, MVT::v2i64, Expand); |
| setOperationAction(ISD::SMIN, MVT::v2i64, Expand); |
| setOperationAction(ISD::UMAX, MVT::v2i64, Expand); |
| setOperationAction(ISD::UMIN, MVT::v2i64, Expand); |
| } |
| |
| // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle |
| // with merges, splats, etc. |
| setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); |
| |
| // Vector truncates to sub-word integer that fit in an Altivec/VSX register |
| // are cheap, so handle them before they get expanded to scalar. |
| setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); |
| setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); |
| |
| setOperationAction(ISD::AND , MVT::v4i32, Legal); |
| setOperationAction(ISD::OR , MVT::v4i32, Legal); |
| setOperationAction(ISD::XOR , MVT::v4i32, Legal); |
| setOperationAction(ISD::LOAD , MVT::v4i32, Legal); |
| setOperationAction(ISD::SELECT, MVT::v4i32, |
| Subtarget.useCRBits() ? Legal : Expand); |
| setOperationAction(ISD::STORE , MVT::v4i32, Legal); |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); |
| setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); |
| setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); |
| |
| // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8. |
| setOperationAction(ISD::ROTL, MVT::v1i128, Custom); |
| // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w). |
| if (Subtarget.hasAltivec()) |
| for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8}) |
| setOperationAction(ISD::ROTL, VT, Legal); |
| // With hasP8Altivec set, we can lower ISD::ROTL to vrld. |
| if (Subtarget.hasP8Altivec()) |
| setOperationAction(ISD::ROTL, MVT::v2i64, Legal); |
| |
| addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); |
| addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); |
| addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); |
| addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); |
| |
| setOperationAction(ISD::MUL, MVT::v4f32, Legal); |
| setOperationAction(ISD::FMA, MVT::v4f32, Legal); |
| |
| if (Subtarget.hasVSX()) { |
| setOperationAction(ISD::FDIV, MVT::v4f32, Legal); |
| setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); |
| } |
| |
| if (Subtarget.hasP8Altivec()) |
| setOperationAction(ISD::MUL, MVT::v4i32, Legal); |
| else |
| setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
| |
| if (Subtarget.isISA3_1()) { |
| setOperationAction(ISD::MUL, MVT::v2i64, Legal); |
| setOperationAction(ISD::MULHS, MVT::v2i64, Legal); |
| setOperationAction(ISD::MULHU, MVT::v2i64, Legal); |
| setOperationAction(ISD::MULHS, MVT::v4i32, Legal); |
| setOperationAction(ISD::MULHU, MVT::v4i32, Legal); |
| setOperationAction(ISD::UDIV, MVT::v2i64, Legal); |
| setOperationAction(ISD::SDIV, MVT::v2i64, Legal); |
| setOperationAction(ISD::UDIV, MVT::v4i32, Legal); |
| setOperationAction(ISD::SDIV, MVT::v4i32, Legal); |
| setOperationAction(ISD::UREM, MVT::v2i64, Legal); |
| setOperationAction(ISD::SREM, MVT::v2i64, Legal); |
| setOperationAction(ISD::UREM, MVT::v4i32, Legal); |
| setOperationAction(ISD::SREM, MVT::v4i32, Legal); |
| setOperationAction(ISD::UREM, MVT::v1i128, Legal); |
| setOperationAction(ISD::SREM, MVT::v1i128, Legal); |
| setOperationAction(ISD::UDIV, MVT::v1i128, Legal); |
| setOperationAction(ISD::SDIV, MVT::v1i128, Legal); |
| setOperationAction(ISD::ROTL, MVT::v1i128, Legal); |
| } |
| |
| setOperationAction(ISD::MUL, MVT::v8i16, Legal); |
| setOperationAction(ISD::MUL, MVT::v16i8, Custom); |
| |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); |
| |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); |
| |
| // Altivec does not contain unordered floating-point compare instructions |
| setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); |
| setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); |
| setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); |
| setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); |
| |
| if (Subtarget.hasVSX()) { |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); |
| if (Subtarget.hasP8Vector()) { |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); |
| } |
| if (Subtarget.hasDirectMove() && isPPC64) { |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); |
| } |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); |
| |
| // The nearbyint variants are not allowed to raise the inexact exception |
| // so we can only code-gen them with unsafe math. |
| if (TM.Options.UnsafeFPMath) { |
| setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); |
| } |
| |
| setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); |
| setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); |
| setOperationAction(ISD::FRINT, MVT::v2f64, Legal); |
| setOperationAction(ISD::FROUND, MVT::v2f64, Legal); |
| setOperationAction(ISD::FROUND, MVT::f64, Legal); |
| setOperationAction(ISD::FRINT, MVT::f64, Legal); |
| |
| setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); |
| setOperationAction(ISD::FRINT, MVT::v4f32, Legal); |
| setOperationAction(ISD::FROUND, MVT::v4f32, Legal); |
| setOperationAction(ISD::FROUND, MVT::f32, Legal); |
| setOperationAction(ISD::FRINT, MVT::f32, Legal); |
| |
| setOperationAction(ISD::MUL, MVT::v2f64, Legal); |
| setOperationAction(ISD::FMA, MVT::v2f64, Legal); |
| |
| setOperationAction(ISD::FDIV, MVT::v2f64, Legal); |
| setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); |
| |
| // Share the Altivec comparison restrictions. |
| setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); |
| setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); |
| setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); |
| setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); |
| |
| setOperationAction(ISD::LOAD, MVT::v2f64, Legal); |
| setOperationAction(ISD::STORE, MVT::v2f64, Legal); |
| |
| setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); |
| |
| if (Subtarget.hasP8Vector()) |
| addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); |
| |
| addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); |
| |
| addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); |
| addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); |
| addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); |
| |
| if (Subtarget.hasP8Altivec()) { |
| setOperationAction(ISD::SHL, MVT::v2i64, Legal); |
| setOperationAction(ISD::SRA, MVT::v2i64, Legal); |
| setOperationAction(ISD::SRL, MVT::v2i64, Legal); |
| |
| // 128 bit shifts can be accomplished via 3 instructions for SHL and |
| // SRL, but not for SRA because of the instructions available: |
| // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth |
| // doing |
| setOperationAction(ISD::SHL, MVT::v1i128, Expand); |
| setOperationAction(ISD::SRL, MVT::v1i128, Expand); |
| setOperationAction(ISD::SRA, MVT::v1i128, Expand); |
| |
| setOperationAction(ISD::SETCC, MVT::v2i64, Legal); |
| } |
| else { |
| setOperationAction(ISD::SHL, MVT::v2i64, Expand); |
| setOperationAction(ISD::SRA, MVT::v2i64, Expand); |
| setOperationAction(ISD::SRL, MVT::v2i64, Expand); |
| |
| setOperationAction(ISD::SETCC, MVT::v2i64, Custom); |
| |
| // VSX v2i64 only supports non-arithmetic operations. |
| setOperationAction(ISD::ADD, MVT::v2i64, Expand); |
| setOperationAction(ISD::SUB, MVT::v2i64, Expand); |
| } |
| |
| if (Subtarget.isISA3_1()) |
| setOperationAction(ISD::SETCC, MVT::v1i128, Legal); |
| else |
| setOperationAction(ISD::SETCC, MVT::v1i128, Expand); |
| |
| setOperationAction(ISD::LOAD, MVT::v2i64, Promote); |
| AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); |
| setOperationAction(ISD::STORE, MVT::v2i64, Promote); |
| AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); |
| |
| setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); |
| |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); |
| |
| // Custom handling for partial vectors of integers converted to |
| // floating point. We already have optimal handling for v2i32 through |
| // the DAG combine, so those aren't necessary. |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); |
| |
| setOperationAction(ISD::FNEG, MVT::v4f32, Legal); |
| setOperationAction(ISD::FNEG, MVT::v2f64, Legal); |
| setOperationAction(ISD::FABS, MVT::v4f32, Legal); |
| setOperationAction(ISD::FABS, MVT::v2f64, Legal); |
| setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); |
| setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal); |
| |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); |
| |
| // Handle constrained floating-point operations of vector. |
| // The predictor is `hasVSX` because altivec instruction has |
| // no exception but VSX vector instruction has. |
| setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); |
| setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); |
| |
| setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); |
| setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); |
| |
| addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); |
| addRegisterClass(MVT::f128, &PPC::VRRCRegClass); |
| |
| for (MVT FPT : MVT::fp_valuetypes()) |
| setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); |
| |
| // Expand the SELECT to SELECT_CC |
| setOperationAction(ISD::SELECT, MVT::f128, Expand); |
| |
| setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
| |
| // No implementation for these ops for PowerPC. |
| setOperationAction(ISD::FSIN, MVT::f128, Expand); |
| setOperationAction(ISD::FCOS, MVT::f128, Expand); |
| setOperationAction(ISD::FPOW, MVT::f128, Expand); |
| setOperationAction(ISD::FPOWI, MVT::f128, Expand); |
| setOperationAction(ISD::FREM, MVT::f128, Expand); |
| } |
| |
| if (Subtarget.hasP8Altivec()) { |
| addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); |
| addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); |
| } |
| |
| if (Subtarget.hasP9Vector()) { |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); |
| |
| // 128 bit shifts can be accomplished via 3 instructions for SHL and |
| // SRL, but not for SRA because of the instructions available: |
| // VS{RL} and VS{RL}O. |
| setOperationAction(ISD::SHL, MVT::v1i128, Legal); |
| setOperationAction(ISD::SRL, MVT::v1i128, Legal); |
| setOperationAction(ISD::SRA, MVT::v1i128, Expand); |
| |
| setOperationAction(ISD::FADD, MVT::f128, Legal); |
| setOperationAction(ISD::FSUB, MVT::f128, Legal); |
| setOperationAction(ISD::FDIV, MVT::f128, Legal); |
| setOperationAction(ISD::FMUL, MVT::f128, Legal); |
| setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); |
| |
| setOperationAction(ISD::FMA, MVT::f128, Legal); |
| setCondCodeAction(ISD::SETULT, MVT::f128, Expand); |
| setCondCodeAction(ISD::SETUGT, MVT::f128, Expand); |
| setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand); |
| setCondCodeAction(ISD::SETOGE, MVT::f128, Expand); |
| setCondCodeAction(ISD::SETOLE, MVT::f128, Expand); |
| setCondCodeAction(ISD::SETONE, MVT::f128, Expand); |
| |
| setOperationAction(ISD::FTRUNC, MVT::f128, Legal); |
| setOperationAction(ISD::FRINT, MVT::f128, Legal); |
| setOperationAction(ISD::FFLOOR, MVT::f128, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f128, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal); |
| setOperationAction(ISD::FROUND, MVT::f128, Legal); |
| |
| setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); |
| setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); |
| setOperationAction(ISD::BITCAST, MVT::i128, Custom); |
| |
| // Handle constrained floating-point operations of fp128 |
| setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); |
| setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal); |
| setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal); |
| setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); |
| setOperationAction(ISD::BSWAP, MVT::v8i16, Legal); |
| setOperationAction(ISD::BSWAP, MVT::v4i32, Legal); |
| setOperationAction(ISD::BSWAP, MVT::v2i64, Legal); |
| setOperationAction(ISD::BSWAP, MVT::v1i128, Legal); |
| } else if (Subtarget.hasVSX()) { |
| setOperationAction(ISD::LOAD, MVT::f128, Promote); |
| setOperationAction(ISD::STORE, MVT::f128, Promote); |
| |
| AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32); |
| AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32); |
| |
| // Set FADD/FSUB as libcall to avoid the legalizer to expand the |
| // fp_to_uint and int_to_fp. |
| setOperationAction(ISD::FADD, MVT::f128, LibCall); |
| setOperationAction(ISD::FSUB, MVT::f128, LibCall); |
| |
| setOperationAction(ISD::FMUL, MVT::f128, Expand); |
| setOperationAction(ISD::FDIV, MVT::f128, Expand); |
| setOperationAction(ISD::FNEG, MVT::f128, Expand); |
| setOperationAction(ISD::FABS, MVT::f128, Expand); |
| setOperationAction(ISD::FSQRT, MVT::f128, Expand); |
| setOperationAction(ISD::FMA, MVT::f128, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); |
| |
| // Expand the fp_extend if the target type is fp128. |
| setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); |
| setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand); |
| |
| // Expand the fp_round if the source type is fp128. |
| for (MVT VT : {MVT::f32, MVT::f64}) { |
| setOperationAction(ISD::FP_ROUND, VT, Custom); |
| setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom); |
| } |
| |
| setOperationAction(ISD::SETCC, MVT::f128, Custom); |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f128, Expand); |
| |
| // Lower following f128 select_cc pattern: |
| // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE |
| setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); |
| |
| // We need to handle f128 SELECT_CC with integer result type. |
| setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand); |
| } |
| |
| if (Subtarget.hasP9Altivec()) { |
| if (Subtarget.isISA3_1()) { |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); |
| } else { |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); |
| } |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); |
| } |
| } |
| |
| if (Subtarget.pairedVectorMemops()) { |
| addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass); |
| setOperationAction(ISD::LOAD, MVT::v256i1, Custom); |
| setOperationAction(ISD::STORE, MVT::v256i1, Custom); |
| } |
| if (Subtarget.hasMMA()) { |
| addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass); |
| setOperationAction(ISD::LOAD, MVT::v512i1, Custom); |
| setOperationAction(ISD::STORE, MVT::v512i1, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom); |
| } |
| |
| if (Subtarget.has64BitSupport()) |
| setOperationAction(ISD::PREFETCH, MVT::Other, Legal); |
| |
| if (Subtarget.isISA3_1()) |
| setOperationAction(ISD::SRA, MVT::v1i128, Legal); |
| |
| setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); |
| |
| if (!isPPC64) { |
| setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); |
| setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); |
| } |
| |
| if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) { |
| setMaxAtomicSizeInBitsSupported(128); |
| setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); |
| setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom); |
| } |
| |
| setBooleanContents(ZeroOrOneBooleanContent); |
| |
| if (Subtarget.hasAltivec()) { |
| // Altivec instructions set fields to all zeros or all ones. |
| setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
| } |
| |
| if (!isPPC64) { |
| // These libcalls are not available in 32-bit. |
| setLibcallName(RTLIB::SHL_I128, nullptr); |
| setLibcallName(RTLIB::SRL_I128, nullptr); |
| setLibcallName(RTLIB::SRA_I128, nullptr); |
| setLibcallName(RTLIB::MULO_I64, nullptr); |
| } |
| |
| if (!isPPC64) |
| setMaxAtomicSizeInBitsSupported(32); |
| |
| setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); |
| |
| // We have target-specific dag combine patterns for the following nodes: |
| setTargetDAGCombine(ISD::ADD); |
| setTargetDAGCombine(ISD::SHL); |
| setTargetDAGCombine(ISD::SRA); |
| setTargetDAGCombine(ISD::SRL); |
| setTargetDAGCombine(ISD::MUL); |
| setTargetDAGCombine(ISD::FMA); |
| setTargetDAGCombine(ISD::SINT_TO_FP); |
| setTargetDAGCombine(ISD::BUILD_VECTOR); |
| if (Subtarget.hasFPCVT()) |
| setTargetDAGCombine(ISD::UINT_TO_FP); |
| setTargetDAGCombine(ISD::LOAD); |
| setTargetDAGCombine(ISD::STORE); |
| setTargetDAGCombine(ISD::BR_CC); |
| if (Subtarget.useCRBits()) |
| setTargetDAGCombine(ISD::BRCOND); |
| setTargetDAGCombine(ISD::BSWAP); |
| setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
| setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); |
| setTargetDAGCombine(ISD::INTRINSIC_VOID); |
| |
| setTargetDAGCombine(ISD::SIGN_EXTEND); |
| setTargetDAGCombine(ISD::ZERO_EXTEND); |
| setTargetDAGCombine(ISD::ANY_EXTEND); |
| |
| setTargetDAGCombine(ISD::TRUNCATE); |
| setTargetDAGCombine(ISD::VECTOR_SHUFFLE); |
| |
| |
| if (Subtarget.useCRBits()) { |
| setTargetDAGCombine(ISD::TRUNCATE); |
| setTargetDAGCombine(ISD::SETCC); |
| setTargetDAGCombine(ISD::SELECT_CC); |
| } |
| |
| if (Subtarget.hasP9Altivec()) { |
| setTargetDAGCombine(ISD::ABS); |
| setTargetDAGCombine(ISD::VSELECT); |
| } |
| |
| setLibcallName(RTLIB::LOG_F128, "logf128"); |
| setLibcallName(RTLIB::LOG2_F128, "log2f128"); |
| setLibcallName(RTLIB::LOG10_F128, "log10f128"); |
| setLibcallName(RTLIB::EXP_F128, "expf128"); |
| setLibcallName(RTLIB::EXP2_F128, "exp2f128"); |
| setLibcallName(RTLIB::SIN_F128, "sinf128"); |
| setLibcallName(RTLIB::COS_F128, "cosf128"); |
| setLibcallName(RTLIB::POW_F128, "powf128"); |
| setLibcallName(RTLIB::FMIN_F128, "fminf128"); |
| setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); |
| setLibcallName(RTLIB::REM_F128, "fmodf128"); |
| setLibcallName(RTLIB::SQRT_F128, "sqrtf128"); |
| setLibcallName(RTLIB::CEIL_F128, "ceilf128"); |
| setLibcallName(RTLIB::FLOOR_F128, "floorf128"); |
| setLibcallName(RTLIB::TRUNC_F128, "truncf128"); |
| setLibcallName(RTLIB::ROUND_F128, "roundf128"); |
| setLibcallName(RTLIB::LROUND_F128, "lroundf128"); |
| setLibcallName(RTLIB::LLROUND_F128, "llroundf128"); |
| setLibcallName(RTLIB::RINT_F128, "rintf128"); |
| setLibcallName(RTLIB::LRINT_F128, "lrintf128"); |
| setLibcallName(RTLIB::LLRINT_F128, "llrintf128"); |
| setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128"); |
| setLibcallName(RTLIB::FMA_F128, "fmaf128"); |
| |
| // With 32 condition bits, we don't need to sink (and duplicate) compares |
| // aggressively in CodeGenPrep. |
| if (Subtarget.useCRBits()) { |
| setHasMultipleConditionRegisters(); |
| setJumpIsExpensive(); |
| } |
| |
| setMinFunctionAlignment(Align(4)); |
| |
| switch (Subtarget.getCPUDirective()) { |
| default: break; |
| case PPC::DIR_970: |
| case PPC::DIR_A2: |
| case PPC::DIR_E500: |
| case PPC::DIR_E500mc: |
| case PPC::DIR_E5500: |
| case PPC::DIR_PWR4: |
| case PPC::DIR_PWR5: |
| case PPC::DIR_PWR5X: |
| case PPC::DIR_PWR6: |
| case PPC::DIR_PWR6X: |
| case PPC::DIR_PWR7: |
| case PPC::DIR_PWR8: |
| case PPC::DIR_PWR9: |
| case PPC::DIR_PWR10: |
| case PPC::DIR_PWR_FUTURE: |
| setPrefLoopAlignment(Align(16)); |
| setPrefFunctionAlignment(Align(16)); |
| break; |
| } |
| |
| if (Subtarget.enableMachineScheduler()) |
| setSchedulingPreference(Sched::Source); |
| else |
| setSchedulingPreference(Sched::Hybrid); |
| |
| computeRegisterProperties(STI.getRegisterInfo()); |
| |
| // The Freescale cores do better with aggressive inlining of memcpy and |
| // friends. GCC uses same threshold of 128 bytes (= 32 word stores). |
| if (Subtarget.getCPUDirective() == PPC::DIR_E500mc || |
| Subtarget.getCPUDirective() == PPC::DIR_E5500) { |
| MaxStoresPerMemset = 32; |
| MaxStoresPerMemsetOptSize = 16; |
| MaxStoresPerMemcpy = 32; |
| MaxStoresPerMemcpyOptSize = 8; |
| MaxStoresPerMemmove = 32; |
| MaxStoresPerMemmoveOptSize = 8; |
| } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) { |
| // The A2 also benefits from (very) aggressive inlining of memcpy and |
| // friends. The overhead of a the function call, even when warm, can be |
| // over one hundred cycles. |
| MaxStoresPerMemset = 128; |
| MaxStoresPerMemcpy = 128; |
| MaxStoresPerMemmove = 128; |
| MaxLoadsPerMemcmp = 128; |
| } else { |
| MaxLoadsPerMemcmp = 8; |
| MaxLoadsPerMemcmpOptSize = 4; |
| } |
| |
| IsStrictFPEnabled = true; |
| |
| // Let the subtarget (CPU) decide if a predictable select is more expensive |
| // than the corresponding branch. This information is used in CGP to decide |
| // when to convert selects into branches. |
| PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive(); |
| } |
| |
| // *********************************** NOTE ************************************ |
| // For selecting load and store instructions, the addressing modes are defined |
| // as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD |
| // patterns to match the load the store instructions. |
| // |
| // The TD definitions for the addressing modes correspond to their respective |
| // Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely |
| // on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the |
| // address mode flags of a particular node. Afterwards, the computed address |
| // flags are passed into getAddrModeForFlags() in order to retrieve the optimal |
| // addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement |
| // accordingly, based on the preferred addressing mode. |
| // |
| // Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode. |
| // MemOpFlags contains all the possible flags that can be used to compute the |
| // optimal addressing mode for load and store instructions. |
| // AddrMode contains all the possible load and store addressing modes available |
| // on Power (such as DForm, DSForm, DQForm, XForm, etc.) |
| // |
| // When adding new load and store instructions, it is possible that new address |
| // flags may need to be added into MemOpFlags, and a new addressing mode will |
| // need to be added to AddrMode. An entry of the new addressing mode (consisting |
| // of the minimal and main distinguishing address flags for the new load/store |
| // instructions) will need to be added into initializeAddrModeMap() below. |
| // Finally, when adding new addressing modes, the getAddrModeForFlags() will |
| // need to be updated to account for selecting the optimal addressing mode. |
| // ***************************************************************************** |
| /// Initialize the map that relates the different addressing modes of the load |
| /// and store instructions to a set of flags. This ensures the load/store |
| /// instruction is correctly matched during instruction selection. |
| void PPCTargetLowering::initializeAddrModeMap() { |
| AddrModesMap[PPC::AM_DForm] = { |
| // LWZ, STW |
| PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt, |
| PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt, |
| PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt, |
| PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt, |
| // LBZ, LHZ, STB, STH |
| PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt, |
| PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt, |
| PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt, |
| PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt, |
| // LHA |
| PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt, |
| PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt, |
| PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt, |
| PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt, |
| // LFS, LFD, STFS, STFD |
| PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9, |
| PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9, |
| PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9, |
| PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9, |
| }; |
| AddrModesMap[PPC::AM_DSForm] = { |
| // LWA |
| PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt, |
| PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt, |
| PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt, |
| // LD, STD |
| PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt, |
| PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt, |
| PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt, |
| // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64 |
| PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9, |
| PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9, |
| PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9, |
| }; |
| AddrModesMap[PPC::AM_DQForm] = { |
| // LXV, STXV |
| PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9, |
| PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9, |
| PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9, |
| }; |
| AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 | |
| PPC::MOF_SubtargetP10}; |
| // TODO: Add mapping for quadword load/store. |
| } |
| |
| /// getMaxByValAlign - Helper for getByValTypeAlignment to determine |
| /// the desired ByVal argument alignment. |
| static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) { |
| if (MaxAlign == MaxMaxAlign) |
| return; |
| if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { |
| if (MaxMaxAlign >= 32 && |
| VTy->getPrimitiveSizeInBits().getFixedSize() >= 256) |
| MaxAlign = Align(32); |
| else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 && |
| MaxAlign < 16) |
| MaxAlign = Align(16); |
| } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { |
| Align EltAlign; |
| getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); |
| if (EltAlign > MaxAlign) |
| MaxAlign = EltAlign; |
| } else if (StructType *STy = dyn_cast<StructType>(Ty)) { |
| for (auto *EltTy : STy->elements()) { |
| Align EltAlign; |
| getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); |
| if (EltAlign > MaxAlign) |
| MaxAlign = EltAlign; |
| if (MaxAlign == MaxMaxAlign) |
| break; |
| } |
| } |
| } |
| |
| /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate |
| /// function arguments in the caller parameter area. |
| uint64_t PPCTargetLowering::getByValTypeAlignment(Type *Ty, |
| const DataLayout &DL) const { |
| // 16byte and wider vectors are passed on 16byte boundary. |
| // The rest is 8 on PPC64 and 4 on PPC32 boundary. |
| Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4); |
| if (Subtarget.hasAltivec()) |
| getMaxByValAlign(Ty, Alignment, Align(16)); |
| return Alignment.value(); |
| } |
| |
| bool PPCTargetLowering::useSoftFloat() const { |
| return Subtarget.useSoftFloat(); |
| } |
| |
| bool PPCTargetLowering::hasSPE() const { |
| return Subtarget.hasSPE(); |
| } |
| |
| bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { |
| return VT.isScalarInteger(); |
| } |
| |
| const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { |
| switch ((PPCISD::NodeType)Opcode) { |
| case PPCISD::FIRST_NUMBER: break; |
| case PPCISD::FSEL: return "PPCISD::FSEL"; |
| case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP"; |
| case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP"; |
| case PPCISD::FCFID: return "PPCISD::FCFID"; |
| case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; |
| case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; |
| case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; |
| case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; |
| case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; |
| case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; |
| case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; |
| case PPCISD::FP_TO_UINT_IN_VSR: |
| return "PPCISD::FP_TO_UINT_IN_VSR,"; |
| case PPCISD::FP_TO_SINT_IN_VSR: |
| return "PPCISD::FP_TO_SINT_IN_VSR"; |
| case PPCISD::FRE: return "PPCISD::FRE"; |
| case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; |
| case PPCISD::FTSQRT: |
| return "PPCISD::FTSQRT"; |
| case PPCISD::FSQRT: |
| return "PPCISD::FSQRT"; |
| case PPCISD::STFIWX: return "PPCISD::STFIWX"; |
| case PPCISD::VPERM: return "PPCISD::VPERM"; |
| case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; |
| case PPCISD::XXSPLTI_SP_TO_DP: |
| return "PPCISD::XXSPLTI_SP_TO_DP"; |
| case PPCISD::XXSPLTI32DX: |
| return "PPCISD::XXSPLTI32DX"; |
| case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; |
| case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; |
| case PPCISD::VECSHL: return "PPCISD::VECSHL"; |
| case PPCISD::CMPB: return "PPCISD::CMPB"; |
| case PPCISD::Hi: return "PPCISD::Hi"; |
| case PPCISD::Lo: return "PPCISD::Lo"; |
| case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; |
| case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; |
| case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; |
| case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; |
| case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; |
| case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA"; |
| case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; |
| case PPCISD::SRL: return "PPCISD::SRL"; |
| case PPCISD::SRA: return "PPCISD::SRA"; |
| case PPCISD::SHL: return "PPCISD::SHL"; |
| case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; |
| case PPCISD::CALL: return "PPCISD::CALL"; |
| case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; |
| case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC"; |
| case PPCISD::CALL_RM: |
| return "PPCISD::CALL_RM"; |
| case PPCISD::CALL_NOP_RM: |
| return "PPCISD::CALL_NOP_RM"; |
| case PPCISD::CALL_NOTOC_RM: |
| return "PPCISD::CALL_NOTOC_RM"; |
| case PPCISD::MTCTR: return "PPCISD::MTCTR"; |
| case PPCISD::BCTRL: return "PPCISD::BCTRL"; |
| case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; |
| case PPCISD::BCTRL_RM: |
| return "PPCISD::BCTRL_RM"; |
| case PPCISD::BCTRL_LOAD_TOC_RM: |
| return "PPCISD::BCTRL_LOAD_TOC_RM"; |
| case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; |
| case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; |
| case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; |
| case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; |
| case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; |
| case PPCISD::MFVSR: return "PPCISD::MFVSR"; |
| case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; |
| case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; |
| case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; |
| case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; |
| case PPCISD::SCALAR_TO_VECTOR_PERMUTED: |
| return "PPCISD::SCALAR_TO_VECTOR_PERMUTED"; |
| case PPCISD::ANDI_rec_1_EQ_BIT: |
| return "PPCISD::ANDI_rec_1_EQ_BIT"; |
| case PPCISD::ANDI_rec_1_GT_BIT: |
| return "PPCISD::ANDI_rec_1_GT_BIT"; |
| case PPCISD::VCMP: return "PPCISD::VCMP"; |
| case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec"; |
| case PPCISD::LBRX: return "PPCISD::LBRX"; |
| case PPCISD::STBRX: return "PPCISD::STBRX"; |
| case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; |
| case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; |
| case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; |
| case PPCISD::STXSIX: return "PPCISD::STXSIX"; |
| case PPCISD::VEXTS: return "PPCISD::VEXTS"; |
| case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; |
| case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; |
| case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE"; |
| case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE"; |
| case PPCISD::ST_VSR_SCAL_INT: |
| return "PPCISD::ST_VSR_SCAL_INT"; |
| case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; |
| case PPCISD::BDNZ: return "PPCISD::BDNZ"; |
| case PPCISD::BDZ: return "PPCISD::BDZ"; |
| case PPCISD::MFFS: return "PPCISD::MFFS"; |
| case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; |
| case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; |
| case PPCISD::CR6SET: return "PPCISD::CR6SET"; |
| case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; |
| case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; |
| case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; |
| case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; |
| case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; |
| case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; |
| case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; |
| case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; |
| case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; |
| case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; |
| case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX"; |
| case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; |
| case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; |
| case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; |
| case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; |
| case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; |
| case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; |
| case PPCISD::PADDI_DTPREL: |
| return "PPCISD::PADDI_DTPREL"; |
| case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; |
| case PPCISD::SC: return "PPCISD::SC"; |
| case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; |
| case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; |
| case PPCISD::RFEBB: return "PPCISD::RFEBB"; |
| case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; |
| case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; |
| case PPCISD::VABSD: return "PPCISD::VABSD"; |
| case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; |
| case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64"; |
| case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE"; |
| case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; |
| case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; |
| case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF"; |
| case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; |
| case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR: |
| return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; |
| case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR: |
| return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR"; |
| case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD"; |
| case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD"; |
| case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG"; |
| case PPCISD::XXMFACC: return "PPCISD::XXMFACC"; |
| case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; |
| case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT"; |
| case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT"; |
| case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; |
| case PPCISD::STRICT_FADDRTZ: |
| return "PPCISD::STRICT_FADDRTZ"; |
| case PPCISD::STRICT_FCTIDZ: |
| return "PPCISD::STRICT_FCTIDZ"; |
| case PPCISD::STRICT_FCTIWZ: |
| return "PPCISD::STRICT_FCTIWZ"; |
| case PPCISD::STRICT_FCTIDUZ: |
| return "PPCISD::STRICT_FCTIDUZ"; |
| case PPCISD::STRICT_FCTIWUZ: |
| return "PPCISD::STRICT_FCTIWUZ"; |
| case PPCISD::STRICT_FCFID: |
| return "PPCISD::STRICT_FCFID"; |
| case PPCISD::STRICT_FCFIDU: |
| return "PPCISD::STRICT_FCFIDU"; |
| case PPCISD::STRICT_FCFIDS: |
| return "PPCISD::STRICT_FCFIDS"; |
| case PPCISD::STRICT_FCFIDUS: |
| return "PPCISD::STRICT_FCFIDUS"; |
| case PPCISD::LXVRZX: return "PPCISD::LXVRZX"; |
| } |
| return nullptr; |
| } |
| |
| EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, |
| EVT VT) const { |
| if (!VT.isVector()) |
| return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; |
| |
| return VT.changeVectorElementTypeToInteger(); |
| } |
| |
| bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { |
| assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); |
| return true; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Node matching predicates, for use by the tblgen matching code. |
| //===----------------------------------------------------------------------===// |
| |
| /// isFloatingPointZero - Return true if this is 0.0 or -0.0. |
| static bool isFloatingPointZero(SDValue Op) { |
| if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) |
| return CFP->getValueAPF().isZero(); |
| else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { |
| // Maybe this has already been legalized into the constant pool? |
| if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) |
| if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) |
| return CFP->getValueAPF().isZero(); |
| } |
| return false; |
| } |
| |
| /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return |
| /// true if Op is undef or if it matches the specified value. |
| static bool isConstantOrUndef(int Op, int Val) { |
| return Op < 0 || Op == Val; |
| } |
| |
| /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a |
| /// VPKUHUM instruction. |
| /// The ShuffleKind distinguishes between big-endian operations with |
| /// two different inputs (0), either-endian operations with two identical |
| /// inputs (1), and little-endian operations with two different inputs (2). |
| /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). |
| bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
| SelectionDAG &DAG) { |
| bool IsLE = DAG.getDataLayout().isLittleEndian(); |
| if (ShuffleKind == 0) { |
| if (IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; ++i) |
| if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) |
| return false; |
| } else if (ShuffleKind == 2) { |
| if (!IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; ++i) |
| if (!isConstantOrUndef(N->getMaskElt(i), i*2)) |
| return false; |
| } else if (ShuffleKind == 1) { |
| unsigned j = IsLE ? 0 : 1; |
| for (unsigned i = 0; i != 8; ++i) |
| if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || |
| !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a |
| /// VPKUWUM instruction. |
| /// The ShuffleKind distinguishes between big-endian operations with |
| /// two different inputs (0), either-endian operations with two identical |
| /// inputs (1), and little-endian operations with two different inputs (2). |
| /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). |
| bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
| SelectionDAG &DAG) { |
| bool IsLE = DAG.getDataLayout().isLittleEndian(); |
| if (ShuffleKind == 0) { |
| if (IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; i += 2) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) |
| return false; |
| } else if (ShuffleKind == 2) { |
| if (!IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; i += 2) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) |
| return false; |
| } else if (ShuffleKind == 1) { |
| unsigned j = IsLE ? 0 : 2; |
| for (unsigned i = 0; i != 8; i += 2) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || |
| !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || |
| !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a |
| /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the |
| /// current subtarget. |
| /// |
| /// The ShuffleKind distinguishes between big-endian operations with |
| /// two different inputs (0), either-endian operations with two identical |
| /// inputs (1), and little-endian operations with two different inputs (2). |
| /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). |
| bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
| SelectionDAG &DAG) { |
| const PPCSubtarget& Subtarget = |
| static_cast<const PPCSubtarget&>(DAG.getSubtarget()); |
| if (!Subtarget.hasP8Vector()) |
| return false; |
| |
| bool IsLE = DAG.getDataLayout().isLittleEndian(); |
| if (ShuffleKind == 0) { |
| if (IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; i += 4) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || |
| !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || |
| !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) |
| return false; |
| } else if (ShuffleKind == 2) { |
| if (!IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; i += 4) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || |
| !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || |
| !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) |
| return false; |
| } else if (ShuffleKind == 1) { |
| unsigned j = IsLE ? 0 : 4; |
| for (unsigned i = 0; i != 8; i += 4) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || |
| !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || |
| !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || |
| !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || |
| !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || |
| !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || |
| !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// isVMerge - Common function, used to match vmrg* shuffles. |
| /// |
| static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, |
| unsigned LHSStart, unsigned RHSStart) { |
| if (N->getValueType(0) != MVT::v16i8) |
| return false; |
| assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && |
| "Unsupported merge size!"); |
| |
| for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units |
| for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit |
| if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), |
| LHSStart+j+i*UnitSize) || |
| !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), |
| RHSStart+j+i*UnitSize)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for |
| /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). |
| /// The ShuffleKind distinguishes between big-endian merges with two |
| /// different inputs (0), either-endian merges with two identical inputs (1), |
| /// and little-endian merges with two different inputs (2). For the latter, |
| /// the input operands are swapped (see PPCInstrAltivec.td). |
| bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, |
| unsigned ShuffleKind, SelectionDAG &DAG) { |
| if (DAG.getDataLayout().isLittleEndian()) { |
| if (ShuffleKind == 1) // unary |
| return isVMerge(N, UnitSize, 0, 0); |
| else if (ShuffleKind == 2) // swapped |
| return isVMerge(N, UnitSize, 0, 16); |
| else |
| return false; |
| } else { |
| if (ShuffleKind == 1) // unary |
| return isVMerge(N, UnitSize, 8, 8); |
| else if (ShuffleKind == 0) // normal |
| return isVMerge(N, UnitSize, 8, 24); |
| else |
| return false; |
| } |
| } |
| |
| /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for |
| /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). |
| /// The ShuffleKind distinguishes between big-endian merges with two |
| /// different inputs (0), either-endian merges with two identical inputs (1), |
| /// and little-endian merges with two different inputs (2). For the latter, |
| /// the input operands are swapped (see PPCInstrAltivec.td). |
| bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, |
| unsigned ShuffleKind, SelectionDAG &DAG) { |
| if (DAG.getDataLayout().isLittleEndian()) { |
| if (ShuffleKind == 1) // unary |
| return isVMerge(N, UnitSize, 8, 8); |
| else if (ShuffleKind == 2) // swapped |
| return isVMerge(N, UnitSize, 8, 24); |
| else |
| return false; |
| } else { |
| if (ShuffleKind == 1) // unary |
| return isVMerge(N, UnitSize, 0, 0); |
| else if (ShuffleKind == 0) // normal |
| return isVMerge(N, UnitSize, 0, 16); |
| else |
| return false; |
| } |
| } |
| |
| /** |
| * Common function used to match vmrgew and vmrgow shuffles |
| * |
| * The indexOffset determines whether to look for even or odd words in |
| * the shuffle mask. This is based on the of the endianness of the target |
| * machine. |
| * - Little Endian: |
| * - Use offset of 0 to check for odd elements |
| * - Use offset of 4 to check for even elements |
| * - Big Endian: |
| * - Use offset of 0 to check for even elements |
| * - Use offset of 4 to check for odd elements |
| * A detailed description of the vector element ordering for little endian and |
| * big endian can be found at |
| * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html |
| * Targeting your applications - what little endian and big endian IBM XL C/C++ |
| * compiler differences mean to you |
| * |
| * The mask to the shuffle vector instruction specifies the indices of the |
| * elements from the two input vectors to place in the result. The elements are |
| * numbered in array-access order, starting with the first vector. These vectors |
| * are always of type v16i8, thus each vector will contain 16 elements of size |
| * 8. More info on the shuffle vector can be found in the |
| * http://llvm.org/docs/LangRef.html#shufflevector-instruction |
| * Language Reference. |
| * |
| * The RHSStartValue indicates whether the same input vectors are used (unary) |
| * or two different input vectors are used, based on the following: |
| * - If the instruction uses the same vector for both inputs, the range of the |
| * indices will be 0 to 15. In this case, the RHSStart value passed should |
| * be 0. |
| * - If the instruction has two different vectors then the range of the |
| * indices will be 0 to 31. In this case, the RHSStart value passed should |
| * be 16 (indices 0-15 specify elements in the first vector while indices 16 |
| * to 31 specify elements in the second vector). |
| * |
| * \param[in] N The shuffle vector SD Node to analyze |
| * \param[in] IndexOffset Specifies whether to look for even or odd elements |
| * \param[in] RHSStartValue Specifies the starting index for the righthand input |
| * vector to the shuffle_vector instruction |
| * \return true iff this shuffle vector represents an even or odd word merge |
| */ |
| static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, |
| unsigned RHSStartValue) { |
| if (N->getValueType(0) != MVT::v16i8) |
| return false; |
| |
| for (unsigned i = 0; i < 2; ++i) |
| for (unsigned j = 0; j < 4; ++j) |
| if (!isConstantOrUndef(N->getMaskElt(i*4+j), |
| i*RHSStartValue+j+IndexOffset) || |
| !isConstantOrUndef(N->getMaskElt(i*4+j+8), |
| i*RHSStartValue+j+IndexOffset+8)) |
| return false; |
| return true; |
| } |
| |
| /** |
| * Determine if the specified shuffle mask is suitable for the vmrgew or |
| * vmrgow instructions. |
| * |
| * \param[in] N The shuffle vector SD Node to analyze |
| * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) |
| * \param[in] ShuffleKind Identify the type of merge: |
| * - 0 = big-endian merge with two different inputs; |
| * - 1 = either-endian merge with two identical inputs; |
| * - 2 = little-endian merge with two different inputs (inputs are swapped for |
| * little-endian merges). |
| * \param[in] DAG The current SelectionDAG |
| * \return true iff this shuffle mask |
| */ |
| bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, |
| unsigned ShuffleKind, SelectionDAG &DAG) { |
| if (DAG.getDataLayout().isLittleEndian()) { |
| unsigned indexOffset = CheckEven ? 4 : 0; |
| if (ShuffleKind == 1) // Unary |
| return isVMerge(N, indexOffset, 0); |
| else if (ShuffleKind == 2) // swapped |
| return isVMerge(N, indexOffset, 16); |
| else |
| return false; |
| } |
| else { |
| unsigned indexOffset = CheckEven ? 0 : 4; |
| if (ShuffleKind == 1) // Unary |
| return isVMerge(N, indexOffset, 0); |
| else if (ShuffleKind == 0) // Normal |
| return isVMerge(N, indexOffset, 16); |
| else |
| return false; |
| } |
| return false; |
| } |
| |
| /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift |
| /// amount, otherwise return -1. |
| /// The ShuffleKind distinguishes between big-endian operations with two |
| /// different inputs (0), either-endian operations with two identical inputs |
| /// (1), and little-endian operations with two different inputs (2). For the |
| /// latter, the input operands are swapped (see PPCInstrAltivec.td). |
| int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, |
| SelectionDAG &DAG) { |
| if (N->getValueType(0) != MVT::v16i8) |
| return -1; |
| |
| ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
| |
| // Find the first non-undef value in the shuffle mask. |
| unsigned i; |
| for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) |
| /*search*/; |
| |
| if (i == 16) return -1; // all undef. |
| |
| // Otherwise, check to see if the rest of the elements are consecutively |
| // numbered from this value. |
| unsigned ShiftAmt = SVOp->getMaskElt(i); |
| if (ShiftAmt < i) return -1; |
| |
| ShiftAmt -= i; |
| bool isLE = DAG.getDataLayout().isLittleEndian(); |
| |
| if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { |
| // Check the rest of the elements to see if they are consecutive. |
| for (++i; i != 16; ++i) |
| if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) |
| return -1; |
| } else if (ShuffleKind == 1) { |
| // Check the rest of the elements to see if they are consecutive. |
| for (++i; i != 16; ++i) |
| if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) |
| return -1; |
| } else |
| return -1; |
| |
| if (isLE) |
| ShiftAmt = 16 - ShiftAmt; |
| |
| return ShiftAmt; |
| } |
| |
| /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand |
| /// specifies a splat of a single element that is suitable for input to |
| /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.). |
| bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { |
| assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && |
| EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"); |
| |
| // The consecutive indices need to specify an element, not part of two |
| // different elements. So abandon ship early if this isn't the case. |
| if (N->getMaskElt(0) % EltSize != 0) |
| return false; |
| |
| // This is a splat operation if each element of the permute is the same, and |
| // if the value doesn't reference the second vector. |
| unsigned ElementBase = N->getMaskElt(0); |
| |
| // FIXME: Handle UNDEF elements too! |
| if (ElementBase >= 16) |
| return false; |
| |
| // Check that the indices are consecutive, in the case of a multi-byte element |
| // splatted with a v16i8 mask. |
| for (unsigned i = 1; i != EltSize; ++i) |
| if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) |
| return false; |
| |
| for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { |
| if (N->getMaskElt(i) < 0) continue; |
| for (unsigned j = 0; j != EltSize; ++j) |
| if (N->getMaskElt(i+j) != N->getMaskElt(j)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// Check that the mask is shuffling N byte elements. Within each N byte |
| /// element of the mask, the indices could be either in increasing or |
| /// decreasing order as long as they are consecutive. |
| /// \param[in] N the shuffle vector SD Node to analyze |
| /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/ |
| /// Word/DoubleWord/QuadWord). |
| /// \param[in] StepLen the delta indices number among the N byte element, if |
| /// the mask is in increasing/decreasing order then it is 1/-1. |
| /// \return true iff the mask is shuffling N byte elements. |
| static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, |
| int StepLen) { |
| assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && |
| "Unexpected element width."); |
| assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); |
| |
| unsigned NumOfElem = 16 / Width; |
| unsigned MaskVal[16]; // Width is never greater than 16 |
| for (unsigned i = 0; i < NumOfElem; ++i) { |
| MaskVal[0] = N->getMaskElt(i * Width); |
| if ((StepLen == 1) && (MaskVal[0] % Width)) { |
| return false; |
| } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { |
| return false; |
| } |
| |
| for (unsigned int j = 1; j < Width; ++j) { |
| MaskVal[j] = N->getMaskElt(i * Width + j); |
| if (MaskVal[j] != MaskVal[j-1] + StepLen) { |
| return false; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
| unsigned &InsertAtByte, bool &Swap, bool IsLE) { |
| if (!isNByteElemShuffleMask(N, 4, 1)) |
| return false; |
| |
| // Now we look at mask elements 0,4,8,12 |
| unsigned M0 = N->getMaskElt(0) / 4; |
| unsigned M1 = N->getMaskElt(4) / 4; |
| unsigned M2 = N->getMaskElt(8) / 4; |
| unsigned M3 = N->getMaskElt(12) / 4; |
| unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; |
| unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; |
| |
| // Below, let H and L be arbitrary elements of the shuffle mask |
| // where H is in the range [4,7] and L is in the range [0,3]. |
| // H, 1, 2, 3 or L, 5, 6, 7 |
| if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || |
| (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { |
| ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; |
| InsertAtByte = IsLE ? 12 : 0; |
| Swap = M0 < 4; |
| return true; |
| } |
| // 0, H, 2, 3 or 4, L, 6, 7 |
| if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || |
| (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { |
| ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; |
| InsertAtByte = IsLE ? 8 : 4; |
| Swap = M1 < 4; |
| return true; |
| } |
| // 0, 1, H, 3 or 4, 5, L, 7 |
| if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || |
| (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { |
| ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; |
| InsertAtByte = IsLE ? 4 : 8; |
| Swap = M2 < 4; |
| return true; |
| } |
| // 0, 1, 2, H or 4, 5, 6, L |
| if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || |
| (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { |
| ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; |
| InsertAtByte = IsLE ? 0 : 12; |
| Swap = M3 < 4; |
| return true; |
| } |
| |
| // If both vector operands for the shuffle are the same vector, the mask will |
| // contain only elements from the first one and the second one will be undef. |
| if (N->getOperand(1).isUndef()) { |
| ShiftElts = 0; |
| Swap = true; |
| unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; |
| if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { |
| InsertAtByte = IsLE ? 12 : 0; |
| return true; |
| } |
| if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { |
| InsertAtByte = IsLE ? 8 : 4; |
| return true; |
| } |
| if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { |
| InsertAtByte = IsLE ? 4 : 8; |
| return true; |
| } |
| if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { |
| InsertAtByte = IsLE ? 0 : 12; |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
| bool &Swap, bool IsLE) { |
| assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); |
| // Ensure each byte index of the word is consecutive. |
| if (!isNByteElemShuffleMask(N, 4, 1)) |
| return false; |
| |
| // Now we look at mask elements 0,4,8,12, which are the beginning of words. |
| unsigned M0 = N->getMaskElt(0) / 4; |
| unsigned M1 = N->getMaskElt(4) / 4; |
| unsigned M2 = N->getMaskElt(8) / 4; |
| unsigned M3 = N->getMaskElt(12) / 4; |
| |
| // If both vector operands for the shuffle are the same vector, the mask will |
| // contain only elements from the first one and the second one will be undef. |
| if (N->getOperand(1).isUndef()) { |
| assert(M0 < 4 && "Indexing into an undef vector?"); |
| if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) |
| return false; |
| |
| ShiftElts = IsLE ? (4 - M0) % 4 : M0; |
| Swap = false; |
| return true; |
| } |
| |
| // Ensure each word index of the ShuffleVector Mask is consecutive. |
| if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) |
| return false; |
| |
| if (IsLE) { |
| if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { |
| // Input vectors don't need to be swapped if the leading element |
| // of the result is one of the 3 left elements of the second vector |
| // (or if there is no shift to be done at all). |
| Swap = false; |
| ShiftElts = (8 - M0) % 8; |
| } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { |
| // Input vectors need to be swapped if the leading element |
| // of the result is one of the 3 left elements of the first vector |
| // (or if we're shifting by 4 - thereby simply swapping the vectors). |
| Swap = true; |
| ShiftElts = (4 - M0) % 4; |
| } |
| |
| return true; |
| } else { // BE |
| if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { |
| // Input vectors don't need to be swapped if the leading element |
| // of the result is one of the 4 elements of the first vector. |
| Swap = false; |
| ShiftElts = M0; |
| } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { |
| // Input vectors need to be swapped if the leading element |
| // of the result is one of the 4 elements of the right vector. |
| Swap = true; |
| ShiftElts = M0 - 4; |
| } |
| |
| return true; |
| } |
| } |
| |
| bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { |
| assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); |
| |
| if (!isNByteElemShuffleMask(N, Width, -1)) |
| return false; |
| |
| for (int i = 0; i < 16; i += Width) |
| if (N->getMaskElt(i) != i + Width - 1) |
| return false; |
| |
| return true; |
| } |
| |
| bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { |
| return isXXBRShuffleMaskHelper(N, 2); |
| } |
| |
| bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { |
| return isXXBRShuffleMaskHelper(N, 4); |
| } |
| |
| bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { |
| return isXXBRShuffleMaskHelper(N, 8); |
| } |
| |
| bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { |
| return isXXBRShuffleMaskHelper(N, 16); |
| } |
| |
| /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap |
| /// if the inputs to the instruction should be swapped and set \p DM to the |
| /// value for the immediate. |
| /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI |
| /// AND element 0 of the result comes from the first input (LE) or second input |
| /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered. |
| /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle |
| /// mask. |
| bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, |
| bool &Swap, bool IsLE) { |
| assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); |
| |
| // Ensure each byte index of the double word is consecutive. |
| if (!isNByteElemShuffleMask(N, 8, 1)) |
| return false; |
| |
| unsigned M0 = N->getMaskElt(0) / 8; |
| unsigned M1 = N->getMaskElt(8) / 8; |
| assert(((M0 | M1) < 4) && "A mask element out of bounds?"); |
| |
| // If both vector operands for the shuffle are the same vector, the mask will |
| // contain only elements from the first one and the second one will be undef. |
| if (N->getOperand(1).isUndef()) { |
| if ((M0 | M1) < 2) { |
| DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1); |
| Swap = false; |
| return true; |
| } else |
| return false; |
| } |
| |
| if (IsLE) { |
| if (M0 > 1 && M1 < 2) { |
| Swap = false; |
| } else if (M0 < 2 && M1 > 1) { |
| M0 = (M0 + 2) % 4; |
| M1 = |