blob: ec7e30d7e362563125462877b568d9d13bae96b6 [file] [log] [blame]
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the PPCISelLowering class.
//
//===----------------------------------------------------------------------===//
#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCCCState.h"
#include "PPCCallingConv.h"
#include "PPCFrameLowering.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCRegisterInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <list>
#include <utility>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "ppc-lowering"
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableSCO("disable-ppc-sco",
cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
cl::desc("use absolute jump tables on ppc"), cl::Hidden);
static cl::opt<bool> EnableQuadwordAtomics(
"ppc-quadword-atomics",
cl::desc("enable quadword lock-free atomic operations"), cl::init(false),
cl::Hidden);
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
const PPCSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
// Initialize map that relates the PPC addressing modes to the computed flags
// of a load/store instruction. The map is used to determine the optimal
// addressing mode when selecting load and stores.
initializeAddrModeMap();
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
bool isPPC64 = Subtarget.isPPC64();
setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
// Set up the register classes.
addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
if (!useSoftFloat()) {
if (hasSPE()) {
addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
// EFPU2 APU only supports f32
if (!Subtarget.hasEFPU2())
addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
} else {
addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
}
}
// Match BITREVERSE to customized fast code sequence in the td file.
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
// Custom lower inline assembly to check for special registers.
setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
}
if (Subtarget.isISA3_0()) {
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
setTruncStoreAction(MVT::f64, MVT::f16, Legal);
setTruncStoreAction(MVT::f32, MVT::f16, Legal);
} else {
// No extending loads from f16 or HW conversions back and forth.
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PowerPC has pre-inc load and store's.
setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
if (!Subtarget.hasSPE()) {
setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
}
// PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {
setOperationAction(ISD::ADDC, VT, Legal);
setOperationAction(ISD::ADDE, VT, Legal);
setOperationAction(ISD::SUBC, VT, Legal);
setOperationAction(ISD::SUBE, VT, Legal);
}
if (Subtarget.useCRBits()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (isPPC64 || Subtarget.hasFPCVT()) {
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
} else {
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
}
// PowerPC does not support direct load/store of condition registers.
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
// FIXME: Remove this once the ANDI glue bug is fixed:
if (ANDIGlueBug)
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
setTruncStoreAction(VT, MVT::i1, Expand);
}
addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
}
// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
// PPC (the libcall is not available).
setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
// We do not currently implement these libm ops for PowerPC.
setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
// PowerPC has no SREM/UREM instructions unless we are on P9
// On P9 we may use a hardware instruction to compute the remainder.
// When the result of both the remainder and the division is required it is
// more efficient to compute the remainder from the result of the division
// rather than use the remainder instruction. The instructions are legalized
// directly because the DivRemPairsPass performs the transformation at the IR
// level.
if (Subtarget.isISA3_0()) {
setOperationAction(ISD::SREM, MVT::i32, Legal);
setOperationAction(ISD::UREM, MVT::i32, Legal);
setOperationAction(ISD::SREM, MVT::i64, Legal);
setOperationAction(ISD::UREM, MVT::i64, Legal);
} else {
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
}
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
// Handle constrained floating-point operations of scalar.
// TODO: Handle SPE specific operation.
setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
if (!Subtarget.hasSPE()) {
setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
}
if (Subtarget.hasVSX()) {
setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
}
if (Subtarget.hasFSQRT()) {
setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
}
if (Subtarget.hasFPRND()) {
setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
}
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
if (Subtarget.hasSPE()) {
setOperationAction(ISD::FMA , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
} else {
setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FMA , MVT::f32, Legal);
}
if (Subtarget.hasSPE())
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
if (Subtarget.hasFCPSGN()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
} else {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
}
if (Subtarget.hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
// PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
// to speed up scalar BSWAP64.
// CTPOP or CTTZ were introduced in P8/P9 respectively
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
if (Subtarget.hasP9Vector() && Subtarget.isPPC64())
setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
else
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
if (Subtarget.isISA3_0()) {
setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
} else {
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
}
if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
} else {
setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
}
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
if (!Subtarget.useCRBits()) {
// PowerPC does not have Select
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Expand);
}
// PowerPC wants to turn select_cc of FP into fsel when possible.
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// PowerPC wants to optimize integer setcc a bit
if (!Subtarget.useCRBits())
setOperationAction(ISD::SETCC, MVT::i32, Custom);
if (Subtarget.hasFPU()) {
setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
}
// PowerPC does not have BRCOND which requires SetCC
if (!Subtarget.useCRBits())
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
if (Subtarget.hasSPE()) {
// SPE has built-in conversions
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
// SPE supports signaling compare of f32/f64.
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
} else {
// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
// PowerPC does not have [U|S]INT_TO_FP
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
}
if (Subtarget.hasDirectMove() && isPPC64) {
setOperationAction(ISD::BITCAST, MVT::f32, Legal);
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
if (TM.Options.UnsafeFPMath) {
setOperationAction(ISD::LRINT, MVT::f64, Legal);
setOperationAction(ISD::LRINT, MVT::f32, Legal);
setOperationAction(ISD::LLRINT, MVT::f64, Legal);
setOperationAction(ISD::LLRINT, MVT::f32, Legal);
setOperationAction(ISD::LROUND, MVT::f64, Legal);
setOperationAction(ISD::LROUND, MVT::f32, Legal);
setOperationAction(ISD::LLROUND, MVT::f64, Legal);
setOperationAction(ISD::LLROUND, MVT::f32, Legal);
}
} else {
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
setOperationAction(ISD::BITCAST, MVT::i64, Expand);
setOperationAction(ISD::BITCAST, MVT::f64, Expand);
}
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
// other SjLj exception interfaces are implemented and please don't build
// your own exception handling based on them.
// LLVM/Clang supports zero-cost DWARF exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
// TRAP is legal.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// TRAMPOLINE is custom lowered.
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
if (Subtarget.is64BitELFABI()) {
// VAARG always uses double-word chunks, so promote anything smaller.
setOperationAction(ISD::VAARG, MVT::i1, Promote);
AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
setOperationAction(ISD::VAARG, MVT::i8, Promote);
AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
setOperationAction(ISD::VAARG, MVT::i16, Promote);
AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
setOperationAction(ISD::VAARG, MVT::i32, Promote);
AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
} else if (Subtarget.is32BitELFABI()) {
// VAARG is custom lowered with the 32-bit SVR4 ABI.
setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::i64, Custom);
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
// VACOPY is custom lowered with the 32-bit SVR4 ABI.
if (Subtarget.is32BitELFABI())
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
else
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
// Use the default implementation.
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
// To handle counter-based loop conditions.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
// Comparisons that require checking two conditions.
if (Subtarget.hasSPE()) {
setCondCodeAction(ISD::SETO, MVT::f32, Expand);
setCondCodeAction(ISD::SETO, MVT::f64, Expand);
setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
}
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
if (Subtarget.has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
// This is just the low 32 bits of a (signed) fp->i64 conversion.
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
}
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
if (Subtarget.hasSPE()) {
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
} else {
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
}
// With the instructions enabled under FPCVT, we can do everything.
if (Subtarget.hasFPCVT()) {
if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
}
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
}
if (Subtarget.use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// 64-bit PowerPC wants to expand i128 shifts itself.
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
} else {
// 32-bit PowerPC wants to expand i64 shifts itself.
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
// PowerPC has better expansions for funnel shifts than the generic
// TargetLowering::expandFunnelShift.
if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::FSHL, MVT::i64, Custom);
setOperationAction(ISD::FSHR, MVT::i64, Custom);
}
setOperationAction(ISD::FSHL, MVT::i32, Custom);
setOperationAction(ISD::FSHR, MVT::i32, Custom);
if (Subtarget.hasVSX()) {
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
}
if (Subtarget.hasAltivec()) {
for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
}
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
// For v2i64, these are only valid with P8Vector. This is corrected after
// the loop.
if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
}
else {
setOperationAction(ISD::SMAX, VT, Expand);
setOperationAction(ISD::SMIN, VT, Expand);
setOperationAction(ISD::UMAX, VT, Expand);
setOperationAction(ISD::UMIN, VT, Expand);
}
if (Subtarget.hasVSX()) {
setOperationAction(ISD::FMAXNUM, VT, Legal);
setOperationAction(ISD::FMINNUM, VT, Legal);
}
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
setOperationAction(ISD::CTPOP, VT, Legal);
setOperationAction(ISD::CTLZ, VT, Legal);
}
else {
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
}
// Vector instructions introduced in P9
if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
setOperationAction(ISD::CTTZ, VT, Legal);
else
setOperationAction(ISD::CTTZ, VT, Expand);
// We promote all shuffles to v16i8.
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
// We promote all non-typed operations to v4i32.
setOperationAction(ISD::AND , VT, Promote);
AddPromotedToType (ISD::AND , VT, MVT::v4i32);
setOperationAction(ISD::OR , VT, Promote);
AddPromotedToType (ISD::OR , VT, MVT::v4i32);
setOperationAction(ISD::XOR , VT, Promote);
AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
setOperationAction(ISD::LOAD , VT, Promote);
AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::SELECT_CC, VT, Promote);
AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
setOperationAction(ISD::STORE, VT, Promote);
AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
// No other operations are legal.
setOperationAction(ISD::MUL , VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
if (!Subtarget.hasP8Vector()) {
setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
// Vector truncates to sub-word integer that fit in an Altivec/VSX register
// are cheap, so handle them before they get expanded to scalar.
setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
setOperationAction(ISD::AND , MVT::v4i32, Legal);
setOperationAction(ISD::OR , MVT::v4i32, Legal);
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
setOperationAction(ISD::SELECT, MVT::v4i32,
Subtarget.useCRBits() ? Legal : Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
// Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
// With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
if (Subtarget.hasAltivec())
for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
setOperationAction(ISD::ROTL, VT, Legal);
// With hasP8Altivec set, we can lower ISD::ROTL to vrld.
if (Subtarget.hasP8Altivec())
setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
if (Subtarget.hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
}
if (Subtarget.hasP8Altivec())
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
else
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
if (Subtarget.isISA3_1()) {
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
setOperationAction(ISD::UREM, MVT::v2i64, Legal);
setOperationAction(ISD::SREM, MVT::v2i64, Legal);
setOperationAction(ISD::UREM, MVT::v4i32, Legal);
setOperationAction(ISD::SREM, MVT::v4i32, Legal);
setOperationAction(ISD::UREM, MVT::v1i128, Legal);
setOperationAction(ISD::SREM, MVT::v1i128, Legal);
setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
}
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
// Altivec does not contain unordered floating-point compare instructions
setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
if (Subtarget.hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
if (Subtarget.hasP8Vector()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
}
if (Subtarget.hasDirectMove() && isPPC64) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
// The nearbyint variants are not allowed to raise the inexact exception
// so we can only code-gen them with unsafe math.
if (TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
}
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
// Share the Altivec comparison restrictions.
setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::STORE, MVT::v2f64, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
if (Subtarget.hasP8Vector())
addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
if (Subtarget.hasP8Altivec()) {
setOperationAction(ISD::SHL, MVT::v2i64, Legal);
setOperationAction(ISD::SRA, MVT::v2i64, Legal);
setOperationAction(ISD::SRL, MVT::v2i64, Legal);
// 128 bit shifts can be accomplished via 3 instructions for SHL and
// SRL, but not for SRA because of the instructions available:
// VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
// doing
setOperationAction(ISD::SHL, MVT::v1i128, Expand);
setOperationAction(ISD::SRL, MVT::v1i128, Expand);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
}
else {
setOperationAction(ISD::SHL, MVT::v2i64, Expand);
setOperationAction(ISD::SRA, MVT::v2i64, Expand);
setOperationAction(ISD::SRL, MVT::v2i64, Expand);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
// VSX v2i64 only supports non-arithmetic operations.
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
setOperationAction(ISD::SUB, MVT::v2i64, Expand);
}
if (Subtarget.isISA3_1())
setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
else
setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
setOperationAction(ISD::STORE, MVT::v2i64, Promote);
AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
// Custom handling for partial vectors of integers converted to
// floating point. We already have optimal handling for v2i32 through
// the DAG combine, so those aren't necessary.
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
setOperationAction(ISD::FABS, MVT::v4f32, Legal);
setOperationAction(ISD::FABS, MVT::v2f64, Legal);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
// Handle constrained floating-point operations of vector.
// The predictor is `hasVSX` because altivec instruction has
// no exception but VSX vector instruction has.
setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
for (MVT FPT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
// Expand the SELECT to SELECT_CC
setOperationAction(ISD::SELECT, MVT::f128, Expand);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
// No implementation for these ops for PowerPC.
setOperationAction(ISD::FSIN, MVT::f128, Expand);
setOperationAction(ISD::FCOS, MVT::f128, Expand);
setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FPOWI, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
}
if (Subtarget.hasP8Altivec()) {
addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
}
if (Subtarget.hasP9Vector()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
// 128 bit shifts can be accomplished via 3 instructions for SHL and
// SRL, but not for SRA because of the instructions available:
// VS{RL} and VS{RL}O.
setOperationAction(ISD::SHL, MVT::v1i128, Legal);
setOperationAction(ISD::SRL, MVT::v1i128, Legal);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
setOperationAction(ISD::FADD, MVT::f128, Legal);
setOperationAction(ISD::FSUB, MVT::f128, Legal);
setOperationAction(ISD::FDIV, MVT::f128, Legal);
setOperationAction(ISD::FMUL, MVT::f128, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
setOperationAction(ISD::FMA, MVT::f128, Legal);
setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
setOperationAction(ISD::FRINT, MVT::f128, Legal);
setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
setOperationAction(ISD::FCEIL, MVT::f128, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
setOperationAction(ISD::FROUND, MVT::f128, Legal);
setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::BITCAST, MVT::i128, Custom);
// Handle constrained floating-point operations of fp128
setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
} else if (Subtarget.hasVSX()) {
setOperationAction(ISD::LOAD, MVT::f128, Promote);
setOperationAction(ISD::STORE, MVT::f128, Promote);
AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
// Set FADD/FSUB as libcall to avoid the legalizer to expand the
// fp_to_uint and int_to_fp.
setOperationAction(ISD::FADD, MVT::f128, LibCall);
setOperationAction(ISD::FSUB, MVT::f128, LibCall);
setOperationAction(ISD::FMUL, MVT::f128, Expand);
setOperationAction(ISD::FDIV, MVT::f128, Expand);
setOperationAction(ISD::FNEG, MVT::f128, Expand);
setOperationAction(ISD::FABS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
setOperationAction(ISD::FMA, MVT::f128, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
// Expand the fp_extend if the target type is fp128.
setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
// Expand the fp_round if the source type is fp128.
for (MVT VT : {MVT::f32, MVT::f64}) {
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
}
setOperationAction(ISD::SETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
setOperationAction(ISD::BR_CC, MVT::f128, Expand);
// Lower following f128 select_cc pattern:
// select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
// We need to handle f128 SELECT_CC with integer result type.
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
}
if (Subtarget.hasP9Altivec()) {
if (Subtarget.isISA3_1()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
} else {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}
}
if (Subtarget.pairedVectorMemops()) {
addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
setOperationAction(ISD::STORE, MVT::v256i1, Custom);
}
if (Subtarget.hasMMA()) {
addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
setOperationAction(ISD::STORE, MVT::v512i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
}
if (Subtarget.has64BitSupport())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
if (Subtarget.isISA3_1())
setOperationAction(ISD::SRA, MVT::v1i128, Legal);
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
if (!isPPC64) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
}
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) {
setMaxAtomicSizeInBitsSupported(128);
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
}
setBooleanContents(ZeroOrOneBooleanContent);
if (Subtarget.hasAltivec()) {
// Altivec instructions set fields to all zeros or all ones.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
}
if (!isPPC64) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!isPPC64)
setMaxAtomicSizeInBitsSupported(32);
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
if (Subtarget.useCRBits())
setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::SELECT_CC);
}
if (Subtarget.hasP9Altivec()) {
setTargetDAGCombine(ISD::ABS);
setTargetDAGCombine(ISD::VSELECT);
}
setLibcallName(RTLIB::LOG_F128, "logf128");
setLibcallName(RTLIB::LOG2_F128, "log2f128");
setLibcallName(RTLIB::LOG10_F128, "log10f128");
setLibcallName(RTLIB::EXP_F128, "expf128");
setLibcallName(RTLIB::EXP2_F128, "exp2f128");
setLibcallName(RTLIB::SIN_F128, "sinf128");
setLibcallName(RTLIB::COS_F128, "cosf128");
setLibcallName(RTLIB::POW_F128, "powf128");
setLibcallName(RTLIB::FMIN_F128, "fminf128");
setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
setLibcallName(RTLIB::REM_F128, "fmodf128");
setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
setLibcallName(RTLIB::CEIL_F128, "ceilf128");
setLibcallName(RTLIB::FLOOR_F128, "floorf128");
setLibcallName(RTLIB::TRUNC_F128, "truncf128");
setLibcallName(RTLIB::ROUND_F128, "roundf128");
setLibcallName(RTLIB::LROUND_F128, "lroundf128");
setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
setLibcallName(RTLIB::RINT_F128, "rintf128");
setLibcallName(RTLIB::LRINT_F128, "lrintf128");
setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
setLibcallName(RTLIB::FMA_F128, "fmaf128");
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
if (Subtarget.useCRBits()) {
setHasMultipleConditionRegisters();
setJumpIsExpensive();
}
setMinFunctionAlignment(Align(4));
switch (Subtarget.getCPUDirective()) {
default: break;
case PPC::DIR_970:
case PPC::DIR_A2:
case PPC::DIR_E500:
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
case PPC::DIR_PWR4:
case PPC::DIR_PWR5:
case PPC::DIR_PWR5X:
case PPC::DIR_PWR6:
case PPC::DIR_PWR6X:
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
case PPC::DIR_PWR10:
case PPC::DIR_PWR_FUTURE:
setPrefLoopAlignment(Align(16));
setPrefFunctionAlignment(Align(16));
break;
}
if (Subtarget.enableMachineScheduler())
setSchedulingPreference(Sched::Source);
else
setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties(STI.getRegisterInfo());
// The Freescale cores do better with aggressive inlining of memcpy and
// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
Subtarget.getCPUDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemsetOptSize = 16;
MaxStoresPerMemcpy = 32;
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
} else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
// The A2 also benefits from (very) aggressive inlining of memcpy and
// friends. The overhead of a the function call, even when warm, can be
// over one hundred cycles.
MaxStoresPerMemset = 128;
MaxStoresPerMemcpy = 128;
MaxStoresPerMemmove = 128;
MaxLoadsPerMemcmp = 128;
} else {
MaxLoadsPerMemcmp = 8;
MaxLoadsPerMemcmpOptSize = 4;
}
IsStrictFPEnabled = true;
// Let the subtarget (CPU) decide if a predictable select is more expensive
// than the corresponding branch. This information is used in CGP to decide
// when to convert selects into branches.
PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
}
// *********************************** NOTE ************************************
// For selecting load and store instructions, the addressing modes are defined
// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
// patterns to match the load the store instructions.
//
// The TD definitions for the addressing modes correspond to their respective
// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
// address mode flags of a particular node. Afterwards, the computed address
// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
// accordingly, based on the preferred addressing mode.
//
// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
// MemOpFlags contains all the possible flags that can be used to compute the
// optimal addressing mode for load and store instructions.
// AddrMode contains all the possible load and store addressing modes available
// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
//
// When adding new load and store instructions, it is possible that new address
// flags may need to be added into MemOpFlags, and a new addressing mode will
// need to be added to AddrMode. An entry of the new addressing mode (consisting
// of the minimal and main distinguishing address flags for the new load/store
// instructions) will need to be added into initializeAddrModeMap() below.
// Finally, when adding new addressing modes, the getAddrModeForFlags() will
// need to be updated to account for selecting the optimal addressing mode.
// *****************************************************************************
/// Initialize the map that relates the different addressing modes of the load
/// and store instructions to a set of flags. This ensures the load/store
/// instruction is correctly matched during instruction selection.
void PPCTargetLowering::initializeAddrModeMap() {
AddrModesMap[PPC::AM_DForm] = {
// LWZ, STW
PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt,
PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt,
PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
// LBZ, LHZ, STB, STH
PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
// LHA
PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
// LFS, LFD, STFS, STFD
PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
};
AddrModesMap[PPC::AM_DSForm] = {
// LWA
PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt,
PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
// LD, STD
PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt,
PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt,
PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt,
// DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
};
AddrModesMap[PPC::AM_DQForm] = {
// LXV, STXV
PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
};
AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
PPC::MOF_SubtargetP10};
// TODO: Add mapping for quadword load/store.
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
if (MaxAlign == MaxMaxAlign)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (MaxMaxAlign >= 32 &&
VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
MaxAlign = Align(32);
else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
MaxAlign < 16)
MaxAlign = Align(16);
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Align EltAlign;
getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (auto *EltTy : STy->elements()) {
Align EltAlign;
getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == MaxMaxAlign)
break;
}
}
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
uint64_t PPCTargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
if (Subtarget.hasAltivec())
getMaxByValAlign(Ty, Alignment, Align(16));
return Alignment.value();
}
bool PPCTargetLowering::useSoftFloat() const {
return Subtarget.useSoftFloat();
}
bool PPCTargetLowering::hasSPE() const {
return Subtarget.hasSPE();
}
bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
case PPCISD::FSEL: return "PPCISD::FSEL";
case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
case PPCISD::FP_TO_UINT_IN_VSR:
return "PPCISD::FP_TO_UINT_IN_VSR,";
case PPCISD::FP_TO_SINT_IN_VSR:
return "PPCISD::FP_TO_SINT_IN_VSR";
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::FTSQRT:
return "PPCISD::FTSQRT";
case PPCISD::FSQRT:
return "PPCISD::FSQRT";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::XXSPLTI_SP_TO_DP:
return "PPCISD::XXSPLTI_SP_TO_DP";
case PPCISD::XXSPLTI32DX:
return "PPCISD::XXSPLTI32DX";
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
case PPCISD::CALL_RM:
return "PPCISD::CALL_RM";
case PPCISD::CALL_NOP_RM:
return "PPCISD::CALL_NOP_RM";
case PPCISD::CALL_NOTOC_RM:
return "PPCISD::CALL_NOTOC_RM";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
case PPCISD::BCTRL_RM:
return "PPCISD::BCTRL_RM";
case PPCISD::BCTRL_LOAD_TOC_RM:
return "PPCISD::BCTRL_LOAD_TOC_RM";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
case PPCISD::MFVSR: return "PPCISD::MFVSR";
case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
case PPCISD::ANDI_rec_1_EQ_BIT:
return "PPCISD::ANDI_rec_1_EQ_BIT";
case PPCISD::ANDI_rec_1_GT_BIT:
return "PPCISD::ANDI_rec_1_GT_BIT";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
case PPCISD::STXSIX: return "PPCISD::STXSIX";
case PPCISD::VEXTS: return "PPCISD::VEXTS";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
case PPCISD::ST_VSR_SCAL_INT:
return "PPCISD::ST_VSR_SCAL_INT";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::BDNZ: return "PPCISD::BDNZ";
case PPCISD::BDZ: return "PPCISD::BDZ";
case PPCISD::MFFS: return "PPCISD::MFFS";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
case PPCISD::CR6SET: return "PPCISD::CR6SET";
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::PADDI_DTPREL:
return "PPCISD::PADDI_DTPREL";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
case PPCISD::SC: return "PPCISD::SC";
case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
case PPCISD::RFEBB: return "PPCISD::RFEBB";
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
case PPCISD::VABSD: return "PPCISD::VABSD";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
case PPCISD::STRICT_FADDRTZ:
return "PPCISD::STRICT_FADDRTZ";
case PPCISD::STRICT_FCTIDZ:
return "PPCISD::STRICT_FCTIDZ";
case PPCISD::STRICT_FCTIWZ:
return "PPCISD::STRICT_FCTIWZ";
case PPCISD::STRICT_FCTIDUZ:
return "PPCISD::STRICT_FCTIDUZ";
case PPCISD::STRICT_FCTIWUZ:
return "PPCISD::STRICT_FCTIWUZ";
case PPCISD::STRICT_FCFID:
return "PPCISD::STRICT_FCFID";
case PPCISD::STRICT_FCFIDU:
return "PPCISD::STRICT_FCFIDU";
case PPCISD::STRICT_FCFIDS:
return "PPCISD::STRICT_FCFIDS";
case PPCISD::STRICT_FCFIDUS:
return "PPCISD::STRICT_FCFIDUS";
case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
}
return nullptr;
}
EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
EVT VT) const {
if (!VT.isVector())
return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
return true;
}
//===----------------------------------------------------------------------===//
// Node matching predicates, for use by the tblgen matching code.
//===----------------------------------------------------------------------===//
/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
static bool isFloatingPointZero(SDValue Op) {
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
return CFP->getValueAPF().isZero();
else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
// Maybe this has already been legalized into the constant pool?
if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
return CFP->getValueAPF().isZero();
}
return false;
}
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
/// true if Op is undef or if it matches the specified value.
static bool isConstantOrUndef(int Op, int Val) {
return Op < 0 || Op == Val;
}
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
/// The ShuffleKind distinguishes between big-endian operations with
/// two different inputs (0), either-endian operations with two identical
/// inputs (1), and little-endian operations with two different inputs (2).
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
for (unsigned i = 0; i != 16; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
return false;
} else if (ShuffleKind == 2) {
if (!IsLE)
return false;
for (unsigned i = 0; i != 16; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2))
return false;
} else if (ShuffleKind == 1) {
unsigned j = IsLE ? 0 : 1;
for (unsigned i = 0; i != 8; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
return false;
}
return true;
}
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
/// The ShuffleKind distinguishes between big-endian operations with
/// two different inputs (0), either-endian operations with two identical
/// inputs (1), and little-endian operations with two different inputs (2).
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
for (unsigned i = 0; i != 16; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
return false;
} else if (ShuffleKind == 2) {
if (!IsLE)
return false;
for (unsigned i = 0; i != 16; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
return false;
} else if (ShuffleKind == 1) {
unsigned j = IsLE ? 0 : 2;
for (unsigned i = 0; i != 8; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
return false;
}
return true;
}
/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
/// current subtarget.
///
/// The ShuffleKind distinguishes between big-endian operations with
/// two different inputs (0), either-endian operations with two identical
/// inputs (1), and little-endian operations with two different inputs (2).
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
const PPCSubtarget& Subtarget =
static_cast<const PPCSubtarget&>(DAG.getSubtarget());
if (!Subtarget.hasP8Vector())
return false;
bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
for (unsigned i = 0; i != 16; i += 4)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
!isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
!isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
return false;
} else if (ShuffleKind == 2) {
if (!IsLE)
return false;
for (unsigned i = 0; i != 16; i += 4)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
!isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
return false;
} else if (ShuffleKind == 1) {
unsigned j = IsLE ? 0 : 4;
for (unsigned i = 0; i != 8; i += 4)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
!isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
!isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
!isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
!isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
return false;
}
return true;
}
/// isVMerge - Common function, used to match vmrg* shuffles.
///
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned LHSStart, unsigned RHSStart) {
if (N->getValueType(0) != MVT::v16i8)
return false;
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!");
for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
LHSStart+j+i*UnitSize) ||
!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
RHSStart+j+i*UnitSize))
return false;
}
return true;
}
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
/// The ShuffleKind distinguishes between big-endian merges with two
/// different inputs (0), either-endian merges with two identical inputs (1),
/// and little-endian merges with two different inputs (2). For the latter,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
if (DAG.getDataLayout().isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 0, 0);
else if (ShuffleKind == 2) // swapped
return isVMerge(N, UnitSize, 0, 16);
else
return false;
} else {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 8, 8);
else if (ShuffleKind == 0) // normal
return isVMerge(N, UnitSize, 8, 24);
else
return false;
}
}
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
/// The ShuffleKind distinguishes between big-endian merges with two
/// different inputs (0), either-endian merges with two identical inputs (1),
/// and little-endian merges with two different inputs (2). For the latter,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
if (DAG.getDataLayout().isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 8, 8);
else if (ShuffleKind == 2) // swapped
return isVMerge(N, UnitSize, 8, 24);
else
return false;
} else {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 0, 0);
else if (ShuffleKind == 0) // normal
return isVMerge(N, UnitSize, 0, 16);
else
return false;
}
}
/**
* Common function used to match vmrgew and vmrgow shuffles
*
* The indexOffset determines whether to look for even or odd words in
* the shuffle mask. This is based on the of the endianness of the target
* machine.
* - Little Endian:
* - Use offset of 0 to check for odd elements
* - Use offset of 4 to check for even elements
* - Big Endian:
* - Use offset of 0 to check for even elements
* - Use offset of 4 to check for odd elements
* A detailed description of the vector element ordering for little endian and
* big endian can be found at
* http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
* Targeting your applications - what little endian and big endian IBM XL C/C++
* compiler differences mean to you
*
* The mask to the shuffle vector instruction specifies the indices of the
* elements from the two input vectors to place in the result. The elements are
* numbered in array-access order, starting with the first vector. These vectors
* are always of type v16i8, thus each vector will contain 16 elements of size
* 8. More info on the shuffle vector can be found in the
* http://llvm.org/docs/LangRef.html#shufflevector-instruction
* Language Reference.
*
* The RHSStartValue indicates whether the same input vectors are used (unary)
* or two different input vectors are used, based on the following:
* - If the instruction uses the same vector for both inputs, the range of the
* indices will be 0 to 15. In this case, the RHSStart value passed should
* be 0.
* - If the instruction has two different vectors then the range of the
* indices will be 0 to 31. In this case, the RHSStart value passed should
* be 16 (indices 0-15 specify elements in the first vector while indices 16
* to 31 specify elements in the second vector).
*
* \param[in] N The shuffle vector SD Node to analyze
* \param[in] IndexOffset Specifies whether to look for even or odd elements
* \param[in] RHSStartValue Specifies the starting index for the righthand input
* vector to the shuffle_vector instruction
* \return true iff this shuffle vector represents an even or odd word merge
*/
static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
unsigned RHSStartValue) {
if (N->getValueType(0) != MVT::v16i8)
return false;
for (unsigned i = 0; i < 2; ++i)
for (unsigned j = 0; j < 4; ++j)
if (!isConstantOrUndef(N->getMaskElt(i*4+j),
i*RHSStartValue+j+IndexOffset) ||
!isConstantOrUndef(N->getMaskElt(i*4+j+8),
i*RHSStartValue+j+IndexOffset+8))
return false;
return true;
}
/**
* Determine if the specified shuffle mask is suitable for the vmrgew or
* vmrgow instructions.
*
* \param[in] N The shuffle vector SD Node to analyze
* \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
* \param[in] ShuffleKind Identify the type of merge:
* - 0 = big-endian merge with two different inputs;
* - 1 = either-endian merge with two identical inputs;
* - 2 = little-endian merge with two different inputs (inputs are swapped for
* little-endian merges).
* \param[in] DAG The current SelectionDAG
* \return true iff this shuffle mask
*/
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG) {
if (DAG.getDataLayout().isLittleEndian()) {
unsigned indexOffset = CheckEven ? 4 : 0;
if (ShuffleKind == 1) // Unary
return isVMerge(N, indexOffset, 0);
else if (ShuffleKind == 2) // swapped
return isVMerge(N, indexOffset, 16);
else
return false;
}
else {
unsigned indexOffset = CheckEven ? 0 : 4;
if (ShuffleKind == 1) // Unary
return isVMerge(N, indexOffset, 0);
else if (ShuffleKind == 0) // Normal
return isVMerge(N, indexOffset, 16);
else
return false;
}
return false;
}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
/// The ShuffleKind distinguishes between big-endian operations with two
/// different inputs (0), either-endian operations with two identical inputs
/// (1), and little-endian operations with two different inputs (2). For the
/// latter, the input operands are swapped (see PPCInstrAltivec.td).
int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
if (N->getValueType(0) != MVT::v16i8)
return -1;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
// Find the first non-undef value in the shuffle mask.
unsigned i;
for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
/*search*/;
if (i == 16) return -1; // all undef.
// Otherwise, check to see if the rest of the elements are consecutively
// numbered from this value.
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
bool isLE = DAG.getDataLayout().isLittleEndian();
if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
return -1;
} else if (ShuffleKind == 1) {
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
return -1;
} else
return -1;
if (isLE)
ShiftAmt = 16 - ShiftAmt;
return ShiftAmt;
}
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
// The consecutive indices need to specify an element, not part of two
// different elements. So abandon ship early if this isn't the case.
if (N->getMaskElt(0) % EltSize != 0)
return false;
// This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector.
unsigned ElementBase = N->getMaskElt(0);
// FIXME: Handle UNDEF elements too!
if (ElementBase >= 16)
return false;
// Check that the indices are consecutive, in the case of a multi-byte element
// splatted with a v16i8 mask.
for (unsigned i = 1; i != EltSize; ++i)
if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
return false;
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
if (N->getMaskElt(i) < 0) continue;
for (unsigned j = 0; j != EltSize; ++j)
if (N->getMaskElt(i+j) != N->getMaskElt(j))
return false;
}
return true;
}
/// Check that the mask is shuffling N byte elements. Within each N byte
/// element of the mask, the indices could be either in increasing or
/// decreasing order as long as they are consecutive.
/// \param[in] N the shuffle vector SD Node to analyze
/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
/// Word/DoubleWord/QuadWord).
/// \param[in] StepLen the delta indices number among the N byte element, if
/// the mask is in increasing/decreasing order then it is 1/-1.
/// \return true iff the mask is shuffling N byte elements.
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
int StepLen) {
assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.");
assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
unsigned NumOfElem = 16 / Width;
unsigned MaskVal[16]; // Width is never greater than 16
for (unsigned i = 0; i < NumOfElem; ++i) {
MaskVal[0] = N->getMaskElt(i * Width);
if ((StepLen == 1) && (MaskVal[0] % Width)) {
return false;
} else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
return false;
}
for (unsigned int j = 1; j < Width; ++j) {
MaskVal[j] = N->getMaskElt(i * Width + j);
if (MaskVal[j] != MaskVal[j-1] + StepLen) {
return false;
}
}
}
return true;
}
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
if (!isNByteElemShuffleMask(N, 4, 1))
return false;
// Now we look at mask elements 0,4,8,12
unsigned M0 = N->getMaskElt(0) / 4;
unsigned M1 = N->getMaskElt(4) / 4;
unsigned M2 = N->getMaskElt(8) / 4;
unsigned M3 = N->getMaskElt(12) / 4;
unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
// Below, let H and L be arbitrary elements of the shuffle mask
// where H is in the range [4,7] and L is in the range [0,3].
// H, 1, 2, 3 or L, 5, 6, 7
if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
(M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
InsertAtByte = IsLE ? 12 : 0;
Swap = M0 < 4;
return true;
}
// 0, H, 2, 3 or 4, L, 6, 7
if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
(M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
InsertAtByte = IsLE ? 8 : 4;
Swap = M1 < 4;
return true;
}
// 0, 1, H, 3 or 4, 5, L, 7
if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
(M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
InsertAtByte = IsLE ? 4 : 8;
Swap = M2 < 4;
return true;
}
// 0, 1, 2, H or 4, 5, 6, L
if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
(M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
InsertAtByte = IsLE ? 0 : 12;
Swap = M3 < 4;
return true;
}
// If both vector operands for the shuffle are the same vector, the mask will
// contain only elements from the first one and the second one will be undef.
if (N->getOperand(1).isUndef()) {
ShiftElts = 0;
Swap = true;
unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
InsertAtByte = IsLE ? 12 : 0;
return true;
}
if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
InsertAtByte = IsLE ? 8 : 4;
return true;
}
if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
InsertAtByte = IsLE ? 4 : 8;
return true;
}
if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
InsertAtByte = IsLE ? 0 : 12;
return true;
}
}
return false;
}
bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the word is consecutive.
if (!isNByteElemShuffleMask(N, 4, 1))
return false;
// Now we look at mask elements 0,4,8,12, which are the beginning of words.
unsigned M0 = N->getMaskElt(0) / 4;
unsigned M1 = N->getMaskElt(4) / 4;
unsigned M2 = N->getMaskElt(8) / 4;
unsigned M3 = N->getMaskElt(12) / 4;
// If both vector operands for the shuffle are the same vector, the mask will
// contain only elements from the first one and the second one will be undef.
if (N->getOperand(1).isUndef()) {
assert(M0 < 4 && "Indexing into an undef vector?");
if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
return false;
ShiftElts = IsLE ? (4 - M0) % 4 : M0;
Swap = false;
return true;
}
// Ensure each word index of the ShuffleVector Mask is consecutive.
if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
return false;
if (IsLE) {
if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
// Input vectors don't need to be swapped if the leading element
// of the result is one of the 3 left elements of the second vector
// (or if there is no shift to be done at all).
Swap = false;
ShiftElts = (8 - M0) % 8;
} else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
// Input vectors need to be swapped if the leading element
// of the result is one of the 3 left elements of the first vector
// (or if we're shifting by 4 - thereby simply swapping the vectors).
Swap = true;
ShiftElts = (4 - M0) % 4;
}
return true;
} else { // BE
if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
// Input vectors don't need to be swapped if the leading element
// of the result is one of the 4 elements of the first vector.
Swap = false;
ShiftElts = M0;
} else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
// Input vectors need to be swapped if the leading element
// of the result is one of the 4 elements of the right vector.
Swap = true;
ShiftElts = M0 - 4;
}
return true;
}
}
bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
if (!isNByteElemShuffleMask(N, Width, -1))
return false;
for (int i = 0; i < 16; i += Width)
if (N->getMaskElt(i) != i + Width - 1)
return false;
return true;
}
bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
return isXXBRShuffleMaskHelper(N, 2);
}
bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
return isXXBRShuffleMaskHelper(N, 4);
}
bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
return isXXBRShuffleMaskHelper(N, 8);
}
bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
return isXXBRShuffleMaskHelper(N, 16);
}
/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
/// if the inputs to the instruction should be swapped and set \p DM to the
/// value for the immediate.
/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
/// AND element 0 of the result comes from the first input (LE) or second input
/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
/// mask.
bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the double word is consecutive.
if (!isNByteElemShuffleMask(N, 8, 1))
return false;
unsigned M0 = N->getMaskElt(0) / 8;
unsigned M1 = N->getMaskElt(8) / 8;
assert(((M0 | M1) < 4) && "A mask element out of bounds?");
// If both vector operands for the shuffle are the same vector, the mask will
// contain only elements from the first one and the second one will be undef.
if (N->getOperand(1).isUndef()) {
if ((M0 | M1) < 2) {
DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
Swap = false;
return true;
} else
return false;
}
if (IsLE) {
if (M0 > 1 && M1 < 2) {
Swap = false;
} else if (M0 < 2 && M1 > 1) {
M0 = (M0 + 2) % 4;
M1 =