blob: 8bc7470d304c1f71a53abbf8b7a97a1757a1491b [file] [log] [blame]
//===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that RISCV uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "RISCVISelLowering.h"
#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVRegisterInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-lower"
STATISTIC(NumTailCalls, "Number of tail calls");
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
if (Subtarget.isRV32E())
report_fatal_error("Codegen not yet implemented for RV32E");
RISCVABI::ABI ABI = Subtarget.getTargetABI();
assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
!Subtarget.hasStdExtF()) {
errs() << "Hard-float 'f' ABI can't be used for a target that "
"doesn't support the F instruction set extension (ignoring "
"target-abi)\n";
ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
} else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
!Subtarget.hasStdExtD()) {
errs() << "Hard-float 'd' ABI can't be used for a target that "
"doesn't support the D instruction set extension (ignoring "
"target-abi)\n";
ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
}
switch (ABI) {
default:
report_fatal_error("Don't know how to lower this ABI");
case RISCVABI::ABI_ILP32:
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64:
case RISCVABI::ABI_LP64F:
case RISCVABI::ABI_LP64D:
break;
}
MVT XLenVT = Subtarget.getXLenVT();
// Set up the register classes.
addRegisterClass(XLenVT, &RISCV::GPRRegClass);
if (Subtarget.hasStdExtZfh())
addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtF())
addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD())
addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
static const MVT::SimpleValueType BoolVecVTs[] = {
MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
static const MVT::SimpleValueType IntVecVTs[] = {
MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
MVT::nxv4i64, MVT::nxv8i64};
static const MVT::SimpleValueType F16VecVTs[] = {
MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
static const MVT::SimpleValueType F32VecVTs[] = {
MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
static const MVT::SimpleValueType F64VecVTs[] = {
MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
if (Subtarget.hasVInstructions()) {
auto addRegClassForRVV = [this](MVT VT) {
unsigned Size = VT.getSizeInBits().getKnownMinValue();
assert(Size <= 512 && isPowerOf2_32(Size));
const TargetRegisterClass *RC;
if (Size <= 64)
RC = &RISCV::VRRegClass;
else if (Size == 128)
RC = &RISCV::VRM2RegClass;
else if (Size == 256)
RC = &RISCV::VRM4RegClass;
else
RC = &RISCV::VRM8RegClass;
addRegisterClass(VT, RC);
};
for (MVT VT : BoolVecVTs)
addRegClassForRVV(VT);
for (MVT VT : IntVecVTs) {
if (VT.getVectorElementType() == MVT::i64 &&
!Subtarget.hasVInstructionsI64())
continue;
addRegClassForRVV(VT);
}
if (Subtarget.hasVInstructionsF16())
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
if (Subtarget.hasVInstructionsF32())
for (MVT VT : F32VecVTs)
addRegClassForRVV(VT);
if (Subtarget.hasVInstructionsF64())
for (MVT VT : F64VecVTs)
addRegClassForRVV(VT);
if (Subtarget.useRVVForFixedLengthVectors()) {
auto addRegClassForFixedVectors = [this](MVT VT) {
MVT ContainerVT = getContainerForFixedLengthVector(VT);
unsigned RCID = getRegClassIDForVecVT(ContainerVT);
const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
addRegisterClass(VT, TRI.getRegClass(RCID));
};
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useRVVForFixedLengthVectorVT(VT))
addRegClassForFixedVectors(VT);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useRVVForFixedLengthVectorVT(VT))
addRegClassForFixedVectors(VT);
}
}
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
setStackPointerRegisterToSaveRestore(RISCV::X2);
for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
setLoadExtAction(N, XLenVT, MVT::i1, Promote);
// TODO: add all necessary setOperationAction calls.
setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, XLenVT, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!Subtarget.hasStdExtZbb()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
}
if (Subtarget.is64Bit()) {
setOperationAction(ISD::ADD, MVT::i32, Custom);
setOperationAction(ISD::SUB, MVT::i32, Custom);
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
} else {
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MUL_I128, nullptr);
setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!Subtarget.hasStdExtM()) {
setOperationAction(ISD::MUL, XLenVT, Expand);
setOperationAction(ISD::MULHS, XLenVT, Expand);
setOperationAction(ISD::MULHU, XLenVT, Expand);
setOperationAction(ISD::SDIV, XLenVT, Expand);
setOperationAction(ISD::UDIV, XLenVT, Expand);
setOperationAction(ISD::SREM, XLenVT, Expand);
setOperationAction(ISD::UREM, XLenVT, Expand);
} else {
if (Subtarget.is64Bit()) {
setOperationAction(ISD::MUL, MVT::i32, Custom);
setOperationAction(ISD::MUL, MVT::i128, Custom);
setOperationAction(ISD::SDIV, MVT::i8, Custom);
setOperationAction(ISD::UDIV, MVT::i8, Custom);
setOperationAction(ISD::UREM, MVT::i8, Custom);
setOperationAction(ISD::SDIV, MVT::i16, Custom);
setOperationAction(ISD::UDIV, MVT::i16, Custom);
setOperationAction(ISD::UREM, MVT::i16, Custom);
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Custom);
} else {
setOperationAction(ISD::MUL, MVT::i64, Custom);
}
}
setOperationAction(ISD::SDIVREM, XLenVT, Expand);
setOperationAction(ISD::UDIVREM, XLenVT, Expand);
setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
if (Subtarget.is64Bit()) {
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::ROTR, MVT::i32, Custom);
}
} else {
setOperationAction(ISD::ROTL, XLenVT, Expand);
setOperationAction(ISD::ROTR, XLenVT, Expand);
}
if (Subtarget.hasStdExtZbp()) {
// Custom lower bswap/bitreverse so we can convert them to GREVI to enable
// more combining.
setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
setOperationAction(ISD::BSWAP, XLenVT, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
// BSWAP i8 doesn't exist.
setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
setOperationAction(ISD::BSWAP, MVT::i16, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
}
} else {
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
// pattern match it directly in isel.
setOperationAction(ISD::BSWAP, XLenVT,
Subtarget.hasStdExtZbb() ? Legal : Expand);
}
if (Subtarget.hasStdExtZbb()) {
setOperationAction(ISD::SMIN, XLenVT, Legal);
setOperationAction(ISD::SMAX, XLenVT, Legal);
setOperationAction(ISD::UMIN, XLenVT, Legal);
setOperationAction(ISD::UMAX, XLenVT, Legal);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
setOperationAction(ISD::CTLZ, MVT::i32, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
}
} else {
setOperationAction(ISD::CTTZ, XLenVT, Expand);
setOperationAction(ISD::CTLZ, XLenVT, Expand);
setOperationAction(ISD::CTPOP, XLenVT, Expand);
}
if (Subtarget.hasStdExtZbt()) {
setOperationAction(ISD::FSHL, XLenVT, Custom);
setOperationAction(ISD::FSHR, XLenVT, Custom);
setOperationAction(ISD::SELECT, XLenVT, Legal);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FSHL, MVT::i32, Custom);
setOperationAction(ISD::FSHR, MVT::i32, Custom);
}
} else {
setOperationAction(ISD::SELECT, XLenVT, Custom);
}
static const ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
static const ISD::NodeType FPOpToExpand[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
if (Subtarget.hasStdExtZfh())
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
if (Subtarget.hasStdExtZfh()) {
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::LRINT, MVT::f16, Legal);
setOperationAction(ISD::LLRINT, MVT::f16, Legal);
setOperationAction(ISD::LROUND, MVT::f16, Legal);
setOperationAction(ISD::LLROUND, MVT::f16, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::FCEIL, MVT::f16, Promote);
setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
setOperationAction(ISD::FRINT, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Promote);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
// We need to custom promote this.
if (Subtarget.is64Bit())
setOperationAction(ISD::FPOWI, MVT::i32, Custom);
}
if (Subtarget.hasStdExtF()) {
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
setOperationAction(ISD::LRINT, MVT::f32, Legal);
setOperationAction(ISD::LLRINT, MVT::f32, Legal);
setOperationAction(ISD::LROUND, MVT::f32, Legal);
setOperationAction(ISD::LLROUND, MVT::f32, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
}
if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
if (Subtarget.hasStdExtD()) {
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
setOperationAction(ISD::LRINT, MVT::f64, Legal);
setOperationAction(ISD::LLRINT, MVT::f64, Legal);
setOperationAction(ISD::LROUND, MVT::f64, Legal);
setOperationAction(ISD::LLROUND, MVT::f64, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
}
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
}
if (Subtarget.hasStdExtF()) {
setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
setOperationAction(ISD::JumpTable, XLenVT, Custom);
setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
// TODO: On M-mode only targets, the cycle[h] CSR may not be present.
// Unfortunately this can't be determined just from the ISA naming string.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
Subtarget.is64Bit() ? Legal : Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
} else {
setMaxAtomicSizeInBitsSupported(0);
}
setBooleanContents(ZeroOrOneBooleanContent);
if (Subtarget.hasVInstructions()) {
setBooleanVectorContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::VSCALE, XLenVT, Custom);
// RVV intrinsics may have illegal operands.
// We also need to custom legalize vmv.x.s.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
} else {
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
}
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
static const unsigned IntegerVPOps[] = {
ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX};
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
// element type being illegal.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
}
for (MVT VT : BoolVecVTs) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
// Mask VTs are custom-expanded into a series of standard nodes
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
// Expand all extending loads to types larger than this, and truncating
// stores from types larger than this.
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(OtherVT, VT, Expand);
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
}
for (MVT VT : IntVecVTs) {
if (VT.getVectorElementType() == MVT::i64 &&
!Subtarget.hasVInstructionsI64())
continue;
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
// Vectors implement MULHS/MULHU.
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
// Custom-lower extensions and truncations from/to mask types.
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
setOperationAction(ISD::TRUNCATE, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
for (unsigned VPOpc : IntegerVPOps)
setOperationAction(VPOpc, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::VP_LOAD, VT, Custom);
setOperationAction(ISD::VP_STORE, VT, Custom);
setOperationAction(ISD::VP_GATHER, VT, Custom);
setOperationAction(ISD::VP_SCATTER, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::STEP_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
// type that can represent the value exactly.
if (VT.getVectorElementType() != MVT::i64) {
MVT FloatEltVT =
VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
if (isTypeLegal(FloatVT)) {
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
}
}
// Expand various CCs to best match the RVV ISA, which natively supports UNE
// but no other unordered comparisons, and supports all ordered comparisons
// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
// and we pattern-match those back to the "original", swapping operands once
// more. This way we catch both operations and both "vf" and "fv" forms with
// fewer patterns.
static const ISD::CondCode VFPCCToExpand[] = {
ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
};
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
// RVV has native FP_ROUND & FP_EXTEND conversions where the element type
// sizes are within one power-of-two of each other. Therefore conversions
// between vXf16 and vXf64 must be lowered as sequences which convert via
// vXf32.
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
// Expand various condition codes (explained above).
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::VP_LOAD, VT, Custom);
setOperationAction(ISD::VP_STORE, VT, Custom);
setOperationAction(ISD::VP_GATHER, VT, Custom);
setOperationAction(ISD::VP_SCATTER, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
for (unsigned VPOpc : FloatingPointVPOps)
setOperationAction(VPOpc, VT, Custom);
};
// Sets common extload/truncstore actions on RVV floating-point vector
// types.
const auto SetCommonVFPExtLoadTruncStoreActions =
[&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
for (auto SmallVT : SmallerVTs) {
setTruncStoreAction(VT, SmallVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
}
};
if (Subtarget.hasVInstructionsF16())
for (MVT VT : F16VecVTs)
SetCommonVFPActions(VT);
for (MVT VT : F32VecVTs) {
if (Subtarget.hasVInstructionsF32())
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
}
for (MVT VT : F64VecVTs) {
if (Subtarget.hasVInstructionsF64())
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
}
if (Subtarget.useRVVForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
if (!useRVVForFixedLengthVectorVT(VT))
continue;
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
continue;
}
// Use SPLAT_VECTOR to prevent type legalization from destroying the
// splats when type legalizing i64 scalar on RV32.
// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
// improvements first.
if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
}
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::VP_LOAD, VT, Custom);
setOperationAction(ISD::VP_STORE, VT, Custom);
setOperationAction(ISD::VP_GATHER, VT, Custom);
setOperationAction(ISD::VP_SCATTER, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SREM, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::UREM, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
for (unsigned VPOpc : IntegerVPOps)
setOperationAction(VPOpc, VT, Custom);
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
// type that can represent the value exactly.
if (VT.getVectorElementType() != MVT::i64) {
MVT FloatEltVT =
VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
EVT FloatVT =
MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
if (isTypeLegal(FloatVT)) {
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
}
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
if (!useRVVForFixedLengthVectorVT(VT))
continue;
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setTruncStoreAction(VT, OtherVT, Expand);
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::VP_LOAD, VT, Custom);
setOperationAction(ISD::VP_STORE, VT, Custom);
setOperationAction(ISD::VP_GATHER, VT, Custom);
setOperationAction(ISD::VP_SCATTER, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMINNUM, VT, Custom);
setOperationAction(ISD::FMAXNUM, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
for (unsigned VPOpc : FloatingPointVPOps)
setOperationAction(VPOpc, VT, Custom);
}
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
setOperationAction(ISD::BITCAST, MVT::i8, Custom);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::BITCAST, MVT::f32, Custom);
setOperationAction(ISD::BITCAST, MVT::f64, Custom);
}
}
// Function alignments.
const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
setMinFunctionAlignment(FunctionAlignment);
setPrefFunctionAlignment(FunctionAlignment);
setMinimumJumpTableEntries(5);
// Jumps are expensive, compared to logic
setJumpIsExpensive();
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
if (Subtarget.hasVInstructions()) {
setTargetDAGCombine(ISD::FCOPYSIGN);
setTargetDAGCombine(ISD::MGATHER);
setTargetDAGCombine(ISD::MSCATTER);
setTargetDAGCombine(ISD::VP_GATHER);
setTargetDAGCombine(ISD::VP_SCATTER);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::STORE);
}
}
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &Context,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
if (Subtarget.hasVInstructions() &&
(VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
return Subtarget.getXLenVT();
}
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
default:
return false;
case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
case Intrinsic::riscv_masked_atomicrmw_add_i32:
case Intrinsic::riscv_masked_atomicrmw_sub_i32:
case Intrinsic::riscv_masked_atomicrmw_nand_i32:
case Intrinsic::riscv_masked_atomicrmw_max_i32:
case Intrinsic::riscv_masked_atomicrmw_min_i32:
case Intrinsic::riscv_masked_atomicrmw_umax_i32:
case Intrinsic::riscv_masked_atomicrmw_umin_i32:
case Intrinsic::riscv_masked_cmpxchg_i32: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(4);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::riscv_masked_strided_load:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = I.getArgOperand(1);
Info.memVT = getValueType(DL, I.getType()->getScalarType());
Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
Info.size = MemoryLocation::UnknownSize;
Info.flags |= MachineMemOperand::MOLoad;
return true;
case Intrinsic::riscv_masked_strided_store:
Info.opc = ISD::INTRINSIC_VOID;
Info.ptrVal = I.getArgOperand(1);
Info.memVT =
getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
Info.align = Align(
DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
8);
Info.size = MemoryLocation::UnknownSize;
Info.flags |= MachineMemOperand::MOStore;
return true;
}
}
bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// Require a 12-bit signed offset.
if (!isInt<12>(AM.BaseOffs))
return false;
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
case 1:
if (!AM.HasBaseReg) // allow "r+i".
break;
return false; // disallow "r+r" or "r+r+i".
default:
return false;
}
return true;
}
bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<12>(Imm);
}
bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<12>(Imm);
}
// On RV32, 64-bit integers are split into their high and low parts and held
// in two different registers, so the trunc is free since the low register can
// just be used.
bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
return false;
unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
unsigned DestBits = DstTy->getPrimitiveSizeInBits();
return (SrcBits == 64 && DestBits == 32);
}
bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
!SrcVT.isInteger() || !DstVT.isInteger())
return false;
unsigned SrcBits = SrcVT.getSizeInBits();
unsigned DestBits = DstVT.getSizeInBits();
return (SrcBits == 64 && DestBits == 32);
}
bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
// Zexts are free if they can be combined with a load.
if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
EVT MemVT = LD->getMemoryVT();
if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
(Subtarget.is64Bit() && MemVT == MVT::i32)) &&
(LD->getExtensionType() == ISD::NON_EXTLOAD ||
LD->getExtensionType() == ISD::ZEXTLOAD))
return true;
}
return TargetLowering::isZExtFree(Val, VT2);
}
bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
return Subtarget.hasStdExtZbb();
}
bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasStdExtZbb();
}
bool RISCVTargetLowering::hasAndNot(SDValue Y) const {
EVT VT = Y.getValueType();
// FIXME: Support vectors once we have tests.
if (VT.isVector())
return false;
return Subtarget.hasStdExtZbb() && !isa<ConstantSDNode>(Y);
}
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// splats of scalars can fold into vector instructions.
bool RISCVTargetLowering::shouldSinkOperands(
Instruction *I, SmallVectorImpl<Use *> &Ops) const {
using namespace llvm::PatternMatch;
if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
return false;
auto IsSinker = [&](Instruction *I, int Operand) {
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::ICmp:
case Instruction::FCmp:
return true;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
return Operand == 1;
case Instruction::Call:
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
return Operand == 0 || Operand == 1;
default:
return false;
}
}
return false;
default:
return false;
}
};
for (auto OpIdx : enumerate(I->operands())) {
if (!IsSinker(I, OpIdx.index()))
continue;
Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
// Make sure we are not already sinking this operand
if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
continue;
// We are looking for a splat that can be sunk.
if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
m_Undef(), m_ZeroMask())))
continue;
// All uses of the shuffle should be sunk to avoid duplicating it across gpr
// and vector registers
for (Use &U : Op->uses()) {
Instruction *Insn = cast<Instruction>(U.getUser());
if (!IsSinker(Insn, U.getOperandNo()))
return false;
}
Ops.push_back(&Op->getOperandUse(0));
Ops.push_back(&OpIdx.value());
}
return true;
}
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
if (VT == MVT::f16 && !Subtarget.hasStdExtZfhmin())
return false;
if (VT == MVT::f32 && !Subtarget.hasStdExtF())
return false;
if (VT == MVT::f64 && !Subtarget.hasStdExtD())
return false;
if (Imm.isNegZero())
return false;
return Imm.isZero();
}
bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
(VT == MVT::f32 && Subtarget.hasStdExtF()) ||
(VT == MVT::f64 && Subtarget.hasStdExtD());
}
MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
// Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin())
return MVT::f32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
// Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly by branches
// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
// with 1/-1.
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
ISD::CondCode &CC, SelectionDAG &DAG) {
// Convert X > -1 to X >= 0.
if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
RHS = DAG.getConstant(0, DL, RHS.getValueType());
CC = ISD::SETGE;
return;
}
// Convert X < 1 to 0 >= X.
if (CC == ISD::SETLT && isOneConstant(RHS)) {
RHS = LHS;
LHS = DAG.getConstant(0, DL, RHS.getValueType());
CC = ISD::SETGE;
return;
}
switch (CC) {
default:
break;
case ISD::SETGT:
case ISD::SETLE:
case ISD::SETUGT:
case ISD::SETULE:
CC = ISD::getSetCCSwappedOperands(CC);
std::swap(LHS, RHS);
break;
}
}
RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
assert(VT.isScalableVector() && "Expecting a scalable vector type");
unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
if (VT.getVectorElementType() == MVT::i1)
KnownSize *= 8;
switch (KnownSize) {
default:
llvm_unreachable("Invalid LMUL.");
case 8:
return RISCVII::VLMUL::LMUL_F8;
case 16:
return RISCVII::VLMUL::LMUL_F4;
case 32:
return RISCVII::VLMUL::LMUL_F2;
case 64:
return RISCVII::VLMUL::LMUL_1;
case 128:
return RISCVII::VLMUL::LMUL_2;
case 256:
return RISCVII::VLMUL::LMUL_4;
case 512:
return RISCVII::VLMUL::LMUL_8;
}
}
unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
switch (LMul) {
default:
llvm_unreachable("Invalid LMUL.");
case RISCVII::VLMUL::LMUL_F8:
case RISCVII::VLMUL::LMUL_F4:
case RISCVII::VLMUL::LMUL_F2:
case RISCVII::VLMUL::LMUL_1:
return RISCV::VRRegClassID;
case RISCVII::VLMUL::LMUL_2:
return RISCV::VRM2RegClassID;
case RISCVII::VLMUL::LMUL_4:
return RISCV::VRM4RegClassID;
case RISCVII::VLMUL::LMUL_8:
return RISCV::VRM8RegClassID;
}
}
unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
RISCVII::VLMUL LMUL = getLMUL(VT);
if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
LMUL == RISCVII::VLMUL::LMUL_F4 ||
LMUL == RISCVII::VLMUL::LMUL_F2 ||
LMUL == RISCVII::VLMUL::LMUL_1) {
static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
"Unexpected subreg numbering");
return RISCV::sub_vrm1_0 + Index;
}
if (LMUL == RISCVII::VLMUL::LMUL_2) {
static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
"Unexpected subreg numbering");
return RISCV::sub_vrm2_0 + Index;
}
if (LMUL == RISCVII::VLMUL::LMUL_4) {
static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
"Unexpected subreg numbering");
return RISCV::sub_vrm4_0 + Index;
}
llvm_unreachable("Invalid vector type.");
}
unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
if (VT.getVectorElementType() == MVT::i1)
return RISCV::VRRegClassID;
return getRegClassIDForLMUL(getLMUL(VT));
}
// Attempt to decompose a subvector insert/extract between VecVT and
// SubVecVT via subregister indices. Returns the subregister index that
// can perform the subvector insert/extract with the given element index, as
// well as the index corresponding to any leftover subvectors that must be
// further inserted/extracted within the register class for SubVecVT.
std::pair<unsigned, unsigned>
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
const RISCVRegisterInfo *TRI) {
static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
RISCV::VRM2RegClassID > RISCV::VRRegClassID),
"Register classes not ordered");
unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
// Try to compose a subregister index that takes us from the incoming
// LMUL>1 register class down to the outgoing one. At each step we half
// the LMUL:
// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
// Note that this is not guaranteed to find a subregister index, such as
// when we are extracting from one VR type to another.
unsigned SubRegIdx = RISCV::NoSubRegister;
for (const unsigned RCID :
{RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
if (VecRegClassID > RCID && SubRegClassID <= RCID) {
VecVT = VecVT.getHalfNumVectorElementsVT();
bool IsHi =
InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
getSubregIndexByMVT(VecVT, IsHi));
if (IsHi)
InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
}
return {SubRegIdx, InsertExtractIdx};
}
// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
// stores for those types.
bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
return !Subtarget.useRVVForFixedLengthVectors() ||
(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
}
bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
if (ScalarTy->isPointerTy())
return true;
if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
ScalarTy->isIntegerTy(32))
return true;
if (ScalarTy->isIntegerTy(64))
return Subtarget.hasVInstructionsI64();
if (ScalarTy->isHalfTy())
return Subtarget.hasVInstructionsF16();
if (ScalarTy->isFloatTy())
return Subtarget.hasVInstructionsF32();
if (ScalarTy->isDoubleTy())
return Subtarget.hasVInstructionsF64();
return false;
}
static bool useRVVForFixedLengthVectorVT(MVT VT,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
if (!Subtarget.useRVVForFixedLengthVectors())
return false;
// We only support a set of vector types with a consistent maximum fixed size
// across all supported vector element types to avoid legalization issues.
// Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
// fixed-length vector type we support is 1024 bytes.
if (VT.getFixedSizeInBits() > 1024 * 8)
return false;
unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
MVT EltVT = VT.getVectorElementType();
// Don't use RVV for vectors we cannot scalarize if required.
switch (EltVT.SimpleTy) {
// i1 is supported but has different rules.
default:
return false;
case MVT::i1:
// Masks can only use a single register.
if (VT.getVectorNumElements() > MinVLen)
return false;
MinVLen /= 8;
break;
case MVT::i8:
case MVT::i16:
case MVT::i32:
break;
case MVT::i64:
if (!Subtarget.hasVInstructionsI64())
return false;
break;
case MVT::f16:
if (!Subtarget.hasVInstructionsF16())
return false;
break;
case MVT::f32:
if (!Subtarget.hasVInstructionsF32())
return false;
break;
case MVT::f64:
if (!Subtarget.hasVInstructionsF64())
return false;
break;
}
// Reject elements larger than ELEN.
if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
return false;
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
// Don't use RVV for types that don't fit.
if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
return false;
// TODO: Perhaps an artificial restriction, but worth having whilst getting
// the base fixed length RVV support in place.
if (!VT.isPow2VectorType())
return false;
return true;
}
bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
}
// Return the largest legal scalable vector type that matches VT's element type.
static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
const RISCVSubtarget &Subtarget) {
// This may be called before legal types are setup.
assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
"Expected legal fixed length vector!");
unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
MVT EltVT = VT.getVectorElementType();
switch (EltVT.SimpleTy) {
default:
llvm_unreachable("unexpected element type for RVV container");
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f16:
case MVT::f32:
case MVT::f64: {
// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
// each fractional LMUL we support SEW between 8 and LMUL*ELEN.
unsigned NumElts =
(VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
return MVT::getScalableVectorVT(EltVT, NumElts);
}
}
}
static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
const RISCVSubtarget &Subtarget) {
return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
Subtarget);
}
MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
}
// Grow V to consume an entire RVV register.
static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isScalableVector() &&
"Expected to convert into a scalable vector!");
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!");
assert(V.getValueType().isScalableVector() &&
"Expected a scalable vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
// Gets the two common "VL" operands: an all-ones mask and the vector length.
// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
// the vector type that it is contained in.
static std::pair<SDValue, SDValue>
getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
MVT XLenVT = Subtarget.getXLenVT();
SDValue VL = VecVT.isFixedLengthVector()
? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
: DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
return {Mask, VL};
}
// As above but assuming the given type is a scalable vector type.
static std::pair<SDValue, SDValue>
getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VecVT.isScalableVector() && "Expecting a scalable vector");
return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
}
// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
// of either is (currently) supported. This can get us into an infinite loop
// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
// as a ..., etc.
// Until either (or both) of these can reliably lower any node, reporting that
// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
// which is not desirable.
bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT, unsigned DefinedValues) const {
return false;
}
bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Only splats are currently supported.
if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
return true;
return false;
}
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) {
// RISCV FP-to-int conversions saturate to the destination register size, but
// don't produce 0 for nan. We can use a conversion instruction and fix the
// nan case with a compare and a select.
SDValue Src = Op.getOperand(0);
EVT DstVT = Op.getValueType();
EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
unsigned Opc;
if (SatVT == DstVT)
Opc = IsSigned ? RISCVISD::FCVT_X_RTZ : RISCVISD::FCVT_XU_RTZ;
else if (DstVT == MVT::i64 && SatVT == MVT::i32)
Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64;
else
return SDValue();
// FIXME: Support other SatVTs by clamping before or after the conversion.
SDLoc DL(Op);
SDValue FpToInt = DAG.getNode(Opc, DL, DstVT, Src);
SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
}
static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert(VT.isFixedLengthVector() && "Unexpected vector!");
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
SDLoc DL(Op);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
unsigned Opc =
VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
struct VIDSequence {
int64_t StepNumerator;
unsigned StepDenominator;
int64_t Addend;
};
// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
// to the (non-zero) step S and start value X. This can be then lowered as the
// RVV sequence (VID * S) + X, for example.
// The step S is represented as an integer numerator divided by a positive
// denominator. Note that the implementation currently only identifies
// sequences in which either the numerator is +/- 1 or the denominator is 1. It
// cannot detect 2/3, for example.
// Note that this method will also match potentially unappealing index
// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
// determine whether this is worth generating code for.
static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
unsigned NumElts = Op.getNumOperands();
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
if (!Op.getValueType().isInteger())
return None;
Optional<unsigned> SeqStepDenom;
Optional<int64_t> SeqStepNum, SeqAddend;
Optional<std::pair<uint64_t, unsigned>> PrevElt;
unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
for (unsigned Idx = 0; Idx < NumElts; Idx++) {
// Assume undef elements match the sequence; we just have to be careful
// when interpolating across them.
if (Op.getOperand(Idx).isUndef())
continue;
// The BUILD_VECTOR must be all constants.
if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
return None;
uint64_t Val = Op.getConstantOperandVal(Idx) &
maskTrailingOnes<uint64_t>(EltSizeInBits);
if (PrevElt) {
// Calculate the step since the last non-undef element, and ensure
// it's consistent across the entire sequence.
unsigned IdxDiff = Idx - PrevElt->second;
int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
// A zero-value value difference means that we're somewhere in the middle
// of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
// step change before evaluating the sequence.
if (ValDiff != 0) {
int64_t Remainder = ValDiff % IdxDiff;
// Normalize the step if it's greater than 1.
if (Remainder != ValDiff) {
// The difference must cleanly divide the element span.
if (Remainder != 0)
return None;
ValDiff /= IdxDiff;
IdxDiff = 1;
}
if (!SeqStepNum)
SeqStepNum = ValDiff;
else if (ValDiff != SeqStepNum)
return None;
if (!SeqStepDenom)
SeqStepDenom = IdxDiff;
else if (IdxDiff != *SeqStepDenom)
return None;
}
}
// Record and/or check any addend.
if (SeqStepNum && SeqStepDenom) {
uint64_t ExpectedVal =
(int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
if (!SeqAddend)
SeqAddend = Addend;
else if (SeqAddend != Addend)
return None;
}
// Record this non-undef element for later.
if (!PrevElt || PrevElt->first != Val)
PrevElt = std::make_pair(Val, Idx);
}
// We need to have logged both a step and an addend for this to count as
// a legal index sequence.
if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
return None;
return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
}
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert(VT.isFixedLengthVector() && "Unexpected vector!");
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
SDLoc DL(Op);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
MVT XLenVT = Subtarget.getXLenVT();
unsigned NumElts = Op.getNumOperands();
if (VT.getVectorElementType() == MVT::i1) {
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
}
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
}
// Lower constant mask BUILD_VECTORs via an integer vector type, in
// scalar integer chunks whose bit-width depends on the number of mask
// bits and XLEN.
// First, determine the most appropriate scalar integer type to use. This
// is at most XLenVT, but may be shrunk to a smaller vector element type
// according to the size of the final vector - use i8 chunks rather than
// XLenVT if we're producing a v8i1. This results in more consistent
// codegen across RV32 and RV64.
unsigned NumViaIntegerBits =
std::min(std::max(NumElts, 8u), Subtarget.getXLen());
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
// If we have to use more than one INSERT_VECTOR_ELT then this
// optimization is likely to increase code size; avoid peforming it in
// such a case. We can use a load from a constant pool in this case.
if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
return SDValue();
// Now we can create our integer vector type. Note that it may be larger
// than the resulting mask type: v4i1 would use v1i8 as its integer type.
MVT IntegerViaVecVT =
MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
divideCeil(NumElts, NumViaIntegerBits));
uint64_t Bits = 0;
unsigned BitPos = 0, IntegerEltIdx = 0;
SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
for (unsigned I = 0; I < NumElts; I++, BitPos++) {
// Once we accumulate enough bits to fill our scalar type, insert into
// our vector and clear our accumulated data.
if (I != 0 && I % NumViaIntegerBits == 0) {
if (NumViaIntegerBits <= 32)
Bits = SignExtend64(Bits, 32);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
Bits = 0;
BitPos = 0;
IntegerEltIdx++;
}
SDValue V = Op.getOperand(I);
bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
Bits |= ((uint64_t)BitValue << BitPos);
}
// Insert the (remaining) scalar value into position in our integer
// vector type.
if (NumViaIntegerBits <= 32)
Bits = SignExtend64(Bits, 32);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
DAG.getConstant(IntegerEltIdx, DL, XLenVT));
if (NumElts < NumViaIntegerBits) {
// If we're producing a smaller vector than our minimum legal integer
// type, bitcast to the equivalent (known-legal) mask type, and extract
// our final mask.
assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
Vec = DAG.getBitcast(MVT::v8i1, Vec);
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
DAG.getConstant(0, DL, XLenVT));
} else {
// Else we must have produced an integer type with the same size as the
// mask type; bitcast for the final result.
assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
Vec = DAG.getBitcast(VT, Vec);
}
return Vec;
}
// A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
// vector type, we have a legal equivalently-sized i8 type, so we can use
// that.
MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
SDValue WideVec;
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
// For a splat, perform a scalar truncate before creating the wider
// vector.
assert(Splat.getValueType() == XLenVT &&
"Unexpected type for i1 splat value");
Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
DAG.getConstant(1, DL, XLenVT));
WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
} else {
SmallVector<SDValue, 8> Ops(Op->op_values());
WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
}
return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
}
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL;
Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
// Try and match index sequences, which we can lower to the vid instruction
// with optional modifications. An all-undef vector is matched by
// getSplatValue, above.
if (auto SimpleVID = isSimpleVIDSequence(Op)) {
int64_t StepNumerator = SimpleVID->StepNumerator;
unsigned StepDenominator = SimpleVID->StepDenominator;
int64_t Addend = SimpleVID->Addend;
// Only emit VIDs with suitably-small steps/addends. We use imm5 is a
// threshold since it's the immediate value many RVV instructions accept.
if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) &&
isInt<5>(Addend)) {
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
// Convert right out of the scalable type so we can use standard ISD
// nodes for the rest of the computation. If we used scalable types with
// these, we'd lose the fixed-length vector info and generate worse
// vsetvli code.
VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
assert(StepNumerator != 0 && "Invalid step");
bool Negate = false;
if (StepNumerator != 1) {
int64_t SplatStepVal = StepNumerator;
unsigned Opcode = ISD::MUL;
if (isPowerOf2_64(std::abs(StepNumerator))) {
Negate = StepNumerator < 0;
Opcode = ISD::SHL;
SplatStepVal = Log2_64(std::abs(StepNumerator));
}
SDValue SplatStep = DAG.getSplatVector(
VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep);
}
if (StepDenominator != 1) {
SDValue SplatStep = DAG.getSplatVector(
VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
}
if (Addend != 0 || Negate) {
SDValue SplatAddend =
DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
}
return VID;
}
}
// Attempt to detect "hidden" splats, which only reveal themselves as splats
// when re-interpreted as a vector with a larger element type. For example,
// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
// could be instead splat as
// v2i32 = build_vector i32 0x00010000, i32 0x00010000
// TODO: This optimization could also work on non-constant splats, but it
// would require bit-manipulation instructions to construct the splat value.
SmallVector<SDValue> Sequence;
unsigned EltBitSize = VT.getScalarSizeInBits();
const auto *BV = cast<BuildVectorSDNode>(Op);
if (VT.isInteger() && EltBitSize < 64 &&
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
BV->getRepeatedSequence(Sequence) &&
(Sequence.size() * EltBitSize) <= 64) {
unsigned SeqLen = Sequence.size();
MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
ViaIntVT == MVT::i64) &&
"Unexpected sequence type");
unsigned EltIdx = 0;
uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
uint64_t SplatValue = 0;
// Construct the amalgamated value which can be splatted as this larger
// vector type.
for (const auto &SeqV : Sequence) {
if (!SeqV.isUndef())
SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
<< (EltIdx * EltBitSize));
EltIdx++;
}
// On RV64, sign-extend from 32 to 64 bits where possible in order to
// achieve better constant materializion.
if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
SplatValue = SignExtend64(SplatValue, 32);
// Since we can't introduce illegal i64 types at this stage, we can only
// perform an i64 splat on RV32 if it is its own sign-extended value. That
// way we can use RVV instructions to splat.
assert((ViaIntVT.bitsLE(XLenVT) ||
(!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
"Unexpected bitcast sequence");
if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
SDValue ViaVL =
DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
MVT ViaContainerVT =
getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
SDValue Splat =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
return DAG.getBitcast(VT, Splat);
}
}
// Try and optimize BUILD_VECTORs with "dominant values" - these are values
// which constitute a large proportion of the elements. In such cases we can
// splat a vector with the dominant element and make up the shortfall with
// INSERT_VECTOR_ELTs.
// Note that this includes vectors of 2 elements by association. The
// upper-most element is the "dominant" one, allowing us to use a splat to
// "insert" the upper element, and an insert of the lower element at position
// 0, which improves codegen.
SDValue DominantValue;
unsigned MostCommonCount = 0;
DenseMap<SDValue, unsigned> ValueCounts;
unsigned NumUndefElts =
count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
// Track the number of scalar loads we know we'd be inserting, estimated as
// any non-zero floating-point constant. Other kinds of element are either
// already in registers or are materialized on demand. The threshold at which
// a vector load is more desirable than several scalar materializion and
// vector-insertion instructions is not known.
unsigned NumScalarLoads = 0;
for (SDValue V : Op->op_values()) {
if (V.isUndef())
continue;
ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
NumScalarLoads += !CFP->isExactlyValue(+0.0);
// Is this value dominant? In case of a tie, prefer the highest element as
// it's cheaper to insert near the beginning of a vector than it is at the
// end.
if (++Count >= MostCommonCount) {
DominantValue = V;
MostCommonCount = Count;
}
}
assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
unsigned NumDefElts = NumElts - NumUndefElts;
unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
// Don't perform this optimization when optimizing for size, since
// materializing elements and inserting them tends to cause code bloat.
if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
((MostCommonCount > DominantValueCountThreshold) ||
(ValueCounts.size() <= Log2_32(NumDefElts)))) {
// Start by splatting the most common element.
SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
DenseSet<SDValue> Processed{DominantValue};
MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
for (const auto &OpIdx : enumerate(Op->ops())) {
const SDValue &V = OpIdx.value();
if (V.isUndef() || !Processed.insert(V).second)
continue;
if (ValueCounts[V] == 1) {
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
DAG.getConstant(OpIdx.index(), DL, XLenVT));
} else {
// Blend in all instances of this value using a VSELECT, using a
// mask where each bit signals whether that element is the one
// we're after.
SmallVector<SDValue> Ops;
transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
return DAG.getConstant(V == V1, DL, XLenVT);
});
Vec = DAG.getNode(ISD::VSELECT, DL, VT,
DAG.getBuildVector(SelMaskTy, DL, Ops),
DAG.getSplatBuildVector(VT, DL, V), Vec);
}
}
return Vec;
}
return SDValue();
}
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
SDValue Hi, SDValue VL, SelectionDAG &DAG) {
if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
// If Hi constant is all the same sign bit as Lo, lower this as a custom
// node in order to try and match RVV vector/scalar instructions.
if ((LoC >> 31) == HiC)
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
}
// Fall back to a stack store and stride x0 vector load.
return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
}
// Called by type legalization to handle splat of i64 on RV32.
// FIXME: We can optimize this when the type has sign or zero bits in one
// of the halves.
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
SDValue VL, SelectionDAG &DAG) {
assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
DAG.getConstant(0, DL, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
DAG.getConstant(1, DL, MVT::i32));
return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
}
// This function lowers a splat of a scalar operand Splat with the vector
// length VL. It ensures the final sequence is type legal, which is useful when
// lowering a splat after type legalization.
static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (VT.isFloatingPoint())
return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
MVT XLenVT = Subtarget.getXLenVT();
// Simplest case is that the operand needs to be promoted to XLenVT.
if (Scalar.getValueType().bitsLE(XLenVT)) {
// If the operand is a constant, sign extend to increase our chances
// of being able to use a .vi instruction. ANY_EXTEND would become a
// a zero extend and the simm5 check in isel would fail.
// FIXME: Should we ignore the upper bits in isel instead?
unsigned ExtOpc =
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
}
assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
"Unexpected scalar for splat lowering!");
// Otherwise use the more complicated splatting algorithm.
return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
}
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
MVT VT = Op.getSimpleValueType();
unsigned NumElts = VT.getVectorNumElements();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
SDValue TrueMask, VL;
std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
if (SVN->isSplat()) {
const int Lane = SVN->getSplatIndex();
if (Lane >= 0) {
MVT SVT = VT.getVectorElementType();
// Turn splatted vector load into a strided load with an X0 stride.
SDValue V = V1;
// Peek through CONCAT_VECTORS as VectorCombine can concat a vector
// with undef.
// FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
int Offset = Lane;
if (V.getOpcode() == ISD::CONCAT_VECTORS) {
int OpElements =
V.getOperand(0).getSimpleValueType().getVectorNumElements();
V = V.getOperand(Offset / OpElements);
Offset %= OpElements;
}
// We need to ensure the load isn't atomic or volatile.
if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
auto *Ld = cast<LoadSDNode>(V);
Offset *= SVT.getStoreSize();
SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
TypeSize::Fixed(Offset), DL);
// If this is SEW=64 on RV32, use a strided load with a stride of x0.
if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
SDValue IntID =
DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
DAG.getRegister(RISCV::X0, XLenVT), VL};
SDValue NewLoad = DAG.getMemIntrinsicNode(
ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
DAG.getMachineFunction().getMachineMemOperand(
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
}
// Otherwise use a scalar load and splat. This will give the best
// opportunity to fold a splat into the operation. ISel can turn it into
// the x0 strided load if we aren't able to fold away the select.
if (SVT.isFloatingPoint())
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
Ld->getPointerInfo().getWithOffset(Offset),
Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
else
V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
Ld->getPointerInfo().getWithOffset(Offset), SVT,
Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
DAG.makeEquivalentMemoryOrdering(Ld, V);
unsigned Opc =
VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
assert(Lane < (int)NumElts && "Unexpected lane!");
SDValue Gather =
DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
}
// Detect shuffles which can be re-expressed as vector selects; these are
// shuffles in which each element in the destination is taken from an element
// at the corresponding index in either source vectors.
bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
int MaskIndex = MaskIdx.value();
return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
});
assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
SmallVector<SDValue> MaskVals;
// As a backup, shuffles can be lowered via a vrgather instruction, possibly
// merged with a second vrgather.
SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
// By default we preserve the original operand order, and use a mask to
// select LHS as true and RHS as false. However, since RVV vector selects may
// feature splats but only on the LHS, we may choose to invert our mask and
// instead select between RHS and LHS.
bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
bool InvertMask = IsSelect == SwapOps;
// Keep a track of which non-undef indices are used by each LHS/RHS shuffle
// half.
DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
// Now construct the mask that will be used by the vselect or blended
// vrgather operation. For vrgathers, construct the appropriate indices into
// each vector.
for (int MaskIndex : SVN->getMask()) {
bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
if (!IsSelect) {
bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
? DAG.getConstant(MaskIndex, DL, XLenVT)
: DAG.getUNDEF(XLenVT));
GatherIndicesRHS.push_back(
IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
: DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
if (IsLHSOrUndefIndex && MaskIndex >= 0)
++LHSIndexCounts[MaskIndex];
if (!IsLHSOrUndefIndex)
++RHSIndexCounts[MaskIndex - NumElts];
}
}
if (SwapOps) {
std::swap(V1, V2);
std::swap(GatherIndicesLHS, GatherIndicesRHS);
}
assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
if (IsSelect)
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
// On such a large vector we're unable to use i8 as the index type.
// FIXME: We could promote the index to i16 and use vrgatherei16, but that
// may involve vector splitting if we're already at LMUL=8, or our
// user-supplied maximum fixed-length LMUL.
return SDValue();
}
unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
MVT IndexVT = VT.changeTypeToInteger();
// Since we can't introduce illegal index types at this stage, use i16 and
// vrgatherei16 if the corresponding index type for plain vrgather is greater
// than XLenVT.
if (IndexVT.getScalarType().bitsGT(XLenVT)) {
GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
}
MVT IndexContainerVT =
ContainerVT.changeVectorElementType(IndexVT.getScalarType());
SDValue Gather;
// TODO: This doesn't trigger for i64 vectors on RV32, since there we
// encounter a bitcasted BUILD_VECTOR with low/high i32 values.
if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
} else {
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
// If only one index is used, we can use a "splat" vrgather.
// TODO: We can splat the most-common index and fix-up any stragglers, if