blob: 80691a72f81505d88f7e5e6d6a31f8806110d220 [file] [log] [blame]
//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that RISC-V uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "RISCVISelLowering.h"
#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVRegisterInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
using namespace llvm;
#define DEBUG_TYPE "riscv-lower"
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<unsigned> ExtensionMaxWebSize(
DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
cl::desc("Give the maximum size (in number of nodes) of the web of "
"instructions that we will consider for VW expansion"),
cl::init(18));
static cl::opt<bool>
AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
cl::desc("Allow the formation of VW_W operations (e.g., "
"VWADD_W) with splat constants"),
cl::init(false));
static cl::opt<unsigned> NumRepeatedDivisors(
DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
cl::desc("Set the minimum number of repetitions of a divisor to allow "
"transformation to multiplications by the reciprocal"),
cl::init(2));
static cl::opt<int>
FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
cl::desc("Give the maximum number of instructions that we will "
"use for creating a floating-point immediate value"),
cl::init(2));
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
if (Subtarget.isRVE())
report_fatal_error("Codegen not yet implemented for RVE");
RISCVABI::ABI ABI = Subtarget.getTargetABI();
assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
!Subtarget.hasStdExtF()) {
errs() << "Hard-float 'f' ABI can't be used for a target that "
"doesn't support the F instruction set extension (ignoring "
"target-abi)\n";
ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
} else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
!Subtarget.hasStdExtD()) {
errs() << "Hard-float 'd' ABI can't be used for a target that "
"doesn't support the D instruction set extension (ignoring "
"target-abi)\n";
ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
}
switch (ABI) {
default:
report_fatal_error("Don't know how to lower this ABI");
case RISCVABI::ABI_ILP32:
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64:
case RISCVABI::ABI_LP64F:
case RISCVABI::ABI_LP64D:
break;
}
MVT XLenVT = Subtarget.getXLenVT();
// Set up the register classes.
addRegisterClass(XLenVT, &RISCV::GPRRegClass);
if (Subtarget.hasStdExtZfhOrZfhmin())
addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtZfbfmin())
addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtF())
addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD())
addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
if (Subtarget.hasStdExtZhinxOrZhinxmin())
addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
if (Subtarget.hasStdExtZfinx())
addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
if (Subtarget.hasStdExtZdinx()) {
if (Subtarget.is64Bit())
addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
else
addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
}
static const MVT::SimpleValueType BoolVecVTs[] = {
MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
static const MVT::SimpleValueType IntVecVTs[] = {
MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
MVT::nxv4i64, MVT::nxv8i64};
static const MVT::SimpleValueType F16VecVTs[] = {
MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
static const MVT::SimpleValueType BF16VecVTs[] = {
MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
static const MVT::SimpleValueType F32VecVTs[] = {
MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
static const MVT::SimpleValueType F64VecVTs[] = {
MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
if (Subtarget.hasVInstructions()) {
auto addRegClassForRVV = [this](MVT VT) {
// Disable the smallest fractional LMUL types if ELEN is less than
// RVVBitsPerBlock.
unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
if (VT.getVectorMinNumElements() < MinElts)
return;
unsigned Size = VT.getSizeInBits().getKnownMinValue();
const TargetRegisterClass *RC;
if (Size <= RISCV::RVVBitsPerBlock)
RC = &RISCV::VRRegClass;
else if (Size == 2 * RISCV::RVVBitsPerBlock)
RC = &RISCV::VRM2RegClass;
else if (Size == 4 * RISCV::RVVBitsPerBlock)
RC = &RISCV::VRM4RegClass;
else if (Size == 8 * RISCV::RVVBitsPerBlock)
RC = &RISCV::VRM8RegClass;
else
llvm_unreachable("Unexpected size");
addRegisterClass(VT, RC);
};
for (MVT VT : BoolVecVTs)
addRegClassForRVV(VT);
for (MVT VT : IntVecVTs) {
if (VT.getVectorElementType() == MVT::i64 &&
!Subtarget.hasVInstructionsI64())
continue;
addRegClassForRVV(VT);
}
if (Subtarget.hasVInstructionsF16Minimal())
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
if (Subtarget.hasVInstructionsBF16())
for (MVT VT : BF16VecVTs)
addRegClassForRVV(VT);
if (Subtarget.hasVInstructionsF32())
for (MVT VT : F32VecVTs)
addRegClassForRVV(VT);
if (Subtarget.hasVInstructionsF64())
for (MVT VT : F64VecVTs)
addRegClassForRVV(VT);
if (Subtarget.useRVVForFixedLengthVectors()) {
auto addRegClassForFixedVectors = [this](MVT VT) {
MVT ContainerVT = getContainerForFixedLengthVector(VT);
unsigned RCID = getRegClassIDForVecVT(ContainerVT);
const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
addRegisterClass(VT, TRI.getRegClass(RCID));
};
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useRVVForFixedLengthVectorVT(VT))
addRegClassForFixedVectors(VT);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useRVVForFixedLengthVectorVT(VT))
addRegClassForFixedVectors(VT);
}
}
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
setStackPointerRegisterToSaveRestore(RISCV::X2);
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
MVT::i1, Promote);
// DAGCombiner can call isLoadExtLegal for types that aren't legal.
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
MVT::i1, Promote);
// TODO: add all necessary setOperationAction calls.
setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, XLenVT, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
setCondCodeAction(ISD::SETLE, XLenVT, Expand);
setCondCodeAction(ISD::SETGT, XLenVT, Custom);
setCondCodeAction(ISD::SETGE, XLenVT, Expand);
setCondCodeAction(ISD::SETULE, XLenVT, Expand);
setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
MVT::i32, Custom);
setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
MVT::i32, Custom);
} else {
setLibcallName(
{RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
nullptr);
setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
else if (Subtarget.is64Bit())
setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
else
setOperationAction(ISD::MUL, MVT::i64, Custom);
if (!Subtarget.hasStdExtM())
setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
XLenVT, Expand);
else if (Subtarget.is64Bit())
setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
{MVT::i8, MVT::i16, MVT::i32}, Custom);
setOperationAction(
{ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
Expand);
setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
Custom);
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
if (Subtarget.is64Bit())
setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
} else if (Subtarget.hasVendorXTHeadBb()) {
if (Subtarget.is64Bit())
setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
} else {
setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
}
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
// pattern match it directly in isel.
setOperationAction(ISD::BSWAP, XLenVT,
(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
Subtarget.hasVendorXTHeadBb())
? Legal
: Expand);
// Zbkb can use rev8+brev8 to implement bitreverse.
setOperationAction(ISD::BITREVERSE, XLenVT,
Subtarget.hasStdExtZbkb() ? Custom : Expand);
if (Subtarget.hasStdExtZbb()) {
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
Legal);
if (Subtarget.is64Bit())
setOperationAction(
{ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF},
MVT::i32, Custom);
} else {
setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
}
if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb()) {
// We need the custom lowering to make sure that the resulting sequence
// for the 32bit case is efficient on 64bit targets.
if (Subtarget.is64Bit())
setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
} else {
setOperationAction(ISD::CTLZ, XLenVT, Expand);
}
if (Subtarget.is64Bit())
setOperationAction(ISD::ABS, MVT::i32, Custom);
if (!Subtarget.hasVendorXTHeadCondMov())
setOperationAction(ISD::SELECT, XLenVT, Custom);
static const unsigned FPLegalNodeTypes[] = {
ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,
ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
static const ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
static const unsigned FPOpToExpand[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
ISD::FREM};
static const unsigned FPRndMode[] = {
ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN};
if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
static const unsigned ZfhminZfbfminPromoteOps[] = {
ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
ISD::FSUB, ISD::FMUL, ISD::FMA,
ISD::FDIV, ISD::FSQRT, ISD::FABS,
ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN, ISD::SELECT};
if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
setOperationAction(ISD::FREM, MVT::bf16, Promote);
// FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
// DAGCombiner::visitFP_ROUND probably needs improvements first.
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
}
if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
if (Subtarget.hasStdExtZfhOrZhinx()) {
setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
setOperationAction(FPRndMode, MVT::f16,
Subtarget.hasStdExtZfa() ? Legal : Custom);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
} else {
setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
MVT::f16, Legal);
// FIXME: Need to promote f16 FCOPYSIGN to f32, but the
// DAGCombiner::visitFP_ROUND probably needs improvements first.
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
}
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f16,
Subtarget.hasStdExtZfa() ? Legal : Promote);
setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
ISD::FLOG10},
MVT::f16, Promote);
// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
// complete support for all operations in LegalizeDAG.
setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
ISD::STRICT_FTRUNC},
MVT::f16, Promote);
// We need to custom promote this.
if (Subtarget.is64Bit())
setOperationAction(ISD::FPOWI, MVT::i32, Custom);
if (!Subtarget.hasStdExtZfa())
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
}
if (Subtarget.hasStdExtFOrZfinx()) {
setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
setOperationAction(FPRndMode, MVT::f32,
Subtarget.hasStdExtZfa() ? Legal : Custom);
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(FPOpToExpand, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f32,
Subtarget.isSoftFPABI() ? LibCall : Custom);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
if (Subtarget.hasStdExtZfa())
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
else
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
}
if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
if (Subtarget.hasStdExtDOrZdinx()) {
setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
if (Subtarget.hasStdExtZfa()) {
setOperationAction(FPRndMode, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f64, Custom);
} else {
if (Subtarget.is64Bit())
setOperationAction(FPRndMode, MVT::f64, Custom);
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
}
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setOperationAction(FPOpToExpand, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f64,
Subtarget.isSoftFPABI() ? LibCall : Custom);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
}
if (Subtarget.is64Bit()) {
setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
MVT::i32, Custom);
setOperationAction(ISD::LROUND, MVT::i32, Custom);
}
if (Subtarget.hasStdExtFOrZfinx()) {
setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
Custom);
setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
XLenVT, Legal);
setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
ISD::JumpTable},
XLenVT, Custom);
setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::Constant, MVT::i64, Custom);
// TODO: On M-mode only targets, the cycle[h] CSR may not be present.
// Unfortunately this can't be determined just from the ISA naming string.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
Subtarget.is64Bit() ? Legal : Custom);
setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
if (Subtarget.hasStdExtZicbop()) {
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
}
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
} else if (Subtarget.hasForcedAtomics()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
} else {
setMaxAtomicSizeInBitsSupported(0);
}
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
setBooleanContents(ZeroOrOneBooleanContent);
if (Subtarget.hasVInstructions()) {
setBooleanVectorContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::VSCALE, XLenVT, Custom);
// RVV intrinsics may have illegal operands.
// We also need to custom legalize vmv.x.s.
setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
ISD::INTRINSIC_VOID},
{MVT::i8, MVT::i16}, Custom);
if (Subtarget.is64Bit())
setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
MVT::i32, Custom);
else
setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
MVT::i64, Custom);
setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
MVT::Other, Custom);
static const unsigned IntegerVPOps[] = {
ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
ISD::VP_ABS};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
static const unsigned FloatingPointVecReduceOps[] = {
ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
ISD::VECREDUCE_FMAX};
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
// element type being illegal.
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
MVT::i64, Custom);
setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
MVT::i64, Custom);
}
for (MVT VT : BoolVecVTs) {
if (!isTypeLegal(VT))
continue;
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
// Mask VTs are custom-expanded into a series of standard nodes
setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
ISD::SCALAR_TO_VECTOR},
VT, Custom);
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(
{ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
Expand);
setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
setOperationAction(
{ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
Custom);
setOperationAction(
{ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
ISD::STRICT_FP_TO_UINT},
VT, Custom);
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
// Expand all extending loads to types larger than this, and truncating
// stores from types larger than this.
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(OtherVT, VT, Expand);
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
VT, Expand);
}
setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
ISD::VP_TRUNCATE, ISD::VP_SETCC},
VT, Custom);
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
setOperationPromotedToType(
ISD::VECTOR_SPLICE, VT,
MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
}
for (MVT VT : IntVecVTs) {
if (!isTypeLegal(VT))
continue;
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
// Vectors implement MULHS/MULHU.
setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
// nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
Legal);
// Custom-lower extensions and truncations from/to mask types.
setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
VT, Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
ISD::STRICT_FP_TO_UINT},
VT, Custom);
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
setOperationAction(
{ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
setOperationAction(ISD::TRUNCATE, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
setOperationAction(IntegerVecReduceOps, VT, Custom);
setOperationAction(IntegerVPOps, VT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
VT, Custom);
setOperationAction(
{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
VT, Custom);
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
VT, Expand);
}
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
// Splice
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
if (Subtarget.hasStdExtZvbb()) {
setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Legal);
setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom);
setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
VT, Custom);
} else {
setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Expand);
setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand);
setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
VT, Expand);
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
// range of f32.
EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
if (isTypeLegal(FloatVT)) {
setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
VT, Custom);
}
setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
}
}
// Expand various CCs to best match the RVV ISA, which natively supports UNE
// but no other unordered comparisons, and supports all ordered comparisons
// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
// and we pattern-match those back to the "original", swapping operands once
// more. This way we catch both operations and both "vf" and "fv" forms with
// fewer patterns.
static const ISD::CondCode VFPCCToExpand[] = {
ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
};
// TODO: support more ops.
static const unsigned ZvfhminPromoteOps[] = {
ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
ISD::FMINIMUM};
// TODO: support more vp ops.
static const unsigned ZvfhminPromoteVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
ISD::VP_FNEARBYINT, ISD::VP_SETCC};
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
// RVV has native FP_ROUND & FP_EXTEND conversions where the element type
// sizes are within one power-of-two of each other. Therefore conversions
// between vXf16 and vXf64 must be lowered as sequences which convert via
// vXf32.
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
Custom);
// Expand various condition codes (explained above).
setCondCodeAction(VFPCCToExpand, VT, Expand);
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
ISD::IS_FPCLASS},
VT, Custom);
setOperationAction(FloatingPointVecReduceOps, VT, Custom);
// Expand FP operations that need libcalls.
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FEXP10, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
VT, Custom);
setOperationAction(
{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
VT, Custom);
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
setOperationAction(FloatingPointVPOps, VT, Custom);
setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
Custom);
setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
VT, Legal);
setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
VT, Custom);
};
// Sets common extload/truncstore actions on RVV floating-point vector
// types.
const auto SetCommonVFPExtLoadTruncStoreActions =
[&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
for (auto SmallVT : SmallerVTs) {
setTruncStoreAction(VT, SmallVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
}
};
if (Subtarget.hasVInstructionsF16()) {
for (MVT VT : F16VecVTs) {
if (!isTypeLegal(VT))
continue;
SetCommonVFPActions(VT);
}
} else if (Subtarget.hasVInstructionsF16Minimal()) {
for (MVT VT : F16VecVTs) {
if (!isTypeLegal(VT))
continue;
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
VT, Custom);
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
// load/store
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
// Custom split nxv32f16 since nxv32f32 if not legal.
if (VT == MVT::nxv32f16) {
setOperationAction(ZvfhminPromoteOps, VT, Custom);
setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
continue;
}
// Add more promote ops.
MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
}
}
if (Subtarget.hasVInstructionsF32()) {
for (MVT VT : F32VecVTs) {
if (!isTypeLegal(VT))
continue;
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
}
}
if (Subtarget.hasVInstructionsF64()) {
for (MVT VT : F64VecVTs) {
if (!isTypeLegal(VT))
continue;
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
}
}
if (Subtarget.useRVVForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
if (!useRVVForFixedLengthVectorVT(VT))
continue;
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
OtherVT, VT, Expand);
}
// Custom lower fixed vector undefs to scalable vector undefs to avoid
// expansion to a build_vector of 0s.
setOperationAction(ISD::UNDEF, VT, Custom);
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
Custom);
setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
Custom);
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(
{ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
Custom);
setOperationAction(
{ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
Custom);
setOperationAction(
{
ISD::SINT_TO_FP,
ISD::UINT_TO_FP,
ISD::FP_TO_SINT,
ISD::FP_TO_UINT,
ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP,
ISD::STRICT_FP_TO_SINT,
ISD::STRICT_FP_TO_UINT,
},
VT, Custom);
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
ISD::OR, ISD::XOR},
VT, Custom);
setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
ISD::VP_SETCC, ISD::VP_TRUNCATE},
VT, Custom);
continue;
}
// Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
// it before type legalization for i64 vectors on RV32. It will then be
// type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
// improvements first.
if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
}
setOperationAction(
{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
ISD::VP_SCATTER},
VT, Custom);
setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
VT, Custom);
setOperationAction(
{ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
setOperationAction(
{ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(
{ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
ISD::VECREDUCE_UMIN},
VT, Custom);
setOperationAction(IntegerVPOps, VT, Custom);
if (Subtarget.hasStdExtZvbb()) {
setOperationAction({ISD::BITREVERSE, ISD::BSWAP, ISD::CTLZ,
ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ,
ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP, ISD::ROTL,
ISD::ROTR},
VT, Custom);
} else {
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
// range of f32.
EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
if (isTypeLegal(FloatVT))
setOperationAction(
{ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
Custom);
}
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
// There are no extending loads or truncating stores.
for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
setTruncStoreAction(VT, InnerVT, Expand);
}
if (!useRVVForFixedLengthVectorVT(VT))
continue;
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
// Custom lower fixed vector undefs to scalable vector undefs to avoid
// expansion to a build_vector of 0s.
setOperationAction(ISD::UNDEF, VT, Custom);
if (VT.getVectorElementType() == MVT::f16 &&
!Subtarget.hasVInstructionsF16()) {
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
setOperationAction(
{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
Custom);
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
VT, Custom);
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
VT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
// Don't promote f16 vector operations to f32 if f32 vector type is
// not legal.
// TODO: could split the f16 vector into two vectors and do promotion.
if (!isTypeLegal(F32VecVT))
continue;
setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
continue;
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
Custom);
setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
ISD::EXTRACT_VECTOR_ELT},
VT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
ISD::MGATHER, ISD::MSCATTER},
VT, Custom);
setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
ISD::VP_SCATTER},
VT, Custom);
setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
VT, Custom);
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
VT, Custom);
setCondCodeAction(VFPCCToExpand, VT, Expand);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(FloatingPointVecReduceOps, VT, Custom);
setOperationAction(FloatingPointVPOps, VT, Custom);
setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
Custom);
setOperationAction(
{ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
VT, Custom);
}
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
Custom);
if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
if (Subtarget.hasStdExtFOrZfinx())
setOperationAction(ISD::BITCAST, MVT::f32, Custom);
if (Subtarget.hasStdExtDOrZdinx())
setOperationAction(ISD::BITCAST, MVT::f64, Custom);
}
}
if (Subtarget.hasStdExtA())
setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
if (Subtarget.hasForcedAtomics()) {
// Force __sync libcalls to be emitted for atomic rmw/cas operations.
setOperationAction(
{ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
XLenVT, LibCall);
}
if (Subtarget.hasVendorXTHeadMemIdx()) {
for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
++im) {
setIndexedLoadAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedLoadAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedLoadAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
if (Subtarget.is64Bit()) {
setIndexedLoadAction(im, MVT::i64, Legal);
setIndexedStoreAction(im, MVT::i64, Legal);
}
}
}
// Function alignments.
const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
setMinFunctionAlignment(FunctionAlignment);
// Set preferred alignments.
setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
setMinimumJumpTableEntries(5);
// Jumps are expensive, compared to logic
setJumpIsExpensive();
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);
if (Subtarget.hasStdExtFOrZfinx())
setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
if (Subtarget.hasStdExtZbb())
setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
setTargetDAGCombine(ISD::TRUNCATE);
if (Subtarget.hasStdExtZbkb())
setTargetDAGCombine(ISD::BITREVERSE);
if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
if (Subtarget.hasStdExtFOrZfinx())
setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
if (Subtarget.hasVInstructions())
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS});
if (Subtarget.hasVendorXTHeadMemPair())
setTargetDAGCombine({ISD::LOAD, ISD::STORE});
if (Subtarget.useRVVForFixedLengthVectors())
setTargetDAGCombine(ISD::BITCAST);
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
// Disable strict node mutation.
IsStrictFPEnabled = true;
}
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &Context,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
if (Subtarget.hasVInstructions() &&
(VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
return Subtarget.getXLenVT();
}
// Return false if we can lower get_vector_length to a vsetvli intrinsic.
bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
unsigned VF,
bool IsScalable) const {
if (!Subtarget.hasVInstructions())
return true;
if (!IsScalable)
return true;
if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
return true;
// Don't allow VF=1 if those types are't legal.
if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
return true;
// VLEN=32 support is incomplete.
if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
return true;
// The maximum VF is for the smallest element width with LMUL=8.
// VF must be a power of 2.
unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
return VF > MaxVF || !isPowerOf2_32(VF);
}
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
auto &DL = I.getModule()->getDataLayout();
auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
bool IsUnitStrided) {
Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = I.getArgOperand(PtrOp);
Type *MemTy;
if (IsStore) {
// Store value is the first operand.
MemTy = I.getArgOperand(0)->getType();
} else {
// Use return type. If it's segment load, return type is a struct.
MemTy = I.getType();
if (MemTy->isStructTy())
MemTy = MemTy->getStructElementType(0);
}
if (!IsUnitStrided)
MemTy = MemTy->getScalarType();
Info.memVT = getValueType(DL, MemTy);
Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
Info.size = MemoryLocation::UnknownSize;
Info.flags |=
IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
return true;
};
if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
Info.flags |= MachineMemOperand::MONonTemporal;
Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);
switch (Intrinsic) {
default:
return false;
case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
case Intrinsic::riscv_masked_atomicrmw_add_i32:
case Intrinsic::riscv_masked_atomicrmw_sub_i32:
case Intrinsic::riscv_masked_atomicrmw_nand_i32:
case Intrinsic::riscv_masked_atomicrmw_max_i32:
case Intrinsic::riscv_masked_atomicrmw_min_i32:
case Intrinsic::riscv_masked_atomicrmw_umax_i32:
case Intrinsic::riscv_masked_atomicrmw_umin_i32:
case Intrinsic::riscv_masked_cmpxchg_i32:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(4);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
case Intrinsic::riscv_masked_strided_load:
return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_masked_strided_store:
return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_seg2_load:
case Intrinsic::riscv_seg3_load:
case Intrinsic::riscv_seg4_load:
case Intrinsic::riscv_seg5_load:
case Intrinsic::riscv_seg6_load:
case Intrinsic::riscv_seg7_load:
case Intrinsic::riscv_seg8_load:
return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_seg2_store:
case Intrinsic::riscv_seg3_store:
case Intrinsic::riscv_seg4_store:
case Intrinsic::riscv_seg5_store:
case Intrinsic::riscv_seg6_store:
case Intrinsic::riscv_seg7_store:
case Intrinsic::riscv_seg8_store:
// Operands are (vec, ..., vec, ptr, vl)
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
/*IsStore*/ true,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_vle:
case Intrinsic::riscv_vle_mask:
case Intrinsic::riscv_vleff:
case Intrinsic::riscv_vleff_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
/*IsStore*/ false,
/*IsUnitStrided*/ true);
case Intrinsic::riscv_vse:
case Intrinsic::riscv_vse_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
/*IsStore*/ true,
/*IsUnitStrided*/ true);
case Intrinsic::riscv_vlse:
case Intrinsic::riscv_vlse_mask:
case Intrinsic::riscv_vloxei:
case Intrinsic::riscv_vloxei_mask:
case Intrinsic::riscv_vluxei:
case Intrinsic::riscv_vluxei_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
/*IsStore*/ false,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_vsse:
case Intrinsic::riscv_vsse_mask:
case Intrinsic::riscv_vsoxei:
case Intrinsic::riscv_vsoxei_mask:
case Intrinsic::riscv_vsuxei:
case Intrinsic::riscv_vsuxei_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
/*IsStore*/ true,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_vlseg2:
case Intrinsic::riscv_vlseg3:
case Intrinsic::riscv_vlseg4:
case Intrinsic::riscv_vlseg5:
case Intrinsic::riscv_vlseg6:
case Intrinsic::riscv_vlseg7:
case Intrinsic::riscv_vlseg8:
case Intrinsic::riscv_vlseg2ff:
case Intrinsic::riscv_vlseg3ff:
case Intrinsic::riscv_vlseg4ff:
case Intrinsic::riscv_vlseg5ff:
case Intrinsic::riscv_vlseg6ff:
case Intrinsic::riscv_vlseg7ff:
case Intrinsic::riscv_vlseg8ff:
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
/*IsStore*/ false,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_vlseg2_mask:
case Intrinsic::riscv_vlseg3_mask:
case Intrinsic::riscv_vlseg4_mask:
case Intrinsic::riscv_vlseg5_mask:
case Intrinsic::riscv_vlseg6_mask:
case Intrinsic::riscv_vlseg7_mask:
case Intrinsic::riscv_vlseg8_mask:
case Intrinsic::riscv_vlseg2ff_mask:
case Intrinsic::riscv_vlseg3ff_mask:
case Intrinsic::riscv_vlseg4ff_mask:
case Intrinsic::riscv_vlseg5ff_mask:
case Intrinsic::riscv_vlseg6ff_mask:
case Intrinsic::riscv_vlseg7ff_mask:
case Intrinsic::riscv_vlseg8ff_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
/*IsStore*/ false,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_vlsseg2:
case Intrinsic::riscv_vlsseg3:
case Intrinsic::riscv_vlsseg4:
case Intrinsic::riscv_vlsseg5:
case Intrinsic::riscv_vlsseg6:
case Intrinsic::riscv_vlsseg7:
case Intrinsic::riscv_vlsseg8:
case Intrinsic::riscv_vloxseg2:
case Intrinsic::riscv_vloxseg3:
case Intrinsic::riscv_vloxseg4:
case Intrinsic::riscv_vloxseg5:
case Intrinsic::riscv_vloxseg6:
case Intrinsic::riscv_vloxseg7:
case Intrinsic::riscv_vloxseg8:
case Intrinsic::riscv_vluxseg2:
case Intrinsic::riscv_vluxseg3:
case Intrinsic::riscv_vluxseg4:
case Intrinsic::riscv_vluxseg5:
case Intrinsic::riscv_vluxseg6:
case Intrinsic::riscv_vluxseg7:
case Intrinsic::riscv_vluxseg8:
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
/*IsStore*/ false,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_vlsseg2_mask:
case Intrinsic::riscv_vlsseg3_mask:
case Intrinsic::riscv_vlsseg4_mask:
case Intrinsic::riscv_vlsseg5_mask:
case Intrinsic::riscv_vlsseg6_mask:
case Intrinsic::riscv_vlsseg7_mask:
case Intrinsic::riscv_vlsseg8_mask:
case Intrinsic::riscv_vloxseg2_mask:
case Intrinsic::riscv_vloxseg3_mask:
case Intrinsic::riscv_vloxseg4_mask:
case Intrinsic::riscv_vloxseg5_mask:
case Intrinsic::riscv_vloxseg6_mask:
case Intrinsic::riscv_vloxseg7_mask:
case Intrinsic::riscv_vloxseg8_mask:
case Intrinsic::riscv_vluxseg2_mask:
case Intrinsic::riscv_vluxseg3_mask:
case Intrinsic::riscv_vluxseg4_mask:
case Intrinsic::riscv_vluxseg5_mask:
case Intrinsic::riscv_vluxseg6_mask:
case Intrinsic::riscv_vluxseg7_mask:
case Intrinsic::riscv_vluxseg8_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
/*IsStore*/ false,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_vsseg2:
case Intrinsic::riscv_vsseg3:
case Intrinsic::riscv_vsseg4:
case Intrinsic::riscv_vsseg5:
case Intrinsic::riscv_vsseg6:
case Intrinsic::riscv_vsseg7:
case Intrinsic::riscv_vsseg8:
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
/*IsStore*/ true,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_vsseg2_mask:
case Intrinsic::riscv_vsseg3_mask:
case Intrinsic::riscv_vsseg4_mask:
case Intrinsic::riscv_vsseg5_mask:
case Intrinsic::riscv_vsseg6_mask:
case Intrinsic::riscv_vsseg7_mask:
case Intrinsic::riscv_vsseg8_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
/*IsStore*/ true,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_vssseg2:
case Intrinsic::riscv_vssseg3:
case Intrinsic::riscv_vssseg4:
case Intrinsic::riscv_vssseg5:
case Intrinsic::riscv_vssseg6:
case Intrinsic::riscv_vssseg7:
case Intrinsic::riscv_vssseg8:
case Intrinsic::riscv_vsoxseg2:
case Intrinsic::riscv_vsoxseg3:
case Intrinsic::riscv_vsoxseg4:
case Intrinsic::riscv_vsoxseg5:
case Intrinsic::riscv_vsoxseg6:
case Intrinsic::riscv_vsoxseg7:
case Intrinsic::riscv_vsoxseg8:
case Intrinsic::riscv_vsuxseg2:
case Intrinsic::riscv_vsuxseg3:
case Intrinsic::riscv_vsuxseg4:
case Intrinsic::riscv_vsuxseg5:
case Intrinsic::riscv_vsuxseg6:
case Intrinsic::riscv_vsuxseg7:
case Intrinsic::riscv_vsuxseg8:
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
/*IsStore*/ true,
/*IsUnitStrided*/ false);
case Intrinsic::riscv_vssseg2_mask:
case Intrinsic::riscv_vssseg3_mask:
case Intrinsic::riscv_vssseg4_mask:
case Intrinsic::riscv_vssseg5_mask:
case Intrinsic::riscv_vssseg6_mask:
case Intrinsic::riscv_vssseg7_mask:
case Intrinsic::riscv_vssseg8_mask:
case Intrinsic::riscv_vsoxseg2_mask:
case Intrinsic::riscv_vsoxseg3_mask:
case Intrinsic::riscv_vsoxseg4_mask:
case Intrinsic::riscv_vsoxseg5_mask:
case Intrinsic::riscv_vsoxseg6_mask:
case Intrinsic::riscv_vsoxseg7_mask:
case Intrinsic::riscv_vsoxseg8_mask:
case Intrinsic::riscv_vsuxseg2_mask:
case Intrinsic::riscv_vsuxseg3_mask:
case Intrinsic::riscv_vsuxseg4_mask:
case Intrinsic::riscv_vsuxseg5_mask:
case Intrinsic::riscv_vsuxseg6_mask:
case Intrinsic::riscv_vsuxseg7_mask:
case Intrinsic::riscv_vsuxseg8_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
/*IsStore*/ true,
/*IsUnitStrided*/ false);
}
}
bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// RVV instructions only support register addressing.
if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
// Require a 12-bit signed offset.
if (!isInt<12>(AM.BaseOffs))
return false;
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
case 1:
if (!AM.HasBaseReg) // allow "r+i".
break;
return false; // disallow "r+r" or "r+r+i".
default:
return false;
}
return true;
}
bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<12>(Imm);
}
bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<12>(Imm);
}
// On RV32, 64-bit integers are split into their high and low parts and held
// in two different registers, so the trunc is free since the low register can
// just be used.
// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
// isTruncateFree?
bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
return false;
unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
unsigned DestBits = DstTy->getPrimitiveSizeInBits();
return (SrcBits == 64 && DestBits == 32);
}
bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
// We consider i64->i32 free on RV64 since we have good selection of W
// instructions that make promoting operations back to i64 free in many cases.
if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
!DstVT.isInteger())
return false;
unsigned SrcBits = SrcVT.getSizeInBits();
unsigned DestBits = DstVT.getSizeInBits();
return (SrcBits == 64 && DestBits == 32);
}
bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
// Zexts are free if they can be combined with a load.
// Don't advertise i32->i64 zextload as being free for RV64. It interacts
// poorly with type legalization of compares preferring sext.
if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
EVT MemVT = LD->getMemoryVT();
if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
(LD->getExtensionType() == ISD::NON_EXTLOAD ||
LD->getExtensionType() == ISD::ZEXTLOAD))
return true;
}
return TargetLowering::isZExtFree(Val, VT2);
}
bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
}
bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
return Subtarget.hasStdExtZbb();
}
bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
}
bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
const Instruction &AndI) const {
// We expect to be able to match a bit extraction instruction if the Zbs
// extension is supported and the mask is a power of two. However, we
// conservatively return false if the mask would fit in an ANDI instruction,
// on the basis that it's possible the sinking+duplication of the AND in
// CodeGenPrepare triggered by this hook wouldn't decrease the instruction
// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
return false;
ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
return false;
return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
}
bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
EVT VT = Y.getValueType();
// FIXME: Support vectors once we have tests.
if (VT.isVector())
return false;
return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
!isa<ConstantSDNode>(Y);
}
bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
// Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
if (Subtarget.hasStdExtZbs())
return X.getValueType().isScalarInteger();
auto *C = dyn_cast<ConstantSDNode>(Y);
// XTheadBs provides th.tst (similar to bexti), if Y is a constant
if (Subtarget.hasVendorXTHeadBs())
return C != nullptr;
// We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
return C && C->getAPIntValue().ule(10);
}
bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
EVT VT) const {
// Only enable for rvv.
if (!VT.isVector() || !Subtarget.hasVInstructions())
return false;
if (VT.isFixedLengthVector() && !isTypeLegal(VT))
return false;
return true;
}
bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getIntegerBitWidth();
if (BitSize > Subtarget.getXLen())
return false;
// Fast path, assume 32-bit immediates are cheap.
int64_t Val = Imm.getSExtValue();
if (isInt<32>(Val))
return true;
// A constant pool entry may be more aligned thant he load we're trying to
// replace. If we don't support unaligned scalar mem, prefer the constant
// pool.
// TODO: Can the caller pass down the alignment?
if (!Subtarget.enableUnalignedScalarMem())
return true;
// Prefer to keep the load if it would require many instructions.
// This uses the same threshold we use for constant pools but doesn't
// check useConstantPoolForLargeInts.
// TODO: Should we keep the load only when we're definitely going to emit a
// constant pool?
RISCVMatInt::InstSeq Seq =
RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
return Seq.size() <= Subtarget.getMaxBuildIntsCost();
}
bool RISCVTargetLowering::
shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const {
// One interesting pattern that we'd want to form is 'bit extract':
// ((1 >> Y) & 1) ==/!= 0
// But we also need to be careful not to try to reverse that fold.
// Is this '((1 >> Y) & 1)'?
if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
return false; // Keep the 'bit extract' pattern.
// Will this be '((1 >> Y) & 1)' after the transform?
if (NewShiftOpcode == ISD::SRL && CC->isOne())
return true; // Do form the 'bit extract' pattern.
// If 'X' is a constant, and we transform, then we will immediately
// try to undo the fold, thus causing endless combine loop.
// So only do the transform if X is not a constant. This matches the default
// implementation of this function.
return !XC;
}
bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
switch (Opcode) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::ICmp:
case Instruction::FCmp:
return true;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
return Operand == 1;
default:
return false;
}
}
bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
return false;
if (canSplatOperand(I->getOpcode(), Operand))
return true;
auto *II = dyn_cast<IntrinsicInst>(I);
if (!II)
return false;
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
case Intrinsic::vp_fma:
return Operand == 0 || Operand == 1;
case Intrinsic::vp_shl:
case Intrinsic::vp_lshr:
case Intrinsic::vp_ashr:
case Intrinsic::vp_udiv:
case Intrinsic::vp_sdiv:
case Intrinsic::vp_urem:
case Intrinsic::vp_srem:
return Operand == 1;
// These intrinsics are commutative.
case Intrinsic::vp_add:
case Intrinsic::vp_mul:
case Intrinsic::vp_and:
case Intrinsic::vp_or:
case Intrinsic::vp_xor:
case Intrinsic::vp_fadd:
case Intrinsic::vp_fmul:
case Intrinsic::vp_icmp:
case Intrinsic::vp_fcmp:
// These intrinsics have 'vr' versions.
case Intrinsic::vp_sub:
case Intrinsic::vp_fsub:
case Intrinsic::vp_fdiv:
return Operand == 0 || Operand == 1;
default:
return false;
}
}
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// splats of scalars can fold into vector instructions.
bool RISCVTargetLowering::shouldSinkOperands(
Instruction *I, SmallVectorImpl<Use *> &Ops) const {
using namespace llvm::PatternMatch;
if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
return false;
for (auto OpIdx : enumerate(I->operands())) {
if (!canSplatOperand(I, OpIdx.index()))
continue;
Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
// Make sure we are not already sinking this operand
if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
continue;
// We are looking for a splat that can be sunk.
if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
m_Undef(), m_ZeroMask())))
continue;
// Don't sink i1 splats.
if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
continue;
// All uses of the shuffle should be sunk to avoid duplicating it across gpr
// and vector registers
for (Use &U : Op->uses()) {
Instruction *Insn = cast<Instruction>(U.getUser());
if (!canSplatOperand(Insn, U.getOperandNo()))
return false;
}
Ops.push_back(&Op->getOperandUse(0));
Ops.push_back(&OpIdx.value());
}
return true;
}
bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
unsigned Opc = VecOp.getOpcode();
// Assume target opcodes can't be scalarized.
// TODO - do we have any exceptions?
if (Opc >= ISD::BUILTIN_OP_END)
return false;
// If the vector op is not supported, try to convert to scalar.
EVT VecVT = VecOp.getValueType();
if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
return true;
// If the vector op is supported, but the scalar op is not, the transform may
// not be worthwhile.
// Permit a vector binary operation can be converted to scalar binary
// operation which is custom lowered with illegal type.
EVT ScalarVT = VecVT.getScalarType();
return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
isOperationCustom(Opc, ScalarVT);
}
bool RISCVTargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
// In order to maximise the opportunity for common subexpression elimination,
// keep a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
return false;
}
// Returns 0-31 if the fli instruction is available for the type and this is
// legal FP immediate for the type. Returns -1 otherwise.
int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
if (!Subtarget.hasStdExtZfa())
return -1;
bool IsSupportedVT = false;
if (VT == MVT::f16) {
IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
} else if (VT == MVT::f32) {
IsSupportedVT = true;
} else if (VT == MVT::f64) {
assert(Subtarget.hasStdExtD() && "Expect D extension");
IsSupportedVT = true;
}
if (!IsSupportedVT)
return -1;
return RISCVLoadFPImm::getLoadFPImm(Imm);
}
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
bool IsLegalVT = false;
if (VT == MVT::f16)
IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
else if (VT == MVT::f32)
IsLegalVT = Subtarget.hasStdExtFOrZfinx();
else if (VT == MVT::f64)
IsLegalVT = Subtarget.hasStdExtDOrZdinx();
else if (VT == MVT::bf16)
IsLegalVT = Subtarget.hasStdExtZfbfmin();
if (!IsLegalVT)
return false;
if (getLegalZfaFPImm(Imm, VT) >= 0)
return true;
// Cannot create a 64 bit floating-point immediate value for rv32.
if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
// td can handle +0.0 or -0.0 already.
// -0.0 can be created by fmv + fneg.
return Imm.isZero();
}
// Special case: the cost for -0.0 is 1.
int Cost = Imm.isNegZero()
? 1
: RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
Subtarget.getXLen(),
Subtarget.getFeatureBits());
// If the constantpool data is already in cache, only Cost 1 is cheaper.
return Cost < FPImmCost;
}
// TODO: This is very conservative.
bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
// Only support extracting a fixed from a fixed vector for now.
if (ResVT.isScalableVector() || SrcVT.isScalableVector())
return false;
unsigned ResElts = ResVT.getVectorNumElements();
unsigned SrcElts = SrcVT.getVectorNumElements();
// Convervatively only handle extracting half of a vector.
// TODO: Relax this.
if ((ResElts * 2) != SrcElts)
return false;
// The smallest type we can slide is i8.
// TODO: We can extract index 0 from a mask vector without a slide.
if (ResVT.getVectorElementType() == MVT::i1)
return false;
// Slide can support arbitrary index, but we only treat vslidedown.vi as
// cheap.
if (Index >= 32)
return false;
// TODO: We can do arbitrary slidedowns, but for now only support extracting
// the upper half of a vector until we have more test coverage.
return Index == 0 || Index == ResElts;
}
MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
!Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
return MVT::f32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
!Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly by branches
// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
// with 1/-1.
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
ISD::CondCode &CC, SelectionDAG &DAG) {
// If this is a single bit test that can't be handled by ANDI, shift the
// bit to be tested to the MSB and perform a signed compare with 0.
if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
isa<ConstantSDNode>(LHS.getOperand(1))) {
uint64_t Mask = LHS.getConstantOperandVal(1);
if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
unsigned ShAmt = 0;
if (isPowerOf2_64(Mask)) {
CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
} else {
ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
}
LHS = LHS.getOperand(0);
if (ShAmt != 0)
LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
DAG.getConstant(ShAmt, DL, LHS.getValueType()));
return;
}
}
if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
int64_t C = RHSC->getSExtValue();
switch (CC) {
default: break;
case ISD::SETGT:
// Convert X > -1 to X >= 0.
if (C == -1) {
RHS = DAG.getConstant(0, DL, RHS.getValueType());
CC = ISD::SETGE;
return;
}
break;
case ISD::SETLT:
// Convert X < 1 to 0 <= X.
if (C == 1) {
RHS = LHS;
LHS = DAG.getConstant(0, DL, RHS.getValueType());
CC = ISD::SETGE;
return;
}
break;
}
}
switch (CC) {
default:
break;
case ISD::SETGT:
case ISD::SETLE:
case ISD::SETUGT:
case ISD::SETULE:
CC = ISD::getSetCCSwappedOperands(CC);
std::swap(LHS, RHS);
break;
}
}
RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
assert(VT.isScalableVector() && "Expecting a scalable vector type");
unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
if (VT.getVectorElementType() == MVT::i1)
KnownSize *= 8;
switch (KnownSize) {
default:
llvm_unreachable("Invalid LMUL.");
case 8:
return RISCVII::VLMUL::LMUL_F8;
case 16:
return RISCVII::VLMUL::LMUL_F4;
case 32:
return RISCVII::VLMUL::LMUL_F2;
case 64:
return RISCVII::VLMUL::LMUL_1;
case 128:
return RISCVII::VLMUL::LMUL_2;
case 256:
return RISCVII::VLMUL::LMUL_4;
case 512:
return RISCVII::VLMUL::LMUL_8;
}
}
unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
switch (LMul) {
default:
llvm_unreachable("Invalid LMUL.");
case RISCVII::VLMUL::LMUL_F8:
case RISCVII::VLMUL::LMUL_F4:
case RISCVII::VLMUL::LMUL_F2:
case RISCVII::VLMUL::LMUL_1:
return RISCV::VRRegClassID;
case RISCVII::VLMUL::LMUL_2:
return RISCV::VRM2RegClassID;
case RISCVII::VLMUL::LMUL_4:
return RISCV::VRM4RegClassID;
case RISCVII::VLMUL::LMUL_8:
return RISCV::VRM8RegClassID;
}
}
unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
RISCVII::VLMUL LMUL = getLMUL(VT);
if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
LMUL == RISCVII::VLMUL::LMUL_F4 ||
LMUL == RISCVII::VLMUL::LMUL_F2 ||
LMUL == RISCVII::VLMUL::LMUL_1) {
static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
"Unexpected subreg numbering");
return RISCV::sub_vrm1_0 + Index;
}
if (LMUL == RISCVII::VLMUL::LMUL_2) {
static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
"Unexpected subreg numbering");
return RISCV::sub_vrm2_0 + Index;
}
if (LMUL == RISCVII::VLMUL::LMUL_4) {
static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
"Unexpected subreg numbering");
return RISCV::sub_vrm4_0 + Index;
}
llvm_unreachable("Invalid vector type.");
}
unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
if (VT.getVectorElementType() == MVT::i1)
return RISCV::VRRegClassID;
return getRegClassIDForLMUL(getLMUL(VT));
}
// Attempt to decompose a subvector insert/extract between VecVT and
// SubVecVT via subregister indices. Returns the subregister index that
// can perform the subvector insert/extract with the given element index, as
// well as the index corresponding to any leftover subvectors that must be
// further inserted/extracted within the register class for SubVecVT.
std::pair<unsigned, unsigned>
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
const RISCVRegisterInfo *TRI) {
static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
RISCV::VRM2RegClassID > RISCV::VRRegClassID),
"Register classes not ordered");
unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
// Try to compose a subregister index that takes us from the incoming
// LMUL>1 register class down to the outgoing one. At each step we half
// the LMUL:
// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
// Note that this is not guaranteed to find a subregister index, such as
// when we are extracting from one VR type to another.
unsigned SubRegIdx = RISCV::NoSubRegister;
for (const unsigned RCID :
{RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
if (VecRegClassID > RCID && SubRegClassID <= RCID) {
VecVT = VecVT.getHalfNumVectorElementsVT();
bool IsHi =
InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
getSubregIndexByMVT(VecVT, IsHi));
if (IsHi)
InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
}
return {SubRegIdx, InsertExtractIdx};
}
// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
// stores for those types.
bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
return !Subtarget.useRVVForFixedLengthVectors() ||
(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
}
bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
if (!ScalarTy.isSimple())
return false;
switch (ScalarTy.getSimpleVT().SimpleTy) {
case MVT::iPTR:
return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
case MVT::i8:
case MVT::i16:
case MVT::i32:
return true;
case MVT::i64:
return Subtarget.hasVInstructionsI64();
case MVT::f16:
return Subtarget.hasVInstructionsF16();
case MVT::f32:
return Subtarget.hasVInstructionsF32();
case MVT::f64:
return Subtarget.hasVInstructionsF64();
default:
return false;
}
}
unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
return NumRepeatedDivisors;
}
static SDValue getVLOperand(SDValue Op) {
assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
"Unexpected opcode");
bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
if (!II)
return SDValue();
return Op.getOperand(II->VLOperand + 1 + HasChain);
}
static bool useRVVForFixedLengthVectorVT(MVT VT,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
if (!Subtarget.useRVVForFixedLengthVectors())
return false;
// We only support a set of vector types with a consistent maximum fixed size
// across all supported vector element types to avoid legalization issues.
// Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
// fixed-length vector type we support is 1024 bytes.
if (VT.getFixedSizeInBits() > 1024 * 8)
return false;
unsigned MinVLen = Subtarget.getRealMinVLen();
MVT EltVT = VT.getVectorElementType();
// Don't use RVV for vectors we cannot scalarize if required.
switch (EltVT.SimpleTy) {
// i1 is supported but has different rules.
default:
return false;
case MVT::i1:
// Masks can only use a single register.
if (VT.getVectorNumElements() > MinVLen)
return false;
MinVLen /= 8;
break;
case MVT::i8:
case MVT::i16:
case MVT::i32:
break;
case MVT::i64:
if (!Subtarget.hasVInstructionsI64())
return false;
break;
case MVT::f16:
if (!Subtarget.hasVInstructionsF16Minimal())
return false;
break;
case MVT::f32:
if (!Subtarget.hasVInstructionsF32())
return false;
break;
case MVT::f64:
if (!Subtarget.hasVInstructionsF64())
return false;
break;
}
// Reject elements larger than ELEN.
if (EltVT.getSizeInBits() > Subtarget.getELen())
return false;
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
// Don't use RVV for types that don't fit.
if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
return false;
// TODO: Perhaps an artificial restriction, but worth having whilst getting
// the base fixed length RVV support in place.
if (!VT.isPow2VectorType())
return false;
return true;
}
bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
}
// Return the largest legal scalable vector type that matches VT's element type.
static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
const RISCVSubtarget &Subtarget) {
// This may be called before legal types are setup.
assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
"Expected legal fixed length vector!");
unsigned MinVLen = Subtarget.getRealMinVLen();
unsigned MaxELen = Subtarget.getELen();
MVT EltVT = VT.getVectorElementType();
switch (EltVT.SimpleTy) {
default:
llvm_unreachable("unexpected element type for RVV container");
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f16:
case MVT::f32:
case MVT::f64: {
// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
// each fractional LMUL we support SEW between 8 and LMUL*ELEN.
unsigned NumElts =
(VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
return MVT::getScalableVectorVT(EltVT, NumElts);
}
}
}
static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
const RISCVSubtarget &Subtarget) {
return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
Subtarget);
}
MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
}
// Grow V to consume an entire RVV register.
static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isScalableVector() &&
"Expected to convert into a scalable vector!");
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!");
assert(V.getValueType().isScalableVector() &&
"Expected a scalable vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
/// Return the type of the mask type suitable for masking the provided
/// vector type. This is simply an i1 element type vector of the same
/// (possibly scalable) length.
static MVT getMaskTypeFor(MVT VecVT) {
assert(VecVT.isVector());
ElementCount EC = VecVT.getVectorElementCount();
return MVT::getVectorVT(MVT::i1, EC);
}
/// Creates an all ones mask suitable for masking a vector of type VecTy with
/// vector length VL. .
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
SelectionDAG &DAG) {
MVT MaskVT = getMaskTypeFor(VecVT);
return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
}
static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
}
static std::pair<SDValue, SDValue>
getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
return {Mask, VL};
}
// Gets the two common "VL" operands: an all-ones mask and the vector length.
// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
// the vector type that the fixed-length vector is contained in. Otherwise if
// VecVT is scalable, then ContainerVT should be the same as VecVT.
static std::pair<SDValue, SDValue>
getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (VecVT.isFixedLengthVector())
return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
Subtarget);
assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
MVT XLenVT = Subtarget.getXLenVT();
SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
return {Mask, VL};
}
// As above but assuming the given type is a scalable vector type.
static std::pair<SDValue, SDValue>
getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VecVT.isScalableVector() && "Expecting a scalable vector");
return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
}
SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
SelectionDAG &DAG) const {
assert(VecVT.isScalableVector() && "Expected scalable vector");
return DAG.getElementCount(DL, Subtarget.getXLenVT(),
VecVT.getVectorElementCount());
}
// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
// of either is (currently) supported. This can get us into an infinite loop
// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
// as a ..., etc.
// Until either (or both) of these can reliably lower any node, reporting that
// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
// which is not desirable.
bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT, unsigned DefinedValues) const {
return false;
}
InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
// implementation-defined.
if (!VT.isVector())
return InstructionCost::getInvalid();
unsigned DLenFactor = Subtarget.getDLenFactor();
unsigned Cost;
if (VT.isScalableVector()) {
unsigned LMul;
bool Fractional;
std::tie(LMul, Fractional) =
RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
if (Fractional)
Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
else
Cost = (LMul * DLenFactor);
} else {
Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
}
return Cost;
}
/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
/// is generally quadratic in the number of vreg implied by LMUL. Note that
/// operand (index and possibly mask) are handled separately.
InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
return getLMULCost(VT) * getLMULCost(VT);
}
/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
/// or may track the vrgather.vv cost. It is implementation-dependent.
InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
return getLMULCost(VT);
}
/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
/// for the type VT. (This does not cover the vslide1up or vslide1down
/// variants.) Slides may be linear in the number of vregs implied by LMUL,
/// or may track the vrgather.vv cost. It is implementation-dependent.
InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const {
return getLMULCost(VT);
}
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
// RISC-V FP-to-int conversions saturate to the destination register size, but
// don't produce 0 for nan. We can use a conversion instruction and fix the
// nan case with a compare and a select.
SDValue Src = Op.getOperand(0);
MVT DstVT = Op.getSimpleValueType();
EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
if (!DstVT.isVector()) {
// For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
// the result.
if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
Src.getValueType() == MVT::bf16) {
Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
}
unsigned Opc;
if (SatVT == DstVT)
Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
else if (DstVT == MVT::i64 && SatVT == MVT::i32)
Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
else
return SDValue();
// FIXME: Support other SatVTs by clamping before or after the conversion.
SDLoc DL(Op);
SDValue FpToInt = DAG.getNode(
Opc, DL, DstVT, Src,
DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
if (Opc == RISCVISD::FCVT_WU_RV64)
FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
ISD::CondCode::SETUO);
}
// Vectors.
MVT DstEltVT = DstVT.getVectorElementType();
MVT SrcVT = Src.getSimpleValueType();
MVT SrcEltVT = SrcVT.getVectorElementType();
unsigned SrcEltSize = SrcEltVT.