blob: 398366452f72aed135b5f890fc2d0d801f420571 [file] [log] [blame]
//===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that RISCV uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "RISCVISelLowering.h"
#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVRegisterInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-lower"
STATISTIC(NumTailCalls, "Number of tail calls");
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
if (Subtarget.isRV32E())
report_fatal_error("Codegen not yet implemented for RV32E");
RISCVABI::ABI ABI = Subtarget.getTargetABI();
assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
!Subtarget.hasStdExtF()) {
errs() << "Hard-float 'f' ABI can't be used for a target that "
"doesn't support the F instruction set extension (ignoring "
"target-abi)\n";
ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
} else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
!Subtarget.hasStdExtD()) {
errs() << "Hard-float 'd' ABI can't be used for a target that "
"doesn't support the D instruction set extension (ignoring "
"target-abi)\n";
ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
}
switch (ABI) {
default:
report_fatal_error("Don't know how to lower this ABI");
case RISCVABI::ABI_ILP32:
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64:
case RISCVABI::ABI_LP64F:
case RISCVABI::ABI_LP64D:
break;
}
MVT XLenVT = Subtarget.getXLenVT();
// Set up the register classes.
addRegisterClass(XLenVT, &RISCV::GPRRegClass);
if (Subtarget.hasStdExtZfh())
addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtF())
addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD())
addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
static const MVT::SimpleValueType BoolVecVTs[] = {
MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
static const MVT::SimpleValueType IntVecVTs[] = {
MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
MVT::nxv4i64, MVT::nxv8i64};
static const MVT::SimpleValueType F16VecVTs[] = {
MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
static const MVT::SimpleValueType F32VecVTs[] = {
MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
static const MVT::SimpleValueType F64VecVTs[] = {
MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
if (Subtarget.hasStdExtV()) {
auto addRegClassForRVV = [this](MVT VT) {
unsigned Size = VT.getSizeInBits().getKnownMinValue();
assert(Size <= 512 && isPowerOf2_32(Size));
const TargetRegisterClass *RC;
if (Size <= 64)
RC = &RISCV::VRRegClass;
else if (Size == 128)
RC = &RISCV::VRM2RegClass;
else if (Size == 256)
RC = &RISCV::VRM4RegClass;
else
RC = &RISCV::VRM8RegClass;
addRegisterClass(VT, RC);
};
for (MVT VT : BoolVecVTs)
addRegClassForRVV(VT);
for (MVT VT : IntVecVTs)
addRegClassForRVV(VT);
if (Subtarget.hasStdExtZfh())
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
if (Subtarget.hasStdExtF())
for (MVT VT : F32VecVTs)
addRegClassForRVV(VT);
if (Subtarget.hasStdExtD())
for (MVT VT : F64VecVTs)
addRegClassForRVV(VT);
if (Subtarget.useRVVForFixedLengthVectors()) {
auto addRegClassForFixedVectors = [this](MVT VT) {
unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
const TargetRegisterClass *RC;
if (LMul == 1 || VT.getVectorElementType() == MVT::i1)
RC = &RISCV::VRRegClass;
else if (LMul == 2)
RC = &RISCV::VRM2RegClass;
else if (LMul == 4)
RC = &RISCV::VRM4RegClass;
else if (LMul == 8)
RC = &RISCV::VRM8RegClass;
else
llvm_unreachable("Unexpected LMul!");
addRegisterClass(VT, RC);
};
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useRVVForFixedLengthVectorVT(VT))
addRegClassForFixedVectors(VT);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useRVVForFixedLengthVectorVT(VT))
addRegClassForFixedVectors(VT);
}
}
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
setStackPointerRegisterToSaveRestore(RISCV::X2);
for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
setLoadExtAction(N, XLenVT, MVT::i1, Promote);
// TODO: add all necessary setOperationAction calls.
setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, XLenVT, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!Subtarget.hasStdExtZbb()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
}
if (Subtarget.hasStdExtZbb() && Subtarget.is64Bit())
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::ADD, MVT::i32, Custom);
setOperationAction(ISD::SUB, MVT::i32, Custom);
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
}
if (!Subtarget.hasStdExtM()) {
setOperationAction(ISD::MUL, XLenVT, Expand);
setOperationAction(ISD::MULHS, XLenVT, Expand);
setOperationAction(ISD::MULHU, XLenVT, Expand);
setOperationAction(ISD::SDIV, XLenVT, Expand);
setOperationAction(ISD::UDIV, XLenVT, Expand);
setOperationAction(ISD::SREM, XLenVT, Expand);
setOperationAction(ISD::UREM, XLenVT, Expand);
} else {
if (Subtarget.is64Bit()) {
setOperationAction(ISD::MUL, MVT::i32, Custom);
setOperationAction(ISD::MUL, MVT::i128, Custom);
setOperationAction(ISD::SDIV, MVT::i8, Custom);
setOperationAction(ISD::UDIV, MVT::i8, Custom);
setOperationAction(ISD::UREM, MVT::i8, Custom);
setOperationAction(ISD::SDIV, MVT::i16, Custom);
setOperationAction(ISD::UDIV, MVT::i16, Custom);
setOperationAction(ISD::UREM, MVT::i16, Custom);
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Custom);
} else {
setOperationAction(ISD::MUL, MVT::i64, Custom);
}
}
setOperationAction(ISD::SDIVREM, XLenVT, Expand);
setOperationAction(ISD::UDIVREM, XLenVT, Expand);
setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
if (Subtarget.is64Bit()) {
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::ROTR, MVT::i32, Custom);
}
} else {
setOperationAction(ISD::ROTL, XLenVT, Expand);
setOperationAction(ISD::ROTR, XLenVT, Expand);
}
if (Subtarget.hasStdExtZbp()) {
// Custom lower bswap/bitreverse so we can convert them to GREVI to enable
// more combining.
setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
setOperationAction(ISD::BSWAP, XLenVT, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
}
} else {
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
// pattern match it directly in isel.
setOperationAction(ISD::BSWAP, XLenVT,
Subtarget.hasStdExtZbb() ? Legal : Expand);
}
if (Subtarget.hasStdExtZbb()) {
setOperationAction(ISD::SMIN, XLenVT, Legal);
setOperationAction(ISD::SMAX, XLenVT, Legal);
setOperationAction(ISD::UMIN, XLenVT, Legal);
setOperationAction(ISD::UMAX, XLenVT, Legal);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
setOperationAction(ISD::CTLZ, MVT::i32, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
}
} else {
setOperationAction(ISD::CTTZ, XLenVT, Expand);
setOperationAction(ISD::CTLZ, XLenVT, Expand);
setOperationAction(ISD::CTPOP, XLenVT, Expand);
}
if (Subtarget.hasStdExtZbt()) {
setOperationAction(ISD::FSHL, XLenVT, Custom);
setOperationAction(ISD::FSHR, XLenVT, Custom);
setOperationAction(ISD::SELECT, XLenVT, Legal);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FSHL, MVT::i32, Custom);
setOperationAction(ISD::FSHR, MVT::i32, Custom);
}
} else {
setOperationAction(ISD::SELECT, XLenVT, Custom);
}
ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
ISD::NodeType FPOpToExpand[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
ISD::FP_TO_FP16};
if (Subtarget.hasStdExtZfh())
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
if (Subtarget.hasStdExtZfh()) {
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f16, Expand);
}
if (Subtarget.hasStdExtF()) {
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
}
if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
if (Subtarget.hasStdExtD()) {
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
}
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
}
setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
setOperationAction(ISD::JumpTable, XLenVT, Custom);
setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
// TODO: On M-mode only targets, the cycle[h] CSR may not be present.
// Unfortunately this can't be determined just from the ISA naming string.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
Subtarget.is64Bit() ? Legal : Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
} else {
setMaxAtomicSizeInBitsSupported(0);
}
setBooleanContents(ZeroOrOneBooleanContent);
if (Subtarget.hasStdExtV()) {
setBooleanVectorContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::VSCALE, XLenVT, Custom);
// RVV intrinsics may have illegal operands.
// We also need to custom legalize vmv.x.s.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
// element type being illegal.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
}
for (MVT VT : BoolVecVTs) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
// Mask VTs are custom-expanded into a series of standard nodes
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
// Expand all extending loads to types larger than this, and truncating
// stores from types larger than this.
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(OtherVT, VT, Expand);
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
}
for (MVT VT : IntVecVTs) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
// Custom-lower extensions and truncations from/to mask types.
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
setOperationAction(ISD::TRUNCATE, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::STEP_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
}
// Expand various CCs to best match the RVV ISA, which natively supports UNE
// but no other unordered comparisons, and supports all ordered comparisons
// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
// and we pattern-match those back to the "original", swapping operands once
// more. This way we catch both operations and both "vf" and "fv" forms with
// fewer patterns.
ISD::CondCode VFPCCToExpand[] = {
ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
};
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
// RVV has native FP_ROUND & FP_EXTEND conversions where the element type
// sizes are within one power-of-two of each other. Therefore conversions
// between vXf16 and vXf64 must be lowered as sequences which convert via
// vXf32.
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
// Expand various condition codes (explained above).
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
};
// Sets common extload/truncstore actions on RVV floating-point vector
// types.
const auto SetCommonVFPExtLoadTruncStoreActions =
[&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
for (auto SmallVT : SmallerVTs) {
setTruncStoreAction(VT, SmallVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
}
};
if (Subtarget.hasStdExtZfh())
for (MVT VT : F16VecVTs)
SetCommonVFPActions(VT);
for (MVT VT : F32VecVTs) {
if (Subtarget.hasStdExtF())
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
}
for (MVT VT : F64VecVTs) {
if (Subtarget.hasStdExtD())
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
}
if (Subtarget.useRVVForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
if (!useRVVForFixedLengthVectorVT(VT))
continue;
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
continue;
}
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SREM, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::UREM, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
if (!useRVVForFixedLengthVectorVT(VT))
continue;
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setTruncStoreAction(VT, OtherVT, Expand);
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
}
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
setOperationAction(ISD::BITCAST, MVT::i8, Custom);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::BITCAST, MVT::f32, Custom);
setOperationAction(ISD::BITCAST, MVT::f64, Custom);
}
}
// Function alignments.
const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
setMinFunctionAlignment(FunctionAlignment);
setPrefFunctionAlignment(FunctionAlignment);
setMinimumJumpTableEntries(5);
// Jumps are expensive, compared to logic
setJumpIsExpensive();
// We can use any register for comparisons
setHasMultipleConditionRegisters();
if (Subtarget.hasStdExtZbp()) {
setTargetDAGCombine(ISD::OR);
}
if (Subtarget.hasStdExtV()) {
setTargetDAGCombine(ISD::FCOPYSIGN);
setTargetDAGCombine(ISD::MGATHER);
setTargetDAGCombine(ISD::MSCATTER);
}
}
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &Context,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
if (Subtarget.hasStdExtV() &&
(VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
default:
return false;
case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
case Intrinsic::riscv_masked_atomicrmw_add_i32:
case Intrinsic::riscv_masked_atomicrmw_sub_i32:
case Intrinsic::riscv_masked_atomicrmw_nand_i32:
case Intrinsic::riscv_masked_atomicrmw_max_i32:
case Intrinsic::riscv_masked_atomicrmw_min_i32:
case Intrinsic::riscv_masked_atomicrmw_umax_i32:
case Intrinsic::riscv_masked_atomicrmw_umin_i32:
case Intrinsic::riscv_masked_cmpxchg_i32:
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(4);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
}
}
bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// Require a 12-bit signed offset.
if (!isInt<12>(AM.BaseOffs))
return false;
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
case 1:
if (!AM.HasBaseReg) // allow "r+i".
break;
return false; // disallow "r+r" or "r+r+i".
default:
return false;
}
return true;
}
bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<12>(Imm);
}
bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<12>(Imm);
}
// On RV32, 64-bit integers are split into their high and low parts and held
// in two different registers, so the trunc is free since the low register can
// just be used.
bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
return false;
unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
unsigned DestBits = DstTy->getPrimitiveSizeInBits();
return (SrcBits == 64 && DestBits == 32);
}
bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
!SrcVT.isInteger() || !DstVT.isInteger())
return false;
unsigned SrcBits = SrcVT.getSizeInBits();
unsigned DestBits = DstVT.getSizeInBits();
return (SrcBits == 64 && DestBits == 32);
}
bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
// Zexts are free if they can be combined with a load.
if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
EVT MemVT = LD->getMemoryVT();
if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
(Subtarget.is64Bit() && MemVT == MVT::i32)) &&
(LD->getExtensionType() == ISD::NON_EXTLOAD ||
LD->getExtensionType() == ISD::ZEXTLOAD))
return true;
}
return TargetLowering::isZExtFree(Val, VT2);
}
bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
return Subtarget.hasStdExtZbb();
}
bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasStdExtZbb();
}
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
return false;
if (VT == MVT::f32 && !Subtarget.hasStdExtF())
return false;
if (VT == MVT::f64 && !Subtarget.hasStdExtD())
return false;
if (Imm.isNegZero())
return false;
return Imm.isZero();
}
bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
(VT == MVT::f32 && Subtarget.hasStdExtF()) ||
(VT == MVT::f64 && Subtarget.hasStdExtD());
}
MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
// Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
// end up using a GPR but that will be decided based on ABI.
if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
return MVT::f32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
// Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
// end up using a GPR but that will be decided based on ABI.
if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly by branches
// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
// with 1/-1.
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
ISD::CondCode &CC, SelectionDAG &DAG) {
// Convert X > -1 to X >= 0.
if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
RHS = DAG.getConstant(0, DL, RHS.getValueType());
CC = ISD::SETGE;
return;
}
// Convert X < 1 to 0 >= X.
if (CC == ISD::SETLT && isOneConstant(RHS)) {
RHS = LHS;
LHS = DAG.getConstant(0, DL, RHS.getValueType());
CC = ISD::SETGE;
return;
}
switch (CC) {
default:
break;
case ISD::SETGT:
case ISD::SETLE:
case ISD::SETUGT:
case ISD::SETULE:
CC = ISD::getSetCCSwappedOperands(CC);
std::swap(LHS, RHS);
break;
}
}
// Return the RISC-V branch opcode that matches the given DAG integer
// condition code. The CondCode must be one of those supported by the RISC-V
// ISA (see translateSetCCForBranch).
static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unsupported CondCode");
case ISD::SETEQ:
return RISCV::BEQ;
case ISD::SETNE:
return RISCV::BNE;
case ISD::SETLT:
return RISCV::BLT;
case ISD::SETGE:
return RISCV::BGE;
case ISD::SETULT:
return RISCV::BLTU;
case ISD::SETUGE:
return RISCV::BGEU;
}
}
RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) {
assert(VT.isScalableVector() && "Expecting a scalable vector type");
unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
if (VT.getVectorElementType() == MVT::i1)
KnownSize *= 8;
switch (KnownSize) {
default:
llvm_unreachable("Invalid LMUL.");
case 8:
return RISCVVLMUL::LMUL_F8;
case 16:
return RISCVVLMUL::LMUL_F4;
case 32:
return RISCVVLMUL::LMUL_F2;
case 64:
return RISCVVLMUL::LMUL_1;
case 128:
return RISCVVLMUL::LMUL_2;
case 256:
return RISCVVLMUL::LMUL_4;
case 512:
return RISCVVLMUL::LMUL_8;
}
}
unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) {
switch (LMul) {
default:
llvm_unreachable("Invalid LMUL.");
case RISCVVLMUL::LMUL_F8:
case RISCVVLMUL::LMUL_F4:
case RISCVVLMUL::LMUL_F2:
case RISCVVLMUL::LMUL_1:
return RISCV::VRRegClassID;
case RISCVVLMUL::LMUL_2:
return RISCV::VRM2RegClassID;
case RISCVVLMUL::LMUL_4:
return RISCV::VRM4RegClassID;
case RISCVVLMUL::LMUL_8:
return RISCV::VRM8RegClassID;
}
}
unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
RISCVVLMUL LMUL = getLMUL(VT);
if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 ||
LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) {
static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
"Unexpected subreg numbering");
return RISCV::sub_vrm1_0 + Index;
}
if (LMUL == RISCVVLMUL::LMUL_2) {
static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
"Unexpected subreg numbering");
return RISCV::sub_vrm2_0 + Index;
}
if (LMUL == RISCVVLMUL::LMUL_4) {
static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
"Unexpected subreg numbering");
return RISCV::sub_vrm4_0 + Index;
}
llvm_unreachable("Invalid vector type.");
}
unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
if (VT.getVectorElementType() == MVT::i1)
return RISCV::VRRegClassID;
return getRegClassIDForLMUL(getLMUL(VT));
}
// Attempt to decompose a subvector insert/extract between VecVT and
// SubVecVT via subregister indices. Returns the subregister index that
// can perform the subvector insert/extract with the given element index, as
// well as the index corresponding to any leftover subvectors that must be
// further inserted/extracted within the register class for SubVecVT.
std::pair<unsigned, unsigned>
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
const RISCVRegisterInfo *TRI) {
static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
RISCV::VRM2RegClassID > RISCV::VRRegClassID),
"Register classes not ordered");
unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
// Try to compose a subregister index that takes us from the incoming
// LMUL>1 register class down to the outgoing one. At each step we half
// the LMUL:
// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
// Note that this is not guaranteed to find a subregister index, such as
// when we are extracting from one VR type to another.
unsigned SubRegIdx = RISCV::NoSubRegister;
for (const unsigned RCID :
{RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
if (VecRegClassID > RCID && SubRegClassID <= RCID) {
VecVT = VecVT.getHalfNumVectorElementsVT();
bool IsHi =
InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
getSubregIndexByMVT(VecVT, IsHi));
if (IsHi)
InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
}
return {SubRegIdx, InsertExtractIdx};
}
// Return the largest legal scalable vector type that matches VT's element type.
MVT RISCVTargetLowering::getContainerForFixedLengthVector(
const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) &&
"Expected legal fixed length vector!");
unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
MVT EltVT = VT.getVectorElementType();
switch (EltVT.SimpleTy) {
default:
llvm_unreachable("unexpected element type for RVV container");
case MVT::i1: {
// Masks are calculated assuming 8-bit elements since that's when we need
// the most elements.
unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8;
return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock);
}
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f16:
case MVT::f32:
case MVT::f64: {
unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits();
return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock);
}
}
}
MVT RISCVTargetLowering::getContainerForFixedLengthVector(
SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) {
return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
Subtarget);
}
MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
return getContainerForFixedLengthVector(*this, VT, getSubtarget());
}
// Grow V to consume an entire RVV register.
static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isScalableVector() &&
"Expected to convert into a scalable vector!");
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!");
assert(V.getValueType().isScalableVector() &&
"Expected a scalable vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
// Gets the two common "VL" operands: an all-ones mask and the vector length.
// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
// the vector type that it is contained in.
static std::pair<SDValue, SDValue>
getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
MVT XLenVT = Subtarget.getXLenVT();
SDValue VL = VecVT.isFixedLengthVector()
? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
: DAG.getRegister(RISCV::X0, XLenVT);
MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
return {Mask, VL};
}
// As above but assuming the given type is a scalable vector type.
static std::pair<SDValue, SDValue>
getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VecVT.isScalableVector() && "Expecting a scalable vector");
return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
}
// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
// of either is (currently) supported. This can get us into an infinite loop
// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
// as a ..., etc.
// Until either (or both) of these can reliably lower any node, reporting that
// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
// which is not desirable.
bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT, unsigned DefinedValues) const {
return false;
}
bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Only splats are currently supported.
if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
return true;
return false;
}
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert(VT.isFixedLengthVector() && "Unexpected vector!");
MVT ContainerVT =
RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget);
SDLoc DL(Op);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
MVT XLenVT = Subtarget.getXLenVT();
unsigned NumElts = Op.getNumOperands();
if (VT.getVectorElementType() == MVT::i1) {
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
}
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
}
// Lower constant mask BUILD_VECTORs via an integer vector type, in
// scalar integer chunks whose bit-width depends on the number of mask
// bits and XLEN.
// First, determine the most appropriate scalar integer type to use. This
// is at most XLenVT, but may be shrunk to a smaller vector element type
// according to the size of the final vector - use i8 chunks rather than
// XLenVT if we're producing a v8i1. This results in more consistent
// codegen across RV32 and RV64.
// If we have to use more than one INSERT_VECTOR_ELT then this optimization
// is likely to increase code size; avoid peforming it in such a case.
unsigned NumViaIntegerBits =
std::min(std::max(NumElts, 8u), Subtarget.getXLen());
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
(!DAG.shouldOptForSize() || NumElts <= NumViaIntegerBits)) {
// Now we can create our integer vector type. Note that it may be larger
// than the resulting mask type: v4i1 would use v1i8 as its integer type.
MVT IntegerViaVecVT =
MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
divideCeil(NumElts, NumViaIntegerBits));
uint64_t Bits = 0;
unsigned BitPos = 0, IntegerEltIdx = 0;
SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
for (unsigned I = 0; I < NumElts; I++, BitPos++) {
// Once we accumulate enough bits to fill our scalar type, insert into
// our vector and clear our accumulated data.
if (I != 0 && I % NumViaIntegerBits == 0) {
if (NumViaIntegerBits <= 32)
Bits = SignExtend64(Bits, 32);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
Bits = 0;
BitPos = 0;
IntegerEltIdx++;
}
SDValue V = Op.getOperand(I);
bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
Bits |= ((uint64_t)BitValue << BitPos);
}
// Insert the (remaining) scalar value into position in our integer
// vector type.
if (NumViaIntegerBits <= 32)
Bits = SignExtend64(Bits, 32);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
DAG.getConstant(IntegerEltIdx, DL, XLenVT));
if (NumElts < NumViaIntegerBits) {
// If we're producing a smaller vector than our minimum legal integer
// type, bitcast to the equivalent (known-legal) mask type, and extract
// our final mask.
assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
Vec = DAG.getBitcast(MVT::v8i1, Vec);
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
DAG.getConstant(0, DL, XLenVT));
} else {
// Else we must have produced an integer type with the same size as the
// mask type; bitcast for the final result.
assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
Vec = DAG.getBitcast(VT, Vec);
}
return Vec;
}
return SDValue();
}
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL;
Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
// Try and match an index sequence, which we can lower directly to the vid
// instruction. An all-undef vector is matched by getSplatValue, above.
if (VT.isInteger()) {
bool IsVID = true;
for (unsigned I = 0; I < NumElts && IsVID; I++)
IsVID &= Op.getOperand(I).isUndef() ||
(isa<ConstantSDNode>(Op.getOperand(I)) &&
Op.getConstantOperandVal(I) == I);
if (IsVID) {
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
return convertFromScalableVector(VT, VID, DAG, Subtarget);
}
}
// Attempt to detect "hidden" splats, which only reveal themselves as splats
// when re-interpreted as a vector with a larger element type. For example,
// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
// could be instead splat as
// v2i32 = build_vector i32 0x00010000, i32 0x00010000
// TODO: This optimization could also work on non-constant splats, but it
// would require bit-manipulation instructions to construct the splat value.
SmallVector<SDValue> Sequence;
unsigned EltBitSize = VT.getScalarSizeInBits();
const auto *BV = cast<BuildVectorSDNode>(Op);
if (VT.isInteger() && EltBitSize < 64 &&
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
BV->getRepeatedSequence(Sequence) &&
(Sequence.size() * EltBitSize) <= 64) {
unsigned SeqLen = Sequence.size();
MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
ViaIntVT == MVT::i64) &&
"Unexpected sequence type");
unsigned EltIdx = 0;
uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
uint64_t SplatValue = 0;
// Construct the amalgamated value which can be splatted as this larger
// vector type.
for (const auto &SeqV : Sequence) {
if (!SeqV.isUndef())
SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
<< (EltIdx * EltBitSize));
EltIdx++;
}
// On RV64, sign-extend from 32 to 64 bits where possible in order to
// achieve better constant materializion.
if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
SplatValue = SignExtend64(SplatValue, 32);
// Since we can't introduce illegal i64 types at this stage, we can only
// perform an i64 splat on RV32 if it is its own sign-extended value. That
// way we can use RVV instructions to splat.
assert((ViaIntVT.bitsLE(XLenVT) ||
(!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
"Unexpected bitcast sequence");
if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
SDValue ViaVL =
DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
MVT ViaContainerVT =
RISCVTargetLowering::getContainerForFixedLengthVector(DAG, ViaVecVT,
Subtarget);
SDValue Splat =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
return DAG.getBitcast(VT, Splat);
}
}
// Try and optimize BUILD_VECTORs with "dominant values" - these are values
// which constitute a large proportion of the elements. In such cases we can
// splat a vector with the dominant element and make up the shortfall with
// INSERT_VECTOR_ELTs.
// Note that this includes vectors of 2 elements by association. The
// upper-most element is the "dominant" one, allowing us to use a splat to
// "insert" the upper element, and an insert of the lower element at position
// 0, which improves codegen.
SDValue DominantValue;
unsigned MostCommonCount = 0;
DenseMap<SDValue, unsigned> ValueCounts;
unsigned NumUndefElts =
count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
for (SDValue V : Op->op_values()) {
if (V.isUndef())
continue;
ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
// Is this value dominant? In case of a tie, prefer the highest element as
// it's cheaper to insert near the beginning of a vector than it is at the
// end.
if (++Count >= MostCommonCount) {
DominantValue = V;
MostCommonCount = Count;
}
}
assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
unsigned NumDefElts = NumElts - NumUndefElts;
unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
// Don't perform this optimization when optimizing for size, since
// materializing elements and inserting them tends to cause code bloat.
if (!DAG.shouldOptForSize() &&
((MostCommonCount > DominantValueCountThreshold) ||
(ValueCounts.size() <= Log2_32(NumDefElts)))) {
// Start by splatting the most common element.
SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
DenseSet<SDValue> Processed{DominantValue};
MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
for (const auto &OpIdx : enumerate(Op->ops())) {
const SDValue &V = OpIdx.value();
if (V.isUndef() || !Processed.insert(V).second)
continue;
if (ValueCounts[V] == 1) {
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
DAG.getConstant(OpIdx.index(), DL, XLenVT));
} else {
// Blend in all instances of this value using a VSELECT, using a
// mask where each bit signals whether that element is the one
// we're after.
SmallVector<SDValue> Ops;
transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
return DAG.getConstant(V == V1, DL, XLenVT);
});
Vec = DAG.getNode(ISD::VSELECT, DL, VT,
DAG.getBuildVector(SelMaskTy, DL, Ops),
DAG.getSplatBuildVector(VT, DL, V), Vec);
}
}
return Vec;
}
return SDValue();
}
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
MVT VT = Op.getSimpleValueType();
unsigned NumElts = VT.getVectorNumElements();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
if (Lane >= 0) {
MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector(
DAG, VT, Subtarget);
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
assert(Lane < (int)NumElts && "Unexpected lane!");
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
SDValue Gather =
DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
DAG.getConstant(Lane, DL, XLenVT), Mask, VL);
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
}
// Detect shuffles which can be re-expressed as vector selects.
SmallVector<SDValue> MaskVals;
// By default we preserve the original operand order, and select LHS as true
// and RHS as false. However, since RVV vector selects may feature splats but
// only on the LHS, we may choose to invert our mask and instead select
// between RHS and LHS.
bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
int MaskIndex = MaskIdx.value();
bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
});
if (IsSelect) {
assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SwapOps ? V2 : V1,
SwapOps ? V1 : V2);
}
return SDValue();
}
static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (VT.isScalableVector())
return DAG.getFPExtendOrRound(Op, DL, VT);
assert(VT.isFixedLengthVector() &&
"Unexpected value type for RVV FP extend/round lowering");
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
? RISCVISD::FP_EXTEND_VL
: RISCVISD::FP_ROUND_VL;
return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
}
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
report_fatal_error("unimplemented operand");
case ISD::GlobalAddress:
return lowerGlobalAddress(Op, DAG);
case ISD::BlockAddress:
return lowerBlockAddress(Op, DAG);
case ISD::ConstantPool:
return lowerConstantPool(Op, DAG);
case ISD::JumpTable:
return lowerJumpTable(Op, DAG);
case ISD::GlobalTLSAddress:
return lowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT:
return lowerSELECT(Op, DAG);
case ISD::BRCOND:
return lowerBRCOND(Op, DAG);
case ISD::VASTART:
return lowerVASTART(Op, DAG);
case ISD::FRAMEADDR:
return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR:
return lowerRETURNADDR(Op, DAG);
case ISD::SHL_PARTS:
return lowerShiftLeftParts(Op, DAG);
case ISD::SRA_PARTS:
return lowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS:
return lowerShiftRightParts(Op, DAG, false);
case ISD::BITCAST: {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
if (VT.isFixedLengthVector()) {
// We can handle fixed length vector bitcasts with a simple replacement
// in isel.
if (Op0VT.isFixedLengthVector())
return Op;
// When bitcasting from scalar to fixed-length vector, insert the scalar
// into a one-element vector of the result type, and perform a vector
// bitcast.
if (!Op0VT.isVector()) {
auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
DAG.getUNDEF(BVT), Op0,
DAG.getConstant(0, DL, XLenVT)));
}
return SDValue();
}
// Custom-legalize bitcasts from fixed-length vector types to scalar types
// thus: bitcast the vector to a one-element vector type whose element type
// is the same as the result type, and extract the first element.
if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
LLVMContext &Context = *DAG.getContext();
SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
DAG.getConstant(0, DL, XLenVT));
}
if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
return FPConv;
}
if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
Subtarget.hasStdExtF()) {
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
SDValue FPConv =
DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
return FPConv;
}
return SDValue();
}
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::BSWAP:
case ISD::BITREVERSE: {
// Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
// Start with the maximum immediate value which is the bitwidth - 1.
unsigned Imm = VT.getSizeInBits() - 1;
// If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
if (Op.getOpcode() == ISD::BSWAP)
Imm &= ~0x7U;
return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
DAG.getConstant(Imm, DL, VT));
}
case ISD::FSHL:
case ISD::FSHR: {
MVT VT = Op.getSimpleValueType();
assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
SDLoc DL(Op);
if (Op.getOperand(2).getOpcode() == ISD::Constant)
return Op;
// FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
// use log(XLen) bits. Mask the shift amount accordingly.
unsigned ShAmtWidth = Subtarget.getXLen() - 1;
SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
DAG.getConstant(ShAmtWidth, DL, VT));
unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
}
case ISD::TRUNCATE: {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
// Only custom-lower vector truncates
if (!VT.isVector())
return Op;
// Truncates to mask types are handled differently
if (VT.getVectorElementType() == MVT::i1)
return lowerVectorMaskTrunc(Op, DAG);
// RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
// truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
// truncate by one power of two at a time.
MVT DstEltVT = VT.getVectorElementType();
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
MVT SrcEltVT = SrcVT.getVectorElementType();
assert(DstEltVT.bitsLT(SrcEltVT) &&
isPowerOf2_64(DstEltVT.getSizeInBits()) &&
isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
"Unexpected vector truncate lowering");
MVT ContainerVT = SrcVT;
if (SrcVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(SrcVT);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
}
SDValue Result = Src;
SDValue Mask, VL;
std::tie(Mask, VL) =
getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
LLVMContext &Context = *DAG.getContext();
const ElementCount Count = ContainerVT.getVectorElementCount();
do {
SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
Mask, VL);
} while (SrcEltVT != DstEltVT);
if (SrcVT.isFixedLengthVector())
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
return Result;
}
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
if (Op.getOperand(0).getValueType().isVector() &&
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
case ISD::SIGN_EXTEND:
if (Op.getOperand(0).getValueType().isVector() &&
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
case ISD::SPLAT_VECTOR_PARTS:
return lowerSPLAT_VECTOR_PARTS(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::VSCALE: {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
// We define our scalable vector types for lmul=1 to use a 64 bit known
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
// vscale as VLENB / 8.
assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!");
SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
DAG.getConstant(3, DL, VT));
return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
}
case ISD::FP_EXTEND: {
// RVV can only do fp_extend to types double the size as the source. We
// custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
// via f32.
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
// Prepare any fixed-length vector operands.
MVT ContainerVT = VT;
if (SrcVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
MVT SrcContainerVT =
ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
}
if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
SrcVT.getVectorElementType() != MVT::f16) {
// For scalable vectors, we only need to close the gap between
// vXf16->vXf64.
if (!VT.isFixedLengthVector())
return Op;
// For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
return convertFromScalableVector(VT, Src, DAG, Subtarget);
}
MVT InterVT = VT.changeVectorElementType(MVT::f32);
MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
SDValue IntermediateExtend = getRVVFPExtendOrRound(
Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
DL, DAG, Subtarget);
if (VT.isFixedLengthVector())
return convertFromScalableVector(VT, Extend, DAG, Subtarget);
return Extend;
}
case ISD::FP_ROUND: {
// RVV can only do fp_round to types half the size as the source. We
// custom-lower f64->f16 rounds via RVV's round-to-odd float
// conversion instruction.
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
// Prepare any fixed-length vector operands.
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
ContainerVT =
SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
}
if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
SrcVT.getVectorElementType() != MVT::f64) {
// For scalable vectors, we only need to close the gap between
// vXf64<->vXf16.
if (!VT.isFixedLengthVector())
return Op;
// For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
return convertFromScalableVector(VT, Src, DAG, Subtarget);
}
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
SDValue IntermediateRound =
DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
DL, DAG, Subtarget);
if (VT.isFixedLengthVector())
return convertFromScalableVector(VT, Round, DAG, Subtarget);
return Round;
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: {
// RVV can only do fp<->int conversions to types half/double the size as
// the source. We custom-lower any conversions that do two hops into
// sequences.
MVT VT = Op.getSimpleValueType();
if (!VT.isVector())
return Op;
SDLoc DL(Op);
SDValue Src = Op.getOperand(0);
MVT EltVT = VT.getVectorElementType();
MVT SrcVT = Src.getSimpleValueType();
MVT SrcEltVT = SrcVT.getVectorElementType();
unsigned EltSize = EltVT.getSizeInBits();
unsigned SrcEltSize = SrcEltVT.getSizeInBits();
assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
"Unexpected vector element types");
bool IsInt2FP = SrcEltVT.isInteger();
// Widening conversions
if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
if (IsInt2FP) {
// Do a regular integer sign/zero extension then convert to float.
MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
VT.getVectorElementCount());
unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
? ISD::ZERO_EXTEND
: ISD::SIGN_EXTEND;
SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
}
// FP2Int
assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
// Do one doubling fp_extend then complete the operation by converting
// to int.
MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
}
// Narrowing conversions
if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
if (IsInt2FP) {
// One narrowing int_to_fp, then an fp_round.
assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
return DAG.getFPExtendOrRound(Int2FP, DL, VT);
}
// FP2Int
// One narrowing fp_to_int, then truncate the integer. If the float isn't
// representable by the integer, the result is poison.
MVT IVecVT =
MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
VT.getVectorElementCount());
SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
}
// Scalable vectors can exit here. Patterns will handle equally-sized
// conversions halving/doubling ones.
if (!VT.isFixedLengthVector())
return Op;
// For fixed-length vectors we lower to a custom "VL" node.
unsigned RVVOpc = 0;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Impossible opcode");
case ISD::FP_TO_SINT:
RVVOpc = RISCVISD::FP_TO_SINT_VL;
break;
case ISD::FP_TO_UINT:
RVVOpc = RISCVISD::FP_TO_UINT_VL;
break;
case ISD::SINT_TO_FP:
RVVOpc = RISCVISD::SINT_TO_FP_VL;
break;
case ISD::UINT_TO_FP:
RVVOpc = RISCVISD::UINT_TO_FP_VL;
break;
}
MVT ContainerVT, SrcContainerVT;
// Derive the reference container type from the larger vector type.
if (SrcEltSize > EltSize) {
SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
ContainerVT =
SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
} else {
ContainerVT = getContainerForFixedLengthVector(VT);
SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
}
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
return convertFromScalableVector(VT, Src, DAG, Subtarget);
}
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_SMIN:
return lowerVECREDUCE(Op, DAG);
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskVECREDUCE(Op, DAG);
return lowerVECREDUCE(Op, DAG);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_SEQ_FADD:
return lowerFPVECREDUCE(Op, DAG);
case ISD::INSERT_SUBVECTOR:
return lowerINSERT_SUBVECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return lowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::STEP_VECTOR:
return lowerSTEP_VECTOR(Op, DAG);
case ISD::VECTOR_REVERSE:
return lowerVECTOR_REVERSE(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
case ISD::CONCAT_VECTORS: {
// Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
// better than going through the stack, as the default expansion does.
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
unsigned NumOpElts =
Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
SDValue Vec = DAG.getUNDEF(VT);
for (const auto &OpIdx : enumerate(Op->ops()))
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
return Vec;
}
case ISD::LOAD:
return lowerFixedLengthVectorLoadToRVV(Op, DAG);
case ISD::STORE:
return lowerFixedLengthVectorStoreToRVV(Op, DAG);
case ISD::MLOAD:
return lowerMLOAD(Op, DAG);
case ISD::MSTORE:
return lowerMSTORE(Op, DAG);
case ISD::SETCC:
return lowerFixedLengthVectorSetccToRVV(Op, DAG);
case ISD::ADD:
return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
case ISD::SUB:
return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
case ISD::MUL:
return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
case ISD::MULHS:
return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
case ISD::MULHU:
return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
case ISD::AND:
return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
RISCVISD::AND_VL);
case ISD::OR:
return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
RISCVISD::OR_VL);
case ISD::XOR:
return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
RISCVISD::XOR_VL);
case ISD::SDIV:
return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
case ISD::SREM:
return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
case ISD::UDIV:
return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
case ISD::UREM:
return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
case ISD::SHL:
return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
case ISD::SRA:
return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
case ISD::SRL:
return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
case ISD::FADD:
return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
case ISD::FSUB:
return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
case ISD::FMUL:
return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
case ISD::FDIV:
return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
case ISD::FNEG:
return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
case ISD::FABS:
return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
case ISD::FSQRT:
return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
case ISD::FMA:
return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
case ISD::SMIN:
return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
case ISD::SMAX:
return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
case ISD::UMIN:
return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
case ISD::UMAX:
return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
case ISD::ABS:
return lowerABS(Op, DAG);
case ISD::VSELECT:
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
case ISD::FCOPYSIGN:
return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
case ISD::MGATHER:
return lowerMGATHER(Op, DAG);
case ISD::MSCATTER:
return lowerMSCATTER(Op, DAG);
}
}
static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
}
static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
Flags);
}
static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
N->getOffset(), Flags);
}
static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
}
template <class NodeTy>
SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
bool IsLocal) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
if (isPositionIndependent()) {
SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
if (IsLocal)
// Use PC-relative addressing to access the symbol. This generates the
// pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
// %pcrel_lo(auipc)).
return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
// Use PC-relative addressing to access the GOT for this symbol, then load
// the address from the GOT. This generates the pattern (PseudoLA sym),
// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
}
switch (getTargetMachine().getCodeModel()) {
default:
report_fatal_error("Unsupported code model for lowering");
case CodeModel::Small: {
// Generate a sequence for accessing addresses within the first 2 GiB of
// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
}
case CodeModel::Medium: {
// Generate a sequence for accessing addresses within any 2GiB range within
// the address space. This generates the pattern (PseudoLLA sym), which
// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
}
}
}
SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
int64_t Offset = N->getOffset();
MVT XLenVT = Subtarget.getXLenVT();
const GlobalValue *GV = N->getGlobal();
bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
SDValue Addr = getAddr(N, DAG, IsLocal);
// In order to maximise the opportunity for common subexpression elimination,
// emit a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
if (Offset != 0)
return DAG.getNode(ISD::ADD, DL, Ty, Addr,
DAG.getConstant(Offset, DL, XLenVT));
return Addr;
}
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
return getAddr(N, DAG);
}
SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
return getAddr(N, DAG);
}
SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
return getAddr(N, DAG);
}
SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
SelectionDAG &DAG,
bool UseGOT) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
const GlobalValue *GV = N->getGlobal();
MVT XLenVT = Subtarget.getXLenVT();
if (UseGOT) {
// Use PC-relative addressing to access the GOT for this TLS symbol, then
// load the address from the GOT and add the thread pointer. This generates
// the pattern (PseudoLA_TLS_IE sym), which expands to
// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
SDValue Load =
SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
// Add the thread pointer.
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
}
// Generate a sequence for accessing the address relative to the thread
// pointer, with the appropriate adjustment for the thread pointer offset.
// This generates the pattern
// (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
SDValue AddrHi =
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
SDValue AddrAdd =
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
SDValue AddrLo =
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
SDValue MNAdd = SDValue(
DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
0);
return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
}
SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
SelectionDAG &DAG) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
const GlobalValue *GV = N->getGlobal();
// Use a PC-relative addressing mode to access the global dynamic GOT address.
// This generates the pattern (PseudoLA_TLS_GD sym), which expands to
// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
SDValue Load =
SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
// Prepare argument list to generate call.
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Load;
Entry.Ty = CallTy;
Args.push_back(Entry);
// Setup call to __tls_get_addr.
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL)
.setChain(DAG.getEntryNode())
.setLibCallee(CallingConv::C, CallTy,
DAG.getExternalSymbol("__tls_get_addr", Ty),
std::move(Args));
return LowerCallTo(CLI).first;
}
SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
int64_t Offset = N->getOffset();
MVT XLenVT = Subtarget.getXLenVT();
TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
if (DAG.getMachineFunction().getFunction().getCallingConv() ==
CallingConv::GHC)
report_fatal_error("In GHC calling convention TLS is not supported");
SDValue Addr;
switch (Model) {
case TLSModel::LocalExec:
Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
break;
case TLSModel::InitialExec:
Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
break;
case TLSModel::LocalDynamic:
case TLSModel::GeneralDynamic:
Addr = getDynamicTLSAddr(N, DAG);
break;
}
// In order to maximise the opportunity for common subexpression elimination,
// emit a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
if (Offset != 0)
return DAG.getNode(ISD::ADD, DL, Ty, Addr,
DAG.getConstant(Offset, DL, XLenVT));
return Addr;
}
SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CondV = Op.getOperand(0);
SDValue TrueV = Op.getOperand(1);
SDValue FalseV = Op.getOperand(2);
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
// If the result type is XLenVT and CondV is the output of a SETCC node
// which also operated on XLenVT inputs, then merge the SETCC node into the
// lowered RISCVISD::SELECT_CC to take advantage of the integer
// compare+branch instructions. i.e.:
// (select (setcc lhs, rhs, cc), truev, falsev)
// -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
CondV.getOperand(0).getSimpleValueType() == XLenVT) {
SDValue LHS = CondV.getOperand(0);
SDValue RHS = CondV.getOperand(1);
auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
ISD::CondCode CCVal = CC->get();
// Special case for a select of 2 constants that have a diffence of 1.
// Normally this is done by DAGCombine, but if the select is introduced by
// type legalization or op legalization, we miss it. Restricting to SETLT
// case for now because that is what signed saturating add/sub need.
// FIXME: We don't need the condition to be SETLT or even a SETCC,
// but we would probably want to swap the true/false values if the condition
// is SETGE/SETLE to avoid an XORI.
if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
CCVal == ISD::SETLT) {
const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
if (TrueVal - 1 == FalseVal)
return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
if (TrueVal + 1 == FalseVal)
return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
}
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
}
// Otherwise:
// (select condv, truev, falsev)
// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
}
SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue CondV = Op.getOperand(1);
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
if (CondV.getOpcode() == ISD::SETCC &&
CondV.getOperand(0).getValueType() == XLenVT) {
SDValue LHS = CondV.getOperand(0);
SDValue RHS = CondV.getOperand(1);
ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
SDValue TargetCC = DAG.getCondCode(CCVal);
return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
LHS, RHS, TargetCC, Op.getOperand(2));
}
return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
CondV, DAG.getConstant(0, DL, XLenVT),
DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
}
SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
SDLoc DL(Op);
SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy(MF.getDataLayout()));
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setFrameAddressIsTaken(true);
Register FrameReg = RI.getFrameRegister(MF);
int XLenInBytes = Subtarget.getXLen() / 8;
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
while (Depth--) {
int Offset = -(XLenInBytes * 2);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
DAG.getIntPtrConstant(Offset, DL));
FrameAddr =
DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
}
return FrameAddr;
}
SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setReturnAddressIsTaken(true);
MVT XLenVT = Subtarget.getXLenVT();
int XLenInBytes = Subtarget.getXLen() / 8;
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
int Off = -XLenInBytes;
SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(Off, DL, VT);
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
MachinePointerInfo());
}
// Return the value of the return address register, marking it an implicit
// live-in.
Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
}
SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
EVT VT = Lo.getValueType();
// if Shamt-XLEN < 0: // Shamt < XLEN
// Lo = Lo << Shamt
// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
// else:
// Lo = 0
// Hi = Lo << (Shamt-XLEN)
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
SDValue ShiftRightLo =
DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
SDValue Parts[2] = {Lo, Hi};
return DAG.getMergeValues(Parts, DL);
}
SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
bool IsSRA) const {
SDLoc DL(Op);
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
EVT VT = Lo.getValueType();
// SRA expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
// Hi = Hi >>s Shamt
// else:
// Lo = Hi >>s (Shamt-XLEN);
// Hi = Hi >>s (XLEN-1)
//
// SRL expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
// Hi = Hi >>u Shamt
// else:
// Lo = Hi >>u (Shamt-XLEN);
// Hi = 0;
unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
SDValue ShiftLeftHi =
DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
SDValue HiFalse =
IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
SDValue Parts[2] = {Lo, Hi};
return DAG.getMergeValues(Parts, DL);
}