| //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the interfaces that RISCV uses to lower LLVM code into a |
| // selection DAG. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "RISCVISelLowering.h" |
| #include "MCTargetDesc/RISCVMatInt.h" |
| #include "RISCV.h" |
| #include "RISCVMachineFunctionInfo.h" |
| #include "RISCVRegisterInfo.h" |
| #include "RISCVSubtarget.h" |
| #include "RISCVTargetMachine.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/Analysis/MemoryLocation.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/DiagnosticInfo.h" |
| #include "llvm/IR/DiagnosticPrinter.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/IntrinsicsRISCV.h" |
| #include "llvm/IR/PatternMatch.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "riscv-lower" |
| |
| STATISTIC(NumTailCalls, "Number of tail calls"); |
| |
| RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, |
| const RISCVSubtarget &STI) |
| : TargetLowering(TM), Subtarget(STI) { |
| |
| if (Subtarget.isRV32E()) |
| report_fatal_error("Codegen not yet implemented for RV32E"); |
| |
| RISCVABI::ABI ABI = Subtarget.getTargetABI(); |
| assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); |
| |
| if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && |
| !Subtarget.hasStdExtF()) { |
| errs() << "Hard-float 'f' ABI can't be used for a target that " |
| "doesn't support the F instruction set extension (ignoring " |
| "target-abi)\n"; |
| ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; |
| } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && |
| !Subtarget.hasStdExtD()) { |
| errs() << "Hard-float 'd' ABI can't be used for a target that " |
| "doesn't support the D instruction set extension (ignoring " |
| "target-abi)\n"; |
| ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; |
| } |
| |
| switch (ABI) { |
| default: |
| report_fatal_error("Don't know how to lower this ABI"); |
| case RISCVABI::ABI_ILP32: |
| case RISCVABI::ABI_ILP32F: |
| case RISCVABI::ABI_ILP32D: |
| case RISCVABI::ABI_LP64: |
| case RISCVABI::ABI_LP64F: |
| case RISCVABI::ABI_LP64D: |
| break; |
| } |
| |
| MVT XLenVT = Subtarget.getXLenVT(); |
| |
| // Set up the register classes. |
| addRegisterClass(XLenVT, &RISCV::GPRRegClass); |
| |
| if (Subtarget.hasStdExtZfh()) |
| addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); |
| if (Subtarget.hasStdExtF()) |
| addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); |
| if (Subtarget.hasStdExtD()) |
| addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); |
| |
| static const MVT::SimpleValueType BoolVecVTs[] = { |
| MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, |
| MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; |
| static const MVT::SimpleValueType IntVecVTs[] = { |
| MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, |
| MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, |
| MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, |
| MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, |
| MVT::nxv4i64, MVT::nxv8i64}; |
| static const MVT::SimpleValueType F16VecVTs[] = { |
| MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, |
| MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; |
| static const MVT::SimpleValueType F32VecVTs[] = { |
| MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; |
| static const MVT::SimpleValueType F64VecVTs[] = { |
| MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; |
| |
| if (Subtarget.hasVInstructions()) { |
| auto addRegClassForRVV = [this](MVT VT) { |
| unsigned Size = VT.getSizeInBits().getKnownMinValue(); |
| assert(Size <= 512 && isPowerOf2_32(Size)); |
| const TargetRegisterClass *RC; |
| if (Size <= 64) |
| RC = &RISCV::VRRegClass; |
| else if (Size == 128) |
| RC = &RISCV::VRM2RegClass; |
| else if (Size == 256) |
| RC = &RISCV::VRM4RegClass; |
| else |
| RC = &RISCV::VRM8RegClass; |
| |
| addRegisterClass(VT, RC); |
| }; |
| |
| for (MVT VT : BoolVecVTs) |
| addRegClassForRVV(VT); |
| for (MVT VT : IntVecVTs) { |
| if (VT.getVectorElementType() == MVT::i64 && |
| !Subtarget.hasVInstructionsI64()) |
| continue; |
| addRegClassForRVV(VT); |
| } |
| |
| if (Subtarget.hasVInstructionsF16()) |
| for (MVT VT : F16VecVTs) |
| addRegClassForRVV(VT); |
| |
| if (Subtarget.hasVInstructionsF32()) |
| for (MVT VT : F32VecVTs) |
| addRegClassForRVV(VT); |
| |
| if (Subtarget.hasVInstructionsF64()) |
| for (MVT VT : F64VecVTs) |
| addRegClassForRVV(VT); |
| |
| if (Subtarget.useRVVForFixedLengthVectors()) { |
| auto addRegClassForFixedVectors = [this](MVT VT) { |
| MVT ContainerVT = getContainerForFixedLengthVector(VT); |
| unsigned RCID = getRegClassIDForVecVT(ContainerVT); |
| const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo(); |
| addRegisterClass(VT, TRI.getRegClass(RCID)); |
| }; |
| for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
| if (useRVVForFixedLengthVectorVT(VT)) |
| addRegClassForFixedVectors(VT); |
| |
| for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
| if (useRVVForFixedLengthVectorVT(VT)) |
| addRegClassForFixedVectors(VT); |
| } |
| } |
| |
| // Compute derived properties from the register classes. |
| computeRegisterProperties(STI.getRegisterInfo()); |
| |
| setStackPointerRegisterToSaveRestore(RISCV::X2); |
| |
| for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) |
| setLoadExtAction(N, XLenVT, MVT::i1, Promote); |
| |
| // TODO: add all necessary setOperationAction calls. |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); |
| |
| setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
| setOperationAction(ISD::BR_CC, XLenVT, Expand); |
| setOperationAction(ISD::BRCOND, MVT::Other, Custom); |
| setOperationAction(ISD::SELECT_CC, XLenVT, Expand); |
| |
| setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
| setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
| |
| setOperationAction(ISD::VASTART, MVT::Other, Custom); |
| setOperationAction(ISD::VAARG, MVT::Other, Expand); |
| setOperationAction(ISD::VACOPY, MVT::Other, Expand); |
| setOperationAction(ISD::VAEND, MVT::Other, Expand); |
| |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
| if (!Subtarget.hasStdExtZbb()) { |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); |
| } |
| |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::ADD, MVT::i32, Custom); |
| setOperationAction(ISD::SUB, MVT::i32, Custom); |
| setOperationAction(ISD::SHL, MVT::i32, Custom); |
| setOperationAction(ISD::SRA, MVT::i32, Custom); |
| setOperationAction(ISD::SRL, MVT::i32, Custom); |
| |
| setOperationAction(ISD::UADDO, MVT::i32, Custom); |
| setOperationAction(ISD::USUBO, MVT::i32, Custom); |
| setOperationAction(ISD::UADDSAT, MVT::i32, Custom); |
| setOperationAction(ISD::USUBSAT, MVT::i32, Custom); |
| } else { |
| setLibcallName(RTLIB::SHL_I128, nullptr); |
| setLibcallName(RTLIB::SRL_I128, nullptr); |
| setLibcallName(RTLIB::SRA_I128, nullptr); |
| setLibcallName(RTLIB::MUL_I128, nullptr); |
| setLibcallName(RTLIB::MULO_I64, nullptr); |
| } |
| |
| if (!Subtarget.hasStdExtM()) { |
| setOperationAction(ISD::MUL, XLenVT, Expand); |
| setOperationAction(ISD::MULHS, XLenVT, Expand); |
| setOperationAction(ISD::MULHU, XLenVT, Expand); |
| setOperationAction(ISD::SDIV, XLenVT, Expand); |
| setOperationAction(ISD::UDIV, XLenVT, Expand); |
| setOperationAction(ISD::SREM, XLenVT, Expand); |
| setOperationAction(ISD::UREM, XLenVT, Expand); |
| } else { |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::MUL, MVT::i32, Custom); |
| setOperationAction(ISD::MUL, MVT::i128, Custom); |
| |
| setOperationAction(ISD::SDIV, MVT::i8, Custom); |
| setOperationAction(ISD::UDIV, MVT::i8, Custom); |
| setOperationAction(ISD::UREM, MVT::i8, Custom); |
| setOperationAction(ISD::SDIV, MVT::i16, Custom); |
| setOperationAction(ISD::UDIV, MVT::i16, Custom); |
| setOperationAction(ISD::UREM, MVT::i16, Custom); |
| setOperationAction(ISD::SDIV, MVT::i32, Custom); |
| setOperationAction(ISD::UDIV, MVT::i32, Custom); |
| setOperationAction(ISD::UREM, MVT::i32, Custom); |
| } else { |
| setOperationAction(ISD::MUL, MVT::i64, Custom); |
| } |
| } |
| |
| setOperationAction(ISD::SDIVREM, XLenVT, Expand); |
| setOperationAction(ISD::UDIVREM, XLenVT, Expand); |
| setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); |
| |
| setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); |
| setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); |
| setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); |
| |
| if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::ROTL, MVT::i32, Custom); |
| setOperationAction(ISD::ROTR, MVT::i32, Custom); |
| } |
| } else { |
| setOperationAction(ISD::ROTL, XLenVT, Expand); |
| setOperationAction(ISD::ROTR, XLenVT, Expand); |
| } |
| |
| if (Subtarget.hasStdExtZbp()) { |
| // Custom lower bswap/bitreverse so we can convert them to GREVI to enable |
| // more combining. |
| setOperationAction(ISD::BITREVERSE, XLenVT, Custom); |
| setOperationAction(ISD::BSWAP, XLenVT, Custom); |
| setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); |
| // BSWAP i8 doesn't exist. |
| setOperationAction(ISD::BITREVERSE, MVT::i16, Custom); |
| setOperationAction(ISD::BSWAP, MVT::i16, Custom); |
| |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); |
| setOperationAction(ISD::BSWAP, MVT::i32, Custom); |
| } |
| } else { |
| // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll |
| // pattern match it directly in isel. |
| setOperationAction(ISD::BSWAP, XLenVT, |
| Subtarget.hasStdExtZbb() ? Legal : Expand); |
| } |
| |
| if (Subtarget.hasStdExtZbb()) { |
| setOperationAction(ISD::SMIN, XLenVT, Legal); |
| setOperationAction(ISD::SMAX, XLenVT, Legal); |
| setOperationAction(ISD::UMIN, XLenVT, Legal); |
| setOperationAction(ISD::UMAX, XLenVT, Legal); |
| |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::CTTZ, MVT::i32, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); |
| setOperationAction(ISD::CTLZ, MVT::i32, Custom); |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); |
| } |
| } else { |
| setOperationAction(ISD::CTTZ, XLenVT, Expand); |
| setOperationAction(ISD::CTLZ, XLenVT, Expand); |
| setOperationAction(ISD::CTPOP, XLenVT, Expand); |
| } |
| |
| if (Subtarget.hasStdExtZbt()) { |
| setOperationAction(ISD::FSHL, XLenVT, Custom); |
| setOperationAction(ISD::FSHR, XLenVT, Custom); |
| setOperationAction(ISD::SELECT, XLenVT, Legal); |
| |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::FSHL, MVT::i32, Custom); |
| setOperationAction(ISD::FSHR, MVT::i32, Custom); |
| } |
| } else { |
| setOperationAction(ISD::SELECT, XLenVT, Custom); |
| } |
| |
| static const ISD::CondCode FPCCToExpand[] = { |
| ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, |
| ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, |
| ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; |
| |
| static const ISD::NodeType FPOpToExpand[] = { |
| ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, |
| ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; |
| |
| if (Subtarget.hasStdExtZfh()) |
| setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
| |
| if (Subtarget.hasStdExtZfh()) { |
| setOperationAction(ISD::FMINNUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); |
| setOperationAction(ISD::LRINT, MVT::f16, Legal); |
| setOperationAction(ISD::LLRINT, MVT::f16, Legal); |
| setOperationAction(ISD::LROUND, MVT::f16, Legal); |
| setOperationAction(ISD::LLROUND, MVT::f16, Legal); |
| for (auto CC : FPCCToExpand) |
| setCondCodeAction(CC, MVT::f16, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); |
| setOperationAction(ISD::SELECT, MVT::f16, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f16, Expand); |
| |
| setOperationAction(ISD::FREM, MVT::f16, Promote); |
| setOperationAction(ISD::FCEIL, MVT::f16, Promote); |
| setOperationAction(ISD::FFLOOR, MVT::f16, Promote); |
| setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); |
| setOperationAction(ISD::FRINT, MVT::f16, Promote); |
| setOperationAction(ISD::FROUND, MVT::f16, Promote); |
| setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); |
| setOperationAction(ISD::FTRUNC, MVT::f16, Promote); |
| setOperationAction(ISD::FPOW, MVT::f16, Promote); |
| setOperationAction(ISD::FPOWI, MVT::f16, Promote); |
| setOperationAction(ISD::FCOS, MVT::f16, Promote); |
| setOperationAction(ISD::FSIN, MVT::f16, Promote); |
| setOperationAction(ISD::FSINCOS, MVT::f16, Promote); |
| setOperationAction(ISD::FEXP, MVT::f16, Promote); |
| setOperationAction(ISD::FEXP2, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG2, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG10, MVT::f16, Promote); |
| |
| // We need to custom promote this. |
| if (Subtarget.is64Bit()) |
| setOperationAction(ISD::FPOWI, MVT::i32, Custom); |
| } |
| |
| if (Subtarget.hasStdExtF()) { |
| setOperationAction(ISD::FMINNUM, MVT::f32, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); |
| setOperationAction(ISD::LRINT, MVT::f32, Legal); |
| setOperationAction(ISD::LLRINT, MVT::f32, Legal); |
| setOperationAction(ISD::LROUND, MVT::f32, Legal); |
| setOperationAction(ISD::LLROUND, MVT::f32, Legal); |
| for (auto CC : FPCCToExpand) |
| setCondCodeAction(CC, MVT::f32, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); |
| setOperationAction(ISD::SELECT, MVT::f32, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f32, Expand); |
| for (auto Op : FPOpToExpand) |
| setOperationAction(Op, MVT::f32, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
| } |
| |
| if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) |
| setOperationAction(ISD::BITCAST, MVT::i32, Custom); |
| |
| if (Subtarget.hasStdExtD()) { |
| setOperationAction(ISD::FMINNUM, MVT::f64, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); |
| setOperationAction(ISD::LRINT, MVT::f64, Legal); |
| setOperationAction(ISD::LLRINT, MVT::f64, Legal); |
| setOperationAction(ISD::LROUND, MVT::f64, Legal); |
| setOperationAction(ISD::LLROUND, MVT::f64, Legal); |
| for (auto CC : FPCCToExpand) |
| setCondCodeAction(CC, MVT::f64, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); |
| setOperationAction(ISD::SELECT, MVT::f64, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f64, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| for (auto Op : FPOpToExpand) |
| setOperationAction(Op, MVT::f64, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
| } |
| |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
| } |
| |
| if (Subtarget.hasStdExtF()) { |
| setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom); |
| setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom); |
| |
| setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom); |
| setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); |
| } |
| |
| setOperationAction(ISD::GlobalAddress, XLenVT, Custom); |
| setOperationAction(ISD::BlockAddress, XLenVT, Custom); |
| setOperationAction(ISD::ConstantPool, XLenVT, Custom); |
| setOperationAction(ISD::JumpTable, XLenVT, Custom); |
| |
| setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); |
| |
| // TODO: On M-mode only targets, the cycle[h] CSR may not be present. |
| // Unfortunately this can't be determined just from the ISA naming string. |
| setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, |
| Subtarget.is64Bit() ? Legal : Custom); |
| |
| setOperationAction(ISD::TRAP, MVT::Other, Legal); |
| setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
| if (Subtarget.is64Bit()) |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); |
| |
| if (Subtarget.hasStdExtA()) { |
| setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); |
| setMinCmpXchgSizeInBits(32); |
| } else { |
| setMaxAtomicSizeInBitsSupported(0); |
| } |
| |
| setBooleanContents(ZeroOrOneBooleanContent); |
| |
| if (Subtarget.hasVInstructions()) { |
| setBooleanVectorContents(ZeroOrOneBooleanContent); |
| |
| setOperationAction(ISD::VSCALE, XLenVT, Custom); |
| |
| // RVV intrinsics may have illegal operands. |
| // We also need to custom legalize vmv.x.s. |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); |
| } else { |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); |
| } |
| |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
| |
| static const unsigned IntegerVPOps[] = { |
| ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, |
| ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, |
| ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, |
| ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, |
| ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, |
| ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, |
| ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}; |
| |
| static const unsigned FloatingPointVPOps[] = { |
| ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, |
| ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, |
| ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX}; |
| |
| if (!Subtarget.is64Bit()) { |
| // We must custom-lower certain vXi64 operations on RV32 due to the vector |
| // element type being illegal. |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); |
| |
| setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); |
| |
| setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom); |
| setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom); |
| setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom); |
| setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom); |
| setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom); |
| setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom); |
| setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom); |
| setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom); |
| } |
| |
| for (MVT VT : BoolVecVTs) { |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
| |
| // Mask VTs are custom-expanded into a series of standard nodes |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| setOperationAction(ISD::VSELECT, VT, Expand); |
| |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| |
| setOperationAction(ISD::VP_REDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VP_REDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom); |
| |
| // RVV has native int->float & float->int conversions where the |
| // element type sizes are within one power-of-two of each other. Any |
| // wider distances between type sizes have to be lowered as sequences |
| // which progressively narrow the gap in stages. |
| setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| |
| // Expand all extending loads to types larger than this, and truncating |
| // stores from types larger than this. |
| for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { |
| setTruncStoreAction(OtherVT, VT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); |
| } |
| } |
| |
| for (MVT VT : IntVecVTs) { |
| if (VT.getVectorElementType() == MVT::i64 && |
| !Subtarget.hasVInstructionsI64()) |
| continue; |
| |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); |
| setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); |
| |
| // Vectors implement MULHS/MULHU. |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| |
| setOperationAction(ISD::SMIN, VT, Legal); |
| setOperationAction(ISD::SMAX, VT, Legal); |
| setOperationAction(ISD::UMIN, VT, Legal); |
| setOperationAction(ISD::UMAX, VT, Legal); |
| |
| setOperationAction(ISD::ROTL, VT, Expand); |
| setOperationAction(ISD::ROTR, VT, Expand); |
| |
| setOperationAction(ISD::CTTZ, VT, Expand); |
| setOperationAction(ISD::CTLZ, VT, Expand); |
| setOperationAction(ISD::CTPOP, VT, Expand); |
| |
| setOperationAction(ISD::BSWAP, VT, Expand); |
| |
| // Custom-lower extensions and truncations from/to mask types. |
| setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
| |
| // RVV has native int->float & float->int conversions where the |
| // element type sizes are within one power-of-two of each other. Any |
| // wider distances between type sizes have to be lowered as sequences |
| // which progressively narrow the gap in stages. |
| setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| |
| setOperationAction(ISD::SADDSAT, VT, Legal); |
| setOperationAction(ISD::UADDSAT, VT, Legal); |
| setOperationAction(ISD::SSUBSAT, VT, Legal); |
| setOperationAction(ISD::USUBSAT, VT, Legal); |
| |
| // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" |
| // nodes which truncate by one power of two at a time. |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| |
| // Custom-lower insert/extract operations to simplify patterns. |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| |
| // Custom-lower reduction operations to set up the corresponding custom |
| // nodes' operands. |
| setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
| |
| for (unsigned VPOpc : IntegerVPOps) |
| setOperationAction(VPOpc, VT, Custom); |
| |
| setOperationAction(ISD::LOAD, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| |
| setOperationAction(ISD::VP_LOAD, VT, Custom); |
| setOperationAction(ISD::VP_STORE, VT, Custom); |
| setOperationAction(ISD::VP_GATHER, VT, Custom); |
| setOperationAction(ISD::VP_SCATTER, VT, Custom); |
| |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| |
| setOperationAction(ISD::STEP_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); |
| |
| for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { |
| setTruncStoreAction(VT, OtherVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); |
| } |
| |
| // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point |
| // type that can represent the value exactly. |
| if (VT.getVectorElementType() != MVT::i64) { |
| MVT FloatEltVT = |
| VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32; |
| EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); |
| if (isTypeLegal(FloatVT)) { |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); |
| } |
| } |
| } |
| |
| // Expand various CCs to best match the RVV ISA, which natively supports UNE |
| // but no other unordered comparisons, and supports all ordered comparisons |
| // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization |
| // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), |
| // and we pattern-match those back to the "original", swapping operands once |
| // more. This way we catch both operations and both "vf" and "fv" forms with |
| // fewer patterns. |
| static const ISD::CondCode VFPCCToExpand[] = { |
| ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, |
| ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, |
| ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, |
| }; |
| |
| // Sets common operation actions on RVV floating-point vector types. |
| const auto SetCommonVFPActions = [&](MVT VT) { |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); |
| // RVV has native FP_ROUND & FP_EXTEND conversions where the element type |
| // sizes are within one power-of-two of each other. Therefore conversions |
| // between vXf16 and vXf64 must be lowered as sequences which convert via |
| // vXf32. |
| setOperationAction(ISD::FP_ROUND, VT, Custom); |
| setOperationAction(ISD::FP_EXTEND, VT, Custom); |
| // Custom-lower insert/extract operations to simplify patterns. |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| // Expand various condition codes (explained above). |
| for (auto CC : VFPCCToExpand) |
| setCondCodeAction(CC, VT, Expand); |
| |
| setOperationAction(ISD::FMINNUM, VT, Legal); |
| setOperationAction(ISD::FMAXNUM, VT, Legal); |
| |
| setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
| |
| setOperationAction(ISD::FCOPYSIGN, VT, Legal); |
| |
| setOperationAction(ISD::LOAD, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| |
| setOperationAction(ISD::VP_LOAD, VT, Custom); |
| setOperationAction(ISD::VP_STORE, VT, Custom); |
| setOperationAction(ISD::VP_GATHER, VT, Custom); |
| setOperationAction(ISD::VP_SCATTER, VT, Custom); |
| |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); |
| |
| for (unsigned VPOpc : FloatingPointVPOps) |
| setOperationAction(VPOpc, VT, Custom); |
| }; |
| |
| // Sets common extload/truncstore actions on RVV floating-point vector |
| // types. |
| const auto SetCommonVFPExtLoadTruncStoreActions = |
| [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { |
| for (auto SmallVT : SmallerVTs) { |
| setTruncStoreAction(VT, SmallVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); |
| } |
| }; |
| |
| if (Subtarget.hasVInstructionsF16()) |
| for (MVT VT : F16VecVTs) |
| SetCommonVFPActions(VT); |
| |
| for (MVT VT : F32VecVTs) { |
| if (Subtarget.hasVInstructionsF32()) |
| SetCommonVFPActions(VT); |
| SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); |
| } |
| |
| for (MVT VT : F64VecVTs) { |
| if (Subtarget.hasVInstructionsF64()) |
| SetCommonVFPActions(VT); |
| SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); |
| SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); |
| } |
| |
| if (Subtarget.useRVVForFixedLengthVectors()) { |
| for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { |
| if (!useRVVForFixedLengthVectorVT(VT)) |
| continue; |
| |
| // By default everything must be expanded. |
| for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
| setOperationAction(Op, VT, Expand); |
| for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { |
| setTruncStoreAction(VT, OtherVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); |
| } |
| |
| // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| |
| setOperationAction(ISD::LOAD, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| |
| setOperationAction(ISD::SETCC, VT, Custom); |
| |
| setOperationAction(ISD::SELECT, VT, Custom); |
| |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| |
| setOperationAction(ISD::BITCAST, VT, Custom); |
| |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| |
| setOperationAction(ISD::VP_REDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VP_REDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom); |
| |
| setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| |
| // Operations below are different for between masks and other vectors. |
| if (VT.getVectorElementType() == MVT::i1) { |
| setOperationAction(ISD::AND, VT, Custom); |
| setOperationAction(ISD::OR, VT, Custom); |
| setOperationAction(ISD::XOR, VT, Custom); |
| continue; |
| } |
| |
| // Use SPLAT_VECTOR to prevent type legalization from destroying the |
| // splats when type legalizing i64 scalar on RV32. |
| // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs |
| // improvements first. |
| if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
| setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); |
| } |
| |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| |
| setOperationAction(ISD::VP_LOAD, VT, Custom); |
| setOperationAction(ISD::VP_STORE, VT, Custom); |
| setOperationAction(ISD::VP_GATHER, VT, Custom); |
| setOperationAction(ISD::VP_SCATTER, VT, Custom); |
| |
| setOperationAction(ISD::ADD, VT, Custom); |
| setOperationAction(ISD::MUL, VT, Custom); |
| setOperationAction(ISD::SUB, VT, Custom); |
| setOperationAction(ISD::AND, VT, Custom); |
| setOperationAction(ISD::OR, VT, Custom); |
| setOperationAction(ISD::XOR, VT, Custom); |
| setOperationAction(ISD::SDIV, VT, Custom); |
| setOperationAction(ISD::SREM, VT, Custom); |
| setOperationAction(ISD::UDIV, VT, Custom); |
| setOperationAction(ISD::UREM, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::SRL, VT, Custom); |
| |
| setOperationAction(ISD::SMIN, VT, Custom); |
| setOperationAction(ISD::SMAX, VT, Custom); |
| setOperationAction(ISD::UMIN, VT, Custom); |
| setOperationAction(ISD::UMAX, VT, Custom); |
| setOperationAction(ISD::ABS, VT, Custom); |
| |
| setOperationAction(ISD::MULHS, VT, Custom); |
| setOperationAction(ISD::MULHU, VT, Custom); |
| |
| setOperationAction(ISD::SADDSAT, VT, Custom); |
| setOperationAction(ISD::UADDSAT, VT, Custom); |
| setOperationAction(ISD::SSUBSAT, VT, Custom); |
| setOperationAction(ISD::USUBSAT, VT, Custom); |
| |
| setOperationAction(ISD::VSELECT, VT, Custom); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| |
| setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
| |
| // Custom-lower reduction operations to set up the corresponding custom |
| // nodes' operands. |
| setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
| |
| for (unsigned VPOpc : IntegerVPOps) |
| setOperationAction(VPOpc, VT, Custom); |
| |
| // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point |
| // type that can represent the value exactly. |
| if (VT.getVectorElementType() != MVT::i64) { |
| MVT FloatEltVT = |
| VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32; |
| EVT FloatVT = |
| MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); |
| if (isTypeLegal(FloatVT)) { |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); |
| } |
| } |
| } |
| |
| for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { |
| if (!useRVVForFixedLengthVectorVT(VT)) |
| continue; |
| |
| // By default everything must be expanded. |
| for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
| setOperationAction(Op, VT, Expand); |
| for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { |
| setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); |
| setTruncStoreAction(VT, OtherVT, Expand); |
| } |
| |
| // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| |
| setOperationAction(ISD::LOAD, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| |
| setOperationAction(ISD::VP_LOAD, VT, Custom); |
| setOperationAction(ISD::VP_STORE, VT, Custom); |
| setOperationAction(ISD::VP_GATHER, VT, Custom); |
| setOperationAction(ISD::VP_SCATTER, VT, Custom); |
| |
| setOperationAction(ISD::FADD, VT, Custom); |
| setOperationAction(ISD::FSUB, VT, Custom); |
| setOperationAction(ISD::FMUL, VT, Custom); |
| setOperationAction(ISD::FDIV, VT, Custom); |
| setOperationAction(ISD::FNEG, VT, Custom); |
| setOperationAction(ISD::FABS, VT, Custom); |
| setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| setOperationAction(ISD::FSQRT, VT, Custom); |
| setOperationAction(ISD::FMA, VT, Custom); |
| setOperationAction(ISD::FMINNUM, VT, Custom); |
| setOperationAction(ISD::FMAXNUM, VT, Custom); |
| |
| setOperationAction(ISD::FP_ROUND, VT, Custom); |
| setOperationAction(ISD::FP_EXTEND, VT, Custom); |
| |
| for (auto CC : VFPCCToExpand) |
| setCondCodeAction(CC, VT, Expand); |
| |
| setOperationAction(ISD::VSELECT, VT, Custom); |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| |
| setOperationAction(ISD::BITCAST, VT, Custom); |
| |
| setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
| |
| for (unsigned VPOpc : FloatingPointVPOps) |
| setOperationAction(VPOpc, VT, Custom); |
| } |
| |
| // Custom-legalize bitcasts from fixed-length vectors to scalar types. |
| setOperationAction(ISD::BITCAST, MVT::i8, Custom); |
| setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::i32, Custom); |
| setOperationAction(ISD::BITCAST, MVT::i64, Custom); |
| setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::f32, Custom); |
| setOperationAction(ISD::BITCAST, MVT::f64, Custom); |
| } |
| } |
| |
| // Function alignments. |
| const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); |
| setMinFunctionAlignment(FunctionAlignment); |
| setPrefFunctionAlignment(FunctionAlignment); |
| |
| setMinimumJumpTableEntries(5); |
| |
| // Jumps are expensive, compared to logic |
| setJumpIsExpensive(); |
| |
| setTargetDAGCombine(ISD::ADD); |
| setTargetDAGCombine(ISD::SUB); |
| setTargetDAGCombine(ISD::AND); |
| setTargetDAGCombine(ISD::OR); |
| setTargetDAGCombine(ISD::XOR); |
| setTargetDAGCombine(ISD::ANY_EXTEND); |
| setTargetDAGCombine(ISD::ZERO_EXTEND); |
| if (Subtarget.hasVInstructions()) { |
| setTargetDAGCombine(ISD::FCOPYSIGN); |
| setTargetDAGCombine(ISD::MGATHER); |
| setTargetDAGCombine(ISD::MSCATTER); |
| setTargetDAGCombine(ISD::VP_GATHER); |
| setTargetDAGCombine(ISD::VP_SCATTER); |
| setTargetDAGCombine(ISD::SRA); |
| setTargetDAGCombine(ISD::SRL); |
| setTargetDAGCombine(ISD::SHL); |
| setTargetDAGCombine(ISD::STORE); |
| } |
| } |
| |
| EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, |
| LLVMContext &Context, |
| EVT VT) const { |
| if (!VT.isVector()) |
| return getPointerTy(DL); |
| if (Subtarget.hasVInstructions() && |
| (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) |
| return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); |
| return VT.changeVectorElementTypeToInteger(); |
| } |
| |
| MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { |
| return Subtarget.getXLenVT(); |
| } |
| |
| bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
| const CallInst &I, |
| MachineFunction &MF, |
| unsigned Intrinsic) const { |
| auto &DL = I.getModule()->getDataLayout(); |
| switch (Intrinsic) { |
| default: |
| return false; |
| case Intrinsic::riscv_masked_atomicrmw_xchg_i32: |
| case Intrinsic::riscv_masked_atomicrmw_add_i32: |
| case Intrinsic::riscv_masked_atomicrmw_sub_i32: |
| case Intrinsic::riscv_masked_atomicrmw_nand_i32: |
| case Intrinsic::riscv_masked_atomicrmw_max_i32: |
| case Intrinsic::riscv_masked_atomicrmw_min_i32: |
| case Intrinsic::riscv_masked_atomicrmw_umax_i32: |
| case Intrinsic::riscv_masked_atomicrmw_umin_i32: |
| case Intrinsic::riscv_masked_cmpxchg_i32: { |
| PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); |
| Info.opc = ISD::INTRINSIC_W_CHAIN; |
| Info.memVT = MVT::getVT(PtrTy->getElementType()); |
| Info.ptrVal = I.getArgOperand(0); |
| Info.offset = 0; |
| Info.align = Align(4); |
| Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | |
| MachineMemOperand::MOVolatile; |
| return true; |
| } |
| case Intrinsic::riscv_masked_strided_load: |
| Info.opc = ISD::INTRINSIC_W_CHAIN; |
| Info.ptrVal = I.getArgOperand(1); |
| Info.memVT = getValueType(DL, I.getType()->getScalarType()); |
| Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8); |
| Info.size = MemoryLocation::UnknownSize; |
| Info.flags |= MachineMemOperand::MOLoad; |
| return true; |
| case Intrinsic::riscv_masked_strided_store: |
| Info.opc = ISD::INTRINSIC_VOID; |
| Info.ptrVal = I.getArgOperand(1); |
| Info.memVT = |
| getValueType(DL, I.getArgOperand(0)->getType()->getScalarType()); |
| Info.align = Align( |
| DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) / |
| 8); |
| Info.size = MemoryLocation::UnknownSize; |
| Info.flags |= MachineMemOperand::MOStore; |
| return true; |
| } |
| } |
| |
| bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
| const AddrMode &AM, Type *Ty, |
| unsigned AS, |
| Instruction *I) const { |
| // No global is ever allowed as a base. |
| if (AM.BaseGV) |
| return false; |
| |
| // Require a 12-bit signed offset. |
| if (!isInt<12>(AM.BaseOffs)) |
| return false; |
| |
| switch (AM.Scale) { |
| case 0: // "r+i" or just "i", depending on HasBaseReg. |
| break; |
| case 1: |
| if (!AM.HasBaseReg) // allow "r+i". |
| break; |
| return false; // disallow "r+r" or "r+r+i". |
| default: |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
| return isInt<12>(Imm); |
| } |
| |
| bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
| return isInt<12>(Imm); |
| } |
| |
| // On RV32, 64-bit integers are split into their high and low parts and held |
| // in two different registers, so the trunc is free since the low register can |
| // just be used. |
| bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { |
| if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) |
| return false; |
| unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); |
| unsigned DestBits = DstTy->getPrimitiveSizeInBits(); |
| return (SrcBits == 64 && DestBits == 32); |
| } |
| |
| bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { |
| if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || |
| !SrcVT.isInteger() || !DstVT.isInteger()) |
| return false; |
| unsigned SrcBits = SrcVT.getSizeInBits(); |
| unsigned DestBits = DstVT.getSizeInBits(); |
| return (SrcBits == 64 && DestBits == 32); |
| } |
| |
| bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
| // Zexts are free if they can be combined with a load. |
| if (auto *LD = dyn_cast<LoadSDNode>(Val)) { |
| EVT MemVT = LD->getMemoryVT(); |
| if ((MemVT == MVT::i8 || MemVT == MVT::i16 || |
| (Subtarget.is64Bit() && MemVT == MVT::i32)) && |
| (LD->getExtensionType() == ISD::NON_EXTLOAD || |
| LD->getExtensionType() == ISD::ZEXTLOAD)) |
| return true; |
| } |
| |
| return TargetLowering::isZExtFree(Val, VT2); |
| } |
| |
| bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { |
| return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; |
| } |
| |
| bool RISCVTargetLowering::isCheapToSpeculateCttz() const { |
| return Subtarget.hasStdExtZbb(); |
| } |
| |
| bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { |
| return Subtarget.hasStdExtZbb(); |
| } |
| |
| bool RISCVTargetLowering::hasAndNot(SDValue Y) const { |
| EVT VT = Y.getValueType(); |
| |
| // FIXME: Support vectors once we have tests. |
| if (VT.isVector()) |
| return false; |
| |
| return Subtarget.hasStdExtZbb() && !isa<ConstantSDNode>(Y); |
| } |
| |
| /// Check if sinking \p I's operands to I's basic block is profitable, because |
| /// the operands can be folded into a target instruction, e.g. |
| /// splats of scalars can fold into vector instructions. |
| bool RISCVTargetLowering::shouldSinkOperands( |
| Instruction *I, SmallVectorImpl<Use *> &Ops) const { |
| using namespace llvm::PatternMatch; |
| |
| if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) |
| return false; |
| |
| auto IsSinker = [&](Instruction *I, int Operand) { |
| switch (I->getOpcode()) { |
| case Instruction::Add: |
| case Instruction::Sub: |
| case Instruction::Mul: |
| case Instruction::And: |
| case Instruction::Or: |
| case Instruction::Xor: |
| case Instruction::FAdd: |
| case Instruction::FSub: |
| case Instruction::FMul: |
| case Instruction::FDiv: |
| case Instruction::ICmp: |
| case Instruction::FCmp: |
| return true; |
| case Instruction::Shl: |
| case Instruction::LShr: |
| case Instruction::AShr: |
| return Operand == 1; |
| case Instruction::Call: |
| if (auto *II = dyn_cast<IntrinsicInst>(I)) { |
| switch (II->getIntrinsicID()) { |
| case Intrinsic::fma: |
| return Operand == 0 || Operand == 1; |
| default: |
| return false; |
| } |
| } |
| return false; |
| default: |
| return false; |
| } |
| }; |
| |
| for (auto OpIdx : enumerate(I->operands())) { |
| if (!IsSinker(I, OpIdx.index())) |
| continue; |
| |
| Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get()); |
| // Make sure we are not already sinking this operand |
| if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; })) |
| continue; |
| |
| // We are looking for a splat that can be sunk. |
| if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), |
| m_Undef(), m_ZeroMask()))) |
| continue; |
| |
| // All uses of the shuffle should be sunk to avoid duplicating it across gpr |
| // and vector registers |
| for (Use &U : Op->uses()) { |
| Instruction *Insn = cast<Instruction>(U.getUser()); |
| if (!IsSinker(Insn, U.getOperandNo())) |
| return false; |
| } |
| |
| Ops.push_back(&Op->getOperandUse(0)); |
| Ops.push_back(&OpIdx.value()); |
| } |
| return true; |
| } |
| |
| bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
| bool ForCodeSize) const { |
| if (VT == MVT::f16 && !Subtarget.hasStdExtZfhmin()) |
| return false; |
| if (VT == MVT::f32 && !Subtarget.hasStdExtF()) |
| return false; |
| if (VT == MVT::f64 && !Subtarget.hasStdExtD()) |
| return false; |
| if (Imm.isNegZero()) |
| return false; |
| return Imm.isZero(); |
| } |
| |
| bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { |
| return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || |
| (VT == MVT::f32 && Subtarget.hasStdExtF()) || |
| (VT == MVT::f64 && Subtarget.hasStdExtD()); |
| } |
| |
| MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
| CallingConv::ID CC, |
| EVT VT) const { |
| // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled. |
| // We might still end up using a GPR but that will be decided based on ABI. |
| if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin()) |
| return MVT::f32; |
| |
| return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
| } |
| |
| unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
| CallingConv::ID CC, |
| EVT VT) const { |
| // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled. |
| // We might still end up using a GPR but that will be decided based on ABI. |
| if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin()) |
| return 1; |
| |
| return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
| } |
| |
| // Changes the condition code and swaps operands if necessary, so the SetCC |
| // operation matches one of the comparisons supported directly by branches |
| // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare |
| // with 1/-1. |
| static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, |
| ISD::CondCode &CC, SelectionDAG &DAG) { |
| // Convert X > -1 to X >= 0. |
| if (CC == ISD::SETGT && isAllOnesConstant(RHS)) { |
| RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
| CC = ISD::SETGE; |
| return; |
| } |
| // Convert X < 1 to 0 >= X. |
| if (CC == ISD::SETLT && isOneConstant(RHS)) { |
| RHS = LHS; |
| LHS = DAG.getConstant(0, DL, RHS.getValueType()); |
| CC = ISD::SETGE; |
| return; |
| } |
| |
| switch (CC) { |
| default: |
| break; |
| case ISD::SETGT: |
| case ISD::SETLE: |
| case ISD::SETUGT: |
| case ISD::SETULE: |
| CC = ISD::getSetCCSwappedOperands(CC); |
| std::swap(LHS, RHS); |
| break; |
| } |
| } |
| |
| RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) { |
| assert(VT.isScalableVector() && "Expecting a scalable vector type"); |
| unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); |
| if (VT.getVectorElementType() == MVT::i1) |
| KnownSize *= 8; |
| |
| switch (KnownSize) { |
| default: |
| llvm_unreachable("Invalid LMUL."); |
| case 8: |
| return RISCVII::VLMUL::LMUL_F8; |
| case 16: |
| return RISCVII::VLMUL::LMUL_F4; |
| case 32: |
| return RISCVII::VLMUL::LMUL_F2; |
| case 64: |
| return RISCVII::VLMUL::LMUL_1; |
| case 128: |
| return RISCVII::VLMUL::LMUL_2; |
| case 256: |
| return RISCVII::VLMUL::LMUL_4; |
| case 512: |
| return RISCVII::VLMUL::LMUL_8; |
| } |
| } |
| |
| unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) { |
| switch (LMul) { |
| default: |
| llvm_unreachable("Invalid LMUL."); |
| case RISCVII::VLMUL::LMUL_F8: |
| case RISCVII::VLMUL::LMUL_F4: |
| case RISCVII::VLMUL::LMUL_F2: |
| case RISCVII::VLMUL::LMUL_1: |
| return RISCV::VRRegClassID; |
| case RISCVII::VLMUL::LMUL_2: |
| return RISCV::VRM2RegClassID; |
| case RISCVII::VLMUL::LMUL_4: |
| return RISCV::VRM4RegClassID; |
| case RISCVII::VLMUL::LMUL_8: |
| return RISCV::VRM8RegClassID; |
| } |
| } |
| |
| unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { |
| RISCVII::VLMUL LMUL = getLMUL(VT); |
| if (LMUL == RISCVII::VLMUL::LMUL_F8 || |
| LMUL == RISCVII::VLMUL::LMUL_F4 || |
| LMUL == RISCVII::VLMUL::LMUL_F2 || |
| LMUL == RISCVII::VLMUL::LMUL_1) { |
| static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, |
| "Unexpected subreg numbering"); |
| return RISCV::sub_vrm1_0 + Index; |
| } |
| if (LMUL == RISCVII::VLMUL::LMUL_2) { |
| static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, |
| "Unexpected subreg numbering"); |
| return RISCV::sub_vrm2_0 + Index; |
| } |
| if (LMUL == RISCVII::VLMUL::LMUL_4) { |
| static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, |
| "Unexpected subreg numbering"); |
| return RISCV::sub_vrm4_0 + Index; |
| } |
| llvm_unreachable("Invalid vector type."); |
| } |
| |
| unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { |
| if (VT.getVectorElementType() == MVT::i1) |
| return RISCV::VRRegClassID; |
| return getRegClassIDForLMUL(getLMUL(VT)); |
| } |
| |
| // Attempt to decompose a subvector insert/extract between VecVT and |
| // SubVecVT via subregister indices. Returns the subregister index that |
| // can perform the subvector insert/extract with the given element index, as |
| // well as the index corresponding to any leftover subvectors that must be |
| // further inserted/extracted within the register class for SubVecVT. |
| std::pair<unsigned, unsigned> |
| RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
| MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, |
| const RISCVRegisterInfo *TRI) { |
| static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && |
| RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && |
| RISCV::VRM2RegClassID > RISCV::VRRegClassID), |
| "Register classes not ordered"); |
| unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); |
| unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); |
| // Try to compose a subregister index that takes us from the incoming |
| // LMUL>1 register class down to the outgoing one. At each step we half |
| // the LMUL: |
| // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 |
| // Note that this is not guaranteed to find a subregister index, such as |
| // when we are extracting from one VR type to another. |
| unsigned SubRegIdx = RISCV::NoSubRegister; |
| for (const unsigned RCID : |
| {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) |
| if (VecRegClassID > RCID && SubRegClassID <= RCID) { |
| VecVT = VecVT.getHalfNumVectorElementsVT(); |
| bool IsHi = |
| InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); |
| SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, |
| getSubregIndexByMVT(VecVT, IsHi)); |
| if (IsHi) |
| InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); |
| } |
| return {SubRegIdx, InsertExtractIdx}; |
| } |
| |
| // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar |
| // stores for those types. |
| bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { |
| return !Subtarget.useRVVForFixedLengthVectors() || |
| (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); |
| } |
| |
| bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const { |
| if (ScalarTy->isPointerTy()) |
| return true; |
| |
| if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) || |
| ScalarTy->isIntegerTy(32)) |
| return true; |
| |
| if (ScalarTy->isIntegerTy(64)) |
| return Subtarget.hasVInstructionsI64(); |
| |
| if (ScalarTy->isHalfTy()) |
| return Subtarget.hasVInstructionsF16(); |
| if (ScalarTy->isFloatTy()) |
| return Subtarget.hasVInstructionsF32(); |
| if (ScalarTy->isDoubleTy()) |
| return Subtarget.hasVInstructionsF64(); |
| |
| return false; |
| } |
| |
| static bool useRVVForFixedLengthVectorVT(MVT VT, |
| const RISCVSubtarget &Subtarget) { |
| assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!"); |
| if (!Subtarget.useRVVForFixedLengthVectors()) |
| return false; |
| |
| // We only support a set of vector types with a consistent maximum fixed size |
| // across all supported vector element types to avoid legalization issues. |
| // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest |
| // fixed-length vector type we support is 1024 bytes. |
| if (VT.getFixedSizeInBits() > 1024 * 8) |
| return false; |
| |
| unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits(); |
| |
| MVT EltVT = VT.getVectorElementType(); |
| |
| // Don't use RVV for vectors we cannot scalarize if required. |
| switch (EltVT.SimpleTy) { |
| // i1 is supported but has different rules. |
| default: |
| return false; |
| case MVT::i1: |
| // Masks can only use a single register. |
| if (VT.getVectorNumElements() > MinVLen) |
| return false; |
| MinVLen /= 8; |
| break; |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| break; |
| case MVT::i64: |
| if (!Subtarget.hasVInstructionsI64()) |
| return false; |
| break; |
| case MVT::f16: |
| if (!Subtarget.hasVInstructionsF16()) |
| return false; |
| break; |
| case MVT::f32: |
| if (!Subtarget.hasVInstructionsF32()) |
| return false; |
| break; |
| case MVT::f64: |
| if (!Subtarget.hasVInstructionsF64()) |
| return false; |
| break; |
| } |
| |
| // Reject elements larger than ELEN. |
| if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors()) |
| return false; |
| |
| unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen); |
| // Don't use RVV for types that don't fit. |
| if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) |
| return false; |
| |
| // TODO: Perhaps an artificial restriction, but worth having whilst getting |
| // the base fixed length RVV support in place. |
| if (!VT.isPow2VectorType()) |
| return false; |
| |
| return true; |
| } |
| |
| bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { |
| return ::useRVVForFixedLengthVectorVT(VT, Subtarget); |
| } |
| |
| // Return the largest legal scalable vector type that matches VT's element type. |
| static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, |
| const RISCVSubtarget &Subtarget) { |
| // This may be called before legal types are setup. |
| assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) || |
| useRVVForFixedLengthVectorVT(VT, Subtarget)) && |
| "Expected legal fixed length vector!"); |
| |
| unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits(); |
| unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors(); |
| |
| MVT EltVT = VT.getVectorElementType(); |
| switch (EltVT.SimpleTy) { |
| default: |
| llvm_unreachable("unexpected element type for RVV container"); |
| case MVT::i1: |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| case MVT::i64: |
| case MVT::f16: |
| case MVT::f32: |
| case MVT::f64: { |
| // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for |
| // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within |
| // each fractional LMUL we support SEW between 8 and LMUL*ELEN. |
| unsigned NumElts = |
| (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; |
| NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen); |
| assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts"); |
| return MVT::getScalableVectorVT(EltVT, NumElts); |
| } |
| } |
| } |
| |
| static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, |
| const RISCVSubtarget &Subtarget) { |
| return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, |
| Subtarget); |
| } |
| |
| MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { |
| return ::getContainerForFixedLengthVector(*this, VT, getSubtarget()); |
| } |
| |
| // Grow V to consume an entire RVV register. |
| static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| assert(VT.isScalableVector() && |
| "Expected to convert into a scalable vector!"); |
| assert(V.getValueType().isFixedLengthVector() && |
| "Expected a fixed length vector operand!"); |
| SDLoc DL(V); |
| SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); |
| return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); |
| } |
| |
| // Shrink V so it's just big enough to maintain a VT's worth of data. |
| static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| assert(VT.isFixedLengthVector() && |
| "Expected to convert into a fixed length vector!"); |
| assert(V.getValueType().isScalableVector() && |
| "Expected a scalable vector operand!"); |
| SDLoc DL(V); |
| SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); |
| return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); |
| } |
| |
| // Gets the two common "VL" operands: an all-ones mask and the vector length. |
| // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is |
| // the vector type that it is contained in. |
| static std::pair<SDValue, SDValue> |
| getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| SDValue VL = VecVT.isFixedLengthVector() |
| ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) |
| : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); |
| MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); |
| SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); |
| return {Mask, VL}; |
| } |
| |
| // As above but assuming the given type is a scalable vector type. |
| static std::pair<SDValue, SDValue> |
| getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| assert(VecVT.isScalableVector() && "Expecting a scalable vector"); |
| return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); |
| } |
| |
| // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few |
| // of either is (currently) supported. This can get us into an infinite loop |
| // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR |
| // as a ..., etc. |
| // Until either (or both) of these can reliably lower any node, reporting that |
| // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks |
| // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, |
| // which is not desirable. |
| bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( |
| EVT VT, unsigned DefinedValues) const { |
| return false; |
| } |
| |
| bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { |
| // Only splats are currently supported. |
| if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) |
| return true; |
| |
| return false; |
| } |
| |
| static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) { |
| // RISCV FP-to-int conversions saturate to the destination register size, but |
| // don't produce 0 for nan. We can use a conversion instruction and fix the |
| // nan case with a compare and a select. |
| SDValue Src = Op.getOperand(0); |
| |
| EVT DstVT = Op.getValueType(); |
| EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
| |
| bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; |
| unsigned Opc; |
| if (SatVT == DstVT) |
| Opc = IsSigned ? RISCVISD::FCVT_X_RTZ : RISCVISD::FCVT_XU_RTZ; |
| else if (DstVT == MVT::i64 && SatVT == MVT::i32) |
| Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64; |
| else |
| return SDValue(); |
| // FIXME: Support other SatVTs by clamping before or after the conversion. |
| |
| SDLoc DL(Op); |
| SDValue FpToInt = DAG.getNode(Opc, DL, DstVT, Src); |
| |
| SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); |
| return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); |
| } |
| |
| static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| MVT VT = Op.getSimpleValueType(); |
| assert(VT.isFixedLengthVector() && "Unexpected vector!"); |
| |
| MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
| |
| SDLoc DL(Op); |
| SDValue Mask, VL; |
| std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
| |
| unsigned Opc = |
| VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; |
| SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL); |
| return convertFromScalableVector(VT, Splat, DAG, Subtarget); |
| } |
| |
| struct VIDSequence { |
| int64_t StepNumerator; |
| unsigned StepDenominator; |
| int64_t Addend; |
| }; |
| |
| // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] |
| // to the (non-zero) step S and start value X. This can be then lowered as the |
| // RVV sequence (VID * S) + X, for example. |
| // The step S is represented as an integer numerator divided by a positive |
| // denominator. Note that the implementation currently only identifies |
| // sequences in which either the numerator is +/- 1 or the denominator is 1. It |
| // cannot detect 2/3, for example. |
| // Note that this method will also match potentially unappealing index |
| // sequences, like <i32 0, i32 50939494>, however it is left to the caller to |
| // determine whether this is worth generating code for. |
| static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) { |
| unsigned NumElts = Op.getNumOperands(); |
| assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); |
| if (!Op.getValueType().isInteger()) |
| return None; |
| |
| Optional<unsigned> SeqStepDenom; |
| Optional<int64_t> SeqStepNum, SeqAddend; |
| Optional<std::pair<uint64_t, unsigned>> PrevElt; |
| unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits(); |
| for (unsigned Idx = 0; Idx < NumElts; Idx++) { |
| // Assume undef elements match the sequence; we just have to be careful |
| // when interpolating across them. |
| if (Op.getOperand(Idx).isUndef()) |
| continue; |
| // The BUILD_VECTOR must be all constants. |
| if (!isa<ConstantSDNode>(Op.getOperand(Idx))) |
| return None; |
| |
| uint64_t Val = Op.getConstantOperandVal(Idx) & |
| maskTrailingOnes<uint64_t>(EltSizeInBits); |
| |
| if (PrevElt) { |
| // Calculate the step since the last non-undef element, and ensure |
| // it's consistent across the entire sequence. |
| unsigned IdxDiff = Idx - PrevElt->second; |
| int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits); |
| |
| // A zero-value value difference means that we're somewhere in the middle |
| // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a |
| // step change before evaluating the sequence. |
| if (ValDiff != 0) { |
| int64_t Remainder = ValDiff % IdxDiff; |
| // Normalize the step if it's greater than 1. |
| if (Remainder != ValDiff) { |
| // The difference must cleanly divide the element span. |
| if (Remainder != 0) |
| return None; |
| ValDiff /= IdxDiff; |
| IdxDiff = 1; |
| } |
| |
| if (!SeqStepNum) |
| SeqStepNum = ValDiff; |
| else if (ValDiff != SeqStepNum) |
| return None; |
| |
| if (!SeqStepDenom) |
| SeqStepDenom = IdxDiff; |
| else if (IdxDiff != *SeqStepDenom) |
| return None; |
| } |
| } |
| |
| // Record and/or check any addend. |
| if (SeqStepNum && SeqStepDenom) { |
| uint64_t ExpectedVal = |
| (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; |
| int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); |
| if (!SeqAddend) |
| SeqAddend = Addend; |
| else if (SeqAddend != Addend) |
| return None; |
| } |
| |
| // Record this non-undef element for later. |
| if (!PrevElt || PrevElt->first != Val) |
| PrevElt = std::make_pair(Val, Idx); |
| } |
| // We need to have logged both a step and an addend for this to count as |
| // a legal index sequence. |
| if (!SeqStepNum || !SeqStepDenom || !SeqAddend) |
| return None; |
| |
| return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; |
| } |
| |
| static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| MVT VT = Op.getSimpleValueType(); |
| assert(VT.isFixedLengthVector() && "Unexpected vector!"); |
| |
| MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
| |
| SDLoc DL(Op); |
| SDValue Mask, VL; |
| std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
| |
| MVT XLenVT = Subtarget.getXLenVT(); |
| unsigned NumElts = Op.getNumOperands(); |
| |
| if (VT.getVectorElementType() == MVT::i1) { |
| if (ISD::isBuildVectorAllZeros(Op.getNode())) { |
| SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); |
| return convertFromScalableVector(VT, VMClr, DAG, Subtarget); |
| } |
| |
| if (ISD::isBuildVectorAllOnes(Op.getNode())) { |
| SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); |
| return convertFromScalableVector(VT, VMSet, DAG, Subtarget); |
| } |
| |
| // Lower constant mask BUILD_VECTORs via an integer vector type, in |
| // scalar integer chunks whose bit-width depends on the number of mask |
| // bits and XLEN. |
| // First, determine the most appropriate scalar integer type to use. This |
| // is at most XLenVT, but may be shrunk to a smaller vector element type |
| // according to the size of the final vector - use i8 chunks rather than |
| // XLenVT if we're producing a v8i1. This results in more consistent |
| // codegen across RV32 and RV64. |
| unsigned NumViaIntegerBits = |
| std::min(std::max(NumElts, 8u), Subtarget.getXLen()); |
| if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { |
| // If we have to use more than one INSERT_VECTOR_ELT then this |
| // optimization is likely to increase code size; avoid peforming it in |
| // such a case. We can use a load from a constant pool in this case. |
| if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) |
| return SDValue(); |
| // Now we can create our integer vector type. Note that it may be larger |
| // than the resulting mask type: v4i1 would use v1i8 as its integer type. |
| MVT IntegerViaVecVT = |
| MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), |
| divideCeil(NumElts, NumViaIntegerBits)); |
| |
| uint64_t Bits = 0; |
| unsigned BitPos = 0, IntegerEltIdx = 0; |
| SDValue Vec = DAG.getUNDEF(IntegerViaVecVT); |
| |
| for (unsigned I = 0; I < NumElts; I++, BitPos++) { |
| // Once we accumulate enough bits to fill our scalar type, insert into |
| // our vector and clear our accumulated data. |
| if (I != 0 && I % NumViaIntegerBits == 0) { |
| if (NumViaIntegerBits <= 32) |
| Bits = SignExtend64(Bits, 32); |
| SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); |
| Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, |
| Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); |
| Bits = 0; |
| BitPos = 0; |
| IntegerEltIdx++; |
| } |
| SDValue V = Op.getOperand(I); |
| bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue(); |
| Bits |= ((uint64_t)BitValue << BitPos); |
| } |
| |
| // Insert the (remaining) scalar value into position in our integer |
| // vector type. |
| if (NumViaIntegerBits <= 32) |
| Bits = SignExtend64(Bits, 32); |
| SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); |
| Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, |
| DAG.getConstant(IntegerEltIdx, DL, XLenVT)); |
| |
| if (NumElts < NumViaIntegerBits) { |
| // If we're producing a smaller vector than our minimum legal integer |
| // type, bitcast to the equivalent (known-legal) mask type, and extract |
| // our final mask. |
| assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); |
| Vec = DAG.getBitcast(MVT::v8i1, Vec); |
| Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, |
| DAG.getConstant(0, DL, XLenVT)); |
| } else { |
| // Else we must have produced an integer type with the same size as the |
| // mask type; bitcast for the final result. |
| assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); |
| Vec = DAG.getBitcast(VT, Vec); |
| } |
| |
| return Vec; |
| } |
| |
| // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask |
| // vector type, we have a legal equivalently-sized i8 type, so we can use |
| // that. |
| MVT WideVecVT = VT.changeVectorElementType(MVT::i8); |
| SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); |
| |
| SDValue WideVec; |
| if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { |
| // For a splat, perform a scalar truncate before creating the wider |
| // vector. |
| assert(Splat.getValueType() == XLenVT && |
| "Unexpected type for i1 splat value"); |
| Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat, |
| DAG.getConstant(1, DL, XLenVT)); |
| WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); |
| } else { |
| SmallVector<SDValue, 8> Ops(Op->op_values()); |
| WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); |
| SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); |
| WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); |
| } |
| |
| return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); |
| } |
| |
| if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { |
| unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL |
| : RISCVISD::VMV_V_X_VL; |
| Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); |
| return convertFromScalableVector(VT, Splat, DAG, Subtarget); |
| } |
| |
| // Try and match index sequences, which we can lower to the vid instruction |
| // with optional modifications. An all-undef vector is matched by |
| // getSplatValue, above. |
| if (auto SimpleVID = isSimpleVIDSequence(Op)) { |
| int64_t StepNumerator = SimpleVID->StepNumerator; |
| unsigned StepDenominator = SimpleVID->StepDenominator; |
| int64_t Addend = SimpleVID->Addend; |
| // Only emit VIDs with suitably-small steps/addends. We use imm5 is a |
| // threshold since it's the immediate value many RVV instructions accept. |
| if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) && |
| isInt<5>(Addend)) { |
| SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); |
| // Convert right out of the scalable type so we can use standard ISD |
| // nodes for the rest of the computation. If we used scalable types with |
| // these, we'd lose the fixed-length vector info and generate worse |
| // vsetvli code. |
| VID = convertFromScalableVector(VT, VID, DAG, Subtarget); |
| assert(StepNumerator != 0 && "Invalid step"); |
| bool Negate = false; |
| if (StepNumerator != 1) { |
| int64_t SplatStepVal = StepNumerator; |
| unsigned Opcode = ISD::MUL; |
| if (isPowerOf2_64(std::abs(StepNumerator))) { |
| Negate = StepNumerator < 0; |
| Opcode = ISD::SHL; |
| SplatStepVal = Log2_64(std::abs(StepNumerator)); |
| } |
| SDValue SplatStep = DAG.getSplatVector( |
| VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); |
| VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep); |
| } |
| if (StepDenominator != 1) { |
| SDValue SplatStep = DAG.getSplatVector( |
| VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT)); |
| VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep); |
| } |
| if (Addend != 0 || Negate) { |
| SDValue SplatAddend = |
| DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT)); |
| VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID); |
| } |
| return VID; |
| } |
| } |
| |
| // Attempt to detect "hidden" splats, which only reveal themselves as splats |
| // when re-interpreted as a vector with a larger element type. For example, |
| // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 |
| // could be instead splat as |
| // v2i32 = build_vector i32 0x00010000, i32 0x00010000 |
| // TODO: This optimization could also work on non-constant splats, but it |
| // would require bit-manipulation instructions to construct the splat value. |
| SmallVector<SDValue> Sequence; |
| unsigned EltBitSize = VT.getScalarSizeInBits(); |
| const auto *BV = cast<BuildVectorSDNode>(Op); |
| if (VT.isInteger() && EltBitSize < 64 && |
| ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && |
| BV->getRepeatedSequence(Sequence) && |
| (Sequence.size() * EltBitSize) <= 64) { |
| unsigned SeqLen = Sequence.size(); |
| MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); |
| MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen); |
| assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || |
| ViaIntVT == MVT::i64) && |
| "Unexpected sequence type"); |
| |
| unsigned EltIdx = 0; |
| uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); |
| uint64_t SplatValue = 0; |
| // Construct the amalgamated value which can be splatted as this larger |
| // vector type. |
| for (const auto &SeqV : Sequence) { |
| if (!SeqV.isUndef()) |
| SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask) |
| << (EltIdx * EltBitSize)); |
| EltIdx++; |
| } |
| |
| // On RV64, sign-extend from 32 to 64 bits where possible in order to |
| // achieve better constant materializion. |
| if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) |
| SplatValue = SignExtend64(SplatValue, 32); |
| |
| // Since we can't introduce illegal i64 types at this stage, we can only |
| // perform an i64 splat on RV32 if it is its own sign-extended value. That |
| // way we can use RVV instructions to splat. |
| assert((ViaIntVT.bitsLE(XLenVT) || |
| (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && |
| "Unexpected bitcast sequence"); |
| if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { |
| SDValue ViaVL = |
| DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); |
| MVT ViaContainerVT = |
| getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget); |
| SDValue Splat = |
| DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, |
| DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); |
| Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); |
| return DAG.getBitcast(VT, Splat); |
| } |
| } |
| |
| // Try and optimize BUILD_VECTORs with "dominant values" - these are values |
| // which constitute a large proportion of the elements. In such cases we can |
| // splat a vector with the dominant element and make up the shortfall with |
| // INSERT_VECTOR_ELTs. |
| // Note that this includes vectors of 2 elements by association. The |
| // upper-most element is the "dominant" one, allowing us to use a splat to |
| // "insert" the upper element, and an insert of the lower element at position |
| // 0, which improves codegen. |
| SDValue DominantValue; |
| unsigned MostCommonCount = 0; |
| DenseMap<SDValue, unsigned> ValueCounts; |
| unsigned NumUndefElts = |
| count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); |
| |
| // Track the number of scalar loads we know we'd be inserting, estimated as |
| // any non-zero floating-point constant. Other kinds of element are either |
| // already in registers or are materialized on demand. The threshold at which |
| // a vector load is more desirable than several scalar materializion and |
| // vector-insertion instructions is not known. |
| unsigned NumScalarLoads = 0; |
| |
| for (SDValue V : Op->op_values()) { |
| if (V.isUndef()) |
| continue; |
| |
| ValueCounts.insert(std::make_pair(V, 0)); |
| unsigned &Count = ValueCounts[V]; |
| |
| if (auto *CFP = dyn_cast<ConstantFPSDNode>(V)) |
| NumScalarLoads += !CFP->isExactlyValue(+0.0); |
| |
| // Is this value dominant? In case of a tie, prefer the highest element as |
| // it's cheaper to insert near the beginning of a vector than it is at the |
| // end. |
| if (++Count >= MostCommonCount) { |
| DominantValue = V; |
| MostCommonCount = Count; |
| } |
| } |
| |
| assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); |
| unsigned NumDefElts = NumElts - NumUndefElts; |
| unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; |
| |
| // Don't perform this optimization when optimizing for size, since |
| // materializing elements and inserting them tends to cause code bloat. |
| if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && |
| ((MostCommonCount > DominantValueCountThreshold) || |
| (ValueCounts.size() <= Log2_32(NumDefElts)))) { |
| // Start by splatting the most common element. |
| SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); |
| |
| DenseSet<SDValue> Processed{DominantValue}; |
| MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); |
| for (const auto &OpIdx : enumerate(Op->ops())) { |
| const SDValue &V = OpIdx.value(); |
| if (V.isUndef() || !Processed.insert(V).second) |
| continue; |
| if (ValueCounts[V] == 1) { |
| Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, |
| DAG.getConstant(OpIdx.index(), DL, XLenVT)); |
| } else { |
| // Blend in all instances of this value using a VSELECT, using a |
| // mask where each bit signals whether that element is the one |
| // we're after. |
| SmallVector<SDValue> Ops; |
| transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { |
| return DAG.getConstant(V == V1, DL, XLenVT); |
| }); |
| Vec = DAG.getNode(ISD::VSELECT, DL, VT, |
| DAG.getBuildVector(SelMaskTy, DL, Ops), |
| DAG.getSplatBuildVector(VT, DL, V), Vec); |
| } |
| } |
| |
| return Vec; |
| } |
| |
| return SDValue(); |
| } |
| |
| static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo, |
| SDValue Hi, SDValue VL, SelectionDAG &DAG) { |
| if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { |
| int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); |
| int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); |
| // If Hi constant is all the same sign bit as Lo, lower this as a custom |
| // node in order to try and match RVV vector/scalar instructions. |
| if ((LoC >> 31) == HiC) |
| return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); |
| } |
| |
| // Fall back to a stack store and stride x0 vector load. |
| return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL); |
| } |
| |
| // Called by type legalization to handle splat of i64 on RV32. |
| // FIXME: We can optimize this when the type has sign or zero bits in one |
| // of the halves. |
| static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, |
| SDValue VL, SelectionDAG &DAG) { |
| assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); |
| SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, |
| DAG.getConstant(0, DL, MVT::i32)); |
| SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, |
| DAG.getConstant(1, DL, MVT::i32)); |
| return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG); |
| } |
| |
| // This function lowers a splat of a scalar operand Splat with the vector |
| // length VL. It ensures the final sequence is type legal, which is useful when |
| // lowering a splat after type legalization. |
| static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL, |
| SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| if (VT.isFloatingPoint()) |
| return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL); |
| |
| MVT XLenVT = Subtarget.getXLenVT(); |
| |
| // Simplest case is that the operand needs to be promoted to XLenVT. |
| if (Scalar.getValueType().bitsLE(XLenVT)) { |
| // If the operand is a constant, sign extend to increase our chances |
| // of being able to use a .vi instruction. ANY_EXTEND would become a |
| // a zero extend and the simm5 check in isel would fail. |
| // FIXME: Should we ignore the upper bits in isel instead? |
| unsigned ExtOpc = |
| isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
| Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); |
| return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL); |
| } |
| |
| assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && |
| "Unexpected scalar for splat lowering!"); |
| |
| // Otherwise use the more complicated splatting algorithm. |
| return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); |
| } |
| |
| static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| SDValue V1 = Op.getOperand(0); |
| SDValue V2 = Op.getOperand(1); |
| SDLoc DL(Op); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| MVT VT = Op.getSimpleValueType(); |
| unsigned NumElts = VT.getVectorNumElements(); |
| ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); |
| |
| MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
| |
| SDValue TrueMask, VL; |
| std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
| |
| if (SVN->isSplat()) { |
| const int Lane = SVN->getSplatIndex(); |
| if (Lane >= 0) { |
| MVT SVT = VT.getVectorElementType(); |
| |
| // Turn splatted vector load into a strided load with an X0 stride. |
| SDValue V = V1; |
| // Peek through CONCAT_VECTORS as VectorCombine can concat a vector |
| // with undef. |
| // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? |
| int Offset = Lane; |
| if (V.getOpcode() == ISD::CONCAT_VECTORS) { |
| int OpElements = |
| V.getOperand(0).getSimpleValueType().getVectorNumElements(); |
| V = V.getOperand(Offset / OpElements); |
| Offset %= OpElements; |
| } |
| |
| // We need to ensure the load isn't atomic or volatile. |
| if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) { |
| auto *Ld = cast<LoadSDNode>(V); |
| Offset *= SVT.getStoreSize(); |
| SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), |
| TypeSize::Fixed(Offset), DL); |
| |
| // If this is SEW=64 on RV32, use a strided load with a stride of x0. |
| if (SVT.isInteger() && SVT.bitsGT(XLenVT)) { |
| SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
| SDValue IntID = |
| DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); |
| SDValue Ops[] = {Ld->getChain(), IntID, NewAddr, |
| DAG.getRegister(RISCV::X0, XLenVT), VL}; |
| SDValue NewLoad = DAG.getMemIntrinsicNode( |
| ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT, |
| DAG.getMachineFunction().getMachineMemOperand( |
| Ld->getMemOperand(), Offset, SVT.getStoreSize())); |
| DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); |
| return convertFromScalableVector(VT, NewLoad, DAG, Subtarget); |
| } |
| |
| // Otherwise use a scalar load and splat. This will give the best |
| // opportunity to fold a splat into the operation. ISel can turn it into |
| // the x0 strided load if we aren't able to fold away the select. |
| if (SVT.isFloatingPoint()) |
| V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, |
| Ld->getPointerInfo().getWithOffset(Offset), |
| Ld->getOriginalAlign(), |
| Ld->getMemOperand()->getFlags()); |
| else |
| V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr, |
| Ld->getPointerInfo().getWithOffset(Offset), SVT, |
| Ld->getOriginalAlign(), |
| Ld->getMemOperand()->getFlags()); |
| DAG.makeEquivalentMemoryOrdering(Ld, V); |
| |
| unsigned Opc = |
| VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; |
| SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL); |
| return convertFromScalableVector(VT, Splat, DAG, Subtarget); |
| } |
| |
| V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); |
| assert(Lane < (int)NumElts && "Unexpected lane!"); |
| SDValue Gather = |
| DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, |
| DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL); |
| return convertFromScalableVector(VT, Gather, DAG, Subtarget); |
| } |
| } |
| |
| // Detect shuffles which can be re-expressed as vector selects; these are |
| // shuffles in which each element in the destination is taken from an element |
| // at the corresponding index in either source vectors. |
| bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) { |
| int MaskIndex = MaskIdx.value(); |
| return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; |
| }); |
| |
| assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); |
| |
| SmallVector<SDValue> MaskVals; |
| // As a backup, shuffles can be lowered via a vrgather instruction, possibly |
| // merged with a second vrgather. |
| SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS; |
| |
| // By default we preserve the original operand order, and use a mask to |
| // select LHS as true and RHS as false. However, since RVV vector selects may |
| // feature splats but only on the LHS, we may choose to invert our mask and |
| // instead select between RHS and LHS. |
| bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); |
| bool InvertMask = IsSelect == SwapOps; |
| |
| // Keep a track of which non-undef indices are used by each LHS/RHS shuffle |
| // half. |
| DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts; |
| |
| // Now construct the mask that will be used by the vselect or blended |
| // vrgather operation. For vrgathers, construct the appropriate indices into |
| // each vector. |
| for (int MaskIndex : SVN->getMask()) { |
| bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask; |
| MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); |
| if (!IsSelect) { |
| bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; |
| GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 |
| ? DAG.getConstant(MaskIndex, DL, XLenVT) |
| : DAG.getUNDEF(XLenVT)); |
| GatherIndicesRHS.push_back( |
| IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) |
| : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); |
| if (IsLHSOrUndefIndex && MaskIndex >= 0) |
| ++LHSIndexCounts[MaskIndex]; |
| if (!IsLHSOrUndefIndex) |
| ++RHSIndexCounts[MaskIndex - NumElts]; |
| } |
| } |
| |
| if (SwapOps) { |
| std::swap(V1, V2); |
| std::swap(GatherIndicesLHS, GatherIndicesRHS); |
| } |
| |
| assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); |
| MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
| SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); |
| |
| if (IsSelect) |
| return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); |
| |
| if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) { |
| // On such a large vector we're unable to use i8 as the index type. |
| // FIXME: We could promote the index to i16 and use vrgatherei16, but that |
| // may involve vector splitting if we're already at LMUL=8, or our |
| // user-supplied maximum fixed-length LMUL. |
| return SDValue(); |
| } |
| |
| unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL; |
| unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; |
| MVT IndexVT = VT.changeTypeToInteger(); |
| // Since we can't introduce illegal index types at this stage, use i16 and |
| // vrgatherei16 if the corresponding index type for plain vrgather is greater |
| // than XLenVT. |
| if (IndexVT.getScalarType().bitsGT(XLenVT)) { |
| GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; |
| IndexVT = IndexVT.changeVectorElementType(MVT::i16); |
| } |
| |
| MVT IndexContainerVT = |
| ContainerVT.changeVectorElementType(IndexVT.getScalarType()); |
| |
| SDValue Gather; |
| // TODO: This doesn't trigger for i64 vectors on RV32, since there we |
| // encounter a bitcasted BUILD_VECTOR with low/high i32 values. |
| if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { |
| Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget); |
| } else { |
| V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); |
| // If only one index is used, we can use a "splat" vrgather. |
| // TODO: We can splat the most-common index and fix-up any stragglers, if |
| // that's beneficial. |
| if (LHSIndexCounts.size() == 1) { |
| int SplatIndex = LHSIndexCounts.begin()->getFirst(); |
| Gather = |
| DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, |
| DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL); |
| } else { |
| SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); |
| LHSIndices = |
| convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); |
| |
| Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, |
| TrueMask, VL); |
| } |
| } |
| |
| // If a second vector operand is used by this shuffle, blend it in with an |
| // additional vrgather. |
| if (!V2.isUndef()) { |
| V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); |
| // If only one index is used, we can use a "splat" vrgather. |
| // TODO: We can splat the most-common index and fix-up any stragglers, if |
| // that's beneficial. |
| if (RHSIndexCounts.size() == 1) { |
| int SplatIndex = RHSIndexCounts.begin()->getFirst(); |
| V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, |
| DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL); |
| } else { |
| SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); |
| RHSIndices = |
| convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); |
| V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask, |
| VL); |
| } |
| |
| MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); |
| SelectMask = |
| convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); |
| |
| Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2, |
| Gather, VL); |
| } |
| |
| return convertFromScalableVector(VT, Gather, DAG, Subtarget); |
| } |
| |
| static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, |
| SDLoc DL, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| if (VT.isScalableVector()) |
| return DAG.getFPExtendOrRound(Op, DL, VT); |
| assert(VT.isFixedLengthVector() && |
| "Unexpected value type for RVV FP extend/round lowering"); |
| SDValue Mask, VL; |
| std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
| unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) |
| ? RISCVISD::FP_EXTEND_VL |
| : RISCVISD::FP_ROUND_VL; |
| return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); |
| } |
| |
| // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting |
| // the exponent. |
| static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { |
| MVT VT = Op.getSimpleValueType(); |
| unsigned EltSize = VT.getScalarSizeInBits(); |
| SDValue Src = Op.getOperand(0); |
| SDLoc DL(Op); |
| |
| // We need a FP type that can represent the value. |
| // TODO: Use f16 for i8 when possible? |
| MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32; |
| MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); |
| |
| // Legal types should have been checked in the RISCVTargetLowering |
| // constructor. |
| // TODO: Splitting may make sense in some cases. |
| assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) && |
| "Expected legal float type!"); |
| |
| // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. |
| // The trailing zero count is equal to log2 of this single bit value. |
| if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { |
| SDValue Neg = |
| DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src); |
| Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg); |
| } |
| |
| // We have a legal FP type, convert to it. |
| SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); |
| // Bitcast to integer and shift the exponent to the LSB. |
| EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); |
| SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); |
| unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; |
| SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, |
| DAG.getConstant(ShiftAmt, DL, IntVT)); |
| // Truncate back to original type to allow vnsrl. |
| SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift); |
| // The exponent contains log2 of the value in biased form. |
| unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; |
| |
| // For trailing zeros, we just need to subtract the bias. |
| if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) |
| return DAG.getNode(ISD::SUB, DL, VT, Trunc, |
| DAG.getConstant(ExponentBias, DL, VT)); |
| |
| // For leading zeros, we need to remove the bias and convert from log2 to |
| // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). |
| unsigned Adjust = ExponentBias + (EltSize - 1); |
| return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc); |
| } |
| |
| // While RVV has alignment restrictions, we should always be able to load as a |
| // legal equivalently-sized byte-typed vector instead. This method is |
| // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If |
| // the load is already correctly-aligned, it returns SDValue(). |
| SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, |
| SelectionDAG &DAG) const { |
| auto *Load = cast<LoadSDNode>(Op); |
| assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); |
| |
| if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
| Load->getMemoryVT(), |
| *Load->getMemOperand())) |
| return SDValue(); |
| |
| SDLoc DL(Op); |
| MVT VT = Op.getSimpleValueType(); |
| unsigned EltSizeBits = VT.getScalarSizeInBits(); |
| assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && |
| "Unexpected unaligned RVV load type"); |
| MVT NewVT = |
| MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); |
| assert(NewVT.isValid() && |
| "Expecting equally-sized RVV vector types to be legal"); |
| SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(), |
| Load->getPointerInfo(), Load->getOriginalAlign(), |
| Load->getMemOperand()->getFlags()); |
| return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); |
| } |
| |
| // While RVV has alignment restrictions, we should always be able to store as a |
| // legal equivalently-sized byte-typed vector instead. This method is |
| // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It |
| // returns SDValue() if the store is already correctly aligned. |
| SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, |
| SelectionDAG &DAG) const { |
| auto *Store = cast<StoreSDNode>(Op); |
| assert(Store && Store->getValue().getValueType().isVector() && |
| "Expected vector store"); |
| |
| if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
| Store->getMemoryVT(), |
| *Store->getMemOperand())) |
| return SDValue(); |
| |
| SDLoc DL(Op); |
| SDValue StoredVal = Store->getValue(); |
| MVT VT = StoredVal.getSimpleValueType(); |
| unsigned EltSizeBits = VT.getScalarSizeInBits(); |
| assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && |
| "Unexpected unaligned RVV store type"); |
| MVT NewVT = |
| MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); |
| assert(NewVT.isValid() && |
| "Expecting equally-sized RVV vector types to be legal"); |
| StoredVal = DAG.getBitcast(NewVT, StoredVal); |
| return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(), |
| Store->getPointerInfo(), Store->getOriginalAlign(), |
| Store->getMemOperand()->getFlags()); |
| } |
| |
| SDValue RISCVTargetLowering::LowerOperation(SDValue Op, |
| SelectionDAG &DAG) const { |
| switch (Op.getOpcode()) { |
| default: |
| report_fatal_error("unimplemented operand"); |
| case ISD::GlobalAddress: |
| return lowerGlobalAddress(Op, DAG); |
| case ISD::BlockAddress: |
| return lowerBlockAddress(Op, DAG); |
| case ISD::ConstantPool: |
| return lowerConstantPool(Op, DAG); |
| case ISD::JumpTable: |
| return lowerJumpTable(Op, DAG); |
| case ISD::GlobalTLSAddress: |
| return lowerGlobalTLSAddress(Op, DAG); |
| case ISD::SELECT: |
| return lowerSELECT(Op, DAG); |
| case ISD::BRCOND: |
| return lowerBRCOND(Op, DAG); |
| case ISD::VASTART: |
| return lowerVASTART(Op, DAG); |
| case ISD::FRAMEADDR: |
| return lowerFRAMEADDR(Op, DAG); |
| case ISD::RETURNADDR: |
| return lowerRETURNADDR(Op, DAG); |
| case ISD::SHL_PARTS: |
| return lowerShiftLeftParts(Op, DAG); |
| case ISD::SRA_PARTS: |
| return lowerShiftRightParts(Op, DAG, true); |
| case ISD::SRL_PARTS: |
| return lowerShiftRightParts(Op, DAG, false); |
| case ISD::BITCAST: { |
|