| //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the interfaces that RISCV uses to lower LLVM code into a |
| // selection DAG. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "RISCVISelLowering.h" |
| #include "MCTargetDesc/RISCVMatInt.h" |
| #include "RISCV.h" |
| #include "RISCVMachineFunctionInfo.h" |
| #include "RISCVRegisterInfo.h" |
| #include "RISCVSubtarget.h" |
| #include "RISCVTargetMachine.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/DiagnosticInfo.h" |
| #include "llvm/IR/DiagnosticPrinter.h" |
| #include "llvm/IR/IntrinsicsRISCV.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "riscv-lower" |
| |
| STATISTIC(NumTailCalls, "Number of tail calls"); |
| |
| RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, |
| const RISCVSubtarget &STI) |
| : TargetLowering(TM), Subtarget(STI) { |
| |
| if (Subtarget.isRV32E()) |
| report_fatal_error("Codegen not yet implemented for RV32E"); |
| |
| RISCVABI::ABI ABI = Subtarget.getTargetABI(); |
| assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); |
| |
| if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && |
| !Subtarget.hasStdExtF()) { |
| errs() << "Hard-float 'f' ABI can't be used for a target that " |
| "doesn't support the F instruction set extension (ignoring " |
| "target-abi)\n"; |
| ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; |
| } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && |
| !Subtarget.hasStdExtD()) { |
| errs() << "Hard-float 'd' ABI can't be used for a target that " |
| "doesn't support the D instruction set extension (ignoring " |
| "target-abi)\n"; |
| ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; |
| } |
| |
| switch (ABI) { |
| default: |
| report_fatal_error("Don't know how to lower this ABI"); |
| case RISCVABI::ABI_ILP32: |
| case RISCVABI::ABI_ILP32F: |
| case RISCVABI::ABI_ILP32D: |
| case RISCVABI::ABI_LP64: |
| case RISCVABI::ABI_LP64F: |
| case RISCVABI::ABI_LP64D: |
| break; |
| } |
| |
| MVT XLenVT = Subtarget.getXLenVT(); |
| |
| // Set up the register classes. |
| addRegisterClass(XLenVT, &RISCV::GPRRegClass); |
| |
| if (Subtarget.hasStdExtZfh()) |
| addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); |
| if (Subtarget.hasStdExtF()) |
| addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); |
| if (Subtarget.hasStdExtD()) |
| addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); |
| |
| static const MVT::SimpleValueType BoolVecVTs[] = { |
| MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, |
| MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; |
| static const MVT::SimpleValueType IntVecVTs[] = { |
| MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, |
| MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, |
| MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, |
| MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, |
| MVT::nxv4i64, MVT::nxv8i64}; |
| static const MVT::SimpleValueType F16VecVTs[] = { |
| MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, |
| MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; |
| static const MVT::SimpleValueType F32VecVTs[] = { |
| MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; |
| static const MVT::SimpleValueType F64VecVTs[] = { |
| MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; |
| |
| if (Subtarget.hasStdExtV()) { |
| auto addRegClassForRVV = [this](MVT VT) { |
| unsigned Size = VT.getSizeInBits().getKnownMinValue(); |
| assert(Size <= 512 && isPowerOf2_32(Size)); |
| const TargetRegisterClass *RC; |
| if (Size <= 64) |
| RC = &RISCV::VRRegClass; |
| else if (Size == 128) |
| RC = &RISCV::VRM2RegClass; |
| else if (Size == 256) |
| RC = &RISCV::VRM4RegClass; |
| else |
| RC = &RISCV::VRM8RegClass; |
| |
| addRegisterClass(VT, RC); |
| }; |
| |
| for (MVT VT : BoolVecVTs) |
| addRegClassForRVV(VT); |
| for (MVT VT : IntVecVTs) |
| addRegClassForRVV(VT); |
| |
| if (Subtarget.hasStdExtZfh()) |
| for (MVT VT : F16VecVTs) |
| addRegClassForRVV(VT); |
| |
| if (Subtarget.hasStdExtF()) |
| for (MVT VT : F32VecVTs) |
| addRegClassForRVV(VT); |
| |
| if (Subtarget.hasStdExtD()) |
| for (MVT VT : F64VecVTs) |
| addRegClassForRVV(VT); |
| |
| if (Subtarget.useRVVForFixedLengthVectors()) { |
| auto addRegClassForFixedVectors = [this](MVT VT) { |
| unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); |
| const TargetRegisterClass *RC; |
| if (LMul == 1 || VT.getVectorElementType() == MVT::i1) |
| RC = &RISCV::VRRegClass; |
| else if (LMul == 2) |
| RC = &RISCV::VRM2RegClass; |
| else if (LMul == 4) |
| RC = &RISCV::VRM4RegClass; |
| else if (LMul == 8) |
| RC = &RISCV::VRM8RegClass; |
| else |
| llvm_unreachable("Unexpected LMul!"); |
| |
| addRegisterClass(VT, RC); |
| }; |
| for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
| if (useRVVForFixedLengthVectorVT(VT)) |
| addRegClassForFixedVectors(VT); |
| |
| for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
| if (useRVVForFixedLengthVectorVT(VT)) |
| addRegClassForFixedVectors(VT); |
| } |
| } |
| |
| // Compute derived properties from the register classes. |
| computeRegisterProperties(STI.getRegisterInfo()); |
| |
| setStackPointerRegisterToSaveRestore(RISCV::X2); |
| |
| for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) |
| setLoadExtAction(N, XLenVT, MVT::i1, Promote); |
| |
| // TODO: add all necessary setOperationAction calls. |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); |
| |
| setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
| setOperationAction(ISD::BR_CC, XLenVT, Expand); |
| setOperationAction(ISD::BRCOND, MVT::Other, Custom); |
| setOperationAction(ISD::SELECT_CC, XLenVT, Expand); |
| |
| setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
| setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
| |
| setOperationAction(ISD::VASTART, MVT::Other, Custom); |
| setOperationAction(ISD::VAARG, MVT::Other, Expand); |
| setOperationAction(ISD::VACOPY, MVT::Other, Expand); |
| setOperationAction(ISD::VAEND, MVT::Other, Expand); |
| |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
| if (!Subtarget.hasStdExtZbb()) { |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); |
| } |
| |
| if (Subtarget.hasStdExtZbb() && Subtarget.is64Bit()) |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); |
| |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::ADD, MVT::i32, Custom); |
| setOperationAction(ISD::SUB, MVT::i32, Custom); |
| setOperationAction(ISD::SHL, MVT::i32, Custom); |
| setOperationAction(ISD::SRA, MVT::i32, Custom); |
| setOperationAction(ISD::SRL, MVT::i32, Custom); |
| |
| setOperationAction(ISD::UADDO, MVT::i32, Custom); |
| setOperationAction(ISD::USUBO, MVT::i32, Custom); |
| setOperationAction(ISD::UADDSAT, MVT::i32, Custom); |
| setOperationAction(ISD::USUBSAT, MVT::i32, Custom); |
| } |
| |
| if (!Subtarget.hasStdExtM()) { |
| setOperationAction(ISD::MUL, XLenVT, Expand); |
| setOperationAction(ISD::MULHS, XLenVT, Expand); |
| setOperationAction(ISD::MULHU, XLenVT, Expand); |
| setOperationAction(ISD::SDIV, XLenVT, Expand); |
| setOperationAction(ISD::UDIV, XLenVT, Expand); |
| setOperationAction(ISD::SREM, XLenVT, Expand); |
| setOperationAction(ISD::UREM, XLenVT, Expand); |
| } else { |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::MUL, MVT::i32, Custom); |
| setOperationAction(ISD::MUL, MVT::i128, Custom); |
| |
| setOperationAction(ISD::SDIV, MVT::i8, Custom); |
| setOperationAction(ISD::UDIV, MVT::i8, Custom); |
| setOperationAction(ISD::UREM, MVT::i8, Custom); |
| setOperationAction(ISD::SDIV, MVT::i16, Custom); |
| setOperationAction(ISD::UDIV, MVT::i16, Custom); |
| setOperationAction(ISD::UREM, MVT::i16, Custom); |
| setOperationAction(ISD::SDIV, MVT::i32, Custom); |
| setOperationAction(ISD::UDIV, MVT::i32, Custom); |
| setOperationAction(ISD::UREM, MVT::i32, Custom); |
| } else { |
| setOperationAction(ISD::MUL, MVT::i64, Custom); |
| } |
| } |
| |
| setOperationAction(ISD::SDIVREM, XLenVT, Expand); |
| setOperationAction(ISD::UDIVREM, XLenVT, Expand); |
| setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); |
| |
| setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); |
| setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); |
| setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); |
| |
| if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::ROTL, MVT::i32, Custom); |
| setOperationAction(ISD::ROTR, MVT::i32, Custom); |
| } |
| } else { |
| setOperationAction(ISD::ROTL, XLenVT, Expand); |
| setOperationAction(ISD::ROTR, XLenVT, Expand); |
| } |
| |
| if (Subtarget.hasStdExtZbp()) { |
| // Custom lower bswap/bitreverse so we can convert them to GREVI to enable |
| // more combining. |
| setOperationAction(ISD::BITREVERSE, XLenVT, Custom); |
| setOperationAction(ISD::BSWAP, XLenVT, Custom); |
| |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); |
| setOperationAction(ISD::BSWAP, MVT::i32, Custom); |
| } |
| } else { |
| // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll |
| // pattern match it directly in isel. |
| setOperationAction(ISD::BSWAP, XLenVT, |
| Subtarget.hasStdExtZbb() ? Legal : Expand); |
| } |
| |
| if (Subtarget.hasStdExtZbb()) { |
| setOperationAction(ISD::SMIN, XLenVT, Legal); |
| setOperationAction(ISD::SMAX, XLenVT, Legal); |
| setOperationAction(ISD::UMIN, XLenVT, Legal); |
| setOperationAction(ISD::UMAX, XLenVT, Legal); |
| |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::CTTZ, MVT::i32, Custom); |
| setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); |
| setOperationAction(ISD::CTLZ, MVT::i32, Custom); |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); |
| } |
| } else { |
| setOperationAction(ISD::CTTZ, XLenVT, Expand); |
| setOperationAction(ISD::CTLZ, XLenVT, Expand); |
| setOperationAction(ISD::CTPOP, XLenVT, Expand); |
| } |
| |
| if (Subtarget.hasStdExtZbt()) { |
| setOperationAction(ISD::FSHL, XLenVT, Custom); |
| setOperationAction(ISD::FSHR, XLenVT, Custom); |
| setOperationAction(ISD::SELECT, XLenVT, Legal); |
| |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::FSHL, MVT::i32, Custom); |
| setOperationAction(ISD::FSHR, MVT::i32, Custom); |
| } |
| } else { |
| setOperationAction(ISD::SELECT, XLenVT, Custom); |
| } |
| |
| ISD::CondCode FPCCToExpand[] = { |
| ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, |
| ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, |
| ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; |
| |
| ISD::NodeType FPOpToExpand[] = { |
| ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, |
| ISD::FP_TO_FP16}; |
| |
| if (Subtarget.hasStdExtZfh()) |
| setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
| |
| if (Subtarget.hasStdExtZfh()) { |
| setOperationAction(ISD::FMINNUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); |
| for (auto CC : FPCCToExpand) |
| setCondCodeAction(CC, MVT::f16, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); |
| setOperationAction(ISD::SELECT, MVT::f16, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f16, Expand); |
| for (auto Op : FPOpToExpand) |
| setOperationAction(Op, MVT::f16, Expand); |
| } |
| |
| if (Subtarget.hasStdExtF()) { |
| setOperationAction(ISD::FMINNUM, MVT::f32, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); |
| for (auto CC : FPCCToExpand) |
| setCondCodeAction(CC, MVT::f32, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); |
| setOperationAction(ISD::SELECT, MVT::f32, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f32, Expand); |
| for (auto Op : FPOpToExpand) |
| setOperationAction(Op, MVT::f32, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
| } |
| |
| if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) |
| setOperationAction(ISD::BITCAST, MVT::i32, Custom); |
| |
| if (Subtarget.hasStdExtD()) { |
| setOperationAction(ISD::FMINNUM, MVT::f64, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); |
| for (auto CC : FPCCToExpand) |
| setCondCodeAction(CC, MVT::f64, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); |
| setOperationAction(ISD::SELECT, MVT::f64, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f64, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| for (auto Op : FPOpToExpand) |
| setOperationAction(Op, MVT::f64, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
| } |
| |
| if (Subtarget.is64Bit()) { |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
| } |
| |
| setOperationAction(ISD::GlobalAddress, XLenVT, Custom); |
| setOperationAction(ISD::BlockAddress, XLenVT, Custom); |
| setOperationAction(ISD::ConstantPool, XLenVT, Custom); |
| setOperationAction(ISD::JumpTable, XLenVT, Custom); |
| |
| setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); |
| |
| // TODO: On M-mode only targets, the cycle[h] CSR may not be present. |
| // Unfortunately this can't be determined just from the ISA naming string. |
| setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, |
| Subtarget.is64Bit() ? Legal : Custom); |
| |
| setOperationAction(ISD::TRAP, MVT::Other, Legal); |
| setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
| |
| if (Subtarget.hasStdExtA()) { |
| setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); |
| setMinCmpXchgSizeInBits(32); |
| } else { |
| setMaxAtomicSizeInBitsSupported(0); |
| } |
| |
| setBooleanContents(ZeroOrOneBooleanContent); |
| |
| if (Subtarget.hasStdExtV()) { |
| setBooleanVectorContents(ZeroOrOneBooleanContent); |
| |
| setOperationAction(ISD::VSCALE, XLenVT, Custom); |
| |
| // RVV intrinsics may have illegal operands. |
| // We also need to custom legalize vmv.x.s. |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); |
| |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
| |
| if (!Subtarget.is64Bit()) { |
| // We must custom-lower certain vXi64 operations on RV32 due to the vector |
| // element type being illegal. |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); |
| |
| setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); |
| } |
| |
| for (MVT VT : BoolVecVTs) { |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); |
| |
| // Mask VTs are custom-expanded into a series of standard nodes |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| |
| // Expand all extending loads to types larger than this, and truncating |
| // stores from types larger than this. |
| for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { |
| setTruncStoreAction(OtherVT, VT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); |
| } |
| } |
| |
| for (MVT VT : IntVecVTs) { |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); |
| setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); |
| |
| setOperationAction(ISD::SMIN, VT, Legal); |
| setOperationAction(ISD::SMAX, VT, Legal); |
| setOperationAction(ISD::UMIN, VT, Legal); |
| setOperationAction(ISD::UMAX, VT, Legal); |
| |
| setOperationAction(ISD::ROTL, VT, Expand); |
| setOperationAction(ISD::ROTR, VT, Expand); |
| |
| // Custom-lower extensions and truncations from/to mask types. |
| setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
| |
| // RVV has native int->float & float->int conversions where the |
| // element type sizes are within one power-of-two of each other. Any |
| // wider distances between type sizes have to be lowered as sequences |
| // which progressively narrow the gap in stages. |
| setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| |
| // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" |
| // nodes which truncate by one power of two at a time. |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| |
| // Custom-lower insert/extract operations to simplify patterns. |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| |
| // Custom-lower reduction operations to set up the corresponding custom |
| // nodes' operands. |
| setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
| |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| setOperationAction(ISD::STEP_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); |
| |
| for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { |
| setTruncStoreAction(VT, OtherVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); |
| } |
| } |
| |
| // Expand various CCs to best match the RVV ISA, which natively supports UNE |
| // but no other unordered comparisons, and supports all ordered comparisons |
| // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization |
| // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), |
| // and we pattern-match those back to the "original", swapping operands once |
| // more. This way we catch both operations and both "vf" and "fv" forms with |
| // fewer patterns. |
| ISD::CondCode VFPCCToExpand[] = { |
| ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, |
| ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, |
| ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, |
| }; |
| |
| // Sets common operation actions on RVV floating-point vector types. |
| const auto SetCommonVFPActions = [&](MVT VT) { |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); |
| // RVV has native FP_ROUND & FP_EXTEND conversions where the element type |
| // sizes are within one power-of-two of each other. Therefore conversions |
| // between vXf16 and vXf64 must be lowered as sequences which convert via |
| // vXf32. |
| setOperationAction(ISD::FP_ROUND, VT, Custom); |
| setOperationAction(ISD::FP_EXTEND, VT, Custom); |
| // Custom-lower insert/extract operations to simplify patterns. |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| // Expand various condition codes (explained above). |
| for (auto CC : VFPCCToExpand) |
| setCondCodeAction(CC, VT, Expand); |
| |
| setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
| setOperationAction(ISD::FCOPYSIGN, VT, Legal); |
| |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); |
| }; |
| |
| // Sets common extload/truncstore actions on RVV floating-point vector |
| // types. |
| const auto SetCommonVFPExtLoadTruncStoreActions = |
| [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { |
| for (auto SmallVT : SmallerVTs) { |
| setTruncStoreAction(VT, SmallVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); |
| } |
| }; |
| |
| if (Subtarget.hasStdExtZfh()) |
| for (MVT VT : F16VecVTs) |
| SetCommonVFPActions(VT); |
| |
| for (MVT VT : F32VecVTs) { |
| if (Subtarget.hasStdExtF()) |
| SetCommonVFPActions(VT); |
| SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); |
| } |
| |
| for (MVT VT : F64VecVTs) { |
| if (Subtarget.hasStdExtD()) |
| SetCommonVFPActions(VT); |
| SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); |
| SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); |
| } |
| |
| if (Subtarget.useRVVForFixedLengthVectors()) { |
| for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { |
| if (!useRVVForFixedLengthVectorVT(VT)) |
| continue; |
| |
| // By default everything must be expanded. |
| for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
| setOperationAction(Op, VT, Expand); |
| for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { |
| setTruncStoreAction(VT, OtherVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); |
| } |
| |
| // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| |
| setOperationAction(ISD::LOAD, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| |
| setOperationAction(ISD::SETCC, VT, Custom); |
| |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| |
| setOperationAction(ISD::BITCAST, VT, Custom); |
| |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| |
| // Operations below are different for between masks and other vectors. |
| if (VT.getVectorElementType() == MVT::i1) { |
| setOperationAction(ISD::AND, VT, Custom); |
| setOperationAction(ISD::OR, VT, Custom); |
| setOperationAction(ISD::XOR, VT, Custom); |
| continue; |
| } |
| |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| setOperationAction(ISD::ADD, VT, Custom); |
| setOperationAction(ISD::MUL, VT, Custom); |
| setOperationAction(ISD::SUB, VT, Custom); |
| setOperationAction(ISD::AND, VT, Custom); |
| setOperationAction(ISD::OR, VT, Custom); |
| setOperationAction(ISD::XOR, VT, Custom); |
| setOperationAction(ISD::SDIV, VT, Custom); |
| setOperationAction(ISD::SREM, VT, Custom); |
| setOperationAction(ISD::UDIV, VT, Custom); |
| setOperationAction(ISD::UREM, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::SRL, VT, Custom); |
| |
| setOperationAction(ISD::SMIN, VT, Custom); |
| setOperationAction(ISD::SMAX, VT, Custom); |
| setOperationAction(ISD::UMIN, VT, Custom); |
| setOperationAction(ISD::UMAX, VT, Custom); |
| setOperationAction(ISD::ABS, VT, Custom); |
| |
| setOperationAction(ISD::MULHS, VT, Custom); |
| setOperationAction(ISD::MULHU, VT, Custom); |
| |
| setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| |
| setOperationAction(ISD::VSELECT, VT, Custom); |
| |
| setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
| |
| // Custom-lower reduction operations to set up the corresponding custom |
| // nodes' operands. |
| setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
| } |
| |
| for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { |
| if (!useRVVForFixedLengthVectorVT(VT)) |
| continue; |
| |
| // By default everything must be expanded. |
| for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
| setOperationAction(Op, VT, Expand); |
| for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { |
| setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); |
| setTruncStoreAction(VT, OtherVT, Expand); |
| } |
| |
| // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| |
| setOperationAction(ISD::LOAD, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| setOperationAction(ISD::FADD, VT, Custom); |
| setOperationAction(ISD::FSUB, VT, Custom); |
| setOperationAction(ISD::FMUL, VT, Custom); |
| setOperationAction(ISD::FDIV, VT, Custom); |
| setOperationAction(ISD::FNEG, VT, Custom); |
| setOperationAction(ISD::FABS, VT, Custom); |
| setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| setOperationAction(ISD::FSQRT, VT, Custom); |
| setOperationAction(ISD::FMA, VT, Custom); |
| |
| setOperationAction(ISD::FP_ROUND, VT, Custom); |
| setOperationAction(ISD::FP_EXTEND, VT, Custom); |
| |
| for (auto CC : VFPCCToExpand) |
| setCondCodeAction(CC, VT, Expand); |
| |
| setOperationAction(ISD::VSELECT, VT, Custom); |
| |
| setOperationAction(ISD::BITCAST, VT, Custom); |
| |
| setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
| } |
| |
| // Custom-legalize bitcasts from fixed-length vectors to scalar types. |
| setOperationAction(ISD::BITCAST, MVT::i8, Custom); |
| setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::i32, Custom); |
| setOperationAction(ISD::BITCAST, MVT::i64, Custom); |
| setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::f32, Custom); |
| setOperationAction(ISD::BITCAST, MVT::f64, Custom); |
| } |
| } |
| |
| // Function alignments. |
| const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); |
| setMinFunctionAlignment(FunctionAlignment); |
| setPrefFunctionAlignment(FunctionAlignment); |
| |
| setMinimumJumpTableEntries(5); |
| |
| // Jumps are expensive, compared to logic |
| setJumpIsExpensive(); |
| |
| // We can use any register for comparisons |
| setHasMultipleConditionRegisters(); |
| |
| if (Subtarget.hasStdExtZbp()) { |
| setTargetDAGCombine(ISD::OR); |
| } |
| if (Subtarget.hasStdExtV()) { |
| setTargetDAGCombine(ISD::FCOPYSIGN); |
| setTargetDAGCombine(ISD::MGATHER); |
| setTargetDAGCombine(ISD::MSCATTER); |
| } |
| } |
| |
| EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, |
| LLVMContext &Context, |
| EVT VT) const { |
| if (!VT.isVector()) |
| return getPointerTy(DL); |
| if (Subtarget.hasStdExtV() && |
| (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) |
| return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); |
| return VT.changeVectorElementTypeToInteger(); |
| } |
| |
| bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
| const CallInst &I, |
| MachineFunction &MF, |
| unsigned Intrinsic) const { |
| switch (Intrinsic) { |
| default: |
| return false; |
| case Intrinsic::riscv_masked_atomicrmw_xchg_i32: |
| case Intrinsic::riscv_masked_atomicrmw_add_i32: |
| case Intrinsic::riscv_masked_atomicrmw_sub_i32: |
| case Intrinsic::riscv_masked_atomicrmw_nand_i32: |
| case Intrinsic::riscv_masked_atomicrmw_max_i32: |
| case Intrinsic::riscv_masked_atomicrmw_min_i32: |
| case Intrinsic::riscv_masked_atomicrmw_umax_i32: |
| case Intrinsic::riscv_masked_atomicrmw_umin_i32: |
| case Intrinsic::riscv_masked_cmpxchg_i32: |
| PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); |
| Info.opc = ISD::INTRINSIC_W_CHAIN; |
| Info.memVT = MVT::getVT(PtrTy->getElementType()); |
| Info.ptrVal = I.getArgOperand(0); |
| Info.offset = 0; |
| Info.align = Align(4); |
| Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | |
| MachineMemOperand::MOVolatile; |
| return true; |
| } |
| } |
| |
| bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
| const AddrMode &AM, Type *Ty, |
| unsigned AS, |
| Instruction *I) const { |
| // No global is ever allowed as a base. |
| if (AM.BaseGV) |
| return false; |
| |
| // Require a 12-bit signed offset. |
| if (!isInt<12>(AM.BaseOffs)) |
| return false; |
| |
| switch (AM.Scale) { |
| case 0: // "r+i" or just "i", depending on HasBaseReg. |
| break; |
| case 1: |
| if (!AM.HasBaseReg) // allow "r+i". |
| break; |
| return false; // disallow "r+r" or "r+r+i". |
| default: |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
| return isInt<12>(Imm); |
| } |
| |
| bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
| return isInt<12>(Imm); |
| } |
| |
| // On RV32, 64-bit integers are split into their high and low parts and held |
| // in two different registers, so the trunc is free since the low register can |
| // just be used. |
| bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { |
| if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) |
| return false; |
| unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); |
| unsigned DestBits = DstTy->getPrimitiveSizeInBits(); |
| return (SrcBits == 64 && DestBits == 32); |
| } |
| |
| bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { |
| if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || |
| !SrcVT.isInteger() || !DstVT.isInteger()) |
| return false; |
| unsigned SrcBits = SrcVT.getSizeInBits(); |
| unsigned DestBits = DstVT.getSizeInBits(); |
| return (SrcBits == 64 && DestBits == 32); |
| } |
| |
| bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
| // Zexts are free if they can be combined with a load. |
| if (auto *LD = dyn_cast<LoadSDNode>(Val)) { |
| EVT MemVT = LD->getMemoryVT(); |
| if ((MemVT == MVT::i8 || MemVT == MVT::i16 || |
| (Subtarget.is64Bit() && MemVT == MVT::i32)) && |
| (LD->getExtensionType() == ISD::NON_EXTLOAD || |
| LD->getExtensionType() == ISD::ZEXTLOAD)) |
| return true; |
| } |
| |
| return TargetLowering::isZExtFree(Val, VT2); |
| } |
| |
| bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { |
| return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; |
| } |
| |
| bool RISCVTargetLowering::isCheapToSpeculateCttz() const { |
| return Subtarget.hasStdExtZbb(); |
| } |
| |
| bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { |
| return Subtarget.hasStdExtZbb(); |
| } |
| |
| bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
| bool ForCodeSize) const { |
| if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) |
| return false; |
| if (VT == MVT::f32 && !Subtarget.hasStdExtF()) |
| return false; |
| if (VT == MVT::f64 && !Subtarget.hasStdExtD()) |
| return false; |
| if (Imm.isNegZero()) |
| return false; |
| return Imm.isZero(); |
| } |
| |
| bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { |
| return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || |
| (VT == MVT::f32 && Subtarget.hasStdExtF()) || |
| (VT == MVT::f64 && Subtarget.hasStdExtD()); |
| } |
| |
| MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
| CallingConv::ID CC, |
| EVT VT) const { |
| // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still |
| // end up using a GPR but that will be decided based on ABI. |
| if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) |
| return MVT::f32; |
| |
| return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
| } |
| |
| unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
| CallingConv::ID CC, |
| EVT VT) const { |
| // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still |
| // end up using a GPR but that will be decided based on ABI. |
| if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) |
| return 1; |
| |
| return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
| } |
| |
| // Changes the condition code and swaps operands if necessary, so the SetCC |
| // operation matches one of the comparisons supported directly by branches |
| // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare |
| // with 1/-1. |
| static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, |
| ISD::CondCode &CC, SelectionDAG &DAG) { |
| // Convert X > -1 to X >= 0. |
| if (CC == ISD::SETGT && isAllOnesConstant(RHS)) { |
| RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
| CC = ISD::SETGE; |
| return; |
| } |
| // Convert X < 1 to 0 >= X. |
| if (CC == ISD::SETLT && isOneConstant(RHS)) { |
| RHS = LHS; |
| LHS = DAG.getConstant(0, DL, RHS.getValueType()); |
| CC = ISD::SETGE; |
| return; |
| } |
| |
| switch (CC) { |
| default: |
| break; |
| case ISD::SETGT: |
| case ISD::SETLE: |
| case ISD::SETUGT: |
| case ISD::SETULE: |
| CC = ISD::getSetCCSwappedOperands(CC); |
| std::swap(LHS, RHS); |
| break; |
| } |
| } |
| |
| // Return the RISC-V branch opcode that matches the given DAG integer |
| // condition code. The CondCode must be one of those supported by the RISC-V |
| // ISA (see translateSetCCForBranch). |
| static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { |
| switch (CC) { |
| default: |
| llvm_unreachable("Unsupported CondCode"); |
| case ISD::SETEQ: |
| return RISCV::BEQ; |
| case ISD::SETNE: |
| return RISCV::BNE; |
| case ISD::SETLT: |
| return RISCV::BLT; |
| case ISD::SETGE: |
| return RISCV::BGE; |
| case ISD::SETULT: |
| return RISCV::BLTU; |
| case ISD::SETUGE: |
| return RISCV::BGEU; |
| } |
| } |
| |
| RISCVVLMUL RISCVTargetLowering::getLMUL(MVT VT) { |
| assert(VT.isScalableVector() && "Expecting a scalable vector type"); |
| unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); |
| if (VT.getVectorElementType() == MVT::i1) |
| KnownSize *= 8; |
| |
| switch (KnownSize) { |
| default: |
| llvm_unreachable("Invalid LMUL."); |
| case 8: |
| return RISCVVLMUL::LMUL_F8; |
| case 16: |
| return RISCVVLMUL::LMUL_F4; |
| case 32: |
| return RISCVVLMUL::LMUL_F2; |
| case 64: |
| return RISCVVLMUL::LMUL_1; |
| case 128: |
| return RISCVVLMUL::LMUL_2; |
| case 256: |
| return RISCVVLMUL::LMUL_4; |
| case 512: |
| return RISCVVLMUL::LMUL_8; |
| } |
| } |
| |
| unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVLMUL LMul) { |
| switch (LMul) { |
| default: |
| llvm_unreachable("Invalid LMUL."); |
| case RISCVVLMUL::LMUL_F8: |
| case RISCVVLMUL::LMUL_F4: |
| case RISCVVLMUL::LMUL_F2: |
| case RISCVVLMUL::LMUL_1: |
| return RISCV::VRRegClassID; |
| case RISCVVLMUL::LMUL_2: |
| return RISCV::VRM2RegClassID; |
| case RISCVVLMUL::LMUL_4: |
| return RISCV::VRM4RegClassID; |
| case RISCVVLMUL::LMUL_8: |
| return RISCV::VRM8RegClassID; |
| } |
| } |
| |
| unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { |
| RISCVVLMUL LMUL = getLMUL(VT); |
| if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 || |
| LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) { |
| static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, |
| "Unexpected subreg numbering"); |
| return RISCV::sub_vrm1_0 + Index; |
| } |
| if (LMUL == RISCVVLMUL::LMUL_2) { |
| static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, |
| "Unexpected subreg numbering"); |
| return RISCV::sub_vrm2_0 + Index; |
| } |
| if (LMUL == RISCVVLMUL::LMUL_4) { |
| static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, |
| "Unexpected subreg numbering"); |
| return RISCV::sub_vrm4_0 + Index; |
| } |
| llvm_unreachable("Invalid vector type."); |
| } |
| |
| unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { |
| if (VT.getVectorElementType() == MVT::i1) |
| return RISCV::VRRegClassID; |
| return getRegClassIDForLMUL(getLMUL(VT)); |
| } |
| |
| // Attempt to decompose a subvector insert/extract between VecVT and |
| // SubVecVT via subregister indices. Returns the subregister index that |
| // can perform the subvector insert/extract with the given element index, as |
| // well as the index corresponding to any leftover subvectors that must be |
| // further inserted/extracted within the register class for SubVecVT. |
| std::pair<unsigned, unsigned> |
| RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
| MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, |
| const RISCVRegisterInfo *TRI) { |
| static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && |
| RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && |
| RISCV::VRM2RegClassID > RISCV::VRRegClassID), |
| "Register classes not ordered"); |
| unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); |
| unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); |
| // Try to compose a subregister index that takes us from the incoming |
| // LMUL>1 register class down to the outgoing one. At each step we half |
| // the LMUL: |
| // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 |
| // Note that this is not guaranteed to find a subregister index, such as |
| // when we are extracting from one VR type to another. |
| unsigned SubRegIdx = RISCV::NoSubRegister; |
| for (const unsigned RCID : |
| {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) |
| if (VecRegClassID > RCID && SubRegClassID <= RCID) { |
| VecVT = VecVT.getHalfNumVectorElementsVT(); |
| bool IsHi = |
| InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); |
| SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, |
| getSubregIndexByMVT(VecVT, IsHi)); |
| if (IsHi) |
| InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); |
| } |
| return {SubRegIdx, InsertExtractIdx}; |
| } |
| |
| // Return the largest legal scalable vector type that matches VT's element type. |
| MVT RISCVTargetLowering::getContainerForFixedLengthVector( |
| const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) { |
| assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) && |
| "Expected legal fixed length vector!"); |
| |
| unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); |
| assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); |
| |
| MVT EltVT = VT.getVectorElementType(); |
| switch (EltVT.SimpleTy) { |
| default: |
| llvm_unreachable("unexpected element type for RVV container"); |
| case MVT::i1: { |
| // Masks are calculated assuming 8-bit elements since that's when we need |
| // the most elements. |
| unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; |
| return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); |
| } |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| case MVT::i64: |
| case MVT::f16: |
| case MVT::f32: |
| case MVT::f64: { |
| unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); |
| return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); |
| } |
| } |
| } |
| |
| MVT RISCVTargetLowering::getContainerForFixedLengthVector( |
| SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) { |
| return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, |
| Subtarget); |
| } |
| |
| MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { |
| return getContainerForFixedLengthVector(*this, VT, getSubtarget()); |
| } |
| |
| // Grow V to consume an entire RVV register. |
| static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| assert(VT.isScalableVector() && |
| "Expected to convert into a scalable vector!"); |
| assert(V.getValueType().isFixedLengthVector() && |
| "Expected a fixed length vector operand!"); |
| SDLoc DL(V); |
| SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); |
| return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); |
| } |
| |
| // Shrink V so it's just big enough to maintain a VT's worth of data. |
| static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| assert(VT.isFixedLengthVector() && |
| "Expected to convert into a fixed length vector!"); |
| assert(V.getValueType().isScalableVector() && |
| "Expected a scalable vector operand!"); |
| SDLoc DL(V); |
| SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); |
| return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); |
| } |
| |
| // Gets the two common "VL" operands: an all-ones mask and the vector length. |
| // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is |
| // the vector type that it is contained in. |
| static std::pair<SDValue, SDValue> |
| getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| SDValue VL = VecVT.isFixedLengthVector() |
| ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) |
| : DAG.getRegister(RISCV::X0, XLenVT); |
| MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); |
| SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); |
| return {Mask, VL}; |
| } |
| |
| // As above but assuming the given type is a scalable vector type. |
| static std::pair<SDValue, SDValue> |
| getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| assert(VecVT.isScalableVector() && "Expecting a scalable vector"); |
| return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); |
| } |
| |
| // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few |
| // of either is (currently) supported. This can get us into an infinite loop |
| // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR |
| // as a ..., etc. |
| // Until either (or both) of these can reliably lower any node, reporting that |
| // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks |
| // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, |
| // which is not desirable. |
| bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( |
| EVT VT, unsigned DefinedValues) const { |
| return false; |
| } |
| |
| bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { |
| // Only splats are currently supported. |
| if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) |
| return true; |
| |
| return false; |
| } |
| |
| static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| MVT VT = Op.getSimpleValueType(); |
| assert(VT.isFixedLengthVector() && "Unexpected vector!"); |
| |
| MVT ContainerVT = |
| RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); |
| |
| SDLoc DL(Op); |
| SDValue Mask, VL; |
| std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
| |
| MVT XLenVT = Subtarget.getXLenVT(); |
| unsigned NumElts = Op.getNumOperands(); |
| |
| if (VT.getVectorElementType() == MVT::i1) { |
| if (ISD::isBuildVectorAllZeros(Op.getNode())) { |
| SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); |
| return convertFromScalableVector(VT, VMClr, DAG, Subtarget); |
| } |
| |
| if (ISD::isBuildVectorAllOnes(Op.getNode())) { |
| SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); |
| return convertFromScalableVector(VT, VMSet, DAG, Subtarget); |
| } |
| |
| // Lower constant mask BUILD_VECTORs via an integer vector type, in |
| // scalar integer chunks whose bit-width depends on the number of mask |
| // bits and XLEN. |
| // First, determine the most appropriate scalar integer type to use. This |
| // is at most XLenVT, but may be shrunk to a smaller vector element type |
| // according to the size of the final vector - use i8 chunks rather than |
| // XLenVT if we're producing a v8i1. This results in more consistent |
| // codegen across RV32 and RV64. |
| // If we have to use more than one INSERT_VECTOR_ELT then this optimization |
| // is likely to increase code size; avoid peforming it in such a case. |
| unsigned NumViaIntegerBits = |
| std::min(std::max(NumElts, 8u), Subtarget.getXLen()); |
| if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && |
| (!DAG.shouldOptForSize() || NumElts <= NumViaIntegerBits)) { |
| // Now we can create our integer vector type. Note that it may be larger |
| // than the resulting mask type: v4i1 would use v1i8 as its integer type. |
| MVT IntegerViaVecVT = |
| MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), |
| divideCeil(NumElts, NumViaIntegerBits)); |
| |
| uint64_t Bits = 0; |
| unsigned BitPos = 0, IntegerEltIdx = 0; |
| SDValue Vec = DAG.getUNDEF(IntegerViaVecVT); |
| |
| for (unsigned I = 0; I < NumElts; I++, BitPos++) { |
| // Once we accumulate enough bits to fill our scalar type, insert into |
| // our vector and clear our accumulated data. |
| if (I != 0 && I % NumViaIntegerBits == 0) { |
| if (NumViaIntegerBits <= 32) |
| Bits = SignExtend64(Bits, 32); |
| SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); |
| Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, |
| Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); |
| Bits = 0; |
| BitPos = 0; |
| IntegerEltIdx++; |
| } |
| SDValue V = Op.getOperand(I); |
| bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue(); |
| Bits |= ((uint64_t)BitValue << BitPos); |
| } |
| |
| // Insert the (remaining) scalar value into position in our integer |
| // vector type. |
| if (NumViaIntegerBits <= 32) |
| Bits = SignExtend64(Bits, 32); |
| SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); |
| Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, |
| DAG.getConstant(IntegerEltIdx, DL, XLenVT)); |
| |
| if (NumElts < NumViaIntegerBits) { |
| // If we're producing a smaller vector than our minimum legal integer |
| // type, bitcast to the equivalent (known-legal) mask type, and extract |
| // our final mask. |
| assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); |
| Vec = DAG.getBitcast(MVT::v8i1, Vec); |
| Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, |
| DAG.getConstant(0, DL, XLenVT)); |
| } else { |
| // Else we must have produced an integer type with the same size as the |
| // mask type; bitcast for the final result. |
| assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); |
| Vec = DAG.getBitcast(VT, Vec); |
| } |
| |
| return Vec; |
| } |
| |
| return SDValue(); |
| } |
| |
| if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { |
| unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL |
| : RISCVISD::VMV_V_X_VL; |
| Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); |
| return convertFromScalableVector(VT, Splat, DAG, Subtarget); |
| } |
| |
| // Try and match an index sequence, which we can lower directly to the vid |
| // instruction. An all-undef vector is matched by getSplatValue, above. |
| if (VT.isInteger()) { |
| bool IsVID = true; |
| for (unsigned I = 0; I < NumElts && IsVID; I++) |
| IsVID &= Op.getOperand(I).isUndef() || |
| (isa<ConstantSDNode>(Op.getOperand(I)) && |
| Op.getConstantOperandVal(I) == I); |
| |
| if (IsVID) { |
| SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); |
| return convertFromScalableVector(VT, VID, DAG, Subtarget); |
| } |
| } |
| |
| // Attempt to detect "hidden" splats, which only reveal themselves as splats |
| // when re-interpreted as a vector with a larger element type. For example, |
| // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 |
| // could be instead splat as |
| // v2i32 = build_vector i32 0x00010000, i32 0x00010000 |
| // TODO: This optimization could also work on non-constant splats, but it |
| // would require bit-manipulation instructions to construct the splat value. |
| SmallVector<SDValue> Sequence; |
| unsigned EltBitSize = VT.getScalarSizeInBits(); |
| const auto *BV = cast<BuildVectorSDNode>(Op); |
| if (VT.isInteger() && EltBitSize < 64 && |
| ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && |
| BV->getRepeatedSequence(Sequence) && |
| (Sequence.size() * EltBitSize) <= 64) { |
| unsigned SeqLen = Sequence.size(); |
| MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); |
| MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen); |
| assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || |
| ViaIntVT == MVT::i64) && |
| "Unexpected sequence type"); |
| |
| unsigned EltIdx = 0; |
| uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); |
| uint64_t SplatValue = 0; |
| // Construct the amalgamated value which can be splatted as this larger |
| // vector type. |
| for (const auto &SeqV : Sequence) { |
| if (!SeqV.isUndef()) |
| SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask) |
| << (EltIdx * EltBitSize)); |
| EltIdx++; |
| } |
| |
| // On RV64, sign-extend from 32 to 64 bits where possible in order to |
| // achieve better constant materializion. |
| if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) |
| SplatValue = SignExtend64(SplatValue, 32); |
| |
| // Since we can't introduce illegal i64 types at this stage, we can only |
| // perform an i64 splat on RV32 if it is its own sign-extended value. That |
| // way we can use RVV instructions to splat. |
| assert((ViaIntVT.bitsLE(XLenVT) || |
| (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && |
| "Unexpected bitcast sequence"); |
| if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { |
| SDValue ViaVL = |
| DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); |
| MVT ViaContainerVT = |
| RISCVTargetLowering::getContainerForFixedLengthVector(DAG, ViaVecVT, |
| Subtarget); |
| SDValue Splat = |
| DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, |
| DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); |
| Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); |
| return DAG.getBitcast(VT, Splat); |
| } |
| } |
| |
| // Try and optimize BUILD_VECTORs with "dominant values" - these are values |
| // which constitute a large proportion of the elements. In such cases we can |
| // splat a vector with the dominant element and make up the shortfall with |
| // INSERT_VECTOR_ELTs. |
| // Note that this includes vectors of 2 elements by association. The |
| // upper-most element is the "dominant" one, allowing us to use a splat to |
| // "insert" the upper element, and an insert of the lower element at position |
| // 0, which improves codegen. |
| SDValue DominantValue; |
| unsigned MostCommonCount = 0; |
| DenseMap<SDValue, unsigned> ValueCounts; |
| unsigned NumUndefElts = |
| count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); |
| |
| for (SDValue V : Op->op_values()) { |
| if (V.isUndef()) |
| continue; |
| |
| ValueCounts.insert(std::make_pair(V, 0)); |
| unsigned &Count = ValueCounts[V]; |
| |
| // Is this value dominant? In case of a tie, prefer the highest element as |
| // it's cheaper to insert near the beginning of a vector than it is at the |
| // end. |
| if (++Count >= MostCommonCount) { |
| DominantValue = V; |
| MostCommonCount = Count; |
| } |
| } |
| |
| assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); |
| unsigned NumDefElts = NumElts - NumUndefElts; |
| unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; |
| |
| // Don't perform this optimization when optimizing for size, since |
| // materializing elements and inserting them tends to cause code bloat. |
| if (!DAG.shouldOptForSize() && |
| ((MostCommonCount > DominantValueCountThreshold) || |
| (ValueCounts.size() <= Log2_32(NumDefElts)))) { |
| // Start by splatting the most common element. |
| SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); |
| |
| DenseSet<SDValue> Processed{DominantValue}; |
| MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); |
| for (const auto &OpIdx : enumerate(Op->ops())) { |
| const SDValue &V = OpIdx.value(); |
| if (V.isUndef() || !Processed.insert(V).second) |
| continue; |
| if (ValueCounts[V] == 1) { |
| Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, |
| DAG.getConstant(OpIdx.index(), DL, XLenVT)); |
| } else { |
| // Blend in all instances of this value using a VSELECT, using a |
| // mask where each bit signals whether that element is the one |
| // we're after. |
| SmallVector<SDValue> Ops; |
| transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { |
| return DAG.getConstant(V == V1, DL, XLenVT); |
| }); |
| Vec = DAG.getNode(ISD::VSELECT, DL, VT, |
| DAG.getBuildVector(SelMaskTy, DL, Ops), |
| DAG.getSplatBuildVector(VT, DL, V), Vec); |
| } |
| } |
| |
| return Vec; |
| } |
| |
| return SDValue(); |
| } |
| |
| static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| SDValue V1 = Op.getOperand(0); |
| SDValue V2 = Op.getOperand(1); |
| SDLoc DL(Op); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| MVT VT = Op.getSimpleValueType(); |
| unsigned NumElts = VT.getVectorNumElements(); |
| ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); |
| |
| if (SVN->isSplat()) { |
| int Lane = SVN->getSplatIndex(); |
| if (Lane >= 0) { |
| MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( |
| DAG, VT, Subtarget); |
| |
| V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); |
| assert(Lane < (int)NumElts && "Unexpected lane!"); |
| |
| SDValue Mask, VL; |
| std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
| SDValue Gather = |
| DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, |
| DAG.getConstant(Lane, DL, XLenVT), Mask, VL); |
| return convertFromScalableVector(VT, Gather, DAG, Subtarget); |
| } |
| } |
| |
| // Detect shuffles which can be re-expressed as vector selects. |
| SmallVector<SDValue> MaskVals; |
| // By default we preserve the original operand order, and select LHS as true |
| // and RHS as false. However, since RVV vector selects may feature splats but |
| // only on the LHS, we may choose to invert our mask and instead select |
| // between RHS and LHS. |
| bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); |
| |
| bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) { |
| int MaskIndex = MaskIdx.value(); |
| bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps; |
| MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); |
| return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; |
| }); |
| |
| if (IsSelect) { |
| assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); |
| MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
| SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); |
| return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SwapOps ? V2 : V1, |
| SwapOps ? V1 : V2); |
| } |
| |
| return SDValue(); |
| } |
| |
| static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, |
| SDLoc DL, SelectionDAG &DAG, |
| const RISCVSubtarget &Subtarget) { |
| if (VT.isScalableVector()) |
| return DAG.getFPExtendOrRound(Op, DL, VT); |
| assert(VT.isFixedLengthVector() && |
| "Unexpected value type for RVV FP extend/round lowering"); |
| SDValue Mask, VL; |
| std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
| unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) |
| ? RISCVISD::FP_EXTEND_VL |
| : RISCVISD::FP_ROUND_VL; |
| return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); |
| } |
| |
| SDValue RISCVTargetLowering::LowerOperation(SDValue Op, |
| SelectionDAG &DAG) const { |
| switch (Op.getOpcode()) { |
| default: |
| report_fatal_error("unimplemented operand"); |
| case ISD::GlobalAddress: |
| return lowerGlobalAddress(Op, DAG); |
| case ISD::BlockAddress: |
| return lowerBlockAddress(Op, DAG); |
| case ISD::ConstantPool: |
| return lowerConstantPool(Op, DAG); |
| case ISD::JumpTable: |
| return lowerJumpTable(Op, DAG); |
| case ISD::GlobalTLSAddress: |
| return lowerGlobalTLSAddress(Op, DAG); |
| case ISD::SELECT: |
| return lowerSELECT(Op, DAG); |
| case ISD::BRCOND: |
| return lowerBRCOND(Op, DAG); |
| case ISD::VASTART: |
| return lowerVASTART(Op, DAG); |
| case ISD::FRAMEADDR: |
| return lowerFRAMEADDR(Op, DAG); |
| case ISD::RETURNADDR: |
| return lowerRETURNADDR(Op, DAG); |
| case ISD::SHL_PARTS: |
| return lowerShiftLeftParts(Op, DAG); |
| case ISD::SRA_PARTS: |
| return lowerShiftRightParts(Op, DAG, true); |
| case ISD::SRL_PARTS: |
| return lowerShiftRightParts(Op, DAG, false); |
| case ISD::BITCAST: { |
| SDLoc DL(Op); |
| EVT VT = Op.getValueType(); |
| SDValue Op0 = Op.getOperand(0); |
| EVT Op0VT = Op0.getValueType(); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| if (VT.isFixedLengthVector()) { |
| // We can handle fixed length vector bitcasts with a simple replacement |
| // in isel. |
| if (Op0VT.isFixedLengthVector()) |
| return Op; |
| // When bitcasting from scalar to fixed-length vector, insert the scalar |
| // into a one-element vector of the result type, and perform a vector |
| // bitcast. |
| if (!Op0VT.isVector()) { |
| auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1); |
| return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, |
| DAG.getUNDEF(BVT), Op0, |
| DAG.getConstant(0, DL, XLenVT))); |
| } |
| return SDValue(); |
| } |
| // Custom-legalize bitcasts from fixed-length vector types to scalar types |
| // thus: bitcast the vector to a one-element vector type whose element type |
| // is the same as the result type, and extract the first element. |
| if (!VT.isVector() && Op0VT.isFixedLengthVector()) { |
| LLVMContext &Context = *DAG.getContext(); |
| SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0); |
| return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, |
| DAG.getConstant(0, DL, XLenVT)); |
| } |
| if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) { |
| SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); |
| SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); |
| return FPConv; |
| } |
| if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && |
| Subtarget.hasStdExtF()) { |
| SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); |
| SDValue FPConv = |
| DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); |
| return FPConv; |
| } |
| return SDValue(); |
| } |
| case ISD::INTRINSIC_WO_CHAIN: |
| return LowerINTRINSIC_WO_CHAIN(Op, DAG); |
| case ISD::INTRINSIC_W_CHAIN: |
| return LowerINTRINSIC_W_CHAIN(Op, DAG); |
| case ISD::BSWAP: |
| case ISD::BITREVERSE: { |
| // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. |
| assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); |
| MVT VT = Op.getSimpleValueType(); |
| SDLoc DL(Op); |
| // Start with the maximum immediate value which is the bitwidth - 1. |
| unsigned Imm = VT.getSizeInBits() - 1; |
| // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. |
| if (Op.getOpcode() == ISD::BSWAP) |
| Imm &= ~0x7U; |
| return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0), |
| DAG.getConstant(Imm, DL, VT)); |
| } |
| case ISD::FSHL: |
| case ISD::FSHR: { |
| MVT VT = Op.getSimpleValueType(); |
| assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); |
| SDLoc DL(Op); |
| if (Op.getOperand(2).getOpcode() == ISD::Constant) |
| return Op; |
| // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only |
| // use log(XLen) bits. Mask the shift amount accordingly. |
| unsigned ShAmtWidth = Subtarget.getXLen() - 1; |
| SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), |
| DAG.getConstant(ShAmtWidth, DL, VT)); |
| unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; |
| return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); |
| } |
| case ISD::TRUNCATE: { |
| SDLoc DL(Op); |
| MVT VT = Op.getSimpleValueType(); |
| // Only custom-lower vector truncates |
| if (!VT.isVector()) |
| return Op; |
| |
| // Truncates to mask types are handled differently |
| if (VT.getVectorElementType() == MVT::i1) |
| return lowerVectorMaskTrunc(Op, DAG); |
| |
| // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary |
| // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which |
| // truncate by one power of two at a time. |
| MVT DstEltVT = VT.getVectorElementType(); |
| |
| SDValue Src = Op.getOperand(0); |
| MVT SrcVT = Src.getSimpleValueType(); |
| MVT SrcEltVT = SrcVT.getVectorElementType(); |
| |
| assert(DstEltVT.bitsLT(SrcEltVT) && |
| isPowerOf2_64(DstEltVT.getSizeInBits()) && |
| isPowerOf2_64(SrcEltVT.getSizeInBits()) && |
| "Unexpected vector truncate lowering"); |
| |
| MVT ContainerVT = SrcVT; |
| if (SrcVT.isFixedLengthVector()) { |
| ContainerVT = getContainerForFixedLengthVector(SrcVT); |
| Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); |
| } |
| |
| SDValue Result = Src; |
| SDValue Mask, VL; |
| std::tie(Mask, VL) = |
| getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); |
| LLVMContext &Context = *DAG.getContext(); |
| const ElementCount Count = ContainerVT.getVectorElementCount(); |
| do { |
| SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); |
| EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); |
| Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, |
| Mask, VL); |
| } while (SrcEltVT != DstEltVT); |
| |
| if (SrcVT.isFixedLengthVector()) |
| Result = convertFromScalableVector(VT, Result, DAG, Subtarget); |
| |
| return Result; |
| } |
| case ISD::ANY_EXTEND: |
| case ISD::ZERO_EXTEND: |
| if (Op.getOperand(0).getValueType().isVector() && |
| Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
| return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); |
| return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); |
| case ISD::SIGN_EXTEND: |
| if (Op.getOperand(0).getValueType().isVector() && |
| Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
| return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); |
| return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); |
| case ISD::SPLAT_VECTOR_PARTS: |
| return lowerSPLAT_VECTOR_PARTS(Op, DAG); |
| case ISD::INSERT_VECTOR_ELT: |
| return lowerINSERT_VECTOR_ELT(Op, DAG); |
| case ISD::EXTRACT_VECTOR_ELT: |
| return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
| case ISD::VSCALE: { |
| MVT VT = Op.getSimpleValueType(); |
| SDLoc DL(Op); |
| SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); |
| // We define our scalable vector types for lmul=1 to use a 64 bit known |
| // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate |
| // vscale as VLENB / 8. |
| assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!"); |
| SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, |
| DAG.getConstant(3, DL, VT)); |
| return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); |
| } |
| case ISD::FP_EXTEND: { |
| // RVV can only do fp_extend to types double the size as the source. We |
| // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going |
| // via f32. |
| SDLoc DL(Op); |
| MVT VT = Op.getSimpleValueType(); |
| SDValue Src = Op.getOperand(0); |
| MVT SrcVT = Src.getSimpleValueType(); |
| |
| // Prepare any fixed-length vector operands. |
| MVT ContainerVT = VT; |
| if (SrcVT.isFixedLengthVector()) { |
| ContainerVT = getContainerForFixedLengthVector(VT); |
| MVT SrcContainerVT = |
| ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); |
| Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); |
| } |
| |
| if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || |
| SrcVT.getVectorElementType() != MVT::f16) { |
| // For scalable vectors, we only need to close the gap between |
| // vXf16->vXf64. |
| if (!VT.isFixedLengthVector()) |
| return Op; |
| // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version. |
| Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); |
| return convertFromScalableVector(VT, Src, DAG, Subtarget); |
| } |
| |
| MVT InterVT = VT.changeVectorElementType(MVT::f32); |
| MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32); |
| SDValue IntermediateExtend = getRVVFPExtendOrRound( |
| Src, InterVT, InterContainerVT, DL, DAG, Subtarget); |
| |
| SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT, |
| DL, DAG, Subtarget); |
| if (VT.isFixedLengthVector()) |
| return convertFromScalableVector(VT, Extend, DAG, Subtarget); |
| return Extend; |
| } |
| case ISD::FP_ROUND: { |
| // RVV can only do fp_round to types half the size as the source. We |
| // custom-lower f64->f16 rounds via RVV's round-to-odd float |
| // conversion instruction. |
| SDLoc DL(Op); |
| MVT VT = Op.getSimpleValueType(); |
| SDValue Src = Op.getOperand(0); |
| MVT SrcVT = Src.getSimpleValueType(); |
| |
| // Prepare any fixed-length vector operands. |
| MVT ContainerVT = VT; |
| if (VT.isFixedLengthVector()) { |
| MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); |
| ContainerVT = |
| SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); |
| Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); |
| } |
| |
| if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || |
| SrcVT.getVectorElementType() != MVT::f64) { |
| // For scalable vectors, we only need to close the gap between |
| // vXf64<->vXf16. |
| if (!VT.isFixedLengthVector()) |
| return Op; |
| // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. |
| Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); |
| return convertFromScalableVector(VT, Src, DAG, Subtarget); |
| } |
| |
| SDValue Mask, VL; |
| std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
| |
| MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); |
| SDValue IntermediateRound = |
| DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); |
| SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, |
| DL, DAG, Subtarget); |
| |
| if (VT.isFixedLengthVector()) |
| return convertFromScalableVector(VT, Round, DAG, Subtarget); |
| return Round; |
| } |
| case ISD::FP_TO_SINT: |
| case ISD::FP_TO_UINT: |
| case ISD::SINT_TO_FP: |
| case ISD::UINT_TO_FP: { |
| // RVV can only do fp<->int conversions to types half/double the size as |
| // the source. We custom-lower any conversions that do two hops into |
| // sequences. |
| MVT VT = Op.getSimpleValueType(); |
| if (!VT.isVector()) |
| return Op; |
| SDLoc DL(Op); |
| SDValue Src = Op.getOperand(0); |
| MVT EltVT = VT.getVectorElementType(); |
| MVT SrcVT = Src.getSimpleValueType(); |
| MVT SrcEltVT = SrcVT.getVectorElementType(); |
| unsigned EltSize = EltVT.getSizeInBits(); |
| unsigned SrcEltSize = SrcEltVT.getSizeInBits(); |
| assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && |
| "Unexpected vector element types"); |
| |
| bool IsInt2FP = SrcEltVT.isInteger(); |
| // Widening conversions |
| if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { |
| if (IsInt2FP) { |
| // Do a regular integer sign/zero extension then convert to float. |
| MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), |
| VT.getVectorElementCount()); |
| unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP |
| ? ISD::ZERO_EXTEND |
| : ISD::SIGN_EXTEND; |
| SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); |
| return DAG.getNode(Op.getOpcode(), DL, VT, Ext); |
| } |
| // FP2Int |
| assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); |
| // Do one doubling fp_extend then complete the operation by converting |
| // to int. |
| MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
| SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); |
| return DAG.getNode(Op.getOpcode(), DL, VT, FExt); |
| } |
| |
| // Narrowing conversions |
| if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { |
| if (IsInt2FP) { |
| // One narrowing int_to_fp, then an fp_round. |
| assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); |
| MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
| SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); |
| return DAG.getFPExtendOrRound(Int2FP, DL, VT); |
| } |
| // FP2Int |
| // One narrowing fp_to_int, then truncate the integer. If the float isn't |
| // representable by the integer, the result is poison. |
| MVT IVecVT = |
| MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), |
| VT.getVectorElementCount()); |
| SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); |
| return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); |
| } |
| |
| // Scalable vectors can exit here. Patterns will handle equally-sized |
| // conversions halving/doubling ones. |
| if (!VT.isFixedLengthVector()) |
| return Op; |
| |
| // For fixed-length vectors we lower to a custom "VL" node. |
| unsigned RVVOpc = 0; |
| switch (Op.getOpcode()) { |
| default: |
| llvm_unreachable("Impossible opcode"); |
| case ISD::FP_TO_SINT: |
| RVVOpc = RISCVISD::FP_TO_SINT_VL; |
| break; |
| case ISD::FP_TO_UINT: |
| RVVOpc = RISCVISD::FP_TO_UINT_VL; |
| break; |
| case ISD::SINT_TO_FP: |
| RVVOpc = RISCVISD::SINT_TO_FP_VL; |
| break; |
| case ISD::UINT_TO_FP: |
| RVVOpc = RISCVISD::UINT_TO_FP_VL; |
| break; |
| } |
| |
| MVT ContainerVT, SrcContainerVT; |
| // Derive the reference container type from the larger vector type. |
| if (SrcEltSize > EltSize) { |
| SrcContainerVT = getContainerForFixedLengthVector(SrcVT); |
| ContainerVT = |
| SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); |
| } else { |
| ContainerVT = getContainerForFixedLengthVector(VT); |
| SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT); |
| } |
| |
| SDValue Mask, VL; |
| std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
| |
| Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); |
| Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); |
| return convertFromScalableVector(VT, Src, DAG, Subtarget); |
| } |
| case ISD::VECREDUCE_ADD: |
| case ISD::VECREDUCE_UMAX: |
| case ISD::VECREDUCE_SMAX: |
| case ISD::VECREDUCE_UMIN: |
| case ISD::VECREDUCE_SMIN: |
| return lowerVECREDUCE(Op, DAG); |
| case ISD::VECREDUCE_AND: |
| case ISD::VECREDUCE_OR: |
| case ISD::VECREDUCE_XOR: |
| if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
| return lowerVectorMaskVECREDUCE(Op, DAG); |
| return lowerVECREDUCE(Op, DAG); |
| case ISD::VECREDUCE_FADD: |
| case ISD::VECREDUCE_SEQ_FADD: |
| return lowerFPVECREDUCE(Op, DAG); |
| case ISD::INSERT_SUBVECTOR: |
| return lowerINSERT_SUBVECTOR(Op, DAG); |
| case ISD::EXTRACT_SUBVECTOR: |
| return lowerEXTRACT_SUBVECTOR(Op, DAG); |
| case ISD::STEP_VECTOR: |
| return lowerSTEP_VECTOR(Op, DAG); |
| case ISD::VECTOR_REVERSE: |
| return lowerVECTOR_REVERSE(Op, DAG); |
| case ISD::BUILD_VECTOR: |
| return lowerBUILD_VECTOR(Op, DAG, Subtarget); |
| case ISD::VECTOR_SHUFFLE: |
| return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); |
| case ISD::CONCAT_VECTORS: { |
| // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is |
| // better than going through the stack, as the default expansion does. |
| SDLoc DL(Op); |
| MVT VT = Op.getSimpleValueType(); |
| unsigned NumOpElts = |
| Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); |
| SDValue Vec = DAG.getUNDEF(VT); |
| for (const auto &OpIdx : enumerate(Op->ops())) |
| Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), |
| DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); |
| return Vec; |
| } |
| case ISD::LOAD: |
| return lowerFixedLengthVectorLoadToRVV(Op, DAG); |
| case ISD::STORE: |
| return lowerFixedLengthVectorStoreToRVV(Op, DAG); |
| case ISD::MLOAD: |
| return lowerMLOAD(Op, DAG); |
| case ISD::MSTORE: |
| return lowerMSTORE(Op, DAG); |
| case ISD::SETCC: |
| return lowerFixedLengthVectorSetccToRVV(Op, DAG); |
| case ISD::ADD: |
| return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); |
| case ISD::SUB: |
| return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); |
| case ISD::MUL: |
| return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); |
| case ISD::MULHS: |
| return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL); |
| case ISD::MULHU: |
| return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL); |
| case ISD::AND: |
| return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, |
| RISCVISD::AND_VL); |
| case ISD::OR: |
| return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, |
| RISCVISD::OR_VL); |
| case ISD::XOR: |
| return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, |
| RISCVISD::XOR_VL); |
| case ISD::SDIV: |
| return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); |
| case ISD::SREM: |
| return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); |
| case ISD::UDIV: |
| return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); |
| case ISD::UREM: |
| return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); |
| case ISD::SHL: |
| return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); |
| case ISD::SRA: |
| return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); |
| case ISD::SRL: |
| return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); |
| case ISD::FADD: |
| return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); |
| case ISD::FSUB: |
| return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); |
| case ISD::FMUL: |
| return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); |
| case ISD::FDIV: |
| return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); |
| case ISD::FNEG: |
| return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); |
| case ISD::FABS: |
| return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); |
| case ISD::FSQRT: |
| return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); |
| case ISD::FMA: |
| return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); |
| case ISD::SMIN: |
| return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); |
| case ISD::SMAX: |
| return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); |
| case ISD::UMIN: |
| return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); |
| case ISD::UMAX: |
| return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); |
| case ISD::ABS: |
| return lowerABS(Op, DAG); |
| case ISD::VSELECT: |
| return lowerFixedLengthVectorSelectToRVV(Op, DAG); |
| case ISD::FCOPYSIGN: |
| return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); |
| case ISD::MGATHER: |
| return lowerMGATHER(Op, DAG); |
| case ISD::MSCATTER: |
| return lowerMSCATTER(Op, DAG); |
| } |
| } |
| |
| static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, |
| SelectionDAG &DAG, unsigned Flags) { |
| return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); |
| } |
| |
| static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, |
| SelectionDAG &DAG, unsigned Flags) { |
| return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), |
| Flags); |
| } |
| |
| static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, |
| SelectionDAG &DAG, unsigned Flags) { |
| return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), |
| N->getOffset(), Flags); |
| } |
| |
| static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, |
| SelectionDAG &DAG, unsigned Flags) { |
| return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); |
| } |
| |
| template <class NodeTy> |
| SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, |
| bool IsLocal) const { |
| SDLoc DL(N); |
| EVT Ty = getPointerTy(DAG.getDataLayout()); |
| |
| if (isPositionIndependent()) { |
| SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
| if (IsLocal) |
| // Use PC-relative addressing to access the symbol. This generates the |
| // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) |
| // %pcrel_lo(auipc)). |
| return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); |
| |
| // Use PC-relative addressing to access the GOT for this symbol, then load |
| // the address from the GOT. This generates the pattern (PseudoLA sym), |
| // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). |
| return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); |
| } |
| |
| switch (getTargetMachine().getCodeModel()) { |
| default: |
| report_fatal_error("Unsupported code model for lowering"); |
| case CodeModel::Small: { |
| // Generate a sequence for accessing addresses within the first 2 GiB of |
| // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). |
| SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); |
| SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); |
| SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); |
| return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); |
| } |
| case CodeModel::Medium: { |
| // Generate a sequence for accessing addresses within any 2GiB range within |
| // the address space. This generates the pattern (PseudoLLA sym), which |
| // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). |
| SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
| return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); |
| } |
| } |
| } |
| |
| SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, |
| SelectionDAG &DAG) const { |
| SDLoc DL(Op); |
| EVT Ty = Op.getValueType(); |
| GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); |
| int64_t Offset = N->getOffset(); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| |
| const GlobalValue *GV = N->getGlobal(); |
| bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); |
| SDValue Addr = getAddr(N, DAG, IsLocal); |
| |
| // In order to maximise the opportunity for common subexpression elimination, |
| // emit a separate ADD node for the global address offset instead of folding |
| // it in the global address node. Later peephole optimisations may choose to |
| // fold it back in when profitable. |
| if (Offset != 0) |
| return DAG.getNode(ISD::ADD, DL, Ty, Addr, |
| DAG.getConstant(Offset, DL, XLenVT)); |
| return Addr; |
| } |
| |
| SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, |
| SelectionDAG &DAG) const { |
| BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); |
| |
| return getAddr(N, DAG); |
| } |
| |
| SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, |
| SelectionDAG &DAG) const { |
| ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); |
| |
| return getAddr(N, DAG); |
| } |
| |
| SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, |
| SelectionDAG &DAG) const { |
| JumpTableSDNode *N = cast<JumpTableSDNode>(Op); |
| |
| return getAddr(N, DAG); |
| } |
| |
| SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, |
| SelectionDAG &DAG, |
| bool UseGOT) const { |
| SDLoc DL(N); |
| EVT Ty = getPointerTy(DAG.getDataLayout()); |
| const GlobalValue *GV = N->getGlobal(); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| |
| if (UseGOT) { |
| // Use PC-relative addressing to access the GOT for this TLS symbol, then |
| // load the address from the GOT and add the thread pointer. This generates |
| // the pattern (PseudoLA_TLS_IE sym), which expands to |
| // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). |
| SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); |
| SDValue Load = |
| SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); |
| |
| // Add the thread pointer. |
| SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); |
| return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); |
| } |
| |
| // Generate a sequence for accessing the address relative to the thread |
| // pointer, with the appropriate adjustment for the thread pointer offset. |
| // This generates the pattern |
| // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) |
| SDValue AddrHi = |
| DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); |
| SDValue AddrAdd = |
| DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); |
| SDValue AddrLo = |
| DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); |
| |
| SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); |
| SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); |
| SDValue MNAdd = SDValue( |
| DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), |
| 0); |
| return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); |
| } |
| |
| SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, |
| SelectionDAG &DAG) const { |
| SDLoc DL(N); |
| EVT Ty = getPointerTy(DAG.getDataLayout()); |
| IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); |
| const GlobalValue *GV = N->getGlobal(); |
| |
| // Use a PC-relative addressing mode to access the global dynamic GOT address. |
| // This generates the pattern (PseudoLA_TLS_GD sym), which expands to |
| // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). |
| SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); |
| SDValue Load = |
| SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); |
| |
| // Prepare argument list to generate call. |
| ArgListTy Args; |
| ArgListEntry Entry; |
| Entry.Node = Load; |
| Entry.Ty = CallTy; |
| Args.push_back(Entry); |
| |
| // Setup call to __tls_get_addr. |
| TargetLowering::CallLoweringInfo CLI(DAG); |
| CLI.setDebugLoc(DL) |
| .setChain(DAG.getEntryNode()) |
| .setLibCallee(CallingConv::C, CallTy, |
| DAG.getExternalSymbol("__tls_get_addr", Ty), |
| std::move(Args)); |
| |
| return LowerCallTo(CLI).first; |
| } |
| |
| SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, |
| SelectionDAG &DAG) const { |
| SDLoc DL(Op); |
| EVT Ty = Op.getValueType(); |
| GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); |
| int64_t Offset = N->getOffset(); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| |
| TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); |
| |
| if (DAG.getMachineFunction().getFunction().getCallingConv() == |
| CallingConv::GHC) |
| report_fatal_error("In GHC calling convention TLS is not supported"); |
| |
| SDValue Addr; |
| switch (Model) { |
| case TLSModel::LocalExec: |
| Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); |
| break; |
| case TLSModel::InitialExec: |
| Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); |
| break; |
| case TLSModel::LocalDynamic: |
| case TLSModel::GeneralDynamic: |
| Addr = getDynamicTLSAddr(N, DAG); |
| break; |
| } |
| |
| // In order to maximise the opportunity for common subexpression elimination, |
| // emit a separate ADD node for the global address offset instead of folding |
| // it in the global address node. Later peephole optimisations may choose to |
| // fold it back in when profitable. |
| if (Offset != 0) |
| return DAG.getNode(ISD::ADD, DL, Ty, Addr, |
| DAG.getConstant(Offset, DL, XLenVT)); |
| return Addr; |
| } |
| |
| SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { |
| SDValue CondV = Op.getOperand(0); |
| SDValue TrueV = Op.getOperand(1); |
| SDValue FalseV = Op.getOperand(2); |
| SDLoc DL(Op); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| |
| // If the result type is XLenVT and CondV is the output of a SETCC node |
| // which also operated on XLenVT inputs, then merge the SETCC node into the |
| // lowered RISCVISD::SELECT_CC to take advantage of the integer |
| // compare+branch instructions. i.e.: |
| // (select (setcc lhs, rhs, cc), truev, falsev) |
| // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) |
| if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && |
| CondV.getOperand(0).getSimpleValueType() == XLenVT) { |
| SDValue LHS = CondV.getOperand(0); |
| SDValue RHS = CondV.getOperand(1); |
| auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); |
| ISD::CondCode CCVal = CC->get(); |
| |
| // Special case for a select of 2 constants that have a diffence of 1. |
| // Normally this is done by DAGCombine, but if the select is introduced by |
| // type legalization or op legalization, we miss it. Restricting to SETLT |
| // case for now because that is what signed saturating add/sub need. |
| // FIXME: We don't need the condition to be SETLT or even a SETCC, |
| // but we would probably want to swap the true/false values if the condition |
| // is SETGE/SETLE to avoid an XORI. |
| if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && |
| CCVal == ISD::SETLT) { |
| const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue(); |
| const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue(); |
| if (TrueVal - 1 == FalseVal) |
| return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV); |
| if (TrueVal + 1 == FalseVal) |
| return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV); |
| } |
| |
| translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); |
| |
| SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); |
| SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; |
| return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); |
| } |
| |
| // Otherwise: |
| // (select condv, truev, falsev) |
| // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) |
| SDValue Zero = DAG.getConstant(0, DL, XLenVT); |
| SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); |
| |
| SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; |
| |
| return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); |
| } |
| |
| SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { |
| SDValue CondV = Op.getOperand(1); |
| SDLoc DL(Op); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| |
| if (CondV.getOpcode() == ISD::SETCC && |
| CondV.getOperand(0).getValueType() == XLenVT) { |
| SDValue LHS = CondV.getOperand(0); |
| SDValue RHS = CondV.getOperand(1); |
| ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); |
| |
| translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); |
| |
| SDValue TargetCC = DAG.getCondCode(CCVal); |
| return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), |
| LHS, RHS, TargetCC, Op.getOperand(2)); |
| } |
| |
| return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), |
| CondV, DAG.getConstant(0, DL, XLenVT), |
| DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); |
| } |
| |
| SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { |
| MachineFunction &MF = DAG.getMachineFunction(); |
| RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); |
| |
| SDLoc DL(Op); |
| SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), |
| getPointerTy(MF.getDataLayout())); |
| |
| // vastart just stores the address of the VarArgsFrameIndex slot into the |
| // memory location argument. |
| const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
| return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), |
| MachinePointerInfo(SV)); |
| } |
| |
| SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, |
| SelectionDAG &DAG) const { |
| const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); |
| MachineFunction &MF = DAG.getMachineFunction(); |
| MachineFrameInfo &MFI = MF.getFrameInfo(); |
| MFI.setFrameAddressIsTaken(true); |
| Register FrameReg = RI.getFrameRegister(MF); |
| int XLenInBytes = Subtarget.getXLen() / 8; |
| |
| EVT VT = Op.getValueType(); |
| SDLoc DL(Op); |
| SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); |
| unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
| while (Depth--) { |
| int Offset = -(XLenInBytes * 2); |
| SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, |
| DAG.getIntPtrConstant(Offset, DL)); |
| FrameAddr = |
| DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); |
| } |
| return FrameAddr; |
| } |
| |
| SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, |
| SelectionDAG &DAG) const { |
| const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); |
| MachineFunction &MF = DAG.getMachineFunction(); |
| MachineFrameInfo &MFI = MF.getFrameInfo(); |
| MFI.setReturnAddressIsTaken(true); |
| MVT XLenVT = Subtarget.getXLenVT(); |
| int XLenInBytes = Subtarget.getXLen() / 8; |
| |
| if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
| return SDValue(); |
| |
| EVT VT = Op.getValueType(); |
| SDLoc DL(Op); |
| unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
| if (Depth) { |
| int Off = -XLenInBytes; |
| SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); |
| SDValue Offset = DAG.getConstant(Off, DL, VT); |
| return DAG.getLoad(VT, DL, DAG.getEntryNode(), |
| DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), |
| MachinePointerInfo()); |
| } |
| |
| // Return the value of the return address register, marking it an implicit |
| // live-in. |
| Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); |
| return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); |
| } |
| |
| SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, |
| SelectionDAG &DAG) const { |
| SDLoc DL(Op); |
| SDValue Lo = Op.getOperand(0); |
| SDValue Hi = Op.getOperand(1); |
| SDValue Shamt = Op.getOperand(2); |
| EVT VT = Lo.getValueType(); |
| |
| // if Shamt-XLEN < 0: // Shamt < XLEN |
| // Lo = Lo << Shamt |
| // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) |
| // else: |
| // Lo = 0 |
| // Hi = Lo << (Shamt-XLEN) |
| |
| SDValue Zero = DAG.getConstant(0, DL, VT); |
| SDValue One = DAG.getConstant(1, DL, VT); |
| SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); |
| SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); |
| SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); |
| SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); |
| |
| SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); |
| SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); |
| SDValue ShiftRightLo = |
| DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); |
| SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); |
| SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); |
| SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); |
| |
| SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); |
| |
| Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); |
| Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); |
| |
| SDValue Parts[2] = {Lo, Hi}; |
| return DAG.getMergeValues(Parts, DL); |
| } |
| |
| SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, |
| bool IsSRA) const { |
| SDLoc DL(Op); |
| SDValue Lo = Op.getOperand(0); |
| SDValue Hi = Op.getOperand(1); |
| SDValue Shamt = Op.getOperand(2); |
| EVT VT = Lo.getValueType(); |
| |
| // SRA expansion: |
| // if Shamt-XLEN < 0: // Shamt < XLEN |
| // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) |
| // Hi = Hi >>s Shamt |
| // else: |
| // Lo = Hi >>s (Shamt-XLEN); |
| // Hi = Hi >>s (XLEN-1) |
| // |
| // SRL expansion: |
| // if Shamt-XLEN < 0: // Shamt < XLEN |
| // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) |
| // Hi = Hi >>u Shamt |
| // else: |
| // Lo = Hi >>u (Shamt-XLEN); |
| // Hi = 0; |
| |
| unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; |
| |
| SDValue Zero = DAG.getConstant(0, DL, VT); |
| SDValue One = DAG.getConstant(1, DL, VT); |
| SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); |
| SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); |
| SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); |
| SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); |
| |
| SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); |
| SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); |
| SDValue ShiftLeftHi = |
| DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); |
| SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); |
| SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); |
| SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); |
| SDValue HiFalse = |
| IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; |
| |
| SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); |
| |
| Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); |
| Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); |
| |
| SDValue Parts[2] = {Lo, Hi}; |
| return DAG.getMergeValues(Parts, DL); |
| } |
|