| //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the AArch64TargetLowering class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AArch64ISelLowering.h" |
| #include "AArch64CallingConvention.h" |
| #include "AArch64ExpandImm.h" |
| #include "AArch64MachineFunctionInfo.h" |
| #include "AArch64PerfectShuffle.h" |
| #include "AArch64RegisterInfo.h" |
| #include "AArch64Subtarget.h" |
| #include "MCTargetDesc/AArch64AddressingModes.h" |
| #include "Utils/AArch64BaseInfo.h" |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/Triple.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/Analysis/ObjCARCUtil.h" |
| #include "llvm/Analysis/VectorUtils.h" |
| #include "llvm/CodeGen/Analysis.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineMemOperand.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/RuntimeLibcalls.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/SelectionDAGNodes.h" |
| #include "llvm/CodeGen/TargetCallingConv.h" |
| #include "llvm/CodeGen/TargetInstrInfo.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DebugLoc.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GetElementPtrTypeIterator.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/IntrinsicsAArch64.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/OperandTraits.h" |
| #include "llvm/IR/PatternMatch.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Use.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CodeGen.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MachineValueType.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Target/TargetOptions.h" |
| #include <algorithm> |
| #include <bitset> |
| #include <cassert> |
| #include <cctype> |
| #include <cstdint> |
| #include <cstdlib> |
| #include <iterator> |
| #include <limits> |
| #include <tuple> |
| #include <utility> |
| #include <vector> |
| |
| using namespace llvm; |
| using namespace llvm::PatternMatch; |
| |
| #define DEBUG_TYPE "aarch64-lower" |
| |
| STATISTIC(NumTailCalls, "Number of tail calls"); |
| STATISTIC(NumShiftInserts, "Number of vector shift inserts"); |
| STATISTIC(NumOptimizedImms, "Number of times immediates were optimized"); |
| |
| // FIXME: The necessary dtprel relocations don't seem to be supported |
| // well in the GNU bfd and gold linkers at the moment. Therefore, by |
| // default, for now, fall back to GeneralDynamic code generation. |
| cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( |
| "aarch64-elf-ldtls-generation", cl::Hidden, |
| cl::desc("Allow AArch64 Local Dynamic TLS code generation"), |
| cl::init(false)); |
| |
| static cl::opt<bool> |
| EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, |
| cl::desc("Enable AArch64 logical imm instruction " |
| "optimization"), |
| cl::init(true)); |
| |
| // Temporary option added for the purpose of testing functionality added |
| // to DAGCombiner.cpp in D92230. It is expected that this can be removed |
| // in future when both implementations will be based off MGATHER rather |
| // than the GLD1 nodes added for the SVE gather load intrinsics. |
| static cl::opt<bool> |
| EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, |
| cl::desc("Combine extends of AArch64 masked " |
| "gather intrinsics"), |
| cl::init(true)); |
| |
| /// Value type used for condition codes. |
| static const MVT MVT_CC = MVT::i32; |
| |
| static inline EVT getPackedSVEVectorVT(EVT VT) { |
| switch (VT.getSimpleVT().SimpleTy) { |
| default: |
| llvm_unreachable("unexpected element type for vector"); |
| case MVT::i8: |
| return MVT::nxv16i8; |
| case MVT::i16: |
| return MVT::nxv8i16; |
| case MVT::i32: |
| return MVT::nxv4i32; |
| case MVT::i64: |
| return MVT::nxv2i64; |
| case MVT::f16: |
| return MVT::nxv8f16; |
| case MVT::f32: |
| return MVT::nxv4f32; |
| case MVT::f64: |
| return MVT::nxv2f64; |
| case MVT::bf16: |
| return MVT::nxv8bf16; |
| } |
| } |
| |
| // NOTE: Currently there's only a need to return integer vector types. If this |
| // changes then just add an extra "type" parameter. |
| static inline EVT getPackedSVEVectorVT(ElementCount EC) { |
| switch (EC.getKnownMinValue()) { |
| default: |
| llvm_unreachable("unexpected element count for vector"); |
| case 16: |
| return MVT::nxv16i8; |
| case 8: |
| return MVT::nxv8i16; |
| case 4: |
| return MVT::nxv4i32; |
| case 2: |
| return MVT::nxv2i64; |
| } |
| } |
| |
| static inline EVT getPromotedVTForPredicate(EVT VT) { |
| assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && |
| "Expected scalable predicate vector type!"); |
| switch (VT.getVectorMinNumElements()) { |
| default: |
| llvm_unreachable("unexpected element count for vector"); |
| case 2: |
| return MVT::nxv2i64; |
| case 4: |
| return MVT::nxv4i32; |
| case 8: |
| return MVT::nxv8i16; |
| case 16: |
| return MVT::nxv16i8; |
| } |
| } |
| |
| /// Returns true if VT's elements occupy the lowest bit positions of its |
| /// associated register class without any intervening space. |
| /// |
| /// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the |
| /// same register class, but only nxv8f16 can be treated as a packed vector. |
| static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) { |
| assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && |
| "Expected legal vector type!"); |
| return VT.isFixedLengthVector() || |
| VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock; |
| } |
| |
| // Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading |
| // predicate and end with a passthru value matching the result type. |
| static bool isMergePassthruOpcode(unsigned Opc) { |
| switch (Opc) { |
| default: |
| return false; |
| case AArch64ISD::BITREVERSE_MERGE_PASSTHRU: |
| case AArch64ISD::BSWAP_MERGE_PASSTHRU: |
| case AArch64ISD::CTLZ_MERGE_PASSTHRU: |
| case AArch64ISD::CTPOP_MERGE_PASSTHRU: |
| case AArch64ISD::DUP_MERGE_PASSTHRU: |
| case AArch64ISD::ABS_MERGE_PASSTHRU: |
| case AArch64ISD::NEG_MERGE_PASSTHRU: |
| case AArch64ISD::FNEG_MERGE_PASSTHRU: |
| case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU: |
| case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU: |
| case AArch64ISD::FCEIL_MERGE_PASSTHRU: |
| case AArch64ISD::FFLOOR_MERGE_PASSTHRU: |
| case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU: |
| case AArch64ISD::FRINT_MERGE_PASSTHRU: |
| case AArch64ISD::FROUND_MERGE_PASSTHRU: |
| case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU: |
| case AArch64ISD::FTRUNC_MERGE_PASSTHRU: |
| case AArch64ISD::FP_ROUND_MERGE_PASSTHRU: |
| case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU: |
| case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU: |
| case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU: |
| case AArch64ISD::FCVTZU_MERGE_PASSTHRU: |
| case AArch64ISD::FCVTZS_MERGE_PASSTHRU: |
| case AArch64ISD::FSQRT_MERGE_PASSTHRU: |
| case AArch64ISD::FRECPX_MERGE_PASSTHRU: |
| case AArch64ISD::FABS_MERGE_PASSTHRU: |
| return true; |
| } |
| } |
| |
| AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, |
| const AArch64Subtarget &STI) |
| : TargetLowering(TM), Subtarget(&STI) { |
| // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so |
| // we have to make something up. Arbitrarily, choose ZeroOrOne. |
| setBooleanContents(ZeroOrOneBooleanContent); |
| // When comparing vectors the result sets the different elements in the |
| // vector to all-one or all-zero. |
| setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
| |
| // Set up the register classes. |
| addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); |
| addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); |
| |
| if (Subtarget->hasLS64()) { |
| addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass); |
| setOperationAction(ISD::LOAD, MVT::i64x8, Custom); |
| setOperationAction(ISD::STORE, MVT::i64x8, Custom); |
| } |
| |
| if (Subtarget->hasFPARMv8()) { |
| addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); |
| addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass); |
| addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); |
| addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); |
| addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); |
| } |
| |
| if (Subtarget->hasNEON()) { |
| addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass); |
| addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass); |
| // Someone set us up the NEON. |
| addDRTypeForNEON(MVT::v2f32); |
| addDRTypeForNEON(MVT::v8i8); |
| addDRTypeForNEON(MVT::v4i16); |
| addDRTypeForNEON(MVT::v2i32); |
| addDRTypeForNEON(MVT::v1i64); |
| addDRTypeForNEON(MVT::v1f64); |
| addDRTypeForNEON(MVT::v4f16); |
| if (Subtarget->hasBF16()) |
| addDRTypeForNEON(MVT::v4bf16); |
| |
| addQRTypeForNEON(MVT::v4f32); |
| addQRTypeForNEON(MVT::v2f64); |
| addQRTypeForNEON(MVT::v16i8); |
| addQRTypeForNEON(MVT::v8i16); |
| addQRTypeForNEON(MVT::v4i32); |
| addQRTypeForNEON(MVT::v2i64); |
| addQRTypeForNEON(MVT::v8f16); |
| if (Subtarget->hasBF16()) |
| addQRTypeForNEON(MVT::v8bf16); |
| } |
| |
| if (Subtarget->hasSVE()) { |
| // Add legal sve predicate types |
| addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass); |
| addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass); |
| addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass); |
| addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass); |
| |
| // Add legal sve data types |
| addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass); |
| |
| addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass); |
| |
| if (Subtarget->hasBF16()) { |
| addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass); |
| } |
| |
| if (Subtarget->useSVEForFixedLengthVectors()) { |
| for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
| if (useSVEForFixedLengthVectorVT(VT)) |
| addRegisterClass(VT, &AArch64::ZPRRegClass); |
| |
| for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
| if (useSVEForFixedLengthVectorVT(VT)) |
| addRegisterClass(VT, &AArch64::ZPRRegClass); |
| } |
| |
| for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) { |
| setOperationAction(ISD::SADDSAT, VT, Legal); |
| setOperationAction(ISD::UADDSAT, VT, Legal); |
| setOperationAction(ISD::SSUBSAT, VT, Legal); |
| setOperationAction(ISD::USUBSAT, VT, Legal); |
| setOperationAction(ISD::UREM, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::SDIVREM, VT, Expand); |
| setOperationAction(ISD::UDIVREM, VT, Expand); |
| } |
| |
| for (auto VT : |
| { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8, |
| MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 }) |
| setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal); |
| |
| for (auto VT : |
| { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32, |
| MVT::nxv2f64 }) { |
| setCondCodeAction(ISD::SETO, VT, Expand); |
| setCondCodeAction(ISD::SETOLT, VT, Expand); |
| setCondCodeAction(ISD::SETLT, VT, Expand); |
| setCondCodeAction(ISD::SETOLE, VT, Expand); |
| setCondCodeAction(ISD::SETLE, VT, Expand); |
| setCondCodeAction(ISD::SETULT, VT, Expand); |
| setCondCodeAction(ISD::SETULE, VT, Expand); |
| setCondCodeAction(ISD::SETUGE, VT, Expand); |
| setCondCodeAction(ISD::SETUGT, VT, Expand); |
| setCondCodeAction(ISD::SETUEQ, VT, Expand); |
| setCondCodeAction(ISD::SETUNE, VT, Expand); |
| |
| setOperationAction(ISD::FREM, VT, Expand); |
| setOperationAction(ISD::FPOW, VT, Expand); |
| setOperationAction(ISD::FPOWI, VT, Expand); |
| setOperationAction(ISD::FCOS, VT, Expand); |
| setOperationAction(ISD::FSIN, VT, Expand); |
| setOperationAction(ISD::FSINCOS, VT, Expand); |
| setOperationAction(ISD::FEXP, VT, Expand); |
| setOperationAction(ISD::FEXP2, VT, Expand); |
| setOperationAction(ISD::FLOG, VT, Expand); |
| setOperationAction(ISD::FLOG2, VT, Expand); |
| setOperationAction(ISD::FLOG10, VT, Expand); |
| } |
| } |
| |
| // Compute derived properties from the register classes |
| computeRegisterProperties(Subtarget->getRegisterInfo()); |
| |
| // Provide all sorts of operation actions |
| setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); |
| setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); |
| setOperationAction(ISD::SETCC, MVT::i32, Custom); |
| setOperationAction(ISD::SETCC, MVT::i64, Custom); |
| setOperationAction(ISD::SETCC, MVT::f16, Custom); |
| setOperationAction(ISD::SETCC, MVT::f32, Custom); |
| setOperationAction(ISD::SETCC, MVT::f64, Custom); |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom); |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); |
| setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
| setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); |
| setOperationAction(ISD::BRCOND, MVT::Other, Expand); |
| setOperationAction(ISD::BR_CC, MVT::i32, Custom); |
| setOperationAction(ISD::BR_CC, MVT::i64, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f16, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f32, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f64, Custom); |
| setOperationAction(ISD::SELECT, MVT::i32, Custom); |
| setOperationAction(ISD::SELECT, MVT::i64, Custom); |
| setOperationAction(ISD::SELECT, MVT::f16, Custom); |
| setOperationAction(ISD::SELECT, MVT::f32, Custom); |
| setOperationAction(ISD::SELECT, MVT::f64, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); |
| setOperationAction(ISD::BR_JT, MVT::Other, Custom); |
| setOperationAction(ISD::JumpTable, MVT::i64, Custom); |
| |
| setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); |
| setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); |
| setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); |
| |
| setOperationAction(ISD::FREM, MVT::f32, Expand); |
| setOperationAction(ISD::FREM, MVT::f64, Expand); |
| setOperationAction(ISD::FREM, MVT::f80, Expand); |
| |
| setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); |
| |
| // Custom lowering hooks are needed for XOR |
| // to fold it into CSINC/CSINV. |
| setOperationAction(ISD::XOR, MVT::i32, Custom); |
| setOperationAction(ISD::XOR, MVT::i64, Custom); |
| |
| // Virtually no operation on f128 is legal, but LLVM can't expand them when |
| // there's a valid register class, so we need custom operations in most cases. |
| setOperationAction(ISD::FABS, MVT::f128, Expand); |
| setOperationAction(ISD::FADD, MVT::f128, LibCall); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); |
| setOperationAction(ISD::FCOS, MVT::f128, Expand); |
| setOperationAction(ISD::FDIV, MVT::f128, LibCall); |
| setOperationAction(ISD::FMA, MVT::f128, Expand); |
| setOperationAction(ISD::FMUL, MVT::f128, LibCall); |
| setOperationAction(ISD::FNEG, MVT::f128, Expand); |
| setOperationAction(ISD::FPOW, MVT::f128, Expand); |
| setOperationAction(ISD::FREM, MVT::f128, Expand); |
| setOperationAction(ISD::FRINT, MVT::f128, Expand); |
| setOperationAction(ISD::FSIN, MVT::f128, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f128, Expand); |
| setOperationAction(ISD::FSQRT, MVT::f128, Expand); |
| setOperationAction(ISD::FSUB, MVT::f128, LibCall); |
| setOperationAction(ISD::FTRUNC, MVT::f128, Expand); |
| setOperationAction(ISD::SETCC, MVT::f128, Custom); |
| setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom); |
| setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f128, Custom); |
| setOperationAction(ISD::SELECT, MVT::f128, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); |
| setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); |
| |
| // Lowering for many of the conversions is actually specified by the non-f128 |
| // type. The LowerXXX function will be trivial when f128 isn't involved. |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); |
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom); |
| setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); |
| setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
| setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); |
| setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); |
| |
| setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); |
| |
| // Variable arguments. |
| setOperationAction(ISD::VASTART, MVT::Other, Custom); |
| setOperationAction(ISD::VAARG, MVT::Other, Custom); |
| setOperationAction(ISD::VACOPY, MVT::Other, Custom); |
| setOperationAction(ISD::VAEND, MVT::Other, Expand); |
| |
| // Variable-sized objects. |
| setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
| setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
| |
| if (Subtarget->isTargetWindows()) |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); |
| else |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); |
| |
| // Constant pool entries |
| setOperationAction(ISD::ConstantPool, MVT::i64, Custom); |
| |
| // BlockAddress |
| setOperationAction(ISD::BlockAddress, MVT::i64, Custom); |
| |
| // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences. |
| setOperationAction(ISD::ADDC, MVT::i32, Custom); |
| setOperationAction(ISD::ADDE, MVT::i32, Custom); |
| setOperationAction(ISD::SUBC, MVT::i32, Custom); |
| setOperationAction(ISD::SUBE, MVT::i32, Custom); |
| setOperationAction(ISD::ADDC, MVT::i64, Custom); |
| setOperationAction(ISD::ADDE, MVT::i64, Custom); |
| setOperationAction(ISD::SUBC, MVT::i64, Custom); |
| setOperationAction(ISD::SUBE, MVT::i64, Custom); |
| |
| // AArch64 lacks both left-rotate and popcount instructions. |
| setOperationAction(ISD::ROTL, MVT::i32, Expand); |
| setOperationAction(ISD::ROTL, MVT::i64, Expand); |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| setOperationAction(ISD::ROTL, VT, Expand); |
| setOperationAction(ISD::ROTR, VT, Expand); |
| } |
| |
| // AArch64 doesn't have i32 MULH{S|U}. |
| setOperationAction(ISD::MULHU, MVT::i32, Expand); |
| setOperationAction(ISD::MULHS, MVT::i32, Expand); |
| |
| // AArch64 doesn't have {U|S}MUL_LOHI. |
| setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); |
| |
| setOperationAction(ISD::CTPOP, MVT::i32, Custom); |
| setOperationAction(ISD::CTPOP, MVT::i64, Custom); |
| setOperationAction(ISD::CTPOP, MVT::i128, Custom); |
| |
| setOperationAction(ISD::ABS, MVT::i32, Custom); |
| setOperationAction(ISD::ABS, MVT::i64, Custom); |
| |
| setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i64, Expand); |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| setOperationAction(ISD::SDIVREM, VT, Expand); |
| setOperationAction(ISD::UDIVREM, VT, Expand); |
| } |
| setOperationAction(ISD::SREM, MVT::i32, Expand); |
| setOperationAction(ISD::SREM, MVT::i64, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i64, Expand); |
| setOperationAction(ISD::UREM, MVT::i32, Expand); |
| setOperationAction(ISD::UREM, MVT::i64, Expand); |
| |
| // Custom lower Add/Sub/Mul with overflow. |
| setOperationAction(ISD::SADDO, MVT::i32, Custom); |
| setOperationAction(ISD::SADDO, MVT::i64, Custom); |
| setOperationAction(ISD::UADDO, MVT::i32, Custom); |
| setOperationAction(ISD::UADDO, MVT::i64, Custom); |
| setOperationAction(ISD::SSUBO, MVT::i32, Custom); |
| setOperationAction(ISD::SSUBO, MVT::i64, Custom); |
| setOperationAction(ISD::USUBO, MVT::i32, Custom); |
| setOperationAction(ISD::USUBO, MVT::i64, Custom); |
| setOperationAction(ISD::SMULO, MVT::i32, Custom); |
| setOperationAction(ISD::SMULO, MVT::i64, Custom); |
| setOperationAction(ISD::UMULO, MVT::i32, Custom); |
| setOperationAction(ISD::UMULO, MVT::i64, Custom); |
| |
| setOperationAction(ISD::FSIN, MVT::f32, Expand); |
| setOperationAction(ISD::FSIN, MVT::f64, Expand); |
| setOperationAction(ISD::FCOS, MVT::f32, Expand); |
| setOperationAction(ISD::FCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FPOW, MVT::f32, Expand); |
| setOperationAction(ISD::FPOW, MVT::f64, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
| if (Subtarget->hasFullFP16()) |
| setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom); |
| else |
| setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); |
| |
| setOperationAction(ISD::FREM, MVT::f16, Promote); |
| setOperationAction(ISD::FREM, MVT::v4f16, Expand); |
| setOperationAction(ISD::FREM, MVT::v8f16, Expand); |
| setOperationAction(ISD::FPOW, MVT::f16, Promote); |
| setOperationAction(ISD::FPOW, MVT::v4f16, Expand); |
| setOperationAction(ISD::FPOW, MVT::v8f16, Expand); |
| setOperationAction(ISD::FPOWI, MVT::f16, Promote); |
| setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); |
| setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); |
| setOperationAction(ISD::FCOS, MVT::f16, Promote); |
| setOperationAction(ISD::FCOS, MVT::v4f16, Expand); |
| setOperationAction(ISD::FCOS, MVT::v8f16, Expand); |
| setOperationAction(ISD::FSIN, MVT::f16, Promote); |
| setOperationAction(ISD::FSIN, MVT::v4f16, Expand); |
| setOperationAction(ISD::FSIN, MVT::v8f16, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f16, Promote); |
| setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand); |
| setOperationAction(ISD::FEXP, MVT::f16, Promote); |
| setOperationAction(ISD::FEXP, MVT::v4f16, Expand); |
| setOperationAction(ISD::FEXP, MVT::v8f16, Expand); |
| setOperationAction(ISD::FEXP2, MVT::f16, Promote); |
| setOperationAction(ISD::FEXP2, MVT::v4f16, Expand); |
| setOperationAction(ISD::FEXP2, MVT::v8f16, Expand); |
| setOperationAction(ISD::FLOG, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG, MVT::v4f16, Expand); |
| setOperationAction(ISD::FLOG, MVT::v8f16, Expand); |
| setOperationAction(ISD::FLOG2, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG2, MVT::v4f16, Expand); |
| setOperationAction(ISD::FLOG2, MVT::v8f16, Expand); |
| setOperationAction(ISD::FLOG10, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG10, MVT::v4f16, Expand); |
| setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); |
| |
| if (!Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::SELECT, MVT::f16, Promote); |
| setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); |
| setOperationAction(ISD::SETCC, MVT::f16, Promote); |
| setOperationAction(ISD::BR_CC, MVT::f16, Promote); |
| setOperationAction(ISD::FADD, MVT::f16, Promote); |
| setOperationAction(ISD::FSUB, MVT::f16, Promote); |
| setOperationAction(ISD::FMUL, MVT::f16, Promote); |
| setOperationAction(ISD::FDIV, MVT::f16, Promote); |
| setOperationAction(ISD::FMA, MVT::f16, Promote); |
| setOperationAction(ISD::FNEG, MVT::f16, Promote); |
| setOperationAction(ISD::FABS, MVT::f16, Promote); |
| setOperationAction(ISD::FCEIL, MVT::f16, Promote); |
| setOperationAction(ISD::FSQRT, MVT::f16, Promote); |
| setOperationAction(ISD::FFLOOR, MVT::f16, Promote); |
| setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); |
| setOperationAction(ISD::FRINT, MVT::f16, Promote); |
| setOperationAction(ISD::FROUND, MVT::f16, Promote); |
| setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); |
| setOperationAction(ISD::FTRUNC, MVT::f16, Promote); |
| setOperationAction(ISD::FMINNUM, MVT::f16, Promote); |
| setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); |
| setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); |
| setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); |
| |
| // promote v4f16 to v4f32 when that is known to be safe. |
| setOperationAction(ISD::FADD, MVT::v4f16, Promote); |
| setOperationAction(ISD::FSUB, MVT::v4f16, Promote); |
| setOperationAction(ISD::FMUL, MVT::v4f16, Promote); |
| setOperationAction(ISD::FDIV, MVT::v4f16, Promote); |
| AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32); |
| AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32); |
| AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32); |
| AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32); |
| |
| setOperationAction(ISD::FABS, MVT::v4f16, Expand); |
| setOperationAction(ISD::FNEG, MVT::v4f16, Expand); |
| setOperationAction(ISD::FROUND, MVT::v4f16, Expand); |
| setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand); |
| setOperationAction(ISD::FMA, MVT::v4f16, Expand); |
| setOperationAction(ISD::SETCC, MVT::v4f16, Expand); |
| setOperationAction(ISD::BR_CC, MVT::v4f16, Expand); |
| setOperationAction(ISD::SELECT, MVT::v4f16, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand); |
| setOperationAction(ISD::FCEIL, MVT::v4f16, Expand); |
| setOperationAction(ISD::FRINT, MVT::v4f16, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); |
| setOperationAction(ISD::FSQRT, MVT::v4f16, Expand); |
| |
| setOperationAction(ISD::FABS, MVT::v8f16, Expand); |
| setOperationAction(ISD::FADD, MVT::v8f16, Expand); |
| setOperationAction(ISD::FCEIL, MVT::v8f16, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand); |
| setOperationAction(ISD::FDIV, MVT::v8f16, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand); |
| setOperationAction(ISD::FMA, MVT::v8f16, Expand); |
| setOperationAction(ISD::FMUL, MVT::v8f16, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); |
| setOperationAction(ISD::FNEG, MVT::v8f16, Expand); |
| setOperationAction(ISD::FROUND, MVT::v8f16, Expand); |
| setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand); |
| setOperationAction(ISD::FRINT, MVT::v8f16, Expand); |
| setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); |
| setOperationAction(ISD::FSUB, MVT::v8f16, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand); |
| setOperationAction(ISD::SETCC, MVT::v8f16, Expand); |
| setOperationAction(ISD::BR_CC, MVT::v8f16, Expand); |
| setOperationAction(ISD::SELECT, MVT::v8f16, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand); |
| setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand); |
| } |
| |
| // AArch64 has implementations of a lot of rounding-like FP operations. |
| for (MVT Ty : {MVT::f32, MVT::f64}) { |
| setOperationAction(ISD::FFLOOR, Ty, Legal); |
| setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
| setOperationAction(ISD::FCEIL, Ty, Legal); |
| setOperationAction(ISD::FRINT, Ty, Legal); |
| setOperationAction(ISD::FTRUNC, Ty, Legal); |
| setOperationAction(ISD::FROUND, Ty, Legal); |
| setOperationAction(ISD::FROUNDEVEN, Ty, Legal); |
| setOperationAction(ISD::FMINNUM, Ty, Legal); |
| setOperationAction(ISD::FMAXNUM, Ty, Legal); |
| setOperationAction(ISD::FMINIMUM, Ty, Legal); |
| setOperationAction(ISD::FMAXIMUM, Ty, Legal); |
| setOperationAction(ISD::LROUND, Ty, Legal); |
| setOperationAction(ISD::LLROUND, Ty, Legal); |
| setOperationAction(ISD::LRINT, Ty, Legal); |
| setOperationAction(ISD::LLRINT, Ty, Legal); |
| } |
| |
| if (Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal); |
| setOperationAction(ISD::FFLOOR, MVT::f16, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f16, Legal); |
| setOperationAction(ISD::FRINT, MVT::f16, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::f16, Legal); |
| setOperationAction(ISD::FROUND, MVT::f16, Legal); |
| setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); |
| } |
| |
| setOperationAction(ISD::PREFETCH, MVT::Other, Custom); |
| |
| setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); |
| setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); |
| |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); |
| |
| // Generate outline atomics library calls only if LSE was not specified for |
| // subtarget |
| if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) { |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall); |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall); |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall); |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall); |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall); |
| setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall); |
| setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall); |
| setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall); |
| setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall); |
| setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall); |
| #define LCALLNAMES(A, B, N) \ |
| setLibcallName(A##N##_RELAX, #B #N "_relax"); \ |
| setLibcallName(A##N##_ACQ, #B #N "_acq"); \ |
| setLibcallName(A##N##_REL, #B #N "_rel"); \ |
| setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel"); |
| #define LCALLNAME4(A, B) \ |
| LCALLNAMES(A, B, 1) \ |
| LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) |
| #define LCALLNAME5(A, B) \ |
| LCALLNAMES(A, B, 1) \ |
| LCALLNAMES(A, B, 2) \ |
| LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16) |
| LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas) |
| LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp) |
| LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd) |
| LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset) |
| LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr) |
| LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor) |
| #undef LCALLNAMES |
| #undef LCALLNAME4 |
| #undef LCALLNAME5 |
| } |
| |
| // 128-bit loads and stores can be done without expanding |
| setOperationAction(ISD::LOAD, MVT::i128, Custom); |
| setOperationAction(ISD::STORE, MVT::i128, Custom); |
| |
| // Aligned 128-bit loads and stores are single-copy atomic according to the |
| // v8.4a spec. |
| if (Subtarget->hasLSE2()) { |
| setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); |
| setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); |
| } |
| |
| // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the |
| // custom lowering, as there are no un-paired non-temporal stores and |
| // legalization will break up 256 bit inputs. |
| setOperationAction(ISD::STORE, MVT::v32i8, Custom); |
| setOperationAction(ISD::STORE, MVT::v16i16, Custom); |
| setOperationAction(ISD::STORE, MVT::v16f16, Custom); |
| setOperationAction(ISD::STORE, MVT::v8i32, Custom); |
| setOperationAction(ISD::STORE, MVT::v8f32, Custom); |
| setOperationAction(ISD::STORE, MVT::v4f64, Custom); |
| setOperationAction(ISD::STORE, MVT::v4i64, Custom); |
| |
| // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. |
| // This requires the Performance Monitors extension. |
| if (Subtarget->hasPerfMon()) |
| setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); |
| |
| if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
| getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
| // Issue __sincos_stret if available. |
| setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
| } else { |
| setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
| } |
| |
| if (Subtarget->getTargetTriple().isOSMSVCRT()) { |
| // MSVCRT doesn't have powi; fall back to pow |
| setLibcallName(RTLIB::POWI_F32, nullptr); |
| setLibcallName(RTLIB::POWI_F64, nullptr); |
| } |
| |
| // Make floating-point constants legal for the large code model, so they don't |
| // become loads from the constant pool. |
| if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { |
| setOperationAction(ISD::ConstantFP, MVT::f32, Legal); |
| setOperationAction(ISD::ConstantFP, MVT::f64, Legal); |
| } |
| |
| // AArch64 does not have floating-point extending loads, i1 sign-extending |
| // load, floating-point truncating stores, or v2i32->v2i16 truncating store. |
| for (MVT VT : MVT::fp_valuetypes()) { |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); |
| } |
| for (MVT VT : MVT::integer_valuetypes()) |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand); |
| |
| setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f80, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f16, Expand); |
| |
| setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::bf16, Custom); |
| |
| // Indexed loads and stores are supported. |
| for (unsigned im = (unsigned)ISD::PRE_INC; |
| im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
| setIndexedLoadAction(im, MVT::i8, Legal); |
| setIndexedLoadAction(im, MVT::i16, Legal); |
| setIndexedLoadAction(im, MVT::i32, Legal); |
| setIndexedLoadAction(im, MVT::i64, Legal); |
| setIndexedLoadAction(im, MVT::f64, Legal); |
| setIndexedLoadAction(im, MVT::f32, Legal); |
| setIndexedLoadAction(im, MVT::f16, Legal); |
| setIndexedLoadAction(im, MVT::bf16, Legal); |
| setIndexedStoreAction(im, MVT::i8, Legal); |
| setIndexedStoreAction(im, MVT::i16, Legal); |
| setIndexedStoreAction(im, MVT::i32, Legal); |
| setIndexedStoreAction(im, MVT::i64, Legal); |
| setIndexedStoreAction(im, MVT::f64, Legal); |
| setIndexedStoreAction(im, MVT::f32, Legal); |
| setIndexedStoreAction(im, MVT::f16, Legal); |
| setIndexedStoreAction(im, MVT::bf16, Legal); |
| } |
| |
| // Trap. |
| setOperationAction(ISD::TRAP, MVT::Other, Legal); |
| setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
| setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal); |
| |
| // We combine OR nodes for bitfield operations. |
| setTargetDAGCombine(ISD::OR); |
| // Try to create BICs for vector ANDs. |
| setTargetDAGCombine(ISD::AND); |
| |
| // Vector add and sub nodes may conceal a high-half opportunity. |
| // Also, try to fold ADD into CSINC/CSINV.. |
| setTargetDAGCombine(ISD::ADD); |
| setTargetDAGCombine(ISD::ABS); |
| setTargetDAGCombine(ISD::SUB); |
| setTargetDAGCombine(ISD::XOR); |
| setTargetDAGCombine(ISD::SINT_TO_FP); |
| setTargetDAGCombine(ISD::UINT_TO_FP); |
| |
| setTargetDAGCombine(ISD::FP_TO_SINT); |
| setTargetDAGCombine(ISD::FP_TO_UINT); |
| setTargetDAGCombine(ISD::FP_TO_SINT_SAT); |
| setTargetDAGCombine(ISD::FP_TO_UINT_SAT); |
| setTargetDAGCombine(ISD::FDIV); |
| |
| // Try and combine setcc with csel |
| setTargetDAGCombine(ISD::SETCC); |
| |
| setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
| |
| setTargetDAGCombine(ISD::ANY_EXTEND); |
| setTargetDAGCombine(ISD::ZERO_EXTEND); |
| setTargetDAGCombine(ISD::SIGN_EXTEND); |
| setTargetDAGCombine(ISD::VECTOR_SPLICE); |
| setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
| setTargetDAGCombine(ISD::TRUNCATE); |
| setTargetDAGCombine(ISD::CONCAT_VECTORS); |
| setTargetDAGCombine(ISD::INSERT_SUBVECTOR); |
| setTargetDAGCombine(ISD::STORE); |
| if (Subtarget->supportsAddressTopByteIgnored()) |
| setTargetDAGCombine(ISD::LOAD); |
| |
| setTargetDAGCombine(ISD::MUL); |
| |
| setTargetDAGCombine(ISD::SELECT); |
| setTargetDAGCombine(ISD::VSELECT); |
| |
| setTargetDAGCombine(ISD::INTRINSIC_VOID); |
| setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); |
| setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); |
| setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); |
| setTargetDAGCombine(ISD::VECREDUCE_ADD); |
| setTargetDAGCombine(ISD::STEP_VECTOR); |
| |
| setTargetDAGCombine(ISD::GlobalAddress); |
| |
| // In case of strict alignment, avoid an excessive number of byte wide stores. |
| MaxStoresPerMemsetOptSize = 8; |
| MaxStoresPerMemset = Subtarget->requiresStrictAlign() |
| ? MaxStoresPerMemsetOptSize : 32; |
| |
| MaxGluedStoresPerMemcpy = 4; |
| MaxStoresPerMemcpyOptSize = 4; |
| MaxStoresPerMemcpy = Subtarget->requiresStrictAlign() |
| ? MaxStoresPerMemcpyOptSize : 16; |
| |
| MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; |
| |
| MaxLoadsPerMemcmpOptSize = 4; |
| MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() |
| ? MaxLoadsPerMemcmpOptSize : 8; |
| |
| setStackPointerRegisterToSaveRestore(AArch64::SP); |
| |
| setSchedulingPreference(Sched::Hybrid); |
| |
| EnableExtLdPromotion = true; |
| |
| // Set required alignment. |
| setMinFunctionAlignment(Align(4)); |
| // Set preferred alignments. |
| setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment())); |
| setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment())); |
| |
| // Only change the limit for entries in a jump table if specified by |
| // the sub target, but not at the command line. |
| unsigned MaxJT = STI.getMaximumJumpTableSize(); |
| if (MaxJT && getMaximumJumpTableSize() == UINT_MAX) |
| setMaximumJumpTableSize(MaxJT); |
| |
| setHasExtractBitsInsn(true); |
| |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
| |
| if (Subtarget->hasNEON()) { |
| // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to |
| // silliness like this: |
| setOperationAction(ISD::FABS, MVT::v1f64, Expand); |
| setOperationAction(ISD::FADD, MVT::v1f64, Expand); |
| setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); |
| setOperationAction(ISD::FCOS, MVT::v1f64, Expand); |
| setOperationAction(ISD::FDIV, MVT::v1f64, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); |
| setOperationAction(ISD::FMA, MVT::v1f64, Expand); |
| setOperationAction(ISD::FMUL, MVT::v1f64, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); |
| setOperationAction(ISD::FNEG, MVT::v1f64, Expand); |
| setOperationAction(ISD::FPOW, MVT::v1f64, Expand); |
| setOperationAction(ISD::FREM, MVT::v1f64, Expand); |
| setOperationAction(ISD::FROUND, MVT::v1f64, Expand); |
| setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand); |
| setOperationAction(ISD::FRINT, MVT::v1f64, Expand); |
| setOperationAction(ISD::FSIN, MVT::v1f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); |
| setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); |
| setOperationAction(ISD::FSUB, MVT::v1f64, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); |
| setOperationAction(ISD::SETCC, MVT::v1f64, Expand); |
| setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); |
| setOperationAction(ISD::SELECT, MVT::v1f64, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); |
| setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); |
| |
| setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); |
| setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); |
| |
| setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand); |
| setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand); |
| |
| setOperationAction(ISD::MUL, MVT::v1i64, Expand); |
| |
| // AArch64 doesn't have a direct vector ->f32 conversion instructions for |
| // elements smaller than i32, so promote the input to i32 first. |
| setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32); |
| setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32); |
| |
| // Similarly, there is no direct i32 -> f64 vector conversion instruction. |
| setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); |
| // Or, direct i32 -> f16 vector conversion. Set it so custom, so the |
| // conversion happens in two steps: v4i32 -> v4f32 -> v4f16 |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); |
| |
| if (Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); |
| } else { |
| // when AArch64 doesn't have fullfp16 support, promote the input |
| // to i32 first. |
| setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32); |
| setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32); |
| setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32); |
| setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32); |
| setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32); |
| setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32); |
| setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32); |
| setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32); |
| } |
| |
| setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); |
| setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); |
| setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal); |
| setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal); |
| setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom); |
| setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom); |
| setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom); |
| setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom); |
| for (auto VT : {MVT::v1i64, MVT::v2i64}) { |
| setOperationAction(ISD::UMAX, VT, Custom); |
| setOperationAction(ISD::SMAX, VT, Custom); |
| setOperationAction(ISD::UMIN, VT, Custom); |
| setOperationAction(ISD::SMIN, VT, Custom); |
| } |
| |
| // AArch64 doesn't have MUL.2d: |
| setOperationAction(ISD::MUL, MVT::v2i64, Expand); |
| // Custom handling for some quad-vector types to detect MULL. |
| setOperationAction(ISD::MUL, MVT::v8i16, Custom); |
| setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
| setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
| |
| // Saturates |
| for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, |
| MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| setOperationAction(ISD::SADDSAT, VT, Legal); |
| setOperationAction(ISD::UADDSAT, VT, Legal); |
| setOperationAction(ISD::SSUBSAT, VT, Legal); |
| setOperationAction(ISD::USUBSAT, VT, Legal); |
| } |
| |
| for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, |
| MVT::v4i32}) { |
| setOperationAction(ISD::ABDS, VT, Legal); |
| setOperationAction(ISD::ABDU, VT, Legal); |
| } |
| |
| // Vector reductions |
| for (MVT VT : { MVT::v4f16, MVT::v2f32, |
| MVT::v8f16, MVT::v4f32, MVT::v2f64 }) { |
| if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
| |
| setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); |
| } |
| } |
| for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, |
| MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { |
| setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
| } |
| setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom); |
| |
| setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); |
| setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); |
| // Likewise, narrowing and extending vector loads/stores aren't handled |
| // directly. |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); |
| |
| if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) { |
| setOperationAction(ISD::MULHS, VT, Legal); |
| setOperationAction(ISD::MULHU, VT, Legal); |
| } else { |
| setOperationAction(ISD::MULHS, VT, Expand); |
| setOperationAction(ISD::MULHU, VT, Expand); |
| } |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| |
| setOperationAction(ISD::BSWAP, VT, Expand); |
| setOperationAction(ISD::CTTZ, VT, Expand); |
| |
| for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
| setTruncStoreAction(VT, InnerVT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
| } |
| } |
| |
| // AArch64 has implementations of a lot of rounding-like FP operations. |
| for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { |
| setOperationAction(ISD::FFLOOR, Ty, Legal); |
| setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
| setOperationAction(ISD::FCEIL, Ty, Legal); |
| setOperationAction(ISD::FRINT, Ty, Legal); |
| setOperationAction(ISD::FTRUNC, Ty, Legal); |
| setOperationAction(ISD::FROUND, Ty, Legal); |
| setOperationAction(ISD::FROUNDEVEN, Ty, Legal); |
| } |
| |
| if (Subtarget->hasFullFP16()) { |
| for (MVT Ty : {MVT::v4f16, MVT::v8f16}) { |
| setOperationAction(ISD::FFLOOR, Ty, Legal); |
| setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
| setOperationAction(ISD::FCEIL, Ty, Legal); |
| setOperationAction(ISD::FRINT, Ty, Legal); |
| setOperationAction(ISD::FTRUNC, Ty, Legal); |
| setOperationAction(ISD::FROUND, Ty, Legal); |
| setOperationAction(ISD::FROUNDEVEN, Ty, Legal); |
| } |
| } |
| |
| if (Subtarget->hasSVE()) |
| setOperationAction(ISD::VSCALE, MVT::i32, Custom); |
| |
| setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); |
| |
| setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom); |
| setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); |
| setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); |
| setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom); |
| setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); |
| setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); |
| } |
| |
| if (Subtarget->hasSVE()) { |
| for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { |
| setOperationAction(ISD::BITREVERSE, VT, Custom); |
| setOperationAction(ISD::BSWAP, VT, Custom); |
| setOperationAction(ISD::CTLZ, VT, Custom); |
| setOperationAction(ISD::CTPOP, VT, Custom); |
| setOperationAction(ISD::CTTZ, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MUL, VT, Custom); |
| setOperationAction(ISD::MULHS, VT, Custom); |
| setOperationAction(ISD::MULHU, VT, Custom); |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::SDIV, VT, Custom); |
| setOperationAction(ISD::UDIV, VT, Custom); |
| setOperationAction(ISD::SMIN, VT, Custom); |
| setOperationAction(ISD::UMIN, VT, Custom); |
| setOperationAction(ISD::SMAX, VT, Custom); |
| setOperationAction(ISD::UMAX, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SRL, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::ABS, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
| |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| setOperationAction(ISD::ROTL, VT, Expand); |
| setOperationAction(ISD::ROTR, VT, Expand); |
| } |
| |
| // Illegal unpacked integer vector types. |
| for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) { |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| } |
| |
| // Legalize unpacked bitcasts to REINTERPRET_CAST. |
| for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16, |
| MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32}) |
| setOperationAction(ISD::BITCAST, VT, Custom); |
| |
| for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| |
| // There are no legal MVT::nxv16f## based types. |
| if (VT != MVT::nxv16i1) { |
| setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
| } |
| } |
| |
| // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does |
| for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64, |
| MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, |
| MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) { |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| } |
| |
| for (MVT VT : MVT::fp_scalable_vector_valuetypes()) { |
| for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) { |
| // Avoid marking truncating FP stores as legal to prevent the |
| // DAGCombiner from creating unsupported truncating stores. |
| setTruncStoreAction(VT, InnerVT, Expand); |
| // SVE does not have floating-point extending loads. |
| setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
| } |
| } |
| |
| // SVE supports truncating stores of 64 and 128-bit vectors |
| setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom); |
| setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom); |
| setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom); |
| setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); |
| setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); |
| |
| for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, |
| MVT::nxv4f32, MVT::nxv2f64}) { |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::FADD, VT, Custom); |
| setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| setOperationAction(ISD::FDIV, VT, Custom); |
| setOperationAction(ISD::FMA, VT, Custom); |
| setOperationAction(ISD::FMAXIMUM, VT, Custom); |
| setOperationAction(ISD::FMAXNUM, VT, Custom); |
| setOperationAction(ISD::FMINIMUM, VT, Custom); |
| setOperationAction(ISD::FMINNUM, VT, Custom); |
| setOperationAction(ISD::FMUL, VT, Custom); |
| setOperationAction(ISD::FNEG, VT, Custom); |
| setOperationAction(ISD::FSUB, VT, Custom); |
| setOperationAction(ISD::FCEIL, VT, Custom); |
| setOperationAction(ISD::FFLOOR, VT, Custom); |
| setOperationAction(ISD::FNEARBYINT, VT, Custom); |
| setOperationAction(ISD::FRINT, VT, Custom); |
| setOperationAction(ISD::FROUND, VT, Custom); |
| setOperationAction(ISD::FROUNDEVEN, VT, Custom); |
| setOperationAction(ISD::FTRUNC, VT, Custom); |
| setOperationAction(ISD::FSQRT, VT, Custom); |
| setOperationAction(ISD::FABS, VT, Custom); |
| setOperationAction(ISD::FP_EXTEND, VT, Custom); |
| setOperationAction(ISD::FP_ROUND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
| setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); |
| |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| } |
| |
| for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) { |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| } |
| |
| setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); |
| |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); |
| |
| // NOTE: Currently this has to happen after computeRegisterProperties rather |
| // than the preferred option of combining it with the addRegisterClass call. |
| if (Subtarget->useSVEForFixedLengthVectors()) { |
| for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
| if (useSVEForFixedLengthVectorVT(VT)) |
| addTypeForFixedLengthSVE(VT); |
| for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
| if (useSVEForFixedLengthVectorVT(VT)) |
| addTypeForFixedLengthSVE(VT); |
| |
| // 64bit results can mean a bigger than NEON input. |
| for (auto VT : {MVT::v8i8, MVT::v4i16}) |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom); |
| |
| // 128bit results imply a bigger than NEON input. |
| for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| for (auto VT : {MVT::v8f16, MVT::v4f32}) |
| setOperationAction(ISD::FP_ROUND, VT, Custom); |
| |
| // These operations are not supported on NEON but SVE can do them. |
| setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom); |
| setOperationAction(ISD::CTLZ, MVT::v1i64, Custom); |
| setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); |
| setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); |
| setOperationAction(ISD::MUL, MVT::v1i64, Custom); |
| setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
| setOperationAction(ISD::MULHS, MVT::v1i64, Custom); |
| setOperationAction(ISD::MULHS, MVT::v2i64, Custom); |
| setOperationAction(ISD::MULHU, MVT::v1i64, Custom); |
| setOperationAction(ISD::MULHU, MVT::v2i64, Custom); |
| setOperationAction(ISD::SDIV, MVT::v8i8, Custom); |
| setOperationAction(ISD::SDIV, MVT::v16i8, Custom); |
| setOperationAction(ISD::SDIV, MVT::v4i16, Custom); |
| setOperationAction(ISD::SDIV, MVT::v8i16, Custom); |
| setOperationAction(ISD::SDIV, MVT::v2i32, Custom); |
| setOperationAction(ISD::SDIV, MVT::v4i32, Custom); |
| setOperationAction(ISD::SDIV, MVT::v1i64, Custom); |
| setOperationAction(ISD::SDIV, MVT::v2i64, Custom); |
| setOperationAction(ISD::SMAX, MVT::v1i64, Custom); |
| setOperationAction(ISD::SMAX, MVT::v2i64, Custom); |
| setOperationAction(ISD::SMIN, MVT::v1i64, Custom); |
| setOperationAction(ISD::SMIN, MVT::v2i64, Custom); |
| setOperationAction(ISD::UDIV, MVT::v8i8, Custom); |
| setOperationAction(ISD::UDIV, MVT::v16i8, Custom); |
| setOperationAction(ISD::UDIV, MVT::v4i16, Custom); |
| setOperationAction(ISD::UDIV, MVT::v8i16, Custom); |
| setOperationAction(ISD::UDIV, MVT::v2i32, Custom); |
| setOperationAction(ISD::UDIV, MVT::v4i32, Custom); |
| setOperationAction(ISD::UDIV, MVT::v1i64, Custom); |
| setOperationAction(ISD::UDIV, MVT::v2i64, Custom); |
| setOperationAction(ISD::UMAX, MVT::v1i64, Custom); |
| setOperationAction(ISD::UMAX, MVT::v2i64, Custom); |
| setOperationAction(ISD::UMIN, MVT::v1i64, Custom); |
| setOperationAction(ISD::UMIN, MVT::v2i64, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom); |
| |
| // Int operations with no NEON support. |
| for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, |
| MVT::v2i32, MVT::v4i32, MVT::v2i64}) { |
| setOperationAction(ISD::BITREVERSE, VT, Custom); |
| setOperationAction(ISD::CTTZ, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| } |
| |
| // FP operations with no NEON support. |
| for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, |
| MVT::v1f64, MVT::v2f64}) |
| setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
| |
| // Use SVE for vectors with more than 2 elements. |
| for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32}) |
| setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
| } |
| |
| setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64); |
| setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32); |
| setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16); |
| setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8); |
| } |
| |
| PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); |
| } |
| |
| void AArch64TargetLowering::addTypeForNEON(MVT VT) { |
| assert(VT.isVector() && "VT should be a vector type"); |
| |
| if (VT.isFloatingPoint()) { |
| MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT(); |
| setOperationPromotedToType(ISD::LOAD, VT, PromoteTo); |
| setOperationPromotedToType(ISD::STORE, VT, PromoteTo); |
| } |
| |
| // Mark vector float intrinsics as expand. |
| if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { |
| setOperationAction(ISD::FSIN, VT, Expand); |
| setOperationAction(ISD::FCOS, VT, Expand); |
| setOperationAction(ISD::FPOW, VT, Expand); |
| setOperationAction(ISD::FLOG, VT, Expand); |
| setOperationAction(ISD::FLOG2, VT, Expand); |
| setOperationAction(ISD::FLOG10, VT, Expand); |
| setOperationAction(ISD::FEXP, VT, Expand); |
| setOperationAction(ISD::FEXP2, VT, Expand); |
| } |
| |
| // But we do support custom-lowering for FCOPYSIGN. |
| if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 || |
| ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16())) |
| setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::SRL, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::OR, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); |
| |
| setOperationAction(ISD::SELECT, VT, Expand); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| setOperationAction(ISD::VSELECT, VT, Expand); |
| for (MVT InnerVT : MVT::all_valuetypes()) |
| setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
| |
| // CNT supports only B element sizes, then use UADDLP to widen. |
| if (VT != MVT::v8i8 && VT != MVT::v16i8) |
| setOperationAction(ISD::CTPOP, VT, Custom); |
| |
| setOperationAction(ISD::UDIV, VT, Expand); |
| setOperationAction(ISD::SDIV, VT, Expand); |
| setOperationAction(ISD::UREM, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::FREM, VT, Expand); |
| |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); |
| |
| if (!VT.isFloatingPoint()) |
| setOperationAction(ISD::ABS, VT, Legal); |
| |
| // [SU][MIN|MAX] are available for all NEON types apart from i64. |
| if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) |
| for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) |
| setOperationAction(Opcode, VT, Legal); |
| |
| // F[MIN|MAX][NUM|NAN] are available for all FP NEON types. |
| if (VT.isFloatingPoint() && |
| VT.getVectorElementType() != MVT::bf16 && |
| (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())) |
| for (unsigned Opcode : |
| {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM}) |
| setOperationAction(Opcode, VT, Legal); |
| |
| if (Subtarget->isLittleEndian()) { |
| for (unsigned im = (unsigned)ISD::PRE_INC; |
| im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
| setIndexedLoadAction(im, VT, Legal); |
| setIndexedStoreAction(im, VT, Legal); |
| } |
| } |
| } |
| |
| bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT, |
| EVT OpVT) const { |
| // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo). |
| if (!Subtarget->hasSVE()) |
| return true; |
| |
| // We can only support legal predicate result types. |
| if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 && |
| ResVT != MVT::nxv16i1) |
| return true; |
| |
| // The whilelo instruction only works with i32 or i64 scalar inputs. |
| if (OpVT != MVT::i32 && OpVT != MVT::i64) |
| return true; |
| |
| return false; |
| } |
| |
| void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { |
| assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); |
| |
| // By default everything must be expanded. |
| for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
| setOperationAction(Op, VT, Expand); |
| |
| // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one. |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| |
| if (VT.isFloatingPoint()) { |
| setCondCodeAction(ISD::SETO, VT, Expand); |
| setCondCodeAction(ISD::SETOLT, VT, Expand); |
| setCondCodeAction(ISD::SETLT, VT, Expand); |
| setCondCodeAction(ISD::SETOLE, VT, Expand); |
| setCondCodeAction(ISD::SETLE, VT, Expand); |
| setCondCodeAction(ISD::SETULT, VT, Expand); |
| setCondCodeAction(ISD::SETULE, VT, Expand); |
| setCondCodeAction(ISD::SETUGE, VT, Expand); |
| setCondCodeAction(ISD::SETUGT, VT, Expand); |
| setCondCodeAction(ISD::SETUEQ, VT, Expand); |
| setCondCodeAction(ISD::SETUNE, VT, Expand); |
| } |
| |
| // Mark integer truncating stores/extending loads as having custom lowering |
| if (VT.isInteger()) { |
| MVT InnerVT = VT.changeVectorElementType(MVT::i8); |
| while (InnerVT != VT) { |
| setTruncStoreAction(VT, InnerVT, Custom); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom); |
| setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom); |
| InnerVT = InnerVT.changeVectorElementType( |
| MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits())); |
| } |
| } |
| |
| // Mark floating-point truncating stores/extending loads as having custom |
| // lowering |
| if (VT.isFloatingPoint()) { |
| MVT InnerVT = VT.changeVectorElementType(MVT::f16); |
| while (InnerVT != VT) { |
| setTruncStoreAction(VT, InnerVT, Custom); |
| setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom); |
| InnerVT = InnerVT.changeVectorElementType( |
| MVT::getFloatingPointVT(2 * InnerVT.getScalarSizeInBits())); |
| } |
| } |
| |
| // Lower fixed length vector operations to scalable equivalents. |
| setOperationAction(ISD::ABS, VT, Custom); |
| setOperationAction(ISD::ADD, VT, Custom); |
| setOperationAction(ISD::AND, VT, Custom); |
| setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
| setOperationAction(ISD::BITCAST, VT, Custom); |
| setOperationAction(ISD::BITREVERSE, VT, Custom); |
| setOperationAction(ISD::BSWAP, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| setOperationAction(ISD::CTLZ, VT, Custom); |
| setOperationAction(ISD::CTPOP, VT, Custom); |
| setOperationAction(ISD::CTTZ, VT, Custom); |
| setOperationAction(ISD::FABS, VT, Custom); |
| setOperationAction(ISD::FADD, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::FCEIL, VT, Custom); |
| setOperationAction(ISD::FDIV, VT, Custom); |
| setOperationAction(ISD::FFLOOR, VT, Custom); |
| setOperationAction(ISD::FMA, VT, Custom); |
| setOperationAction(ISD::FMAXIMUM, VT, Custom); |
| setOperationAction(ISD::FMAXNUM, VT, Custom); |
| setOperationAction(ISD::FMINIMUM, VT, Custom); |
| setOperationAction(ISD::FMINNUM, VT, Custom); |
| setOperationAction(ISD::FMUL, VT, Custom); |
| setOperationAction(ISD::FNEARBYINT, VT, Custom); |
| setOperationAction(ISD::FNEG, VT, Custom); |
| setOperationAction(ISD::FP_EXTEND, VT, Custom); |
| setOperationAction(ISD::FP_ROUND, VT, Custom); |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| setOperationAction(ISD::FRINT, VT, Custom); |
| setOperationAction(ISD::FROUND, VT, Custom); |
| setOperationAction(ISD::FROUNDEVEN, VT, Custom); |
| setOperationAction(ISD::FSQRT, VT, Custom); |
| setOperationAction(ISD::FSUB, VT, Custom); |
| setOperationAction(ISD::FTRUNC, VT, Custom); |
| setOperationAction(ISD::LOAD, VT, Custom); |
| setOperationAction(ISD::MGATHER, VT, Custom); |
| setOperationAction(ISD::MLOAD, VT, Custom); |
| setOperationAction(ISD::MSCATTER, VT, Custom); |
| setOperationAction(ISD::MSTORE, VT, Custom); |
| setOperationAction(ISD::MUL, VT, Custom); |
| setOperationAction(ISD::MULHS, VT, Custom); |
| setOperationAction(ISD::MULHU, VT, Custom); |
| setOperationAction(ISD::OR, VT, Custom); |
| setOperationAction(ISD::SDIV, VT, Custom); |
| setOperationAction(ISD::SELECT, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); |
| setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::SMAX, VT, Custom); |
| setOperationAction(ISD::SMIN, VT, Custom); |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::SRL, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| setOperationAction(ISD::SUB, VT, Custom); |
| setOperationAction(ISD::TRUNCATE, VT, Custom); |
| setOperationAction(ISD::UDIV, VT, Custom); |
| setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
| setOperationAction(ISD::UMAX, VT, Custom); |
| setOperationAction(ISD::UMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::VSELECT, VT, Custom); |
| setOperationAction(ISD::XOR, VT, Custom); |
| setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
| } |
| |
| void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { |
| addRegisterClass(VT, &AArch64::FPR64RegClass); |
| addTypeForNEON(VT); |
| } |
| |
| void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { |
| addRegisterClass(VT, &AArch64::FPR128RegClass); |
| addTypeForNEON(VT); |
| } |
| |
| EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, |
| LLVMContext &C, EVT VT) const { |
| if (!VT.isVector()) |
| return MVT::i32; |
| if (VT.isScalableVector()) |
| return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount()); |
| return VT.changeVectorElementTypeToInteger(); |
| } |
| |
| static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, |
| const APInt &Demanded, |
| TargetLowering::TargetLoweringOpt &TLO, |
| unsigned NewOpc) { |
| uint64_t OldImm = Imm, NewImm, Enc; |
| uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask; |
| |
| // Return if the immediate is already all zeros, all ones, a bimm32 or a |
| // bimm64. |
| if (Imm == 0 || Imm == Mask || |
| AArch64_AM::isLogicalImmediate(Imm & Mask, Size)) |
| return false; |
| |
| unsigned EltSize = Size; |
| uint64_t DemandedBits = Demanded.getZExtValue(); |
| |
| // Clear bits that are not demanded. |
| Imm &= DemandedBits; |
| |
| while (true) { |
| // The goal here is to set the non-demanded bits in a way that minimizes |
| // the number of switching between 0 and 1. In order to achieve this goal, |
| // we set the non-demanded bits to the value of the preceding demanded bits. |
| // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a |
| // non-demanded bit), we copy bit0 (1) to the least significant 'x', |
| // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'. |
| // The final result is 0b11000011. |
| uint64_t NonDemandedBits = ~DemandedBits; |
| uint64_t InvertedImm = ~Imm & DemandedBits; |
| uint64_t RotatedImm = |
| ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) & |
| NonDemandedBits; |
| uint64_t Sum = RotatedImm + NonDemandedBits; |
| bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1)); |
| uint64_t Ones = (Sum + Carry) & NonDemandedBits; |
| NewImm = (Imm | Ones) & Mask; |
| |
| // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate |
| // or all-ones or all-zeros, in which case we can stop searching. Otherwise, |
| // we halve the element size and continue the search. |
| if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask))) |
| break; |
| |
| // We cannot shrink the element size any further if it is 2-bits. |
| if (EltSize == 2) |
| return false; |
| |
| EltSize /= 2; |
| Mask >>= EltSize; |
| uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize; |
| |
| // Return if there is mismatch in any of the demanded bits of Imm and Hi. |
| if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0) |
| return false; |
| |
| // Merge the upper and lower halves of Imm and DemandedBits. |
| Imm |= Hi; |
| DemandedBits |= DemandedBitsHi; |
| } |
| |
| ++NumOptimizedImms; |
| |
| // Replicate the element across the register width. |
| while (EltSize < Size) { |
| NewImm |= NewImm << EltSize; |
| EltSize *= 2; |
| } |
| |
| (void)OldImm; |
| assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && |
| "demanded bits should never be altered"); |
| assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm"); |
| |
| // Create the new constant immediate node. |
| EVT VT = Op.getValueType(); |
| SDLoc DL(Op); |
| SDValue New; |
| |
| // If the new constant immediate is all-zeros or all-ones, let the target |
| // independent DAG combine optimize this node. |
| if (NewImm == 0 || NewImm == OrigMask) { |
| New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), |
| TLO.DAG.getConstant(NewImm, DL, VT)); |
| // Otherwise, create a machine node so that target independent DAG combine |
| // doesn't undo this optimization. |
| } else { |
| Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); |
| SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); |
| New = SDValue( |
| TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); |
| } |
| |
| return TLO.CombineTo(Op, New); |
| } |
| |
| bool AArch64TargetLowering::targetShrinkDemandedConstant( |
| SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
| TargetLoweringOpt &TLO) const { |
| // Delay this optimization to as late as possible. |
| if (!TLO.LegalOps) |
| return false; |
| |
| if (!EnableOptimizeLogicalImm) |
| return false; |
| |
| EVT VT = Op.getValueType(); |
| if (VT.isVector()) |
| return false; |
| |
| unsigned Size = VT.getSizeInBits(); |
| assert((Size == 32 || Size == 64) && |
| "i32 or i64 is expected after legalization."); |
| |
| // Exit early if we demand all bits. |
| if (DemandedBits.countPopulation() == Size) |
| return false; |
| |
| unsigned NewOpc; |
| switch (Op.getOpcode()) { |
| default: |
| return false; |
| case ISD::AND: |
| NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri; |
| break; |
| case ISD::OR: |
| NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri; |
| break; |
| case ISD::XOR: |
| NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri; |
| break; |
| } |
| ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
| if (!C) |
| return false; |
| uint64_t Imm = C->getZExtValue(); |
| return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc); |
| } |
| |
| /// computeKnownBitsForTargetNode - Determine which of the bits specified in |
| /// Mask are known to be either zero or one and return them Known. |
| void AArch64TargetLowering::computeKnownBitsForTargetNode( |
| const SDValue Op, KnownBits &Known, |
| const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { |
| switch (Op.getOpcode()) { |
| default: |
| break; |
| case AArch64ISD::CSEL: { |
| KnownBits Known2; |
| Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); |
| Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1); |
| Known = KnownBits::commonBits(Known, Known2); |
| break; |
| } |
| case AArch64ISD::LOADgot: |
| case AArch64ISD::ADDlow: { |
| if (!Subtarget->isTargetILP32()) |
| break; |
| // In ILP32 mode all valid pointers are in the low 4GB of the address-space. |
| Known.Zero = APInt::getHighBitsSet(64, 32); |
| break; |
| } |
| case AArch64ISD::ASSERT_ZEXT_BOOL: { |
| Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); |
| Known.Zero |= APInt(Known.getBitWidth(), 0xFE); |
| break; |
| } |
| case ISD::INTRINSIC_W_CHAIN: { |
| ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1)); |
| Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); |
| switch (IntID) { |
| default: return; |
| case Intrinsic::aarch64_ldaxr: |
| case Intrinsic::aarch64_ldxr: { |
| unsigned BitWidth = Known.getBitWidth(); |
| EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); |
| unsigned MemBits = VT.getScalarSizeInBits(); |
| Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); |
| return; |
| } |
| } |
| break; |
| } |
| case ISD::INTRINSIC_WO_CHAIN: |
| case ISD::INTRINSIC_VOID: { |
| unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
| switch (IntNo) { |
| default: |
| break; |
| case Intrinsic::aarch64_neon_umaxv: |
| case Intrinsic::aarch64_neon_uminv: { |
| // Figure out the datatype of the vector operand. The UMINV instruction |
| // will zero extend the result, so we can mark as known zero all the |
| // bits larger than the element datatype. 32-bit or larget doesn't need |
| // this as those are legal types and will be handled by isel directly. |
| MVT VT = Op.getOperand(1).getValueType().getSimpleVT(); |
| unsigned BitWidth = Known.getBitWidth(); |
| if (VT == MVT::v8i8 || VT == MVT::v16i8) { |
| assert(BitWidth >= 8 && "Unexpected width!"); |
| APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); |
| Known.Zero |= Mask; |
| } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { |
| assert(BitWidth >= 16 && "Unexpected width!"); |
| APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); |
| Known.Zero |= Mask; |
| } |
| break; |
| } break; |
| } |
| } |
| } |
| } |
| |
| MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL, |
| EVT) const { |
| return MVT::i64; |
| } |
| |
| bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( |
| EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
| bool *Fast) const { |
| if (Subtarget->requiresStrictAlign()) |
| return false; |
| |
| if (Fast) { |
| // Some CPUs are fine with unaligned stores except for 128-bit ones. |
| *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 || |
| // See comments in performSTORECombine() for more details about |
| // these conditions. |
| |
| // Code that uses clang vector extensions can mark that it |
| // wants unaligned accesses to be treated as fast by |
| // underspecifying alignment to be 1 or 2. |
| Alignment <= 2 || |
| |
| // Disregard v2i64. Memcpy lowering produces those and splitting |
| // them regresses performance on micro-benchmarks and olden/bh. |
| VT == MVT::v2i64; |
| } |
| return true; |
| } |
| |
| // Same as above but handling LLTs instead. |
| bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( |
| LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
| bool *Fast) const { |
| if (Subtarget->requiresStrictAlign()) |
| return false; |
| |
| if (Fast) { |
| // Some CPUs are fine with unaligned stores except for 128-bit ones. |
| *Fast = !Subtarget->isMisaligned128StoreSlow() || |
| Ty.getSizeInBytes() != 16 || |
| // See comments in performSTORECombine() for more details about |
| // these conditions. |
| |
| // Code that uses clang vector extensions can mark that it |
| // wants unaligned accesses to be treated as fast by |
| // underspecifying alignment to be 1 or 2. |
| Alignment <= 2 || |
| |
| // Disregard v2i64. Memcpy lowering produces those and splitting |
| // them regresses performance on micro-benchmarks and olden/bh. |
| Ty == LLT::fixed_vector(2, 64); |
| } |
| return true; |
| } |
| |
| FastISel * |
| AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
| const TargetLibraryInfo *libInfo) const { |
| return AArch64::createFastISel(funcInfo, libInfo); |
| } |
| |
| const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { |
| #define MAKE_CASE(V) \ |
| case V: \ |
| return #V; |
| switch ((AArch64ISD::NodeType)Opcode) { |
| case AArch64ISD::FIRST_NUMBER: |
| break; |
| MAKE_CASE(AArch64ISD::CALL) |
| MAKE_CASE(AArch64ISD::ADRP) |
| MAKE_CASE(AArch64ISD::ADR) |
| MAKE_CASE(AArch64ISD::ADDlow) |
| MAKE_CASE(AArch64ISD::LOADgot) |
| MAKE_CASE(AArch64ISD::RET_FLAG) |
| MAKE_CASE(AArch64ISD::BRCOND) |
| MAKE_CASE(AArch64ISD::CSEL) |
| MAKE_CASE(AArch64ISD::CSINV) |
| MAKE_CASE(AArch64ISD::CSNEG) |
| MAKE_CASE(AArch64ISD::CSINC) |
| MAKE_CASE(AArch64ISD::THREAD_POINTER) |
| MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) |
| MAKE_CASE(AArch64ISD::ADD_PRED) |
| MAKE_CASE(AArch64ISD::MUL_PRED) |
| MAKE_CASE(AArch64ISD::MULHS_PRED) |
| MAKE_CASE(AArch64ISD::MULHU_PRED) |
| MAKE_CASE(AArch64ISD::SDIV_PRED) |
| MAKE_CASE(AArch64ISD::SHL_PRED) |
| MAKE_CASE(AArch64ISD::SMAX_PRED) |
| MAKE_CASE(AArch64ISD::SMIN_PRED) |
| MAKE_CASE(AArch64ISD::SRA_PRED) |
| MAKE_CASE(AArch64ISD::SRL_PRED) |
| MAKE_CASE(AArch64ISD::SUB_PRED) |
| MAKE_CASE(AArch64ISD::UDIV_PRED) |
| MAKE_CASE(AArch64ISD::UMAX_PRED) |
| MAKE_CASE(AArch64ISD::UMIN_PRED) |
| MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1) |
| MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::ADC) |
| MAKE_CASE(AArch64ISD::SBC) |
| MAKE_CASE(AArch64ISD::ADDS) |
| MAKE_CASE(AArch64ISD::SUBS) |
| MAKE_CASE(AArch64ISD::ADCS) |
| MAKE_CASE(AArch64ISD::SBCS) |
| MAKE_CASE(AArch64ISD::ANDS) |
| MAKE_CASE(AArch64ISD::CCMP) |
| MAKE_CASE(AArch64ISD::CCMN) |
| MAKE_CASE(AArch64ISD::FCCMP) |
| MAKE_CASE(AArch64ISD::FCMP) |
| MAKE_CASE(AArch64ISD::STRICT_FCMP) |
| MAKE_CASE(AArch64ISD::STRICT_FCMPE) |
| MAKE_CASE(AArch64ISD::DUP) |
| MAKE_CASE(AArch64ISD::DUPLANE8) |
| MAKE_CASE(AArch64ISD::DUPLANE16) |
| MAKE_CASE(AArch64ISD::DUPLANE32) |
| MAKE_CASE(AArch64ISD::DUPLANE64) |
| MAKE_CASE(AArch64ISD::MOVI) |
| MAKE_CASE(AArch64ISD::MOVIshift) |
| MAKE_CASE(AArch64ISD::MOVIedit) |
| MAKE_CASE(AArch64ISD::MOVImsl) |
| MAKE_CASE(AArch64ISD::FMOV) |
| MAKE_CASE(AArch64ISD::MVNIshift) |
| MAKE_CASE(AArch64ISD::MVNImsl) |
| MAKE_CASE(AArch64ISD::BICi) |
| MAKE_CASE(AArch64ISD::ORRi) |
| MAKE_CASE(AArch64ISD::BSP) |
| MAKE_CASE(AArch64ISD::EXTR) |
| MAKE_CASE(AArch64ISD::ZIP1) |
| MAKE_CASE(AArch64ISD::ZIP2) |
| MAKE_CASE(AArch64ISD::UZP1) |
| MAKE_CASE(AArch64ISD::UZP2) |
| MAKE_CASE(AArch64ISD::TRN1) |
| MAKE_CASE(AArch64ISD::TRN2) |
| MAKE_CASE(AArch64ISD::REV16) |
| MAKE_CASE(AArch64ISD::REV32) |
| MAKE_CASE(AArch64ISD::REV64) |
| MAKE_CASE(AArch64ISD::EXT) |
| MAKE_CASE(AArch64ISD::SPLICE) |
| MAKE_CASE(AArch64ISD::VSHL) |
| MAKE_CASE(AArch64ISD::VLSHR) |
| MAKE_CASE(AArch64ISD::VASHR) |
| MAKE_CASE(AArch64ISD::VSLI) |
| MAKE_CASE(AArch64ISD::VSRI) |
| MAKE_CASE(AArch64ISD::CMEQ) |
| MAKE_CASE(AArch64ISD::CMGE) |
| MAKE_CASE(AArch64ISD::CMGT) |
| MAKE_CASE(AArch64ISD::CMHI) |
| MAKE_CASE(AArch64ISD::CMHS) |
| MAKE_CASE(AArch64ISD::FCMEQ) |
| MAKE_CASE(AArch64ISD::FCMGE) |
| MAKE_CASE(AArch64ISD::FCMGT) |
| MAKE_CASE(AArch64ISD::CMEQz) |
| MAKE_CASE(AArch64ISD::CMGEz) |
| MAKE_CASE(AArch64ISD::CMGTz) |
| MAKE_CASE(AArch64ISD::CMLEz) |
| MAKE_CASE(AArch64ISD::CMLTz) |
| MAKE_CASE(AArch64ISD::FCMEQz) |
| MAKE_CASE(AArch64ISD::FCMGEz) |
| MAKE_CASE(AArch64ISD::FCMGTz) |
| MAKE_CASE(AArch64ISD::FCMLEz) |
| MAKE_CASE(AArch64ISD::FCMLTz) |
| MAKE_CASE(AArch64ISD::SADDV) |
| MAKE_CASE(AArch64ISD::UADDV) |
| MAKE_CASE(AArch64ISD::SRHADD) |
| MAKE_CASE(AArch64ISD::URHADD) |
| MAKE_CASE(AArch64ISD::SHADD) |
| MAKE_CASE(AArch64ISD::UHADD) |
| MAKE_CASE(AArch64ISD::SDOT) |
| MAKE_CASE(AArch64ISD::UDOT) |
| MAKE_CASE(AArch64ISD::SMINV) |
| MAKE_CASE(AArch64ISD::UMINV) |
| MAKE_CASE(AArch64ISD::SMAXV) |
| MAKE_CASE(AArch64ISD::UMAXV) |
| MAKE_CASE(AArch64ISD::SADDV_PRED) |
| MAKE_CASE(AArch64ISD::UADDV_PRED) |
| MAKE_CASE(AArch64ISD::SMAXV_PRED) |
| MAKE_CASE(AArch64ISD::UMAXV_PRED) |
| MAKE_CASE(AArch64ISD::SMINV_PRED) |
| MAKE_CASE(AArch64ISD::UMINV_PRED) |
| MAKE_CASE(AArch64ISD::ORV_PRED) |
| MAKE_CASE(AArch64ISD::EORV_PRED) |
| MAKE_CASE(AArch64ISD::ANDV_PRED) |
| MAKE_CASE(AArch64ISD::CLASTA_N) |
| MAKE_CASE(AArch64ISD::CLASTB_N) |
| MAKE_CASE(AArch64ISD::LASTA) |
| MAKE_CASE(AArch64ISD::LASTB) |
| MAKE_CASE(AArch64ISD::REINTERPRET_CAST) |
| MAKE_CASE(AArch64ISD::LS64_BUILD) |
| MAKE_CASE(AArch64ISD::LS64_EXTRACT) |
| MAKE_CASE(AArch64ISD::TBL) |
| MAKE_CASE(AArch64ISD::FADD_PRED) |
| MAKE_CASE(AArch64ISD::FADDA_PRED) |
| MAKE_CASE(AArch64ISD::FADDV_PRED) |
| MAKE_CASE(AArch64ISD::FDIV_PRED) |
| MAKE_CASE(AArch64ISD::FMA_PRED) |
| MAKE_CASE(AArch64ISD::FMAX_PRED) |
| MAKE_CASE(AArch64ISD::FMAXV_PRED) |
| MAKE_CASE(AArch64ISD::FMAXNM_PRED) |
| MAKE_CASE(AArch64ISD::FMAXNMV_PRED) |
| MAKE_CASE(AArch64ISD::FMIN_PRED) |
| MAKE_CASE(AArch64ISD::FMINV_PRED) |
| MAKE_CASE(AArch64ISD::FMINNM_PRED) |
| MAKE_CASE(AArch64ISD::FMINNMV_PRED) |
| MAKE_CASE(AArch64ISD::FMUL_PRED) |
| MAKE_CASE(AArch64ISD::FSUB_PRED) |
| MAKE_CASE(AArch64ISD::BIC) |
| MAKE_CASE(AArch64ISD::BIT) |
| MAKE_CASE(AArch64ISD::CBZ) |
| MAKE_CASE(AArch64ISD::CBNZ) |
| MAKE_CASE(AArch64ISD::TBZ) |
| MAKE_CASE(AArch64ISD::TBNZ) |
| MAKE_CASE(AArch64ISD::TC_RETURN) |
| MAKE_CASE(AArch64ISD::PREFETCH) |
| MAKE_CASE(AArch64ISD::SITOF) |
| MAKE_CASE(AArch64ISD::UITOF) |
| MAKE_CASE(AArch64ISD::NVCAST) |
| MAKE_CASE(AArch64ISD::MRS) |
| MAKE_CASE(AArch64ISD::SQSHL_I) |
| MAKE_CASE(AArch64ISD::UQSHL_I) |
| MAKE_CASE(AArch64ISD::SRSHR_I) |
| MAKE_CASE(AArch64ISD::URSHR_I) |
| MAKE_CASE(AArch64ISD::SQSHLU_I) |
| MAKE_CASE(AArch64ISD::WrapperLarge) |
| MAKE_CASE(AArch64ISD::LD2post) |
| MAKE_CASE(AArch64ISD::LD3post) |
| MAKE_CASE(AArch64ISD::LD4post) |
| MAKE_CASE(AArch64ISD::ST2post) |
| MAKE_CASE(AArch64ISD::ST3post) |
| MAKE_CASE(AArch64ISD::ST4post) |
| MAKE_CASE(AArch64ISD::LD1x2post) |
| MAKE_CASE(AArch64ISD::LD1x3post) |
| MAKE_CASE(AArch64ISD::LD1x4post) |
| MAKE_CASE(AArch64ISD::ST1x2post) |
| MAKE_CASE(AArch64ISD::ST1x3post) |
| MAKE_CASE(AArch64ISD::ST1x4post) |
| MAKE_CASE(AArch64ISD::LD1DUPpost) |
| MAKE_CASE(AArch64ISD::LD2DUPpost) |
| MAKE_CASE(AArch64ISD::LD3DUPpost) |
| MAKE_CASE(AArch64ISD::LD4DUPpost) |
| MAKE_CASE(AArch64ISD::LD1LANEpost) |
| MAKE_CASE(AArch64ISD::LD2LANEpost) |
| MAKE_CASE(AArch64ISD::LD3LANEpost) |
| MAKE_CASE(AArch64ISD::LD4LANEpost) |
| MAKE_CASE(AArch64ISD::ST2LANEpost) |
| MAKE_CASE(AArch64ISD::ST3LANEpost) |
| MAKE_CASE(AArch64ISD::ST4LANEpost) |
| MAKE_CASE(AArch64ISD::SMULL) |
| MAKE_CASE(AArch64ISD::UMULL) |
| MAKE_CASE(AArch64ISD::FRECPE) |
| MAKE_CASE(AArch64ISD::FRECPS) |
| MAKE_CASE(AArch64ISD::FRSQRTE) |
| MAKE_CASE(AArch64ISD::FRSQRTS) |
| MAKE_CASE(AArch64ISD::STG) |
| MAKE_CASE(AArch64ISD::STZG) |
| MAKE_CASE(AArch64ISD::ST2G) |
| MAKE_CASE(AArch64ISD::STZ2G) |
| MAKE_CASE(AArch64ISD::SUNPKHI) |
| MAKE_CASE(AArch64ISD::SUNPKLO) |
| MAKE_CASE(AArch64ISD::UUNPKHI) |
| MAKE_CASE(AArch64ISD::UUNPKLO) |
| MAKE_CASE(AArch64ISD::INSR) |
| MAKE_CASE(AArch64ISD::PTEST) |
| MAKE_CASE(AArch64ISD::PTRUE) |
| MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO) |
| MAKE_CASE(AArch64ISD::ST1_PRED) |
| MAKE_CASE(AArch64ISD::SST1_PRED) |
| MAKE_CASE(AArch64ISD::SST1_SCALED_PRED) |
| MAKE_CASE(AArch64ISD::SST1_SXTW_PRED) |
| MAKE_CASE(AArch64ISD::SST1_UXTW_PRED) |
| MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED) |
| MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED) |
| MAKE_CASE(AArch64ISD::SST1_IMM_PRED) |
| MAKE_CASE(AArch64ISD::SSTNT1_PRED) |
| MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED) |
| MAKE_CASE(AArch64ISD::LDP) |
| MAKE_CASE(AArch64ISD::STP) |
| MAKE_CASE(AArch64ISD::STNP) |
| MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU) |
| MAKE_CASE(AArch64ISD::INDEX_VECTOR) |
| MAKE_CASE(AArch64ISD::UADDLP) |
| MAKE_CASE(AArch64ISD::CALL_RVMARKER) |
| MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL) |
| } |
| #undef MAKE_CASE |
| return nullptr; |
| } |
| |
| MachineBasicBlock * |
| AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI, |
| MachineBasicBlock *MBB) const { |
| // We materialise the F128CSEL pseudo-instruction as some control flow and a |
| // phi node: |
| |
| // OrigBB: |
| // [... previous instrs leading to comparison ...] |
| // b.ne TrueBB |
| // b EndBB |
| // TrueBB: |
| // ; Fallthrough |
| // EndBB: |
| // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB] |
| |
| MachineFunction *MF = MBB->getParent(); |
| const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
| const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
| DebugLoc DL = MI.getDebugLoc(); |
| MachineFunction::iterator It = ++MBB->getIterator(); |
| |
| Register DestReg = MI.getOperand(0).getReg(); |
| Register IfTrueReg = MI.getOperand(1).getReg(); |
| Register IfFalseReg = MI.getOperand(2).getReg(); |
| unsigned CondCode = MI.getOperand(3).getImm(); |
| bool NZCVKilled = MI.getOperand(4).isKill(); |
| |
| MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| MF->insert(It, TrueBB); |
| MF->insert(It, EndBB); |
| |
| // Transfer rest of current basic-block to EndBB |
| EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), |
| MBB->end()); |
| EndBB->transferSuccessorsAndUpdatePHIs(MBB); |
| |
| BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB); |
| BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB); |
| MBB->addSuccessor(TrueBB); |
| MBB->addSuccessor(EndBB); |
| |
| // TrueBB falls through to the end. |
| TrueBB->addSuccessor(EndBB); |
| |
| if (!NZCVKilled) { |
| TrueBB->addLiveIn(AArch64::NZCV); |
| EndBB->addLiveIn(AArch64::NZCV); |
| } |
| |
| BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg) |
| .addReg(IfTrueReg) |
| .addMBB(TrueBB) |
| .addReg(IfFalseReg) |
| .addMBB(MBB); |
| |
| MI.eraseFromParent(); |
| return EndBB; |
| } |
| |
| MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( |
| MachineInstr &MI, MachineBasicBlock *BB) const { |
| assert(!isAsynchronousEHPersonality(classifyEHPersonality( |
| BB->getParent()->getFunction().getPersonalityFn())) && |
| "SEH does not use catchret!"); |
| return BB; |
| } |
| |
| MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( |
| MachineInstr &MI, MachineBasicBlock *BB) const { |
| switch (MI.getOpcode()) { |
| default: |
| #ifndef NDEBUG |
| MI.dump(); |
| #endif |
| llvm_unreachable("Unexpected instruction for custom inserter!"); |
| |
| case AArch64::F128CSEL: |
| return EmitF128CSEL(MI, BB); |
| |
| case TargetOpcode::STATEPOINT: |
| // STATEPOINT is a pseudo instruction which has no implicit defs/uses |
| // while bl call instruction (where statepoint will be lowered at the end) |
| // has implicit def. Add this implicit dead def here as a workaround. |
| MI.addOperand(*MI.getMF(), MachineOperand::CreateReg(AArch64::LR, true, |
| true, false, true)); |
| LLVM_FALLTHROUGH; |
| case TargetOpcode::STACKMAP: |
| case TargetOpcode::PATCHPOINT: |
| return emitPatchPoint(MI, BB); |
| |
| case AArch64::CATCHRET: |
| return EmitLoweredCatchRet(MI, BB); |
| } |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // AArch64 Lowering private implementation. |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| // Lowering Code |
| //===----------------------------------------------------------------------===// |
| |
| // Forward declarations of SVE fixed length lowering helpers |
| static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT); |
| static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V); |
| static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V); |
| static SDValue convertFixedMaskToScalableVector(SDValue Mask, |
| SelectionDAG &DAG); |
| static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL, |
| EVT VT); |
| |
| /// isZerosVector - Check whether SDNode N is a zero-filled vector. |
| static bool isZerosVector(const SDNode *N) { |
| // Look through a bit convert. |
| while (N->getOpcode() == ISD::BITCAST) |
| N = N->getOperand(0).getNode(); |
| |
| if (ISD::isConstantSplatVectorAllZeros(N)) |
| return true; |
| |
| if (N->getOpcode() != AArch64ISD::DUP) |
| return false; |
| |
| auto Opnd0 = N->getOperand(0); |
| auto *CINT = dyn_cast<ConstantSDNode>(Opnd0); |
| auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0); |
| return (CINT && CINT->isZero()) || (CFP && CFP->isZero()); |
| } |
| |
| /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 |
| /// CC |
| static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) { |
| switch (CC) { |
| default: |
| llvm_unreachable("Unknown condition code!"); |
| case ISD::SETNE: |
| return AArch64CC::NE; |
| case ISD::SETEQ: |
| return AArch64CC::EQ; |
| case ISD::SETGT: |
| return AArch64CC::GT; |
| case ISD::SETGE: |
| return AArch64CC::GE; |
| case ISD::SETLT: |
| return AArch64CC::LT; |
| case ISD::SETLE: |
| return AArch64CC::LE; |
| case ISD::SETUGT: |
| return AArch64CC::HI; |
| case ISD::SETUGE: |
| return AArch64CC::HS; |
| case ISD::SETULT: |
| return AArch64CC::LO; |
| case ISD::SETULE: |
| return AArch64CC::LS; |
| } |
| } |
| |
| /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC. |
| static void changeFPCCToAArch64CC(ISD::CondCode CC, |
| AArch64CC::CondCode &CondCode, |
| AArch64CC::CondCode &CondCode2) { |
| CondCode2 = AArch64CC::AL; |
| switch (CC) { |
| default: |
| llvm_unreachable("Unknown FP condition!"); |
| case ISD::SETEQ: |
| case ISD::SETOEQ: |
| CondCode = AArch64CC::EQ; |
| break; |
| case ISD::SETGT: |
| case ISD::SETOGT: |
| CondCode = AArch64CC::GT; |
| break; |
| case ISD::SETGE: |
| case ISD::SETOGE: |
| CondCode = AArch64CC::GE; |
| break; |
| case ISD::SETOLT: |
| CondCode = AArch64CC::MI; |
| break; |
| case ISD::SETOLE: |
| CondCode = AArch64CC::LS; |
| break; |
| case ISD::SETONE: |
| CondCode = AArch64CC::MI; |
| CondCode2 = AArch64CC::GT; |
| break; |
| case ISD::SETO: |
| CondCode = AArch64CC::VC; |
| break; |
| case ISD::SETUO: |
| CondCode = AArch64CC::VS; |
<
|