| //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the AArch64TargetLowering class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AArch64ExpandImm.h" |
| #include "AArch64ISelLowering.h" |
| #include "AArch64CallingConvention.h" |
| #include "AArch64MachineFunctionInfo.h" |
| #include "AArch64PerfectShuffle.h" |
| #include "AArch64RegisterInfo.h" |
| #include "AArch64Subtarget.h" |
| #include "MCTargetDesc/AArch64AddressingModes.h" |
| #include "Utils/AArch64BaseInfo.h" |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/StringSwitch.h" |
| #include "llvm/ADT/Triple.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/Analysis/VectorUtils.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineMemOperand.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/RuntimeLibcalls.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/SelectionDAGNodes.h" |
| #include "llvm/CodeGen/TargetCallingConv.h" |
| #include "llvm/CodeGen/TargetInstrInfo.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DebugLoc.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GetElementPtrTypeIterator.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/OperandTraits.h" |
| #include "llvm/IR/PatternMatch.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Use.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CodeGen.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MachineValueType.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Target/TargetOptions.h" |
| #include <algorithm> |
| #include <bitset> |
| #include <cassert> |
| #include <cctype> |
| #include <cstdint> |
| #include <cstdlib> |
| #include <iterator> |
| #include <limits> |
| #include <tuple> |
| #include <utility> |
| #include <vector> |
| |
| using namespace llvm; |
| using namespace llvm::PatternMatch; |
| |
| #define DEBUG_TYPE "aarch64-lower" |
| |
| STATISTIC(NumTailCalls, "Number of tail calls"); |
| STATISTIC(NumShiftInserts, "Number of vector shift inserts"); |
| STATISTIC(NumOptimizedImms, "Number of times immediates were optimized"); |
| |
| static cl::opt<bool> |
| EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, |
| cl::desc("Allow AArch64 SLI/SRI formation"), |
| cl::init(false)); |
| |
| // FIXME: The necessary dtprel relocations don't seem to be supported |
| // well in the GNU bfd and gold linkers at the moment. Therefore, by |
| // default, for now, fall back to GeneralDynamic code generation. |
| cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( |
| "aarch64-elf-ldtls-generation", cl::Hidden, |
| cl::desc("Allow AArch64 Local Dynamic TLS code generation"), |
| cl::init(false)); |
| |
| static cl::opt<bool> |
| EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, |
| cl::desc("Enable AArch64 logical imm instruction " |
| "optimization"), |
| cl::init(true)); |
| |
| /// Value type used for condition codes. |
| static const MVT MVT_CC = MVT::i32; |
| |
| AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, |
| const AArch64Subtarget &STI) |
| : TargetLowering(TM), Subtarget(&STI) { |
| // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so |
| // we have to make something up. Arbitrarily, choose ZeroOrOne. |
| setBooleanContents(ZeroOrOneBooleanContent); |
| // When comparing vectors the result sets the different elements in the |
| // vector to all-one or all-zero. |
| setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
| |
| // Set up the register classes. |
| addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); |
| addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); |
| |
| if (Subtarget->hasFPARMv8()) { |
| addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); |
| addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); |
| addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); |
| addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); |
| } |
| |
| if (Subtarget->hasNEON()) { |
| addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass); |
| addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass); |
| // Someone set us up the NEON. |
| addDRTypeForNEON(MVT::v2f32); |
| addDRTypeForNEON(MVT::v8i8); |
| addDRTypeForNEON(MVT::v4i16); |
| addDRTypeForNEON(MVT::v2i32); |
| addDRTypeForNEON(MVT::v1i64); |
| addDRTypeForNEON(MVT::v1f64); |
| addDRTypeForNEON(MVT::v4f16); |
| |
| addQRTypeForNEON(MVT::v4f32); |
| addQRTypeForNEON(MVT::v2f64); |
| addQRTypeForNEON(MVT::v16i8); |
| addQRTypeForNEON(MVT::v8i16); |
| addQRTypeForNEON(MVT::v4i32); |
| addQRTypeForNEON(MVT::v2i64); |
| addQRTypeForNEON(MVT::v8f16); |
| } |
| |
| if (Subtarget->hasSVE()) { |
| // Add legal sve predicate types |
| addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass); |
| addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass); |
| addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass); |
| addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass); |
| |
| // Add legal sve data types |
| addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass); |
| |
| addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv1f32, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv1f64, &AArch64::ZPRRegClass); |
| addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass); |
| } |
| |
| // Compute derived properties from the register classes |
| computeRegisterProperties(Subtarget->getRegisterInfo()); |
| |
| // Provide all sorts of operation actions |
| setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); |
| setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); |
| setOperationAction(ISD::SETCC, MVT::i32, Custom); |
| setOperationAction(ISD::SETCC, MVT::i64, Custom); |
| setOperationAction(ISD::SETCC, MVT::f16, Custom); |
| setOperationAction(ISD::SETCC, MVT::f32, Custom); |
| setOperationAction(ISD::SETCC, MVT::f64, Custom); |
| setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
| setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); |
| setOperationAction(ISD::BRCOND, MVT::Other, Expand); |
| setOperationAction(ISD::BR_CC, MVT::i32, Custom); |
| setOperationAction(ISD::BR_CC, MVT::i64, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f16, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f32, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f64, Custom); |
| setOperationAction(ISD::SELECT, MVT::i32, Custom); |
| setOperationAction(ISD::SELECT, MVT::i64, Custom); |
| setOperationAction(ISD::SELECT, MVT::f16, Custom); |
| setOperationAction(ISD::SELECT, MVT::f32, Custom); |
| setOperationAction(ISD::SELECT, MVT::f64, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); |
| setOperationAction(ISD::BR_JT, MVT::Other, Custom); |
| setOperationAction(ISD::JumpTable, MVT::i64, Custom); |
| |
| setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); |
| setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); |
| setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); |
| |
| setOperationAction(ISD::FREM, MVT::f32, Expand); |
| setOperationAction(ISD::FREM, MVT::f64, Expand); |
| setOperationAction(ISD::FREM, MVT::f80, Expand); |
| |
| setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); |
| |
| // Custom lowering hooks are needed for XOR |
| // to fold it into CSINC/CSINV. |
| setOperationAction(ISD::XOR, MVT::i32, Custom); |
| setOperationAction(ISD::XOR, MVT::i64, Custom); |
| |
| // Virtually no operation on f128 is legal, but LLVM can't expand them when |
| // there's a valid register class, so we need custom operations in most cases. |
| setOperationAction(ISD::FABS, MVT::f128, Expand); |
| setOperationAction(ISD::FADD, MVT::f128, Custom); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); |
| setOperationAction(ISD::FCOS, MVT::f128, Expand); |
| setOperationAction(ISD::FDIV, MVT::f128, Custom); |
| setOperationAction(ISD::FMA, MVT::f128, Expand); |
| setOperationAction(ISD::FMUL, MVT::f128, Custom); |
| setOperationAction(ISD::FNEG, MVT::f128, Expand); |
| setOperationAction(ISD::FPOW, MVT::f128, Expand); |
| setOperationAction(ISD::FREM, MVT::f128, Expand); |
| setOperationAction(ISD::FRINT, MVT::f128, Expand); |
| setOperationAction(ISD::FSIN, MVT::f128, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f128, Expand); |
| setOperationAction(ISD::FSQRT, MVT::f128, Expand); |
| setOperationAction(ISD::FSUB, MVT::f128, Custom); |
| setOperationAction(ISD::FTRUNC, MVT::f128, Expand); |
| setOperationAction(ISD::SETCC, MVT::f128, Custom); |
| setOperationAction(ISD::BR_CC, MVT::f128, Custom); |
| setOperationAction(ISD::SELECT, MVT::f128, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); |
| setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); |
| |
| // Lowering for many of the conversions is actually specified by the non-f128 |
| // type. The LowerXXX function will be trivial when f128 isn't involved. |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); |
| setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
| setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); |
| |
| // Variable arguments. |
| setOperationAction(ISD::VASTART, MVT::Other, Custom); |
| setOperationAction(ISD::VAARG, MVT::Other, Custom); |
| setOperationAction(ISD::VACOPY, MVT::Other, Custom); |
| setOperationAction(ISD::VAEND, MVT::Other, Expand); |
| |
| // Variable-sized objects. |
| setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
| setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
| |
| if (Subtarget->isTargetWindows()) |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); |
| else |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); |
| |
| // Constant pool entries |
| setOperationAction(ISD::ConstantPool, MVT::i64, Custom); |
| |
| // BlockAddress |
| setOperationAction(ISD::BlockAddress, MVT::i64, Custom); |
| |
| // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences. |
| setOperationAction(ISD::ADDC, MVT::i32, Custom); |
| setOperationAction(ISD::ADDE, MVT::i32, Custom); |
| setOperationAction(ISD::SUBC, MVT::i32, Custom); |
| setOperationAction(ISD::SUBE, MVT::i32, Custom); |
| setOperationAction(ISD::ADDC, MVT::i64, Custom); |
| setOperationAction(ISD::ADDE, MVT::i64, Custom); |
| setOperationAction(ISD::SUBC, MVT::i64, Custom); |
| setOperationAction(ISD::SUBE, MVT::i64, Custom); |
| |
| // AArch64 lacks both left-rotate and popcount instructions. |
| setOperationAction(ISD::ROTL, MVT::i32, Expand); |
| setOperationAction(ISD::ROTL, MVT::i64, Expand); |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| setOperationAction(ISD::ROTL, VT, Expand); |
| setOperationAction(ISD::ROTR, VT, Expand); |
| } |
| |
| // AArch64 doesn't have {U|S}MUL_LOHI. |
| setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); |
| |
| setOperationAction(ISD::CTPOP, MVT::i32, Custom); |
| setOperationAction(ISD::CTPOP, MVT::i64, Custom); |
| |
| setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i64, Expand); |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| setOperationAction(ISD::SDIVREM, VT, Expand); |
| setOperationAction(ISD::UDIVREM, VT, Expand); |
| } |
| setOperationAction(ISD::SREM, MVT::i32, Expand); |
| setOperationAction(ISD::SREM, MVT::i64, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i64, Expand); |
| setOperationAction(ISD::UREM, MVT::i32, Expand); |
| setOperationAction(ISD::UREM, MVT::i64, Expand); |
| |
| // Custom lower Add/Sub/Mul with overflow. |
| setOperationAction(ISD::SADDO, MVT::i32, Custom); |
| setOperationAction(ISD::SADDO, MVT::i64, Custom); |
| setOperationAction(ISD::UADDO, MVT::i32, Custom); |
| setOperationAction(ISD::UADDO, MVT::i64, Custom); |
| setOperationAction(ISD::SSUBO, MVT::i32, Custom); |
| setOperationAction(ISD::SSUBO, MVT::i64, Custom); |
| setOperationAction(ISD::USUBO, MVT::i32, Custom); |
| setOperationAction(ISD::USUBO, MVT::i64, Custom); |
| setOperationAction(ISD::SMULO, MVT::i32, Custom); |
| setOperationAction(ISD::SMULO, MVT::i64, Custom); |
| setOperationAction(ISD::UMULO, MVT::i32, Custom); |
| setOperationAction(ISD::UMULO, MVT::i64, Custom); |
| |
| setOperationAction(ISD::FSIN, MVT::f32, Expand); |
| setOperationAction(ISD::FSIN, MVT::f64, Expand); |
| setOperationAction(ISD::FCOS, MVT::f32, Expand); |
| setOperationAction(ISD::FCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FPOW, MVT::f32, Expand); |
| setOperationAction(ISD::FPOW, MVT::f64, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
| if (Subtarget->hasFullFP16()) |
| setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom); |
| else |
| setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); |
| |
| setOperationAction(ISD::FREM, MVT::f16, Promote); |
| setOperationAction(ISD::FREM, MVT::v4f16, Expand); |
| setOperationAction(ISD::FREM, MVT::v8f16, Expand); |
| setOperationAction(ISD::FPOW, MVT::f16, Promote); |
| setOperationAction(ISD::FPOW, MVT::v4f16, Expand); |
| setOperationAction(ISD::FPOW, MVT::v8f16, Expand); |
| setOperationAction(ISD::FPOWI, MVT::f16, Promote); |
| setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); |
| setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); |
| setOperationAction(ISD::FCOS, MVT::f16, Promote); |
| setOperationAction(ISD::FCOS, MVT::v4f16, Expand); |
| setOperationAction(ISD::FCOS, MVT::v8f16, Expand); |
| setOperationAction(ISD::FSIN, MVT::f16, Promote); |
| setOperationAction(ISD::FSIN, MVT::v4f16, Expand); |
| setOperationAction(ISD::FSIN, MVT::v8f16, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f16, Promote); |
| setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand); |
| setOperationAction(ISD::FEXP, MVT::f16, Promote); |
| setOperationAction(ISD::FEXP, MVT::v4f16, Expand); |
| setOperationAction(ISD::FEXP, MVT::v8f16, Expand); |
| setOperationAction(ISD::FEXP2, MVT::f16, Promote); |
| setOperationAction(ISD::FEXP2, MVT::v4f16, Expand); |
| setOperationAction(ISD::FEXP2, MVT::v8f16, Expand); |
| setOperationAction(ISD::FLOG, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG, MVT::v4f16, Expand); |
| setOperationAction(ISD::FLOG, MVT::v8f16, Expand); |
| setOperationAction(ISD::FLOG2, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG2, MVT::v4f16, Expand); |
| setOperationAction(ISD::FLOG2, MVT::v8f16, Expand); |
| setOperationAction(ISD::FLOG10, MVT::f16, Promote); |
| setOperationAction(ISD::FLOG10, MVT::v4f16, Expand); |
| setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); |
| |
| if (!Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::SELECT, MVT::f16, Promote); |
| setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); |
| setOperationAction(ISD::SETCC, MVT::f16, Promote); |
| setOperationAction(ISD::BR_CC, MVT::f16, Promote); |
| setOperationAction(ISD::FADD, MVT::f16, Promote); |
| setOperationAction(ISD::FSUB, MVT::f16, Promote); |
| setOperationAction(ISD::FMUL, MVT::f16, Promote); |
| setOperationAction(ISD::FDIV, MVT::f16, Promote); |
| setOperationAction(ISD::FMA, MVT::f16, Promote); |
| setOperationAction(ISD::FNEG, MVT::f16, Promote); |
| setOperationAction(ISD::FABS, MVT::f16, Promote); |
| setOperationAction(ISD::FCEIL, MVT::f16, Promote); |
| setOperationAction(ISD::FSQRT, MVT::f16, Promote); |
| setOperationAction(ISD::FFLOOR, MVT::f16, Promote); |
| setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); |
| setOperationAction(ISD::FRINT, MVT::f16, Promote); |
| setOperationAction(ISD::FROUND, MVT::f16, Promote); |
| setOperationAction(ISD::FTRUNC, MVT::f16, Promote); |
| setOperationAction(ISD::FMINNUM, MVT::f16, Promote); |
| setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); |
| setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); |
| setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); |
| |
| // promote v4f16 to v4f32 when that is known to be safe. |
| setOperationAction(ISD::FADD, MVT::v4f16, Promote); |
| setOperationAction(ISD::FSUB, MVT::v4f16, Promote); |
| setOperationAction(ISD::FMUL, MVT::v4f16, Promote); |
| setOperationAction(ISD::FDIV, MVT::v4f16, Promote); |
| setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote); |
| setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote); |
| AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32); |
| AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32); |
| AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32); |
| AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32); |
| AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32); |
| AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32); |
| |
| setOperationAction(ISD::FABS, MVT::v4f16, Expand); |
| setOperationAction(ISD::FNEG, MVT::v4f16, Expand); |
| setOperationAction(ISD::FROUND, MVT::v4f16, Expand); |
| setOperationAction(ISD::FMA, MVT::v4f16, Expand); |
| setOperationAction(ISD::SETCC, MVT::v4f16, Expand); |
| setOperationAction(ISD::BR_CC, MVT::v4f16, Expand); |
| setOperationAction(ISD::SELECT, MVT::v4f16, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand); |
| setOperationAction(ISD::FCEIL, MVT::v4f16, Expand); |
| setOperationAction(ISD::FRINT, MVT::v4f16, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); |
| setOperationAction(ISD::FSQRT, MVT::v4f16, Expand); |
| |
| setOperationAction(ISD::FABS, MVT::v8f16, Expand); |
| setOperationAction(ISD::FADD, MVT::v8f16, Expand); |
| setOperationAction(ISD::FCEIL, MVT::v8f16, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand); |
| setOperationAction(ISD::FDIV, MVT::v8f16, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand); |
| setOperationAction(ISD::FMA, MVT::v8f16, Expand); |
| setOperationAction(ISD::FMUL, MVT::v8f16, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); |
| setOperationAction(ISD::FNEG, MVT::v8f16, Expand); |
| setOperationAction(ISD::FROUND, MVT::v8f16, Expand); |
| setOperationAction(ISD::FRINT, MVT::v8f16, Expand); |
| setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); |
| setOperationAction(ISD::FSUB, MVT::v8f16, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand); |
| setOperationAction(ISD::SETCC, MVT::v8f16, Expand); |
| setOperationAction(ISD::BR_CC, MVT::v8f16, Expand); |
| setOperationAction(ISD::SELECT, MVT::v8f16, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand); |
| setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand); |
| } |
| |
| // AArch64 has implementations of a lot of rounding-like FP operations. |
| for (MVT Ty : {MVT::f32, MVT::f64}) { |
| setOperationAction(ISD::FFLOOR, Ty, Legal); |
| setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
| setOperationAction(ISD::FCEIL, Ty, Legal); |
| setOperationAction(ISD::FRINT, Ty, Legal); |
| setOperationAction(ISD::FTRUNC, Ty, Legal); |
| setOperationAction(ISD::FROUND, Ty, Legal); |
| setOperationAction(ISD::FMINNUM, Ty, Legal); |
| setOperationAction(ISD::FMAXNUM, Ty, Legal); |
| setOperationAction(ISD::FMINIMUM, Ty, Legal); |
| setOperationAction(ISD::FMAXIMUM, Ty, Legal); |
| setOperationAction(ISD::LROUND, Ty, Legal); |
| setOperationAction(ISD::LLROUND, Ty, Legal); |
| setOperationAction(ISD::LRINT, Ty, Legal); |
| setOperationAction(ISD::LLRINT, Ty, Legal); |
| } |
| |
| if (Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal); |
| setOperationAction(ISD::FFLOOR, MVT::f16, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f16, Legal); |
| setOperationAction(ISD::FRINT, MVT::f16, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::f16, Legal); |
| setOperationAction(ISD::FROUND, MVT::f16, Legal); |
| setOperationAction(ISD::FMINNUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); |
| setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); |
| } |
| |
| setOperationAction(ISD::PREFETCH, MVT::Other, Custom); |
| |
| setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); |
| |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); |
| setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); |
| |
| // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. |
| // This requires the Performance Monitors extension. |
| if (Subtarget->hasPerfMon()) |
| setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); |
| |
| if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
| getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
| // Issue __sincos_stret if available. |
| setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
| } else { |
| setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
| } |
| |
| // Make floating-point constants legal for the large code model, so they don't |
| // become loads from the constant pool. |
| if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { |
| setOperationAction(ISD::ConstantFP, MVT::f32, Legal); |
| setOperationAction(ISD::ConstantFP, MVT::f64, Legal); |
| } |
| |
| // AArch64 does not have floating-point extending loads, i1 sign-extending |
| // load, floating-point truncating stores, or v2i32->v2i16 truncating store. |
| for (MVT VT : MVT::fp_valuetypes()) { |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); |
| } |
| for (MVT VT : MVT::integer_valuetypes()) |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand); |
| |
| setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f80, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f16, Expand); |
| |
| setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
| setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
| |
| // Indexed loads and stores are supported. |
| for (unsigned im = (unsigned)ISD::PRE_INC; |
| im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
| setIndexedLoadAction(im, MVT::i8, Legal); |
| setIndexedLoadAction(im, MVT::i16, Legal); |
| setIndexedLoadAction(im, MVT::i32, Legal); |
| setIndexedLoadAction(im, MVT::i64, Legal); |
| setIndexedLoadAction(im, MVT::f64, Legal); |
| setIndexedLoadAction(im, MVT::f32, Legal); |
| setIndexedLoadAction(im, MVT::f16, Legal); |
| setIndexedStoreAction(im, MVT::i8, Legal); |
| setIndexedStoreAction(im, MVT::i16, Legal); |
| setIndexedStoreAction(im, MVT::i32, Legal); |
| setIndexedStoreAction(im, MVT::i64, Legal); |
| setIndexedStoreAction(im, MVT::f64, Legal); |
| setIndexedStoreAction(im, MVT::f32, Legal); |
| setIndexedStoreAction(im, MVT::f16, Legal); |
| } |
| |
| // Trap. |
| setOperationAction(ISD::TRAP, MVT::Other, Legal); |
| if (Subtarget->isTargetWindows()) |
| setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
| |
| // We combine OR nodes for bitfield operations. |
| setTargetDAGCombine(ISD::OR); |
| // Try to create BICs for vector ANDs. |
| setTargetDAGCombine(ISD::AND); |
| |
| // Vector add and sub nodes may conceal a high-half opportunity. |
| // Also, try to fold ADD into CSINC/CSINV.. |
| setTargetDAGCombine(ISD::ADD); |
| setTargetDAGCombine(ISD::SUB); |
| setTargetDAGCombine(ISD::SRL); |
| setTargetDAGCombine(ISD::XOR); |
| setTargetDAGCombine(ISD::SINT_TO_FP); |
| setTargetDAGCombine(ISD::UINT_TO_FP); |
| |
| setTargetDAGCombine(ISD::FP_TO_SINT); |
| setTargetDAGCombine(ISD::FP_TO_UINT); |
| setTargetDAGCombine(ISD::FDIV); |
| |
| setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
| |
| setTargetDAGCombine(ISD::ANY_EXTEND); |
| setTargetDAGCombine(ISD::ZERO_EXTEND); |
| setTargetDAGCombine(ISD::SIGN_EXTEND); |
| setTargetDAGCombine(ISD::BITCAST); |
| setTargetDAGCombine(ISD::CONCAT_VECTORS); |
| setTargetDAGCombine(ISD::STORE); |
| if (Subtarget->supportsAddressTopByteIgnored()) |
| setTargetDAGCombine(ISD::LOAD); |
| |
| setTargetDAGCombine(ISD::MUL); |
| |
| setTargetDAGCombine(ISD::SELECT); |
| setTargetDAGCombine(ISD::VSELECT); |
| |
| setTargetDAGCombine(ISD::INTRINSIC_VOID); |
| setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); |
| setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); |
| |
| setTargetDAGCombine(ISD::GlobalAddress); |
| |
| // In case of strict alignment, avoid an excessive number of byte wide stores. |
| MaxStoresPerMemsetOptSize = 8; |
| MaxStoresPerMemset = Subtarget->requiresStrictAlign() |
| ? MaxStoresPerMemsetOptSize : 32; |
| |
| MaxGluedStoresPerMemcpy = 4; |
| MaxStoresPerMemcpyOptSize = 4; |
| MaxStoresPerMemcpy = Subtarget->requiresStrictAlign() |
| ? MaxStoresPerMemcpyOptSize : 16; |
| |
| MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; |
| |
| MaxLoadsPerMemcmpOptSize = 4; |
| MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() |
| ? MaxLoadsPerMemcmpOptSize : 8; |
| |
| setStackPointerRegisterToSaveRestore(AArch64::SP); |
| |
| setSchedulingPreference(Sched::Hybrid); |
| |
| EnableExtLdPromotion = true; |
| |
| // Set required alignment. |
| setMinFunctionAlignment(Align(4)); |
| // Set preferred alignments. |
| setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment())); |
| setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment())); |
| |
| // Only change the limit for entries in a jump table if specified by |
| // the sub target, but not at the command line. |
| unsigned MaxJT = STI.getMaximumJumpTableSize(); |
| if (MaxJT && getMaximumJumpTableSize() == UINT_MAX) |
| setMaximumJumpTableSize(MaxJT); |
| |
| setHasExtractBitsInsn(true); |
| |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
| |
| if (Subtarget->hasNEON()) { |
| // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to |
| // silliness like this: |
| setOperationAction(ISD::FABS, MVT::v1f64, Expand); |
| setOperationAction(ISD::FADD, MVT::v1f64, Expand); |
| setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); |
| setOperationAction(ISD::FCOS, MVT::v1f64, Expand); |
| setOperationAction(ISD::FDIV, MVT::v1f64, Expand); |
| setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); |
| setOperationAction(ISD::FMA, MVT::v1f64, Expand); |
| setOperationAction(ISD::FMUL, MVT::v1f64, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); |
| setOperationAction(ISD::FNEG, MVT::v1f64, Expand); |
| setOperationAction(ISD::FPOW, MVT::v1f64, Expand); |
| setOperationAction(ISD::FREM, MVT::v1f64, Expand); |
| setOperationAction(ISD::FROUND, MVT::v1f64, Expand); |
| setOperationAction(ISD::FRINT, MVT::v1f64, Expand); |
| setOperationAction(ISD::FSIN, MVT::v1f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); |
| setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); |
| setOperationAction(ISD::FSUB, MVT::v1f64, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); |
| setOperationAction(ISD::SETCC, MVT::v1f64, Expand); |
| setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); |
| setOperationAction(ISD::SELECT, MVT::v1f64, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); |
| setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); |
| |
| setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); |
| setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); |
| |
| setOperationAction(ISD::MUL, MVT::v1i64, Expand); |
| |
| // AArch64 doesn't have a direct vector ->f32 conversion instructions for |
| // elements smaller than i32, so promote the input to i32 first. |
| setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32); |
| setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32); |
| // i8 vector elements also need promotion to i32 for v8i8 |
| setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32); |
| setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32); |
| // Similarly, there is no direct i32 -> f64 vector conversion instruction. |
| setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); |
| // Or, direct i32 -> f16 vector conversion. Set it so custom, so the |
| // conversion happens in two steps: v4i32 -> v4f32 -> v4f16 |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); |
| |
| if (Subtarget->hasFullFP16()) { |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); |
| } else { |
| // when AArch64 doesn't have fullfp16 support, promote the input |
| // to i32 first. |
| setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32); |
| setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32); |
| setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32); |
| setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32); |
| } |
| |
| setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); |
| setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); |
| |
| // AArch64 doesn't have MUL.2d: |
| setOperationAction(ISD::MUL, MVT::v2i64, Expand); |
| // Custom handling for some quad-vector types to detect MULL. |
| setOperationAction(ISD::MUL, MVT::v8i16, Custom); |
| setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
| setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
| |
| // Vector reductions |
| for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, |
| MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
| } |
| for (MVT VT : { MVT::v4f16, MVT::v2f32, |
| MVT::v8f16, MVT::v4f32, MVT::v2f64 }) { |
| setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
| setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
| } |
| |
| setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); |
| setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); |
| // Likewise, narrowing and extending vector loads/stores aren't handled |
| // directly. |
| for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); |
| |
| if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) { |
| setOperationAction(ISD::MULHS, VT, Legal); |
| setOperationAction(ISD::MULHU, VT, Legal); |
| } else { |
| setOperationAction(ISD::MULHS, VT, Expand); |
| setOperationAction(ISD::MULHU, VT, Expand); |
| } |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| |
| setOperationAction(ISD::BSWAP, VT, Expand); |
| setOperationAction(ISD::CTTZ, VT, Expand); |
| |
| for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
| setTruncStoreAction(VT, InnerVT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
| } |
| } |
| |
| // AArch64 has implementations of a lot of rounding-like FP operations. |
| for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { |
| setOperationAction(ISD::FFLOOR, Ty, Legal); |
| setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
| setOperationAction(ISD::FCEIL, Ty, Legal); |
| setOperationAction(ISD::FRINT, Ty, Legal); |
| setOperationAction(ISD::FTRUNC, Ty, Legal); |
| setOperationAction(ISD::FROUND, Ty, Legal); |
| } |
| |
| if (Subtarget->hasFullFP16()) { |
| for (MVT Ty : {MVT::v4f16, MVT::v8f16}) { |
| setOperationAction(ISD::FFLOOR, Ty, Legal); |
| setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
| setOperationAction(ISD::FCEIL, Ty, Legal); |
| setOperationAction(ISD::FRINT, Ty, Legal); |
| setOperationAction(ISD::FTRUNC, Ty, Legal); |
| setOperationAction(ISD::FROUND, Ty, Legal); |
| } |
| } |
| |
| setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); |
| } |
| |
| if (Subtarget->hasSVE()) { |
| for (MVT VT : MVT::integer_scalable_vector_valuetypes()) { |
| if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1) |
| setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
| } |
| } |
| |
| PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); |
| } |
| |
| void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { |
| assert(VT.isVector() && "VT should be a vector type"); |
| |
| if (VT.isFloatingPoint()) { |
| MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT(); |
| setOperationPromotedToType(ISD::LOAD, VT, PromoteTo); |
| setOperationPromotedToType(ISD::STORE, VT, PromoteTo); |
| } |
| |
| // Mark vector float intrinsics as expand. |
| if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { |
| setOperationAction(ISD::FSIN, VT, Expand); |
| setOperationAction(ISD::FCOS, VT, Expand); |
| setOperationAction(ISD::FPOW, VT, Expand); |
| setOperationAction(ISD::FLOG, VT, Expand); |
| setOperationAction(ISD::FLOG2, VT, Expand); |
| setOperationAction(ISD::FLOG10, VT, Expand); |
| setOperationAction(ISD::FEXP, VT, Expand); |
| setOperationAction(ISD::FEXP2, VT, Expand); |
| |
| // But we do support custom-lowering for FCOPYSIGN. |
| setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| } |
| |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| setOperationAction(ISD::SRA, VT, Custom); |
| setOperationAction(ISD::SRL, VT, Custom); |
| setOperationAction(ISD::SHL, VT, Custom); |
| setOperationAction(ISD::OR, VT, Custom); |
| setOperationAction(ISD::SETCC, VT, Custom); |
| setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); |
| |
| setOperationAction(ISD::SELECT, VT, Expand); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| setOperationAction(ISD::VSELECT, VT, Expand); |
| for (MVT InnerVT : MVT::all_valuetypes()) |
| setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
| |
| // CNT supports only B element sizes, then use UADDLP to widen. |
| if (VT != MVT::v8i8 && VT != MVT::v16i8) |
| setOperationAction(ISD::CTPOP, VT, Custom); |
| |
| setOperationAction(ISD::UDIV, VT, Expand); |
| setOperationAction(ISD::SDIV, VT, Expand); |
| setOperationAction(ISD::UREM, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::FREM, VT, Expand); |
| |
| setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| |
| if (!VT.isFloatingPoint()) |
| setOperationAction(ISD::ABS, VT, Legal); |
| |
| // [SU][MIN|MAX] are available for all NEON types apart from i64. |
| if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) |
| for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) |
| setOperationAction(Opcode, VT, Legal); |
| |
| // F[MIN|MAX][NUM|NAN] are available for all FP NEON types. |
| if (VT.isFloatingPoint() && |
| (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())) |
| for (unsigned Opcode : |
| {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM}) |
| setOperationAction(Opcode, VT, Legal); |
| |
| if (Subtarget->isLittleEndian()) { |
| for (unsigned im = (unsigned)ISD::PRE_INC; |
| im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
| setIndexedLoadAction(im, VT, Legal); |
| setIndexedStoreAction(im, VT, Legal); |
| } |
| } |
| } |
| |
| void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { |
| addRegisterClass(VT, &AArch64::FPR64RegClass); |
| addTypeForNEON(VT, MVT::v2i32); |
| } |
| |
| void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { |
| addRegisterClass(VT, &AArch64::FPR128RegClass); |
| addTypeForNEON(VT, MVT::v4i32); |
| } |
| |
| EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, |
| EVT VT) const { |
| if (!VT.isVector()) |
| return MVT::i32; |
| return VT.changeVectorElementTypeToInteger(); |
| } |
| |
| static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, |
| const APInt &Demanded, |
| TargetLowering::TargetLoweringOpt &TLO, |
| unsigned NewOpc) { |
| uint64_t OldImm = Imm, NewImm, Enc; |
| uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask; |
| |
| // Return if the immediate is already all zeros, all ones, a bimm32 or a |
| // bimm64. |
| if (Imm == 0 || Imm == Mask || |
| AArch64_AM::isLogicalImmediate(Imm & Mask, Size)) |
| return false; |
| |
| unsigned EltSize = Size; |
| uint64_t DemandedBits = Demanded.getZExtValue(); |
| |
| // Clear bits that are not demanded. |
| Imm &= DemandedBits; |
| |
| while (true) { |
| // The goal here is to set the non-demanded bits in a way that minimizes |
| // the number of switching between 0 and 1. In order to achieve this goal, |
| // we set the non-demanded bits to the value of the preceding demanded bits. |
| // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a |
| // non-demanded bit), we copy bit0 (1) to the least significant 'x', |
| // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'. |
| // The final result is 0b11000011. |
| uint64_t NonDemandedBits = ~DemandedBits; |
| uint64_t InvertedImm = ~Imm & DemandedBits; |
| uint64_t RotatedImm = |
| ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) & |
| NonDemandedBits; |
| uint64_t Sum = RotatedImm + NonDemandedBits; |
| bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1)); |
| uint64_t Ones = (Sum + Carry) & NonDemandedBits; |
| NewImm = (Imm | Ones) & Mask; |
| |
| // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate |
| // or all-ones or all-zeros, in which case we can stop searching. Otherwise, |
| // we halve the element size and continue the search. |
| if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask))) |
| break; |
| |
| // We cannot shrink the element size any further if it is 2-bits. |
| if (EltSize == 2) |
| return false; |
| |
| EltSize /= 2; |
| Mask >>= EltSize; |
| uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize; |
| |
| // Return if there is mismatch in any of the demanded bits of Imm and Hi. |
| if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0) |
| return false; |
| |
| // Merge the upper and lower halves of Imm and DemandedBits. |
| Imm |= Hi; |
| DemandedBits |= DemandedBitsHi; |
| } |
| |
| ++NumOptimizedImms; |
| |
| // Replicate the element across the register width. |
| while (EltSize < Size) { |
| NewImm |= NewImm << EltSize; |
| EltSize *= 2; |
| } |
| |
| (void)OldImm; |
| assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && |
| "demanded bits should never be altered"); |
| assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm"); |
| |
| // Create the new constant immediate node. |
| EVT VT = Op.getValueType(); |
| SDLoc DL(Op); |
| SDValue New; |
| |
| // If the new constant immediate is all-zeros or all-ones, let the target |
| // independent DAG combine optimize this node. |
| if (NewImm == 0 || NewImm == OrigMask) { |
| New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), |
| TLO.DAG.getConstant(NewImm, DL, VT)); |
| // Otherwise, create a machine node so that target independent DAG combine |
| // doesn't undo this optimization. |
| } else { |
| Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); |
| SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); |
| New = SDValue( |
| TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); |
| } |
| |
| return TLO.CombineTo(Op, New); |
| } |
| |
| bool AArch64TargetLowering::targetShrinkDemandedConstant( |
| SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const { |
| // Delay this optimization to as late as possible. |
| if (!TLO.LegalOps) |
| return false; |
| |
| if (!EnableOptimizeLogicalImm) |
| return false; |
| |
| EVT VT = Op.getValueType(); |
| if (VT.isVector()) |
| return false; |
| |
| unsigned Size = VT.getSizeInBits(); |
| assert((Size == 32 || Size == 64) && |
| "i32 or i64 is expected after legalization."); |
| |
| // Exit early if we demand all bits. |
| if (Demanded.countPopulation() == Size) |
| return false; |
| |
| unsigned NewOpc; |
| switch (Op.getOpcode()) { |
| default: |
| return false; |
| case ISD::AND: |
| NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri; |
| break; |
| case ISD::OR: |
| NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri; |
| break; |
| case ISD::XOR: |
| NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri; |
| break; |
| } |
| ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
| if (!C) |
| return false; |
| uint64_t Imm = C->getZExtValue(); |
| return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc); |
| } |
| |
| /// computeKnownBitsForTargetNode - Determine which of the bits specified in |
| /// Mask are known to be either zero or one and return them Known. |
| void AArch64TargetLowering::computeKnownBitsForTargetNode( |
| const SDValue Op, KnownBits &Known, |
| const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { |
| switch (Op.getOpcode()) { |
| default: |
| break; |
| case AArch64ISD::CSEL: { |
| KnownBits Known2; |
| Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); |
| Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1); |
| Known.Zero &= Known2.Zero; |
| Known.One &= Known2.One; |
| break; |
| } |
| case AArch64ISD::LOADgot: |
| case AArch64ISD::ADDlow: { |
| if (!Subtarget->isTargetILP32()) |
| break; |
| // In ILP32 mode all valid pointers are in the low 4GB of the address-space. |
| Known.Zero = APInt::getHighBitsSet(64, 32); |
| break; |
| } |
| case ISD::INTRINSIC_W_CHAIN: { |
| ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1)); |
| Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); |
| switch (IntID) { |
| default: return; |
| case Intrinsic::aarch64_ldaxr: |
| case Intrinsic::aarch64_ldxr: { |
| unsigned BitWidth = Known.getBitWidth(); |
| EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); |
| unsigned MemBits = VT.getScalarSizeInBits(); |
| Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); |
| return; |
| } |
| } |
| break; |
| } |
| case ISD::INTRINSIC_WO_CHAIN: |
| case ISD::INTRINSIC_VOID: { |
| unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
| switch (IntNo) { |
| default: |
| break; |
| case Intrinsic::aarch64_neon_umaxv: |
| case Intrinsic::aarch64_neon_uminv: { |
| // Figure out the datatype of the vector operand. The UMINV instruction |
| // will zero extend the result, so we can mark as known zero all the |
| // bits larger than the element datatype. 32-bit or larget doesn't need |
| // this as those are legal types and will be handled by isel directly. |
| MVT VT = Op.getOperand(1).getValueType().getSimpleVT(); |
| unsigned BitWidth = Known.getBitWidth(); |
| if (VT == MVT::v8i8 || VT == MVT::v16i8) { |
| assert(BitWidth >= 8 && "Unexpected width!"); |
| APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); |
| Known.Zero |= Mask; |
| } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { |
| assert(BitWidth >= 16 && "Unexpected width!"); |
| APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); |
| Known.Zero |= Mask; |
| } |
| break; |
| } break; |
| } |
| } |
| } |
| } |
| |
| MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL, |
| EVT) const { |
| return MVT::i64; |
| } |
| |
| bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( |
| EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, |
| bool *Fast) const { |
| if (Subtarget->requiresStrictAlign()) |
| return false; |
| |
| if (Fast) { |
| // Some CPUs are fine with unaligned stores except for 128-bit ones. |
| *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 || |
| // See comments in performSTORECombine() for more details about |
| // these conditions. |
| |
| // Code that uses clang vector extensions can mark that it |
| // wants unaligned accesses to be treated as fast by |
| // underspecifying alignment to be 1 or 2. |
| Align <= 2 || |
| |
| // Disregard v2i64. Memcpy lowering produces those and splitting |
| // them regresses performance on micro-benchmarks and olden/bh. |
| VT == MVT::v2i64; |
| } |
| return true; |
| } |
| |
| // Same as above but handling LLTs instead. |
| bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( |
| LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, |
| bool *Fast) const { |
| if (Subtarget->requiresStrictAlign()) |
| return false; |
| |
| if (Fast) { |
| // Some CPUs are fine with unaligned stores except for 128-bit ones. |
| *Fast = !Subtarget->isMisaligned128StoreSlow() || |
| Ty.getSizeInBytes() != 16 || |
| // See comments in performSTORECombine() for more details about |
| // these conditions. |
| |
| // Code that uses clang vector extensions can mark that it |
| // wants unaligned accesses to be treated as fast by |
| // underspecifying alignment to be 1 or 2. |
| Align <= 2 || |
| |
| // Disregard v2i64. Memcpy lowering produces those and splitting |
| // them regresses performance on micro-benchmarks and olden/bh. |
| Ty == LLT::vector(2, 64); |
| } |
| return true; |
| } |
| |
| FastISel * |
| AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
| const TargetLibraryInfo *libInfo) const { |
| return AArch64::createFastISel(funcInfo, libInfo); |
| } |
| |
| const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { |
| switch ((AArch64ISD::NodeType)Opcode) { |
| case AArch64ISD::FIRST_NUMBER: break; |
| case AArch64ISD::CALL: return "AArch64ISD::CALL"; |
| case AArch64ISD::ADRP: return "AArch64ISD::ADRP"; |
| case AArch64ISD::ADR: return "AArch64ISD::ADR"; |
| case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow"; |
| case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot"; |
| case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG"; |
| case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND"; |
| case AArch64ISD::CSEL: return "AArch64ISD::CSEL"; |
| case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL"; |
| case AArch64ISD::CSINV: return "AArch64ISD::CSINV"; |
| case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG"; |
| case AArch64ISD::CSINC: return "AArch64ISD::CSINC"; |
| case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; |
| case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ"; |
| case AArch64ISD::ADC: return "AArch64ISD::ADC"; |
| case AArch64ISD::SBC: return "AArch64ISD::SBC"; |
| case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; |
| case AArch64ISD::SUBS: return "AArch64ISD::SUBS"; |
| case AArch64ISD::ADCS: return "AArch64ISD::ADCS"; |
| case AArch64ISD::SBCS: return "AArch64ISD::SBCS"; |
| case AArch64ISD::ANDS: return "AArch64ISD::ANDS"; |
| case AArch64ISD::CCMP: return "AArch64ISD::CCMP"; |
| case AArch64ISD::CCMN: return "AArch64ISD::CCMN"; |
| case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP"; |
| case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; |
| case AArch64ISD::DUP: return "AArch64ISD::DUP"; |
| case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8"; |
| case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16"; |
| case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32"; |
| case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64"; |
| case AArch64ISD::MOVI: return "AArch64ISD::MOVI"; |
| case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift"; |
| case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit"; |
| case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl"; |
| case AArch64ISD::FMOV: return "AArch64ISD::FMOV"; |
| case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift"; |
| case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl"; |
| case AArch64ISD::BICi: return "AArch64ISD::BICi"; |
| case AArch64ISD::ORRi: return "AArch64ISD::ORRi"; |
| case AArch64ISD::BSL: return "AArch64ISD::BSL"; |
| case AArch64ISD::NEG: return "AArch64ISD::NEG"; |
| case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; |
| case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1"; |
| case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2"; |
| case AArch64ISD::UZP1: return "AArch64ISD::UZP1"; |
| case AArch64ISD::UZP2: return "AArch64ISD::UZP2"; |
| case AArch64ISD::TRN1: return "AArch64ISD::TRN1"; |
| case AArch64ISD::TRN2: return "AArch64ISD::TRN2"; |
| case AArch64ISD::REV16: return "AArch64ISD::REV16"; |
| case AArch64ISD::REV32: return "AArch64ISD::REV32"; |
| case AArch64ISD::REV64: return "AArch64ISD::REV64"; |
| case AArch64ISD::EXT: return "AArch64ISD::EXT"; |
| case AArch64ISD::VSHL: return "AArch64ISD::VSHL"; |
| case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR"; |
| case AArch64ISD::VASHR: return "AArch64ISD::VASHR"; |
| case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ"; |
| case AArch64ISD::CMGE: return "AArch64ISD::CMGE"; |
| case AArch64ISD::CMGT: return "AArch64ISD::CMGT"; |
| case AArch64ISD::CMHI: return "AArch64ISD::CMHI"; |
| case AArch64ISD::CMHS: return "AArch64ISD::CMHS"; |
| case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ"; |
| case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE"; |
| case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT"; |
| case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz"; |
| case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz"; |
| case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz"; |
| case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz"; |
| case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz"; |
| case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz"; |
| case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz"; |
| case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz"; |
| case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz"; |
| case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz"; |
| case AArch64ISD::SADDV: return "AArch64ISD::SADDV"; |
| case AArch64ISD::UADDV: return "AArch64ISD::UADDV"; |
| case AArch64ISD::SMINV: return "AArch64ISD::SMINV"; |
| case AArch64ISD::UMINV: return "AArch64ISD::UMINV"; |
| case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV"; |
| case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV"; |
| case AArch64ISD::NOT: return "AArch64ISD::NOT"; |
| case AArch64ISD::BIT: return "AArch64ISD::BIT"; |
| case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; |
| case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ"; |
| case AArch64ISD::TBZ: return "AArch64ISD::TBZ"; |
| case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ"; |
| case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; |
| case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH"; |
| case AArch64ISD::SITOF: return "AArch64ISD::SITOF"; |
| case AArch64ISD::UITOF: return "AArch64ISD::UITOF"; |
| case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST"; |
| case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I"; |
| case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I"; |
| case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I"; |
| case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I"; |
| case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I"; |
| case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; |
| case AArch64ISD::LD2post: return "AArch64ISD::LD2post"; |
| case AArch64ISD::LD3post: return "AArch64ISD::LD3post"; |
| case AArch64ISD::LD4post: return "AArch64ISD::LD4post"; |
| case AArch64ISD::ST2post: return "AArch64ISD::ST2post"; |
| case AArch64ISD::ST3post: return "AArch64ISD::ST3post"; |
| case AArch64ISD::ST4post: return "AArch64ISD::ST4post"; |
| case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post"; |
| case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post"; |
| case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post"; |
| case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post"; |
| case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post"; |
| case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post"; |
| case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost"; |
| case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost"; |
| case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost"; |
| case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost"; |
| case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost"; |
| case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost"; |
| case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost"; |
| case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost"; |
| case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost"; |
| case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost"; |
| case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost"; |
| case AArch64ISD::SMULL: return "AArch64ISD::SMULL"; |
| case AArch64ISD::UMULL: return "AArch64ISD::UMULL"; |
| case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE"; |
| case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS"; |
| case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE"; |
| case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS"; |
| case AArch64ISD::STG: return "AArch64ISD::STG"; |
| case AArch64ISD::STZG: return "AArch64ISD::STZG"; |
| case AArch64ISD::ST2G: return "AArch64ISD::ST2G"; |
| case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G"; |
| case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI"; |
| case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO"; |
| case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI"; |
| case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO"; |
| } |
| return nullptr; |
| } |
| |
| MachineBasicBlock * |
| AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI, |
| MachineBasicBlock *MBB) const { |
| // We materialise the F128CSEL pseudo-instruction as some control flow and a |
| // phi node: |
| |
| // OrigBB: |
| // [... previous instrs leading to comparison ...] |
| // b.ne TrueBB |
| // b EndBB |
| // TrueBB: |
| // ; Fallthrough |
| // EndBB: |
| // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB] |
| |
| MachineFunction *MF = MBB->getParent(); |
| const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
| const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
| DebugLoc DL = MI.getDebugLoc(); |
| MachineFunction::iterator It = ++MBB->getIterator(); |
| |
| Register DestReg = MI.getOperand(0).getReg(); |
| Register IfTrueReg = MI.getOperand(1).getReg(); |
| Register IfFalseReg = MI.getOperand(2).getReg(); |
| unsigned CondCode = MI.getOperand(3).getImm(); |
| bool NZCVKilled = MI.getOperand(4).isKill(); |
| |
| MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| MF->insert(It, TrueBB); |
| MF->insert(It, EndBB); |
| |
| // Transfer rest of current basic-block to EndBB |
| EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), |
| MBB->end()); |
| EndBB->transferSuccessorsAndUpdatePHIs(MBB); |
| |
| BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB); |
| BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB); |
| MBB->addSuccessor(TrueBB); |
| MBB->addSuccessor(EndBB); |
| |
| // TrueBB falls through to the end. |
| TrueBB->addSuccessor(EndBB); |
| |
| if (!NZCVKilled) { |
| TrueBB->addLiveIn(AArch64::NZCV); |
| EndBB->addLiveIn(AArch64::NZCV); |
| } |
| |
| BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg) |
| .addReg(IfTrueReg) |
| .addMBB(TrueBB) |
| .addReg(IfFalseReg) |
| .addMBB(MBB); |
| |
| MI.eraseFromParent(); |
| return EndBB; |
| } |
| |
| MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( |
| MachineInstr &MI, MachineBasicBlock *BB) const { |
| assert(!isAsynchronousEHPersonality(classifyEHPersonality( |
| BB->getParent()->getFunction().getPersonalityFn())) && |
| "SEH does not use catchret!"); |
| return BB; |
| } |
| |
| MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad( |
| MachineInstr &MI, MachineBasicBlock *BB) const { |
| MI.eraseFromParent(); |
| return BB; |
| } |
| |
| MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( |
| MachineInstr &MI, MachineBasicBlock *BB) const { |
| switch (MI.getOpcode()) { |
| default: |
| #ifndef NDEBUG |
| MI.dump(); |
| #endif |
| llvm_unreachable("Unexpected instruction for custom inserter!"); |
| |
| case AArch64::F128CSEL: |
| return EmitF128CSEL(MI, BB); |
| |
| case TargetOpcode::STACKMAP: |
| case TargetOpcode::PATCHPOINT: |
| return emitPatchPoint(MI, BB); |
| |
| case AArch64::CATCHRET: |
| return EmitLoweredCatchRet(MI, BB); |
| case AArch64::CATCHPAD: |
| return EmitLoweredCatchPad(MI, BB); |
| } |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // AArch64 Lowering private implementation. |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| // Lowering Code |
| //===----------------------------------------------------------------------===// |
| |
| /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 |
| /// CC |
| static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) { |
| switch (CC) { |
| default: |
| llvm_unreachable("Unknown condition code!"); |
| case ISD::SETNE: |
| return AArch64CC::NE; |
| case ISD::SETEQ: |
| return AArch64CC::EQ; |
| case ISD::SETGT: |
| return AArch64CC::GT; |
| case ISD::SETGE: |
| return AArch64CC::GE; |
| case ISD::SETLT: |
| return AArch64CC::LT; |
| case ISD::SETLE: |
| return AArch64CC::LE; |
| case ISD::SETUGT: |
| return AArch64CC::HI; |
| case ISD::SETUGE: |
| return AArch64CC::HS; |
| case ISD::SETULT: |
| return AArch64CC::LO; |
| case ISD::SETULE: |
| return AArch64CC::LS; |
| } |
| } |
| |
| /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC. |
| static void changeFPCCToAArch64CC(ISD::CondCode CC, |
| AArch64CC::CondCode &CondCode, |
| AArch64CC::CondCode &CondCode2) { |
| CondCode2 = AArch64CC::AL; |
| switch (CC) { |
| default: |
| llvm_unreachable("Unknown FP condition!"); |
| case ISD::SETEQ: |
| case ISD::SETOEQ: |
| CondCode = AArch64CC::EQ; |
| break; |
| case ISD::SETGT: |
| case ISD::SETOGT: |
| CondCode = AArch64CC::GT; |
| break; |
| case ISD::SETGE: |
| case ISD::SETOGE: |
| CondCode = AArch64CC::GE; |
| break; |
| case ISD::SETOLT: |
| CondCode = AArch64CC::MI; |
| break; |
| case ISD::SETOLE: |
| CondCode = AArch64CC::LS; |
| break; |
| case ISD::SETONE: |
| CondCode = AArch64CC::MI; |
| CondCode2 = AArch64CC::GT; |
| break; |
| case ISD::SETO: |
| CondCode = AArch64CC::VC; |
| break; |
| case ISD::SETUO: |
| CondCode = AArch64CC::VS; |
| break; |
| case ISD::SETUEQ: |
| CondCode = AArch64CC::EQ; |
| CondCode2 = AArch64CC::VS; |
| break; |
| case ISD::SETUGT: |
| CondCode = AArch64CC::HI; |
| break; |
| case ISD::SETUGE: |
| CondCode = AArch64CC::PL; |
| break; |
| case ISD::SETLT: |
| case ISD::SETULT: |
| CondCode = AArch64CC::LT; |
| break; |
| case ISD::SETLE: |
| case ISD::SETULE: |
| CondCode = AArch64CC::LE; |
| break; |
| case ISD::SETNE: |
| case ISD::SETUNE: |
| CondCode = AArch64CC::NE; |
| break; |
| } |
| } |
| |
| /// Convert a DAG fp condition code to an AArch64 CC. |
| /// This differs from changeFPCCToAArch64CC in that it returns cond codes that |
| /// should be AND'ed instead of OR'ed. |
| static void changeFPCCToANDAArch64CC(ISD::CondCode CC, |
| AArch64CC::CondCode &CondCode, |
| AArch64CC::CondCode &CondCode2) { |
| CondCode2 = AArch64CC::AL; |
| switch (CC) { |
| default: |
| changeFPCCToAArch64CC(CC, CondCode, CondCode2); |
| assert(CondCode2 == AArch64CC::AL); |
| break; |
| case ISD::SETONE: |
| // (a one b) |
| // == ((a olt b) || (a ogt b)) |
| // == ((a ord b) && (a une b)) |
| CondCode = AArch64CC::VC; |
| CondCode2 = AArch64CC::NE; |
| break; |
| case ISD::SETUEQ: |
| // (a ueq b) |
| // == ((a uno b) || (a oeq b)) |
| // == ((a ule b) && (a uge b)) |
| CondCode = AArch64CC::PL; |
| CondCode2 = AArch64CC::LE; |
| break; |
| } |
| } |
| |
| /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 |
| /// CC usable with the vector instructions. Fewer operations are available |
| /// without a real NZCV register, so we have to use less efficient combinations |
| /// to get the same effect. |
| static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, |
| AArch64CC::CondCode &CondCode, |
| AArch64CC::CondCode &CondCode2, |
| bool &Invert) { |
| Invert = false; |
| switch (CC) { |
| default: |
| // Mostly the scalar mappings work fine. |
| changeFPCCToAArch64CC(CC, CondCode, CondCode2); |
| break; |
| case ISD::SETUO: |
| Invert = true; |
| LLVM_FALLTHROUGH; |
| case ISD::SETO: |
| CondCode = AArch64CC::MI; |
| CondCode2 = AArch64CC::GE; |
| break; |
| case ISD::SETUEQ: |
| case ISD::SETULT: |
| case ISD::SETULE: |
| case ISD::SETUGT: |
| case ISD::SETUGE: |
| // All of the compare-mask comparisons are ordered, but we can switch |
| // between the two by a double inversion. E.g. ULE == !OGT. |
| Invert = true; |
| changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2); |
| break; |
| } |
| } |
| |
| static bool isLegalArithImmed(uint64_t C) { |
| // Matches AArch64DAGToDAGISel::SelectArithImmed(). |
| bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); |
| LLVM_DEBUG(dbgs() << "Is imm " << C |
| << " legal: " << (IsLegal ? "yes\n" : "no\n")); |
| return IsLegal; |
| } |
| |
| // Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on |
| // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags |
| // can be set differently by this operation. It comes down to whether |
| // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then |
| // everything is fine. If not then the optimization is wrong. Thus general |
| // comparisons are only valid if op2 != 0. |
| // |
| // So, finally, the only LLVM-native comparisons that don't mention C and V |
| // are SETEQ and SETNE. They're the only ones we can safely use CMN for in |
| // the absence of information about op2. |
| static bool isCMN(SDValue Op, ISD::CondCode CC) { |
| return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) && |
| (CC == ISD::SETEQ || CC == ISD::SETNE); |
| } |
| |
| static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, |
| const SDLoc &dl, SelectionDAG &DAG) { |
| EVT VT = LHS.getValueType(); |
| const bool FullFP16 = |
| static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16(); |
| |
| if (VT.isFloatingPoint()) { |
| assert(VT != MVT::f128); |
| if (VT == MVT::f16 && !FullFP16) { |
| LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS); |
| RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS); |
| VT = MVT::f32; |
| } |
| return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS); |
| } |
| |
| // The CMP instruction is just an alias for SUBS, and representing it as |
| // SUBS means that it's possible to get CSE with subtract operations. |
| // A later phase can perform the optimization of setting the destination |
| // register to WZR/XZR if it ends up being unused. |
| unsigned Opcode = AArch64ISD::SUBS; |
| |
| if (isCMN(RHS, CC)) { |
| // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ? |
| Opcode = AArch64ISD::ADDS; |
| RHS = RHS.getOperand(1); |
| } else if (isCMN(LHS, CC)) { |
| // As we are looking for EQ/NE compares, the operands can be commuted ; can |
| // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ? |
| Opcode = AArch64ISD::ADDS; |
| LHS = LHS.getOperand(1); |
| } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) && |
| !isUnsignedIntSetCC(CC)) { |
| // Similarly, (CMP (and X, Y), 0) can be implemented with a TST |
| // (a.k.a. ANDS) except that the flags are only guaranteed to work for one |
| // of the signed comparisons. |
| Opcode = AArch64ISD::ANDS; |
| RHS = LHS.getOperand(1); |
| LHS = LHS.getOperand(0); |
| } |
| |
| return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS) |
| .getValue(1); |
| } |
| |
| /// \defgroup AArch64CCMP CMP;CCMP matching |
| /// |
| /// These functions deal with the formation of CMP;CCMP;... sequences. |
| /// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of |
| /// a comparison. They set the NZCV flags to a predefined value if their |
| /// predicate is false. This allows to express arbitrary conjunctions, for |
| /// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))" |
| /// expressed as: |
| /// cmp A |
| /// ccmp B, inv(CB), CA |
| /// check for CB flags |
| /// |
| /// This naturally lets us implement chains of AND operations with SETCC |
| /// operands. And we can even implement some other situations by transforming |
| /// them: |
| /// - We can implement (NEG SETCC) i.e. negating a single comparison by |
| /// negating the flags used in a CCMP/FCCMP operations. |
| /// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations |
| /// by negating the flags we test for afterwards. i.e. |
| /// NEG (CMP CCMP CCCMP ...) can be implemented. |
| /// - Note that we can only ever negate all previously processed results. |
| /// What we can not implement by flipping the flags to test is a negation |
| /// of two sub-trees (because the negation affects all sub-trees emitted so |
| /// far, so the 2nd sub-tree we emit would also affect the first). |
| /// With those tools we can implement some OR operations: |
| /// - (OR (SETCC A) (SETCC B)) can be implemented via: |
| /// NEG (AND (NEG (SETCC A)) (NEG (SETCC B))) |
| /// - After transforming OR to NEG/AND combinations we may be able to use NEG |
| /// elimination rules from earlier to implement the whole thing as a |
| /// CCMP/FCCMP chain. |
| /// |
| /// As complete example: |
| /// or (or (setCA (cmp A)) (setCB (cmp B))) |
| /// (and (setCC (cmp C)) (setCD (cmp D)))" |
| /// can be reassociated to: |
| /// or (and (setCC (cmp C)) setCD (cmp D)) |
| // (or (setCA (cmp A)) (setCB (cmp B))) |
| /// can be transformed to: |
| /// not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) |
| /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))" |
| /// which can be implemented as: |
| /// cmp C |
| /// ccmp D, inv(CD), CC |
| /// ccmp A, CA, inv(CD) |
| /// ccmp B, CB, inv(CA) |
| /// check for CB flags |
| /// |
| /// A counterexample is "or (and A B) (and C D)" which translates to |
| /// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we |
| /// can only implement 1 of the inner (not) operations, but not both! |
| /// @{ |
| |
| /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate. |
| static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, |
| ISD::CondCode CC, SDValue CCOp, |
| AArch64CC::CondCode Predicate, |
| AArch64CC::CondCode OutCC, |
| const SDLoc &DL, SelectionDAG &DAG) { |
| unsigned Opcode = 0; |
| const bool FullFP16 = |
| static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16(); |
| |
| if (LHS.getValueType().isFloatingPoint()) { |
| assert(LHS.getValueType() != MVT::f128); |
| if (LHS.getValueType() == MVT::f16 && !FullFP16) { |
| LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS); |
| RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS); |
| } |
| Opcode = AArch64ISD::FCCMP; |
| } else if (RHS.getOpcode() == ISD::SUB) { |
| SDValue SubOp0 = RHS.getOperand(0); |
| if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
| // See emitComparison() on why we can only do this for SETEQ and SETNE. |
| Opcode = AArch64ISD::CCMN; |
| RHS = RHS.getOperand(1); |
| } |
| } |
| if (Opcode == 0) |
| Opcode = AArch64ISD::CCMP; |
| |
| SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC); |
| AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); |
| unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); |
| SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); |
| return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); |
| } |
| |
| /// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be |
| /// expressed as a conjunction. See \ref AArch64CCMP. |
| /// \param CanNegate Set to true if we can negate the whole sub-tree just by |
| /// changing the conditions on the SETCC tests. |
| /// (this means we can call emitConjunctionRec() with |
| /// Negate==true on this sub-tree) |
| /// \param MustBeFirst Set to true if this subtree needs to be negated and we |
| /// cannot do the negation naturally. We are required to |
| /// emit the subtree first in this case. |
| /// \param WillNegate Is true if are called when the result of this |
| /// subexpression must be negated. This happens when the |
| /// outer expression is an OR. We can use this fact to know |
| /// that we have a double negation (or (or ...) ...) that |
| /// can be implemented for free. |
| static bool canEmitConjunction(const SDValue Val, bool &CanNegate, |
| bool &MustBeFirst, bool WillNegate, |
| unsigned Depth = 0) { |
| if (!Val.hasOneUse()) |
| return false; |
| unsigned Opcode = Val->getOpcode(); |
| if (Opcode == ISD::SETCC) { |
| if (Val->getOperand(0).getValueType() == MVT::f128) |
| return false; |
| CanNegate = true; |
| MustBeFirst = false; |
| return true; |
| } |
| // Protect against exponential runtime and stack overflow. |
| if (Depth > 6) |
| return false; |
| if (Opcode == ISD::AND || Opcode == ISD::OR) { |
| bool IsOR = Opcode == ISD::OR; |
| SDValue O0 = Val->getOperand(0); |
| SDValue O1 = Val->getOperand(1); |
| bool CanNegateL; |
| bool MustBeFirstL; |
| if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1)) |
| return false; |
| bool CanNegateR; |
| bool MustBeFirstR; |
| if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1)) |
| return false; |
| |
| if (MustBeFirstL && MustBeFirstR) |
| return false; |
| |
| if (IsOR) { |
| // For an OR expression we need to be able to naturally negate at least |
| // one side or we cannot do the transformation at all. |
| if (!CanNegateL && !CanNegateR) |
| return false; |
| // If we the result of the OR will be negated and we can naturally negate |
| // the leafs, then this sub-tree as a whole negates naturally. |
| CanNegate = WillNegate && CanNegateL && CanNegateR; |
| // If we cannot naturally negate the whole sub-tree, then this must be |
| // emitted first. |
| MustBeFirst = !CanNegate; |
| } else { |
| assert(Opcode == ISD::AND && "Must be OR or AND"); |
| // We cannot naturally negate an AND operation. |
| CanNegate = false; |
| MustBeFirst = MustBeFirstL || MustBeFirstR; |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain |
| /// of CCMP/CFCMP ops. See @ref AArch64CCMP. |
| /// Tries to transform the given i1 producing node @p Val to a series compare |
| /// and conditional compare operations. @returns an NZCV flags producing node |
| /// and sets @p OutCC to the flags that should be tested or returns SDValue() if |
| /// transformation was not possible. |
| /// \p Negate is true if we want this sub-tree being negated just by changing |
| /// SETCC conditions. |
| static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, |
| AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, |
| AArch64CC::CondCode Predicate) { |
| // We're at a tree leaf, produce a conditional comparison operation. |
| unsigned Opcode = Val->getOpcode(); |
| if (Opcode == ISD::SETCC) { |
| SDValue LHS = Val->getOperand(0); |
| SDValue RHS = Val->getOperand(1); |
| ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get(); |
| bool isInteger = LHS.getValueType().isInteger(); |
| if (Negate) |
| CC = getSetCCInverse(CC, isInteger); |
| SDLoc DL(Val); |
| // Determine OutCC and handle FP special case. |
| if (isInteger) { |
| OutCC = changeIntCCToAArch64CC(CC); |
| } else { |
| assert(LHS.getValueType().isFloatingPoint()); |
| AArch64CC::CondCode ExtraCC; |
| changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC); |
| // Some floating point conditions can't be tested with a single condition |
| // code. Construct an additional comparison in this case. |
| if (ExtraCC != AArch64CC::AL) { |
| SDValue ExtraCmp; |
| if (!CCOp.getNode()) |
| ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); |
| else |
| ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, |
| ExtraCC, DL, DAG); |
| CCOp = ExtraCmp; |
| Predicate = ExtraCC; |
| } |
| } |
| |
| // Produce a normal comparison if we are first in the chain |
| if (!CCOp) |
| return emitComparison(LHS, RHS, CC, DL, DAG); |
| // Otherwise produce a ccmp. |
| return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL, |
| DAG); |
| } |
| assert(Val->hasOneUse() && "Valid conjunction/disjunction tree"); |
| |
| bool IsOR = Opcode == ISD::OR; |
| |
| SDValue LHS = Val->getOperand(0); |
| bool CanNegateL; |
| bool MustBeFirstL; |
| bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR); |
| assert(ValidL && "Valid conjunction/disjunction tree"); |
| (void)ValidL; |
| |
| SDValue RHS = Val->getOperand(1); |
| bool CanNegateR; |
| bool MustBeFirstR; |
| bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR); |
| assert(ValidR && "Valid conjunction/disjunction tree"); |
| (void)ValidR; |
| |
| // Swap sub-tree that must come first to the right side. |
| if (MustBeFirstL) { |
| assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); |
| std::swap(LHS, RHS); |
| std::swap(CanNegateL, CanNegateR); |
| std::swap(MustBeFirstL, MustBeFirstR); |
| } |
| |
| bool NegateR; |
| bool NegateAfterR; |
| bool NegateL; |
| bool NegateAfterAll; |
| if (Opcode == ISD::OR) { |
| // Swap the sub-tree that we can negate naturally to the left. |
| if (!CanNegateL) { |
| assert(CanNegateR && "at least one side must be negatable"); |
| assert(!MustBeFirstR && "invalid conjunction/disjunction tree"); |
| assert(!Negate); |
| std::swap(LHS, RHS); |
| NegateR = false; |
| NegateAfterR = true; |
| } else { |
| // Negate the left sub-tree if possible, otherwise negate the result. |
| NegateR = CanNegateR; |
| NegateAfterR = !CanNegateR; |
| } |
| NegateL = true; |
| NegateAfterAll = !Negate; |
| } else { |
| assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree"); |
| assert(!Negate && "Valid conjunction/disjunction tree"); |
| |
| NegateL = false; |
| NegateR = false; |
| NegateAfterR = false; |
| NegateAfterAll = false; |
| } |
| |
| // Emit sub-trees. |
| AArch64CC::CondCode RHSCC; |
| SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate); |
| if (NegateAfterR) |
| RHSCC = AArch64CC::getInvertedCondCode(RHSCC); |
| SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC); |
| if (NegateAfterAll) |
| OutCC = AArch64CC::getInvertedCondCode(OutCC); |
| return CmpL; |
| } |
| |
| /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). |
| /// In some cases this is even possible with OR operations in the expression. |
| /// See \ref AArch64CCMP. |
| /// \see emitConjunctionRec(). |
| static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, |
| AArch64CC::CondCode &OutCC) { |
| bool DummyCanNegate; |
| bool DummyMustBeFirst; |
| if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false)) |
| return SDValue(); |
| |
| return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL); |
| } |
| |
| /// @} |
| |
| /// Returns how profitable it is to fold a comparison's operand's shift and/or |
| /// extension operations. |
| static unsigned getCmpOperandFoldingProfit(SDValue Op) { |
| auto isSupportedExtend = [&](SDValue V) { |
| if (V.getOpcode() == ISD::SIGN_EXTEND_INREG) |
| return true; |
| |
| if (V.getOpcode() == ISD::AND) |
| if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) { |
| uint64_t Mask = MaskCst->getZExtValue(); |
| return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF); |
| } |
| |
| return false; |
| }; |
| |
| if (!Op.hasOneUse()) |
| return 0; |
| |
| if (isSupportedExtend(Op)) |
| return 1; |
| |
| unsigned Opc = Op.getOpcode(); |
| if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) |
| if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { |
| uint64_t Shift = ShiftCst->getZExtValue(); |
| if (isSupportedExtend(Op.getOperand(0))) |
| return (Shift <= 4) ? 2 : 1; |
| EVT VT = Op.getValueType(); |
| if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63)) |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, |
| SDValue &AArch64cc, SelectionDAG &DAG, |
| const SDLoc &dl) { |
| if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { |
| EVT VT = RHS.getValueType(); |
| uint64_t C = RHSC->getZExtValue(); |
| if (!isLegalArithImmed(C)) { |
| // Constant does not fit, try adjusting it by one? |
| switch (CC) { |
| default: |
| break; |
| case ISD::SETLT: |
| case ISD::SETGE: |
| if ((VT == MVT::i32 && C != 0x80000000 && |
| isLegalArithImmed((uint32_t)(C - 1))) || |
| (VT == MVT::i64 && C != 0x80000000ULL && |
| isLegalArithImmed(C - 1ULL))) { |
| CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; |
| C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; |
| RHS = DAG.getConstant(C, dl, VT); |
| } |
| break; |
| case ISD::SETULT: |
| case ISD::SETUGE: |
| if ((VT == MVT::i32 && C != 0 && |
| isLegalArithImmed((uint32_t)(C - 1))) || |
| (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) { |
| CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; |
| C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; |
| RHS = DAG.getConstant(C, dl, VT); |
| } |
| break; |
| case ISD::SETLE: |
| case ISD::SETGT: |
| if ((VT == MVT::i32 && C != INT32_MAX && |
| isLegalArithImmed((uint32_t)(C + 1))) || |
| (VT == MVT::i64 && C != INT64_MAX && |
| isLegalArithImmed(C + 1ULL))) { |
| CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; |
| C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; |
| RHS = DAG.getConstant(C, dl, VT); |
| } |
| break; |
| case ISD::SETULE: |
| case ISD::SETUGT: |
| if ((VT == MVT::i32 && C != UINT32_MAX && |
| isLegalArithImmed((uint32_t)(C + 1))) || |
| (VT == MVT::i64 && C != UINT64_MAX && |
| isLegalArithImmed(C + 1ULL))) { |
| CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; |
| C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; |
| RHS = DAG.getConstant(C, dl, VT); |
| } |
| break; |
| } |
| } |
| } |
| |
| // Comparisons are canonicalized so that the RHS operand is simpler than the |
| // LHS one, the extreme case being when RHS is an immediate. However, AArch64 |
| // can fold some shift+extend operations on the RHS operand, so swap the |
| // operands if that can be done. |
| // |
| // For example: |
| // lsl w13, w11, #1 |
| // cmp w13, w12 |
| // can be turned into: |
| // cmp w12, w11, lsl #1 |
| if (!isa<ConstantSDNode>(RHS) || |
| !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) { |
| SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS; |
| |
| if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) { |
| std::swap(LHS, RHS); |
| CC = ISD::getSetCCSwappedOperands(CC); |
| } |
| } |
| |
| SDValue Cmp; |
| AArch64CC::CondCode AArch64CC; |
| if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) { |
| const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS); |
| |
| // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. |
| // For the i8 operand, the largest immediate is 255, so this can be easily |
| // encoded in the compare instruction. For the i16 operand, however, the |
| // largest immediate cannot be encoded in the compare. |
| // Therefore, use a sign extending load and cmn to avoid materializing the |
| // -1 constant. For example, |
| // movz w1, #65535 |
| // ldrh w0, [x0, #0] |
| // cmp w0, w1 |
| // > |
| // ldrsh w0, [x0, #0] |
| // cmn w0, #1 |
| // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) |
| // if and only if (sext LHS) == (sext RHS). The checks are in place to |
| // ensure both the LHS and RHS are truly zero extended and to make sure the |
| // transformation is profitable. |
| if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) && |
| cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD && |
| cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 && |
| LHS.getNode()->hasNUsesOfValue(1, 0)) { |
| int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue(); |
| if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { |
| SDValue SExt = |
| DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, |
| DAG.getValueType(MVT::i16)); |
| Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl, |
| RHS.getValueType()), |
| CC, dl, DAG); |
| AArch64CC = changeIntCCToAArch64CC(CC); |
| } |
| } |
| |
| if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) { |
| if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) { |
| if ((CC == ISD::SETNE) ^ RHSC->isNullValue()) |
| AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC); |
| } |
| } |
| } |
| |
| if (!Cmp) { |
| Cmp = emitComparison(LHS, RHS, CC, dl, DAG); |
| AArch64CC = changeIntCCToAArch64CC(CC); |
| } |
| AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC); |
| return Cmp; |
| } |
| |
| static std::pair<SDValue, SDValue> |
| getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { |
| assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && |
| "Unsupported value type"); |
| SDValue Value, Overflow; |
| SDLoc DL(Op); |
| SDValue LHS = Op.getOperand(0); |
| SDValue RHS = Op.getOperand(1); |
| unsigned Opc = 0; |
| switch (Op.getOpcode()) { |
| default: |
| llvm_unreachable("Unknown overflow instruction!"); |
| case ISD::SADDO: |
| Opc = AArch64ISD::ADDS; |
| CC = AArch64CC::VS; |
| break; |
| case ISD::UADDO: |
| Opc = AArch64ISD::ADDS; |
| CC = AArch64CC::HS; |
| break; |
| case ISD::SSUBO: |
| Opc = AArch64ISD::SUBS; |
| CC = AArch64CC::VS; |
| break; |
| case ISD::USUBO: |
| Opc = AArch64ISD::SUBS; |
| CC = AArch64CC::LO; |
| break; |
| // Multiply needs a little bit extra work. |
| case ISD::SMULO: |
| case ISD::UMULO: { |
| CC = AArch64CC::NE; |
| bool IsSigned = Op.getOpcode() == ISD::SMULO; |
| if (Op.getValueType() == MVT::i32) { |
| unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
| // For a 32 bit multiply with overflow check we want the instruction |
| // selector to generate a widening multiply (SMADDL/UMADDL). For that we |
| // need to generate the following pattern: |
| // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b)) |
| LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS); |
| RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS); |
| SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); |
| SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul, |
| DAG.getConstant(0, DL, MVT::i64)); |
| // On AArch64 the upper 32 bits are always zero extended for a 32 bit |
| // operation. We need to clear out the upper 32 bits, because we used a |
| // widening multiply that wrote all 64 bits. In the end this should be a |
| // noop. |
| Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add); |
| if (IsSigned) { |
| // The signed overflow check requires more than just a simple check for |
| // any bit set in the upper 32 bits of the result. These bits could be |
| // just the sign bits of a negative number. To perform the overflow |
| // check we have to arithmetic shift right the 32nd bit of the result by |
| // 31 bits. Then we compare the result to the upper 32 bits. |
| SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add, |
| DAG.getConstant(32, DL, MVT::i64)); |
| UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits); |
| SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value, |
| DAG.getConstant(31, DL, MVT::i64)); |
| // It is important that LowerBits is last, otherwise the arithmetic |
| // shift will not be folded into the compare (SUBS). |
| SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32); |
| Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) |
| .getValue(1); |
| } else { |
| // The overflow check for unsigned multiply is easy. We only need to |
| // check if any of the upper 32 bits are set. This can be done with a |
| // CMP (shifted register). For that we need to generate the following |
| // pattern: |
| // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32) |
| SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, |
| DAG.getConstant(32, DL, MVT::i64)); |
| SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); |
| Overflow = |
| DAG.getNode(AArch64ISD::SUBS, DL, VTs, |
| DAG.getConstant(0, DL, MVT::i64), |
| UpperBits).getValue(1); |
| } |
| break; |
| } |
| assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type"); |
| // For the 64 bit multiply |
| Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); |
| if (IsSigned) { |
| SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS); |
| SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value, |
| DAG.getConstant(63, DL, MVT::i64)); |
| // It is important that LowerBits is last, otherwise the arithmetic |
| // shift will not be folded into the compare (SUBS). |
| SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); |
| Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) |
| .getValue(1); |
| } else { |
| SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); |
| SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); |
| Overflow = |
| DAG.getNode(AArch64ISD::SUBS, DL, VTs, |
| DAG.getConstant(0, DL, MVT::i64), |
| UpperBits).getValue(1); |
| } |
| break; |
| } |
| } // switch (...) |
| |
| if (Opc) { |
| SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32); |
| |
| // Emit the AArch64 operation with overflow check. |
| Value = DAG.getNode(Opc, DL, VTs, LHS, RHS); |
| Overflow = Value.getValue(1); |
| } |
| return std::make_pair(Value, Overflow); |
| } |
| |
| SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, |
| RTLIB::Libcall Call) const { |
| SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end()); |
| MakeLibCallOptions CallOptions; |
| return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first; |
| } |
| |
| // Returns true if the given Op is the overflow flag result of an overflow |
| // intrinsic operation. |
| static bool isOverflowIntrOpRes(SDValue Op) { |
| unsigned Opc = Op.getOpcode(); |
| return (Op.getResNo() == 1 && |
| (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || |
| Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)); |
| } |
| |
| static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { |
| SDValue Sel = Op.getOperand(0); |
| SDValue Other = Op.getOperand(1); |
| SDLoc dl(Sel); |
| |
| // If the operand is an overflow checking operation, invert the condition |
| // code and kill the Not operation. I.e., transform: |
| // (xor (overflow_op_bool, 1)) |
| // --> |
| // (csel 1, 0, invert(cc), overflow_op_bool) |
| // ... which later gets transformed to just a cset instruction with an |
| // inverted condition code, rather than a cset + eor sequence. |
| if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) { |
| // Only lower legal XALUO ops. |
| if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0))) |
| return SDValue(); |
| |
| SDValue TVal = DAG.getConstant(1, dl, MVT::i32); |
| SDValue FVal = DAG.getConstant(0, dl, MVT::i32); |
| AArch64CC::CondCode CC; |
| SDValue Value, Overflow; |
| std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG); |
| SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); |
| return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal, |
| CCVal, Overflow); |
| } |
| // If neither operand is a SELECT_CC, give up. |
| if (Sel.getOpcode() != ISD::SELECT_CC) |
| std::swap(Sel, Other); |
| if (Sel.getOpcode() != ISD::SELECT_CC) |
| return Op; |
| |
| // The folding we want to perform is: |
| // (xor x, (select_cc a, b, cc, 0, -1) ) |
| // --> |
| // (csel x, (xor x, -1), cc ...) |
| // |
| // The latter will get matched to a CSINV instruction. |
| |
| ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get(); |
| SDValue LHS = Sel.getOperand(0); |
| SDValue RHS = Sel.getOperand(1); |
| SDValue TVal = Sel.getOperand(2); |
| SDValue FVal = Sel.getOperand(3); |
| |
| // FIXME: This could be generalized to non-integer comparisons. |
| if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) |
| return Op; |
| |
| ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal); |
| ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal); |
| |
| // The values aren't constants, this isn't the pattern we're looking for. |
| if (!CFVal || !CTVal) |
| return Op; |
| |
| // We can commute the SELECT_CC by inverting the condition. This |
| // might be needed to make this fit into a CSINV pattern. |
| if (CTVal->isAllOnesValue() && CFVal->isNullValue()) { |
| std::swap(TVal, FVal); |
| std::swap(CTVal, CFVal); |
| CC = ISD::getSetCCInverse(CC, true); |
| } |
| |
| // If the constants line up, perform the transform! |
| if (CTVal->isNullValue() && CFVal->isAllOnesValue()) { |
| SDValue CCVal; |
| SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); |
| |
| FVal = Other; |
| TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other, |
| DAG.getConstant(-1ULL, dl, Other.getValueType())); |
| |
| return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal, |
| CCVal, Cmp); |
| } |
| |
| return Op; |
| } |
| |
| static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { |
| EVT VT = Op.getValueType(); |
| |
| // Let legalize expand this if it isn't a legal type yet. |
| if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
| return SDValue(); |
| |
| SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
| |
| unsigned Opc; |
| bool ExtraOp = false; |
| switch (Op.getOpcode()) { |
| default: |
| llvm_unreachable("Invalid code"); |
| case ISD::ADDC: |
| Opc = AArch64ISD::ADDS; |
| break; |
| case ISD::SUBC: |
| Opc = AArch64ISD::SUBS; |
| break; |
| case ISD::ADDE: |
| Opc = AArch64ISD::ADCS; |
| ExtraOp = true; |
| break; |
| case ISD::SUBE: |
| Opc = AArch64ISD::SBCS; |
| ExtraOp = true; |
| break; |
| } |
| |
| if (!ExtraOp) |
| return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); |
| return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), |
| Op.getOperand(2)); |
| } |
| |
| static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { |
| // Let legalize expand this if it isn't a legal type yet. |
| if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) |
| return SDValue(); |
| |
| SDLoc dl(Op); |
| AArch64CC::CondCode CC; |
| // The actual operation that sets the overflow or carry flag. |
| SDValue Value, Overflow; |
| std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG); |
| |
| // We use 0 and 1 as false and true values. |
| SDValue TVal = DAG.getConstant(1, dl, MVT::i32); |
| SDValue FVal = DAG.getConstant(0, dl, MVT::i32); |
| |
| // We use an inverted condition, because the conditional select is inverted |
| // too. This will allow it to be selected to a single instruction: |
| // CSINC Wd, WZR, WZR, invert(cond). |
| SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); |
| Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, |
| CCVal, Overflow); |
| |
| SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
| return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); |
| } |
| |
| // Prefetch operands are: |
| // 1: Address to prefetch |
| // 2: bool isWrite |
| // 3: int locality (0 = no locality ... 3 = extreme locality) |
| // 4: bool isDataCache |
| static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { |
| SDLoc DL(Op); |
| unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); |
| unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); |
| unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); |
| |
| bool IsStream = !Locality; |
| // When the locality number is set |
| if (Locality) { |
| // The front-end should have filtered out the out-of-range values |
| assert(Locality <= 3 && "Prefetch locality out-of-range"); |
| // The locality degree is the opposite of the cache speed. |
| // Put the number the other way around. |
| // The encoding starts at 0 for level 1 |
| Locality = 3 - Locality; |
| } |
| |
| // built the mask value encoding the expected behavior. |
| unsigned PrfOp = (IsWrite << 4) | // Load/Store bit |
| (!IsData << 3) | // IsDataCache bit |
| (Locality << 1) | // Cache level bits |
| (unsigned)IsStream; // Stream bit |
| return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), |
| DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1)); |
| } |
| |
| SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, |
| SelectionDAG &DAG) const { |
| assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); |
| |
| RTLIB::Libcall LC; |
| LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); |
| |
| return LowerF128Call(Op, DAG, LC); |
| } |
| |
| SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, |
| SelectionDAG &DAG) const { |
| if (Op.getOperand(0).getValueType() != MVT::f128) { |
| // It's legal except when f128 is involved |
| return Op; |
| } |
| |
| RTLIB::Libcall LC; |
| LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); |
| |
| // FP_ROUND node has a second operand indicating whether it is known to be |
| // precise. That doesn't take part in the LibCall so we can't directly use |
| // LowerF128Call. |
| SDValue SrcVal = Op.getOperand(0); |
| MakeLibCallOptions CallOptions; |
| return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, CallOptions, |
| SDLoc(Op)).first; |
| } |
| |
| SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, |
| SelectionDAG &DAG) const { |
| // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. |
| // Any additional optimization in this function should be recorded |
| // in the cost tables. |
| EVT InVT = Op.getOperand(0).getValueType(); |
| EVT VT = Op.getValueType(); |
| unsigned NumElts = InVT.getVectorNumElements(); |
| |
| // f16 conversions are promoted to f32 when full fp16 is not supported. |
| if (InVT.getVectorElementType() == MVT::f16 && |
| !Subtarget->hasFullFP16()) { |
| MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); |
| SDLoc dl(Op); |
| return DAG.getNode( |
| Op.getOpcode(), dl, Op.getValueType(), |
| DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0))); |
| } |
| |
| if (VT.getSizeInBits() < InVT.getSizeInBits()) { |
| SDLoc dl(Op); |
| SDValue Cv = |
| DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), |
| Op.getOperand(0)); |
| return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); |
| } |
| |
| if (VT.getSizeInBits() > InVT.getSizeInBits()) { |
| SDLoc dl(Op); |
| MVT ExtVT = |
| MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()), |
| VT.getVectorNumElements()); |
| SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0)); |
| return DAG.getNode(Op.getOpcode(), dl, VT, Ext); |
| } |
| |
| // Type changing conversions are illegal. |
| return Op; |
| } |
| |
| SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, |
| SelectionDAG &DAG) const |