| //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines an instruction selector for the ARM target. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "ARM.h" |
| #include "ARMBaseInstrInfo.h" |
| #include "ARMTargetMachine.h" |
| #include "MCTargetDesc/ARMAddressingModes.h" |
| #include "Utils/ARMBaseInfo.h" |
| #include "llvm/ADT/APSInt.h" |
| #include "llvm/ADT/StringSwitch.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/SelectionDAGISel.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/IntrinsicsARM.h" |
| #include "llvm/IR/LLVMContext.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Target/TargetOptions.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "arm-isel" |
| |
| static cl::opt<bool> |
| DisableShifterOp("disable-shifter-op", cl::Hidden, |
| cl::desc("Disable isel of shifter-op"), |
| cl::init(false)); |
| |
| //===--------------------------------------------------------------------===// |
| /// ARMDAGToDAGISel - ARM specific code to select ARM machine |
| /// instructions for SelectionDAG operations. |
| /// |
| namespace { |
| |
| class ARMDAGToDAGISel : public SelectionDAGISel { |
| /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can |
| /// make the right decision when generating code for different targets. |
| const ARMSubtarget *Subtarget; |
| |
| public: |
| explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) |
| : SelectionDAGISel(tm, OptLevel) {} |
| |
| bool runOnMachineFunction(MachineFunction &MF) override { |
| // Reset the subtarget each time through. |
| Subtarget = &MF.getSubtarget<ARMSubtarget>(); |
| SelectionDAGISel::runOnMachineFunction(MF); |
| return true; |
| } |
| |
| StringRef getPassName() const override { return "ARM Instruction Selection"; } |
| |
| void PreprocessISelDAG() override; |
| |
| /// getI32Imm - Return a target constant of type i32 with the specified |
| /// value. |
| inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { |
| return CurDAG->getTargetConstant(Imm, dl, MVT::i32); |
| } |
| |
| void Select(SDNode *N) override; |
| |
| /// Return true as some complex patterns, like those that call |
| /// canExtractShiftFromMul can modify the DAG inplace. |
| bool ComplexPatternFuncMutatesDAG() const override { return true; } |
| |
| bool hasNoVMLxHazardUse(SDNode *N) const; |
| bool isShifterOpProfitable(const SDValue &Shift, |
| ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); |
| bool SelectRegShifterOperand(SDValue N, SDValue &A, |
| SDValue &B, SDValue &C, |
| bool CheckProfitability = true); |
| bool SelectImmShifterOperand(SDValue N, SDValue &A, |
| SDValue &B, bool CheckProfitability = true); |
| bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, |
| SDValue &C) { |
| // Don't apply the profitability check |
| return SelectRegShifterOperand(N, A, B, C, false); |
| } |
| bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { |
| // Don't apply the profitability check |
| return SelectImmShifterOperand(N, A, B, false); |
| } |
| bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { |
| if (!N.hasOneUse()) |
| return false; |
| return SelectImmShifterOperand(N, A, B, false); |
| } |
| |
| bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); |
| |
| bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); |
| bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); |
| |
| bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { |
| const ConstantSDNode *CN = cast<ConstantSDNode>(N); |
| Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); |
| Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); |
| return true; |
| } |
| |
| bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, |
| SDValue &Offset, SDValue &Opc); |
| bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, |
| SDValue &Offset, SDValue &Opc); |
| bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, |
| SDValue &Offset, SDValue &Opc); |
| bool SelectAddrOffsetNone(SDValue N, SDValue &Base); |
| bool SelectAddrMode3(SDValue N, SDValue &Base, |
| SDValue &Offset, SDValue &Opc); |
| bool SelectAddrMode3Offset(SDNode *Op, SDValue N, |
| SDValue &Offset, SDValue &Opc); |
| bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); |
| bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); |
| bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); |
| bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); |
| bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); |
| |
| bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); |
| |
| // Thumb Addressing Modes: |
| bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); |
| bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); |
| bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, |
| SDValue &OffImm); |
| bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, |
| SDValue &OffImm); |
| bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, |
| SDValue &OffImm); |
| bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, |
| SDValue &OffImm); |
| bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); |
| template <unsigned Shift> |
| bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); |
| |
| // Thumb 2 Addressing Modes: |
| bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); |
| template <unsigned Shift> |
| bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); |
| bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, |
| SDValue &OffImm); |
| bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, |
| SDValue &OffImm); |
| template <unsigned Shift> |
| bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); |
| bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, |
| unsigned Shift); |
| template <unsigned Shift> |
| bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); |
| bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, |
| SDValue &OffReg, SDValue &ShImm); |
| bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); |
| |
| template<int Min, int Max> |
| bool SelectImmediateInRange(SDValue N, SDValue &OffImm); |
| |
| inline bool is_so_imm(unsigned Imm) const { |
| return ARM_AM::getSOImmVal(Imm) != -1; |
| } |
| |
| inline bool is_so_imm_not(unsigned Imm) const { |
| return ARM_AM::getSOImmVal(~Imm) != -1; |
| } |
| |
| inline bool is_t2_so_imm(unsigned Imm) const { |
| return ARM_AM::getT2SOImmVal(Imm) != -1; |
| } |
| |
| inline bool is_t2_so_imm_not(unsigned Imm) const { |
| return ARM_AM::getT2SOImmVal(~Imm) != -1; |
| } |
| |
| // Include the pieces autogenerated from the target description. |
| #include "ARMGenDAGISel.inc" |
| |
| private: |
| void transferMemOperands(SDNode *Src, SDNode *Dst); |
| |
| /// Indexed (pre/post inc/dec) load matching code for ARM. |
| bool tryARMIndexedLoad(SDNode *N); |
| bool tryT1IndexedLoad(SDNode *N); |
| bool tryT2IndexedLoad(SDNode *N); |
| bool tryMVEIndexedLoad(SDNode *N); |
| bool tryFMULFixed(SDNode *N, SDLoc dl); |
| bool tryFP_TO_INT(SDNode *N, SDLoc dl); |
| bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, |
| bool IsUnsigned, |
| bool FixedToFloat); |
| |
| /// SelectVLD - Select NEON load intrinsics. NumVecs should be |
| /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for |
| /// loads of D registers and even subregs and odd subregs of Q registers. |
| /// For NumVecs <= 2, QOpcodes1 is not used. |
| void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, |
| const uint16_t *DOpcodes, const uint16_t *QOpcodes0, |
| const uint16_t *QOpcodes1); |
| |
| /// SelectVST - Select NEON store intrinsics. NumVecs should |
| /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for |
| /// stores of D registers and even subregs and odd subregs of Q registers. |
| /// For NumVecs <= 2, QOpcodes1 is not used. |
| void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, |
| const uint16_t *DOpcodes, const uint16_t *QOpcodes0, |
| const uint16_t *QOpcodes1); |
| |
| /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should |
| /// be 2, 3 or 4. The opcode arrays specify the instructions used for |
| /// load/store of D registers and Q registers. |
| void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, |
| unsigned NumVecs, const uint16_t *DOpcodes, |
| const uint16_t *QOpcodes); |
| |
| /// Helper functions for setting up clusters of MVE predication operands. |
| template <typename SDValueVector> |
| void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, |
| SDValue PredicateMask); |
| template <typename SDValueVector> |
| void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, |
| SDValue PredicateMask, SDValue Inactive); |
| |
| template <typename SDValueVector> |
| void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); |
| template <typename SDValueVector> |
| void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); |
| |
| /// SelectMVE_WB - Select MVE writeback load/store intrinsics. |
| void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); |
| |
| /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. |
| void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, |
| bool HasSaturationOperand); |
| |
| /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. |
| void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, |
| uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); |
| |
| /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between |
| /// vector lanes. |
| void SelectMVE_VSHLC(SDNode *N, bool Predicated); |
| |
| /// Select long MVE vector reductions with two vector operands |
| /// Stride is the number of vector element widths the instruction can operate |
| /// on: |
| /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] |
| /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] |
| /// Stride is used when addressing the OpcodesS array which contains multiple |
| /// opcodes for each element width. |
| /// TySize is the index into the list of element types listed above |
| void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, |
| const uint16_t *OpcodesS, const uint16_t *OpcodesU, |
| size_t Stride, size_t TySize); |
| |
| /// Select a 64-bit MVE vector reduction with two vector operands |
| /// arm_mve_vmlldava_[predicated] |
| void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, |
| const uint16_t *OpcodesU); |
| /// Select a 72-bit MVE vector rounding reduction with two vector operands |
| /// int_arm_mve_vrmlldavha[_predicated] |
| void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, |
| const uint16_t *OpcodesU); |
| |
| /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs |
| /// should be 2 or 4. The opcode array specifies the instructions |
| /// used for 8, 16 and 32-bit lane sizes respectively, and each |
| /// pointer points to a set of NumVecs sub-opcodes used for the |
| /// different stages (e.g. VLD20 versus VLD21) of each load family. |
| void SelectMVE_VLD(SDNode *N, unsigned NumVecs, |
| const uint16_t *const *Opcodes, bool HasWriteback); |
| |
| /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an |
| /// array of 3 elements for the 8, 16 and 32-bit lane sizes. |
| void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, |
| bool Wrapping, bool Predicated); |
| |
| /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, |
| /// CX1DA, CX2D, CX2DA, CX3, CX3DA). |
| /// \arg \c NumExtraOps number of extra operands besides the coprocossor, |
| /// the accumulator and the immediate operand, i.e. 0 |
| /// for CX1*, 1 for CX2*, 2 for CX3* |
| /// \arg \c HasAccum whether the instruction has an accumulator operand |
| void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, |
| bool HasAccum); |
| |
| /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs |
| /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used |
| /// for loading D registers. |
| void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, |
| unsigned NumVecs, const uint16_t *DOpcodes, |
| const uint16_t *QOpcodes0 = nullptr, |
| const uint16_t *QOpcodes1 = nullptr); |
| |
| /// Try to select SBFX/UBFX instructions for ARM. |
| bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); |
| |
| bool tryInsertVectorElt(SDNode *N); |
| |
| // Select special operations if node forms integer ABS pattern |
| bool tryABSOp(SDNode *N); |
| |
| bool tryReadRegister(SDNode *N); |
| bool tryWriteRegister(SDNode *N); |
| |
| bool tryInlineAsm(SDNode *N); |
| |
| void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); |
| |
| void SelectCMP_SWAP(SDNode *N); |
| |
| /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for |
| /// inline asm expressions. |
| bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, |
| std::vector<SDValue> &OutOps) override; |
| |
| // Form pairs of consecutive R, S, D, or Q registers. |
| SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); |
| SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); |
| SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); |
| SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); |
| |
| // Form sequences of 4 consecutive S, D, or Q registers. |
| SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); |
| SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); |
| SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); |
| |
| // Get the alignment operand for a NEON VLD or VST instruction. |
| SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, |
| bool is64BitVector); |
| |
| /// Checks if N is a multiplication by a constant where we can extract out a |
| /// power of two from the constant so that it can be used in a shift, but only |
| /// if it simplifies the materialization of the constant. Returns true if it |
| /// is, and assigns to PowerOfTwo the power of two that should be extracted |
| /// out and to NewMulConst the new constant to be multiplied by. |
| bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, |
| unsigned &PowerOfTwo, SDValue &NewMulConst) const; |
| |
| /// Replace N with M in CurDAG, in a way that also ensures that M gets |
| /// selected when N would have been selected. |
| void replaceDAGValue(const SDValue &N, SDValue M); |
| }; |
| } |
| |
| /// isInt32Immediate - This method tests to see if the node is a 32-bit constant |
| /// operand. If so Imm will receive the 32-bit value. |
| static bool isInt32Immediate(SDNode *N, unsigned &Imm) { |
| if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { |
| Imm = cast<ConstantSDNode>(N)->getZExtValue(); |
| return true; |
| } |
| return false; |
| } |
| |
| // isInt32Immediate - This method tests to see if a constant operand. |
| // If so Imm will receive the 32 bit value. |
| static bool isInt32Immediate(SDValue N, unsigned &Imm) { |
| return isInt32Immediate(N.getNode(), Imm); |
| } |
| |
| // isOpcWithIntImmediate - This method tests to see if the node is a specific |
| // opcode and that it has a immediate integer right operand. |
| // If so Imm will receive the 32 bit value. |
| static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { |
| return N->getOpcode() == Opc && |
| isInt32Immediate(N->getOperand(1).getNode(), Imm); |
| } |
| |
| /// Check whether a particular node is a constant value representable as |
| /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). |
| /// |
| /// \param ScaledConstant [out] - On success, the pre-scaled constant value. |
| static bool isScaledConstantInRange(SDValue Node, int Scale, |
| int RangeMin, int RangeMax, |
| int &ScaledConstant) { |
| assert(Scale > 0 && "Invalid scale!"); |
| |
| // Check that this is a constant. |
| const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); |
| if (!C) |
| return false; |
| |
| ScaledConstant = (int) C->getZExtValue(); |
| if ((ScaledConstant % Scale) != 0) |
| return false; |
| |
| ScaledConstant /= Scale; |
| return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; |
| } |
| |
| void ARMDAGToDAGISel::PreprocessISelDAG() { |
| if (!Subtarget->hasV6T2Ops()) |
| return; |
| |
| bool isThumb2 = Subtarget->isThumb(); |
| // We use make_early_inc_range to avoid invalidation issues. |
| for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) { |
| if (N.getOpcode() != ISD::ADD) |
| continue; |
| |
| // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with |
| // leading zeros, followed by consecutive set bits, followed by 1 or 2 |
| // trailing zeros, e.g. 1020. |
| // Transform the expression to |
| // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number |
| // of trailing zeros of c2. The left shift would be folded as an shifter |
| // operand of 'add' and the 'and' and 'srl' would become a bits extraction |
| // node (UBFX). |
| |
| SDValue N0 = N.getOperand(0); |
| SDValue N1 = N.getOperand(1); |
| unsigned And_imm = 0; |
| if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { |
| if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) |
| std::swap(N0, N1); |
| } |
| if (!And_imm) |
| continue; |
| |
| // Check if the AND mask is an immediate of the form: 000.....1111111100 |
| unsigned TZ = countTrailingZeros(And_imm); |
| if (TZ != 1 && TZ != 2) |
| // Be conservative here. Shifter operands aren't always free. e.g. On |
| // Swift, left shifter operand of 1 / 2 for free but others are not. |
| // e.g. |
| // ubfx r3, r1, #16, #8 |
| // ldr.w r3, [r0, r3, lsl #2] |
| // vs. |
| // mov.w r9, #1020 |
| // and.w r2, r9, r1, lsr #14 |
| // ldr r2, [r0, r2] |
| continue; |
| And_imm >>= TZ; |
| if (And_imm & (And_imm + 1)) |
| continue; |
| |
| // Look for (and (srl X, c1), c2). |
| SDValue Srl = N1.getOperand(0); |
| unsigned Srl_imm = 0; |
| if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || |
| (Srl_imm <= 2)) |
| continue; |
| |
| // Make sure first operand is not a shifter operand which would prevent |
| // folding of the left shift. |
| SDValue CPTmp0; |
| SDValue CPTmp1; |
| SDValue CPTmp2; |
| if (isThumb2) { |
| if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) |
| continue; |
| } else { |
| if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || |
| SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) |
| continue; |
| } |
| |
| // Now make the transformation. |
| Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, |
| Srl.getOperand(0), |
| CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), |
| MVT::i32)); |
| N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, |
| Srl, |
| CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); |
| N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, |
| N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); |
| CurDAG->UpdateNodeOperands(&N, N0, N1); |
| } |
| } |
| |
| /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS |
| /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at |
| /// least on current ARM implementations) which should be avoidded. |
| bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { |
| if (OptLevel == CodeGenOpt::None) |
| return true; |
| |
| if (!Subtarget->hasVMLxHazards()) |
| return true; |
| |
| if (!N->hasOneUse()) |
| return false; |
| |
| SDNode *Use = *N->use_begin(); |
| if (Use->getOpcode() == ISD::CopyToReg) |
| return true; |
| if (Use->isMachineOpcode()) { |
| const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( |
| CurDAG->getSubtarget().getInstrInfo()); |
| |
| const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); |
| if (MCID.mayStore()) |
| return true; |
| unsigned Opcode = MCID.getOpcode(); |
| if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) |
| return true; |
| // vmlx feeding into another vmlx. We actually want to unfold |
| // the use later in the MLxExpansion pass. e.g. |
| // vmla |
| // vmla (stall 8 cycles) |
| // |
| // vmul (5 cycles) |
| // vadd (5 cycles) |
| // vmla |
| // This adds up to about 18 - 19 cycles. |
| // |
| // vmla |
| // vmul (stall 4 cycles) |
| // vadd adds up to about 14 cycles. |
| return TII->isFpMLxInstruction(Opcode); |
| } |
| |
| return false; |
| } |
| |
| bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, |
| ARM_AM::ShiftOpc ShOpcVal, |
| unsigned ShAmt) { |
| if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) |
| return true; |
| if (Shift.hasOneUse()) |
| return true; |
| // R << 2 is free. |
| return ShOpcVal == ARM_AM::lsl && |
| (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); |
| } |
| |
| bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, |
| unsigned MaxShift, |
| unsigned &PowerOfTwo, |
| SDValue &NewMulConst) const { |
| assert(N.getOpcode() == ISD::MUL); |
| assert(MaxShift > 0); |
| |
| // If the multiply is used in more than one place then changing the constant |
| // will make other uses incorrect, so don't. |
| if (!N.hasOneUse()) return false; |
| // Check if the multiply is by a constant |
| ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); |
| if (!MulConst) return false; |
| // If the constant is used in more than one place then modifying it will mean |
| // we need to materialize two constants instead of one, which is a bad idea. |
| if (!MulConst->hasOneUse()) return false; |
| unsigned MulConstVal = MulConst->getZExtValue(); |
| if (MulConstVal == 0) return false; |
| |
| // Find the largest power of 2 that MulConstVal is a multiple of |
| PowerOfTwo = MaxShift; |
| while ((MulConstVal % (1 << PowerOfTwo)) != 0) { |
| --PowerOfTwo; |
| if (PowerOfTwo == 0) return false; |
| } |
| |
| // Only optimise if the new cost is better |
| unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); |
| NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); |
| unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); |
| unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); |
| return NewCost < OldCost; |
| } |
| |
| void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { |
| CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); |
| ReplaceUses(N, M); |
| } |
| |
| bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, |
| SDValue &BaseReg, |
| SDValue &Opc, |
| bool CheckProfitability) { |
| if (DisableShifterOp) |
| return false; |
| |
| // If N is a multiply-by-constant and it's profitable to extract a shift and |
| // use it in a shifted operand do so. |
| if (N.getOpcode() == ISD::MUL) { |
| unsigned PowerOfTwo = 0; |
| SDValue NewMulConst; |
| if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { |
| HandleSDNode Handle(N); |
| SDLoc Loc(N); |
| replaceDAGValue(N.getOperand(1), NewMulConst); |
| BaseReg = Handle.getValue(); |
| Opc = CurDAG->getTargetConstant( |
| ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); |
| return true; |
| } |
| } |
| |
| ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); |
| |
| // Don't match base register only case. That is matched to a separate |
| // lower complexity pattern with explicit register operand. |
| if (ShOpcVal == ARM_AM::no_shift) return false; |
| |
| BaseReg = N.getOperand(0); |
| unsigned ShImmVal = 0; |
| ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); |
| if (!RHS) return false; |
| ShImmVal = RHS->getZExtValue() & 31; |
| Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), |
| SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, |
| SDValue &BaseReg, |
| SDValue &ShReg, |
| SDValue &Opc, |
| bool CheckProfitability) { |
| if (DisableShifterOp) |
| return false; |
| |
| ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); |
| |
| // Don't match base register only case. That is matched to a separate |
| // lower complexity pattern with explicit register operand. |
| if (ShOpcVal == ARM_AM::no_shift) return false; |
| |
| BaseReg = N.getOperand(0); |
| unsigned ShImmVal = 0; |
| ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); |
| if (RHS) return false; |
| |
| ShReg = N.getOperand(1); |
| if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) |
| return false; |
| Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), |
| SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| // Determine whether an ISD::OR's operands are suitable to turn the operation |
| // into an addition, which often has more compact encodings. |
| bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { |
| assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); |
| Out = N; |
| return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); |
| } |
| |
| |
| bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, |
| SDValue &Base, |
| SDValue &OffImm) { |
| // Match simple R + imm12 operands. |
| |
| // Base only. |
| if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && |
| !CurDAG->isBaseWithConstantOffset(N)) { |
| if (N.getOpcode() == ISD::FrameIndex) { |
| // Match frame index. |
| int FI = cast<FrameIndexSDNode>(N)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| if (N.getOpcode() == ARMISD::Wrapper && |
| N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && |
| N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && |
| N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { |
| Base = N.getOperand(0); |
| } else |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
| int RHSC = (int)RHS->getSExtValue(); |
| if (N.getOpcode() == ISD::SUB) |
| RHSC = -RHSC; |
| |
| if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } |
| OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); |
| return true; |
| } |
| } |
| |
| // Base only. |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| |
| |
| bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, |
| SDValue &Opc) { |
| if (N.getOpcode() == ISD::MUL && |
| ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { |
| if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
| // X * [3,5,9] -> X + X * [2,4,8] etc. |
| int RHSC = (int)RHS->getZExtValue(); |
| if (RHSC & 1) { |
| RHSC = RHSC & ~1; |
| ARM_AM::AddrOpc AddSub = ARM_AM::add; |
| if (RHSC < 0) { |
| AddSub = ARM_AM::sub; |
| RHSC = - RHSC; |
| } |
| if (isPowerOf2_32(RHSC)) { |
| unsigned ShAmt = Log2_32(RHSC); |
| Base = Offset = N.getOperand(0); |
| Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, |
| ARM_AM::lsl), |
| SDLoc(N), MVT::i32); |
| return true; |
| } |
| } |
| } |
| } |
| |
| if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && |
| // ISD::OR that is equivalent to an ISD::ADD. |
| !CurDAG->isBaseWithConstantOffset(N)) |
| return false; |
| |
| // Leave simple R +/- imm12 operands for LDRi12 |
| if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { |
| int RHSC; |
| if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, |
| -0x1000+1, 0x1000, RHSC)) // 12 bits. |
| return false; |
| } |
| |
| // Otherwise this is R +/- [possibly shifted] R. |
| ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; |
| ARM_AM::ShiftOpc ShOpcVal = |
| ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); |
| unsigned ShAmt = 0; |
| |
| Base = N.getOperand(0); |
| Offset = N.getOperand(1); |
| |
| if (ShOpcVal != ARM_AM::no_shift) { |
| // Check to see if the RHS of the shift is a constant, if not, we can't fold |
| // it. |
| if (ConstantSDNode *Sh = |
| dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { |
| ShAmt = Sh->getZExtValue(); |
| if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) |
| Offset = N.getOperand(1).getOperand(0); |
| else { |
| ShAmt = 0; |
| ShOpcVal = ARM_AM::no_shift; |
| } |
| } else { |
| ShOpcVal = ARM_AM::no_shift; |
| } |
| } |
| |
| // Try matching (R shl C) + (R). |
| if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && |
| !(Subtarget->isLikeA9() || Subtarget->isSwift() || |
| N.getOperand(0).hasOneUse())) { |
| ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); |
| if (ShOpcVal != ARM_AM::no_shift) { |
| // Check to see if the RHS of the shift is a constant, if not, we can't |
| // fold it. |
| if (ConstantSDNode *Sh = |
| dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { |
| ShAmt = Sh->getZExtValue(); |
| if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { |
| Offset = N.getOperand(0).getOperand(0); |
| Base = N.getOperand(1); |
| } else { |
| ShAmt = 0; |
| ShOpcVal = ARM_AM::no_shift; |
| } |
| } else { |
| ShOpcVal = ARM_AM::no_shift; |
| } |
| } |
| } |
| |
| // If Offset is a multiply-by-constant and it's profitable to extract a shift |
| // and use it in a shifted operand do so. |
| if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { |
| unsigned PowerOfTwo = 0; |
| SDValue NewMulConst; |
| if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { |
| HandleSDNode Handle(Offset); |
| replaceDAGValue(Offset.getOperand(1), NewMulConst); |
| Offset = Handle.getValue(); |
| ShAmt = PowerOfTwo; |
| ShOpcVal = ARM_AM::lsl; |
| } |
| } |
| |
| Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), |
| SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, |
| SDValue &Offset, SDValue &Opc) { |
| unsigned Opcode = Op->getOpcode(); |
| ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) |
| ? cast<LoadSDNode>(Op)->getAddressingMode() |
| : cast<StoreSDNode>(Op)->getAddressingMode(); |
| ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) |
| ? ARM_AM::add : ARM_AM::sub; |
| int Val; |
| if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) |
| return false; |
| |
| Offset = N; |
| ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); |
| unsigned ShAmt = 0; |
| if (ShOpcVal != ARM_AM::no_shift) { |
| // Check to see if the RHS of the shift is a constant, if not, we can't fold |
| // it. |
| if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
| ShAmt = Sh->getZExtValue(); |
| if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) |
| Offset = N.getOperand(0); |
| else { |
| ShAmt = 0; |
| ShOpcVal = ARM_AM::no_shift; |
| } |
| } else { |
| ShOpcVal = ARM_AM::no_shift; |
| } |
| } |
| |
| Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), |
| SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, |
| SDValue &Offset, SDValue &Opc) { |
| unsigned Opcode = Op->getOpcode(); |
| ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) |
| ? cast<LoadSDNode>(Op)->getAddressingMode() |
| : cast<StoreSDNode>(Op)->getAddressingMode(); |
| ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) |
| ? ARM_AM::add : ARM_AM::sub; |
| int Val; |
| if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. |
| if (AddSub == ARM_AM::sub) Val *= -1; |
| Offset = CurDAG->getRegister(0, MVT::i32); |
| Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| |
| bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, |
| SDValue &Offset, SDValue &Opc) { |
| unsigned Opcode = Op->getOpcode(); |
| ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) |
| ? cast<LoadSDNode>(Op)->getAddressingMode() |
| : cast<StoreSDNode>(Op)->getAddressingMode(); |
| ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) |
| ? ARM_AM::add : ARM_AM::sub; |
| int Val; |
| if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. |
| Offset = CurDAG->getRegister(0, MVT::i32); |
| Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, |
| ARM_AM::no_shift), |
| SDLoc(Op), MVT::i32); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { |
| Base = N; |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, |
| SDValue &Base, SDValue &Offset, |
| SDValue &Opc) { |
| if (N.getOpcode() == ISD::SUB) { |
| // X - C is canonicalize to X + -C, no need to handle it here. |
| Base = N.getOperand(0); |
| Offset = N.getOperand(1); |
| Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), |
| MVT::i32); |
| return true; |
| } |
| |
| if (!CurDAG->isBaseWithConstantOffset(N)) { |
| Base = N; |
| if (N.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(N)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } |
| Offset = CurDAG->getRegister(0, MVT::i32); |
| Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), |
| MVT::i32); |
| return true; |
| } |
| |
| // If the RHS is +/- imm8, fold into addr mode. |
| int RHSC; |
| if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, |
| -256 + 1, 256, RHSC)) { // 8 bits. |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } |
| Offset = CurDAG->getRegister(0, MVT::i32); |
| |
| ARM_AM::AddrOpc AddSub = ARM_AM::add; |
| if (RHSC < 0) { |
| AddSub = ARM_AM::sub; |
| RHSC = -RHSC; |
| } |
| Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), |
| MVT::i32); |
| return true; |
| } |
| |
| Base = N.getOperand(0); |
| Offset = N.getOperand(1); |
| Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), |
| MVT::i32); |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, |
| SDValue &Offset, SDValue &Opc) { |
| unsigned Opcode = Op->getOpcode(); |
| ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) |
| ? cast<LoadSDNode>(Op)->getAddressingMode() |
| : cast<StoreSDNode>(Op)->getAddressingMode(); |
| ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) |
| ? ARM_AM::add : ARM_AM::sub; |
| int Val; |
| if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. |
| Offset = CurDAG->getRegister(0, MVT::i32); |
| Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), |
| MVT::i32); |
| return true; |
| } |
| |
| Offset = N; |
| Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), |
| MVT::i32); |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, |
| bool FP16) { |
| if (!CurDAG->isBaseWithConstantOffset(N)) { |
| Base = N; |
| if (N.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(N)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } else if (N.getOpcode() == ARMISD::Wrapper && |
| N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && |
| N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && |
| N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { |
| Base = N.getOperand(0); |
| } |
| Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), |
| SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| // If the RHS is +/- imm8, fold into addr mode. |
| int RHSC; |
| const int Scale = FP16 ? 2 : 4; |
| |
| if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } |
| |
| ARM_AM::AddrOpc AddSub = ARM_AM::add; |
| if (RHSC < 0) { |
| AddSub = ARM_AM::sub; |
| RHSC = -RHSC; |
| } |
| |
| if (FP16) |
| Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), |
| SDLoc(N), MVT::i32); |
| else |
| Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), |
| SDLoc(N), MVT::i32); |
| |
| return true; |
| } |
| |
| Base = N; |
| |
| if (FP16) |
| Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), |
| SDLoc(N), MVT::i32); |
| else |
| Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), |
| SDLoc(N), MVT::i32); |
| |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, |
| SDValue &Base, SDValue &Offset) { |
| return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); |
| } |
| |
| bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, |
| SDValue &Base, SDValue &Offset) { |
| return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); |
| } |
| |
| bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, |
| SDValue &Align) { |
| Addr = N; |
| |
| unsigned Alignment = 0; |
| |
| MemSDNode *MemN = cast<MemSDNode>(Parent); |
| |
| if (isa<LSBaseSDNode>(MemN) || |
| ((MemN->getOpcode() == ARMISD::VST1_UPD || |
| MemN->getOpcode() == ARMISD::VLD1_UPD) && |
| MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { |
| // This case occurs only for VLD1-lane/dup and VST1-lane instructions. |
| // The maximum alignment is equal to the memory size being referenced. |
| unsigned MMOAlign = MemN->getAlignment(); |
| unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; |
| if (MMOAlign >= MemSize && MemSize > 1) |
| Alignment = MemSize; |
| } else { |
| // All other uses of addrmode6 are for intrinsics. For now just record |
| // the raw alignment value; it will be refined later based on the legal |
| // alignment operands for the intrinsic. |
| Alignment = MemN->getAlignment(); |
| } |
| |
| Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, |
| SDValue &Offset) { |
| LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); |
| ISD::MemIndexedMode AM = LdSt->getAddressingMode(); |
| if (AM != ISD::POST_INC) |
| return false; |
| Offset = N; |
| if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { |
| if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) |
| Offset = CurDAG->getRegister(0, MVT::i32); |
| } |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, |
| SDValue &Offset, SDValue &Label) { |
| if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { |
| Offset = N.getOperand(0); |
| SDValue N1 = N.getOperand(1); |
| Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), |
| SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| |
| //===----------------------------------------------------------------------===// |
| // Thumb Addressing Modes |
| //===----------------------------------------------------------------------===// |
| |
| static bool shouldUseZeroOffsetLdSt(SDValue N) { |
| // Negative numbers are difficult to materialise in thumb1. If we are |
| // selecting the add of a negative, instead try to select ri with a zero |
| // offset, so create the add node directly which will become a sub. |
| if (N.getOpcode() != ISD::ADD) |
| return false; |
| |
| // Look for an imm which is not legal for ld/st, but is legal for sub. |
| if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) |
| return C->getSExtValue() < 0 && C->getSExtValue() >= -255; |
| |
| return false; |
| } |
| |
| bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, |
| SDValue &Offset) { |
| if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { |
| ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); |
| if (!NC || !NC->isZero()) |
| return false; |
| |
| Base = Offset = N; |
| return true; |
| } |
| |
| Base = N.getOperand(0); |
| Offset = N.getOperand(1); |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, |
| SDValue &Offset) { |
| if (shouldUseZeroOffsetLdSt(N)) |
| return false; // Select ri instead |
| return SelectThumbAddrModeRRSext(N, Base, Offset); |
| } |
| |
| bool |
| ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, |
| SDValue &Base, SDValue &OffImm) { |
| if (shouldUseZeroOffsetLdSt(N)) { |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| if (!CurDAG->isBaseWithConstantOffset(N)) { |
| if (N.getOpcode() == ISD::ADD) { |
| return false; // We want to select register offset instead |
| } else if (N.getOpcode() == ARMISD::Wrapper && |
| N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && |
| N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && |
| N.getOperand(0).getOpcode() != ISD::TargetConstantPool && |
| N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { |
| Base = N.getOperand(0); |
| } else { |
| Base = N; |
| } |
| |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| // If the RHS is + imm5 * scale, fold into addr mode. |
| int RHSC; |
| if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { |
| Base = N.getOperand(0); |
| OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| // Offset is too large, so use register offset instead. |
| return false; |
| } |
| |
| bool |
| ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, |
| SDValue &OffImm) { |
| return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); |
| } |
| |
| bool |
| ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, |
| SDValue &OffImm) { |
| return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); |
| } |
| |
| bool |
| ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, |
| SDValue &OffImm) { |
| return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); |
| } |
| |
| bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, |
| SDValue &Base, SDValue &OffImm) { |
| if (N.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(N)->getIndex(); |
| // Only multiples of 4 are allowed for the offset, so the frame object |
| // alignment must be at least 4. |
| MachineFrameInfo &MFI = MF->getFrameInfo(); |
| if (MFI.getObjectAlign(FI) < Align(4)) |
| MFI.setObjectAlignment(FI, Align(4)); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| if (!CurDAG->isBaseWithConstantOffset(N)) |
| return false; |
| |
| if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { |
| // If the RHS is + imm8 * scale, fold into addr mode. |
| int RHSC; |
| if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { |
| Base = N.getOperand(0); |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| // Make sure the offset is inside the object, or we might fail to |
| // allocate an emergency spill slot. (An out-of-range access is UB, but |
| // it could show up anyway.) |
| MachineFrameInfo &MFI = MF->getFrameInfo(); |
| if (RHSC * 4 < MFI.getObjectSize(FI)) { |
| // For LHS+RHS to result in an offset that's a multiple of 4 the object |
| // indexed by the LHS must be 4-byte aligned. |
| if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) |
| MFI.setObjectAlignment(FI, Align(4)); |
| if (MFI.getObjectAlign(FI) >= Align(4)) { |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); |
| return true; |
| } |
| } |
| } |
| } |
| |
| return false; |
| } |
| |
| template <unsigned Shift> |
| bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, |
| SDValue &OffImm) { |
| if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { |
| int RHSC; |
| if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, |
| RHSC)) { |
| Base = N.getOperand(0); |
| if (N.getOpcode() == ISD::SUB) |
| RHSC = -RHSC; |
| OffImm = |
| CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); |
| return true; |
| } |
| } |
| |
| // Base only. |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| |
| //===----------------------------------------------------------------------===// |
| // Thumb 2 Addressing Modes |
| //===----------------------------------------------------------------------===// |
| |
| |
| bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, |
| SDValue &Base, SDValue &OffImm) { |
| // Match simple R + imm12 operands. |
| |
| // Base only. |
| if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && |
| !CurDAG->isBaseWithConstantOffset(N)) { |
| if (N.getOpcode() == ISD::FrameIndex) { |
| // Match frame index. |
| int FI = cast<FrameIndexSDNode>(N)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| if (N.getOpcode() == ARMISD::Wrapper && |
| N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && |
| N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && |
| N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::TargetConstantPool) |
| return false; // We want to select t2LDRpci instead. |
| } else |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
| if (SelectT2AddrModeImm8(N, Base, OffImm)) |
| // Let t2LDRi8 handle (R - imm8). |
| return false; |
| |
| int RHSC = (int)RHS->getZExtValue(); |
| if (N.getOpcode() == ISD::SUB) |
| RHSC = -RHSC; |
| |
| if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } |
| OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); |
| return true; |
| } |
| } |
| |
| // Base only. |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| template <unsigned Shift> |
| bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, |
| SDValue &OffImm) { |
| if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { |
| int RHSC; |
| if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } |
| |
| if (N.getOpcode() == ISD::SUB) |
| RHSC = -RHSC; |
| OffImm = |
| CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); |
| return true; |
| } |
| } |
| |
| // Base only. |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, |
| SDValue &Base, SDValue &OffImm) { |
| // Match simple R - imm8 operands. |
| if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && |
| !CurDAG->isBaseWithConstantOffset(N)) |
| return false; |
| |
| if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
| int RHSC = (int)RHS->getSExtValue(); |
| if (N.getOpcode() == ISD::SUB) |
| RHSC = -RHSC; |
| |
| if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } |
| OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, |
| SDValue &OffImm){ |
| unsigned Opcode = Op->getOpcode(); |
| ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) |
| ? cast<LoadSDNode>(Op)->getAddressingMode() |
| : cast<StoreSDNode>(Op)->getAddressingMode(); |
| int RHSC; |
| if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. |
| OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) |
| ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) |
| : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| template <unsigned Shift> |
| bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, |
| SDValue &OffImm) { |
| if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { |
| int RHSC; |
| if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, |
| RHSC)) { |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } |
| |
| if (N.getOpcode() == ISD::SUB) |
| RHSC = -RHSC; |
| OffImm = |
| CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); |
| return true; |
| } |
| } |
| |
| // Base only. |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| template <unsigned Shift> |
| bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, |
| SDValue &OffImm) { |
| return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); |
| } |
| |
| bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, |
| SDValue &OffImm, |
| unsigned Shift) { |
| unsigned Opcode = Op->getOpcode(); |
| ISD::MemIndexedMode AM; |
| switch (Opcode) { |
| case ISD::LOAD: |
| AM = cast<LoadSDNode>(Op)->getAddressingMode(); |
| break; |
| case ISD::STORE: |
| AM = cast<StoreSDNode>(Op)->getAddressingMode(); |
| break; |
| case ISD::MLOAD: |
| AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); |
| break; |
| case ISD::MSTORE: |
| AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); |
| break; |
| default: |
| llvm_unreachable("Unexpected Opcode for Imm7Offset"); |
| } |
| |
| int RHSC; |
| // 7 bit constant, shifted by Shift. |
| if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { |
| OffImm = |
| ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) |
| ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) |
| : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), |
| MVT::i32); |
| return true; |
| } |
| return false; |
| } |
| |
| template <int Min, int Max> |
| bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { |
| int Val; |
| if (isScaledConstantInRange(N, 1, Min, Max, Val)) { |
| OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); |
| return true; |
| } |
| return false; |
| } |
| |
| bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, |
| SDValue &Base, |
| SDValue &OffReg, SDValue &ShImm) { |
| // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. |
| if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) |
| return false; |
| |
| // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. |
| if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
| int RHSC = (int)RHS->getZExtValue(); |
| if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) |
| return false; |
| else if (RHSC < 0 && RHSC >= -255) // 8 bits |
| return false; |
| } |
| |
| // Look for (R + R) or (R + (R << [1,2,3])). |
| unsigned ShAmt = 0; |
| Base = N.getOperand(0); |
| OffReg = N.getOperand(1); |
| |
| // Swap if it is ((R << c) + R). |
| ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); |
| if (ShOpcVal != ARM_AM::lsl) { |
| ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); |
| if (ShOpcVal == ARM_AM::lsl) |
| std::swap(Base, OffReg); |
| } |
| |
| if (ShOpcVal == ARM_AM::lsl) { |
| // Check to see if the RHS of the shift is a constant, if not, we can't fold |
| // it. |
| if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { |
| ShAmt = Sh->getZExtValue(); |
| if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) |
| OffReg = OffReg.getOperand(0); |
| else { |
| ShAmt = 0; |
| } |
| } |
| } |
| |
| // If OffReg is a multiply-by-constant and it's profitable to extract a shift |
| // and use it in a shifted operand do so. |
| if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { |
| unsigned PowerOfTwo = 0; |
| SDValue NewMulConst; |
| if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { |
| HandleSDNode Handle(OffReg); |
| replaceDAGValue(OffReg.getOperand(1), NewMulConst); |
| OffReg = Handle.getValue(); |
| ShAmt = PowerOfTwo; |
| } |
| } |
| |
| ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); |
| |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, |
| SDValue &OffImm) { |
| // This *must* succeed since it's used for the irreplaceable ldrex and strex |
| // instructions. |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); |
| |
| if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) |
| return true; |
| |
| ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); |
| if (!RHS) |
| return true; |
| |
| uint32_t RHSC = (int)RHS->getZExtValue(); |
| if (RHSC > 1020 || RHSC % 4 != 0) |
| return true; |
| |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } |
| |
| OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| //===--------------------------------------------------------------------===// |
| |
| /// getAL - Returns a ARMCC::AL immediate node. |
| static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { |
| return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); |
| } |
| |
| void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { |
| MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); |
| CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); |
| } |
| |
| bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { |
| LoadSDNode *LD = cast<LoadSDNode>(N); |
| ISD::MemIndexedMode AM = LD->getAddressingMode(); |
| if (AM == ISD::UNINDEXED) |
| return false; |
| |
| EVT LoadedVT = LD->getMemoryVT(); |
| SDValue Offset, AMOpc; |
| bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); |
| unsigned Opcode = 0; |
| bool Match = false; |
| if (LoadedVT == MVT::i32 && isPre && |
| SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { |
| Opcode = ARM::LDR_PRE_IMM; |
| Match = true; |
| } else if (LoadedVT == MVT::i32 && !isPre && |
| SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { |
| Opcode = ARM::LDR_POST_IMM; |
| Match = true; |
| } else if (LoadedVT == MVT::i32 && |
| SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { |
| Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; |
| Match = true; |
| |
| } else if (LoadedVT == MVT::i16 && |
| SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { |
| Match = true; |
| Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) |
| ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) |
| : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); |
| } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { |
| if (LD->getExtensionType() == ISD::SEXTLOAD) { |
| if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { |
| Match = true; |
| Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; |
| } |
| } else { |
| if (isPre && |
| SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { |
| Match = true; |
| Opcode = ARM::LDRB_PRE_IMM; |
| } else if (!isPre && |
| SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { |
| Match = true; |
| Opcode = ARM::LDRB_POST_IMM; |
| } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { |
| Match = true; |
| Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; |
| } |
| } |
| } |
| |
| if (Match) { |
| if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { |
| SDValue Chain = LD->getChain(); |
| SDValue Base = LD->getBasePtr(); |
| SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), |
| CurDAG->getRegister(0, MVT::i32), Chain }; |
| SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, |
| MVT::Other, Ops); |
| transferMemOperands(N, New); |
| ReplaceNode(N, New); |
| return true; |
| } else { |
| SDValue Chain = LD->getChain(); |
| SDValue Base = LD->getBasePtr(); |
| SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), |
| CurDAG->getRegister(0, MVT::i32), Chain }; |
| SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, |
| MVT::Other, Ops); |
| transferMemOperands(N, New); |
| ReplaceNode(N, New); |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { |
| LoadSDNode *LD = cast<LoadSDNode>(N); |
| EVT LoadedVT = LD->getMemoryVT(); |
| ISD::MemIndexedMode AM = LD->getAddressingMode(); |
| if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || |
| LoadedVT.getSimpleVT().SimpleTy != MVT::i32) |
| return false; |
| |
| auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); |
| if (!COffs || COffs->getZExtValue() != 4) |
| return false; |
| |
| // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. |
| // The encoding of LDM is not how the rest of ISel expects a post-inc load to |
| // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after |
| // ISel. |
| SDValue Chain = LD->getChain(); |
| SDValue Base = LD->getBasePtr(); |
| SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), |
| CurDAG->getRegister(0, MVT::i32), Chain }; |
| SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, |
| MVT::i32, MVT::Other, Ops); |
| transferMemOperands(N, New); |
| ReplaceNode(N, New); |
| return true; |
| } |
| |
| bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { |
| LoadSDNode *LD = cast<LoadSDNode>(N); |
| ISD::MemIndexedMode AM = LD->getAddressingMode(); |
| if (AM == ISD::UNINDEXED) |
| return false; |
| |
| EVT LoadedVT = LD->getMemoryVT(); |
| bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; |
| SDValue Offset; |
| bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); |
| unsigned Opcode = 0; |
| bool Match = false; |
| if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { |
| switch (LoadedVT.getSimpleVT().SimpleTy) { |
| case MVT::i32: |
| Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; |
| break; |
| case MVT::i16: |
| if (isSExtLd) |
| Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; |
| else |
| Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; |
| break; |
| case MVT::i8: |
| case MVT::i1: |
| if (isSExtLd) |
| Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; |
| else |
| Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; |
| break; |
| default: |
| return false; |
| } |
| Match = true; |
| } |
| |
| if (Match) { |
| SDValue Chain = LD->getChain(); |
| SDValue Base = LD->getBasePtr(); |
| SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), |
| CurDAG->getRegister(0, MVT::i32), Chain }; |
| SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, |
| MVT::Other, Ops); |
| transferMemOperands(N, New); |
| ReplaceNode(N, New); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { |
| EVT LoadedVT; |
| unsigned Opcode = 0; |
| bool isSExtLd, isPre; |
| Align Alignment; |
| ARMVCC::VPTCodes Pred; |
| SDValue PredReg; |
| SDValue Chain, Base, Offset; |
| |
| if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { |
| ISD::MemIndexedMode AM = LD->getAddressingMode(); |
| if (AM == ISD::UNINDEXED) |
| return false; |
| LoadedVT = LD->getMemoryVT(); |
| if (!LoadedVT.isVector()) |
| return false; |
| |
| Chain = LD->getChain(); |
| Base = LD->getBasePtr(); |
| Offset = LD->getOffset(); |
| Alignment = LD->getAlign(); |
| isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; |
| isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); |
| Pred = ARMVCC::None; |
| PredReg = CurDAG->getRegister(0, MVT::i32); |
| } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { |
| ISD::MemIndexedMode AM = LD->getAddressingMode(); |
| if (AM == ISD::UNINDEXED) |
| return false; |
| LoadedVT = LD->getMemoryVT(); |
| if (!LoadedVT.isVector()) |
| return false; |
| |
| Chain = LD->getChain(); |
| Base = LD->getBasePtr(); |
| Offset = LD->getOffset(); |
| Alignment = LD->getAlign(); |
| isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; |
| isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); |
| Pred = ARMVCC::Then; |
| PredReg = LD->getMask(); |
| } else |
| llvm_unreachable("Expected a Load or a Masked Load!"); |
| |
| // We allow LE non-masked loads to change the type (for example use a vldrb.8 |
| // as opposed to a vldrw.32). This can allow extra addressing modes or |
| // alignments for what is otherwise an equivalent instruction. |
| bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); |
| |
| SDValue NewOffset; |
| if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && |
| SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { |
| if (isSExtLd) |
| Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; |
| else |
| Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; |
| } else if (LoadedVT == MVT::v8i8 && |
| SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { |
| if (isSExtLd) |
| Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; |
| else |
| Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; |
| } else if (LoadedVT == MVT::v4i8 && |
| SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { |
| if (isSExtLd) |
| Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; |
| else |
| Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; |
| } else if (Alignment >= Align(4) && |
| (CanChangeType || LoadedVT == MVT::v4i32 || |
| LoadedVT == MVT::v4f32) && |
| SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) |
| Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; |
| else if (Alignment >= Align(2) && |
| (CanChangeType || LoadedVT == MVT::v8i16 || |
| LoadedVT == MVT::v8f16) && |
| SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) |
| Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; |
| else if ((CanChangeType || LoadedVT == MVT::v16i8) && |
| SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) |
| Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; |
| else |
| return false; |
| |
| SDValue Ops[] = {Base, |
| NewOffset, |
| CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), |
| PredReg, |
| CurDAG->getRegister(0, MVT::i32), // tp_reg |
| Chain}; |
| SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, |
| N->getValueType(0), MVT::Other, Ops); |
| transferMemOperands(N, New); |
| ReplaceUses(SDValue(N, 0), SDValue(New, 1)); |
| ReplaceUses(SDValue(N, 1), SDValue(New, 0)); |
| ReplaceUses(SDValue(N, 2), SDValue(New, 2)); |
| CurDAG->RemoveDeadNode(N); |
| return true; |
| } |
| |
| /// Form a GPRPair pseudo register from a pair of GPR regs. |
| SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { |
| SDLoc dl(V0.getNode()); |
| SDValue RegClass = |
| CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); |
| SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); |
| SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); |
| const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; |
| return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); |
| } |
| |
| /// Form a D register from a pair of S registers. |
| SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { |
| SDLoc dl(V0.getNode()); |
| SDValue RegClass = |
| CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); |
| SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); |
| SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); |
| const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; |
| return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); |
| } |
| |
| /// Form a quad register from a pair of D registers. |
| SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { |
| SDLoc dl(V0.getNode()); |
| SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, |
| MVT::i32); |
| SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); |
| SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); |
| const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; |
| return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); |
| } |
| |
| /// Form 4 consecutive D registers from a pair of Q registers. |
| SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { |
| SDLoc dl(V0.getNode()); |
| SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, |
| MVT::i32); |
| SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); |
| SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); |
| const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; |
| return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); |
| } |
| |
| /// Form 4 consecutive S registers. |
| SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, |
| SDValue V2, SDValue V3) { |
| SDLoc dl(V0.getNode()); |
| SDValue RegClass = |
| CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); |
| SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); |
| SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); |
| SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); |
| SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); |
| const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, |
| V2, SubReg2, V3, SubReg3 }; |
| return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); |
| } |
| |
| /// Form 4 consecutive D registers. |
| SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, |
| SDValue V2, SDValue V3) { |
| SDLoc dl(V0.getNode()); |
| SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, |
| MVT::i32); |
| SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); |
| SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); |
| SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); |
| SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); |
| const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, |
| V2, SubReg2, V3, SubReg3 }; |
| return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); |
| } |
| |
| /// Form 4 consecutive Q registers. |
| SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, |
| SDValue V2, SDValue V3) { |
| SDLoc dl(V0.getNode()); |
| SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, |
| MVT::i32); |
| SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); |
| SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); |
| SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); |
| SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); |
| const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, |
| V2, SubReg2, V3, SubReg3 }; |
| return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); |
| } |
| |
| /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand |
| /// of a NEON VLD or VST instruction. The supported values depend on the |
| /// number of registers being loaded. |
| SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, |
| unsigned NumVecs, bool is64BitVector) { |
| unsigned NumRegs = NumVecs; |
| if (!is64BitVector && NumVecs < 3) |
| NumRegs *= 2; |
| |
| unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); |
| if (Alignment >= 32 && NumRegs == 4) |
| Alignment = 32; |
| else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) |
| Alignment = 16; |
| else if (Alignment >= 8) |
| Alignment = 8; |
| else |
| Alignment = 0; |
| |
| return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); |
| } |
| |
| static bool isVLDfixed(unsigned Opc) |
| { |
| switch (Opc) { |
| default: return false; |
| case ARM::VLD1d8wb_fixed : return true; |
| case ARM::VLD1d16wb_fixed : return true; |
| case ARM::VLD1d64Qwb_fixed : return true; |
| case ARM::VLD1d32wb_fixed : return true; |
| case ARM::VLD1d64wb_fixed : return true; |
| case ARM::VLD1d8TPseudoWB_fixed : return true; |
| case ARM::VLD1d16TPseudoWB_fixed : return true; |
| case ARM::VLD1d32TPseudoWB_fixed : return true; |
| case ARM::VLD1d64TPseudoWB_fixed : return true; |
| case ARM::VLD1d8QPseudoWB_fixed : return true; |
| case ARM::VLD1d16QPseudoWB_fixed : return true; |
| case ARM::VLD1d32QPseudoWB_fixed : return true; |
| case ARM::VLD1d64QPseudoWB_fixed : return true; |
| case ARM::VLD1q8wb_fixed : return true; |
| case ARM::VLD1q16wb_fixed : return true; |
| case ARM::VLD1q32wb_fixed : return true; |
| case ARM::VLD1q64wb_fixed : return true; |
| case ARM::VLD1DUPd8wb_fixed : return true; |
| case ARM::VLD1DUPd16wb_fixed : return true; |
| case ARM::VLD1DUPd32wb_fixed : return true; |
| case ARM::VLD1DUPq8wb_fixed : return true; |
| case ARM::VLD1DUPq16wb_fixed : return true; |
| case ARM::VLD1DUPq32wb_fixed : return true; |
| case ARM::VLD2d8wb_fixed : return true; |
| case ARM::VLD2d16wb_fixed : return true; |
| case ARM::VLD2d32wb_fixed : return true; |
| case ARM::VLD2q8PseudoWB_fixed : return true; |
| case ARM::VLD2q16PseudoWB_fixed : return true; |
| case ARM::VLD2q32PseudoWB_fixed : return true; |
| case ARM::VLD2DUPd8wb_fixed : return true; |
| case ARM::VLD2DUPd16wb_fixed : return true; |
| case ARM::VLD2DUPd32wb_fixed : return true; |
| case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; |
| case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; |
| case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; |
| } |
| } |
| |
| static bool isVSTfixed(unsigned Opc) |
| { |
| switch (Opc) { |
| default: return false; |
| case ARM::VST1d8wb_fixed : return true; |
| case ARM::VST1d16wb_fixed : return true; |
| case ARM::VST1d32wb_fixed : return true; |
| case ARM::VST1d64wb_fixed : return true; |
| case ARM::VST1q8wb_fixed : return true; |
| case ARM::VST1q16wb_fixed : return true; |
| case ARM::VST1q32wb_fixed : return true; |
| case ARM::VST1q64wb_fixed : return true; |
| case ARM::VST1d8TPseudoWB_fixed : return true; |
| case ARM::VST1d16TPseudoWB_fixed : return true; |
| case ARM::VST1d32TPseudoWB_fixed : return true; |
| case ARM::VST1d64TPseudoWB_fixed : return true; |
| case ARM::VST1d8QPseudoWB_fixed : return true; |
| case ARM::VST1d16QPseudoWB_fixed : return true; |
| case ARM::VST1d32QPseudoWB_fixed : return true; |
| case ARM::VST1d64QPseudoWB_fixed : return true; |
| case ARM::VST2d8wb_fixed : return true; |
| case ARM::VST2d16wb_fixed : return true; |
| case ARM::VST2d32wb_fixed : return true; |
| case ARM::VST2q8PseudoWB_fixed : return true; |
| case ARM::VST2q16PseudoWB_fixed : return true; |
| case ARM::VST2q32PseudoWB_fixed : return true; |
| } |
| } |
| |
| // Get the register stride update opcode of a VLD/VST instruction that |
| // is otherwise equivalent to the given fixed stride updating instruction. |
| static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { |
| assert((isVLDfixed(Opc) || isVSTfixed(Opc)) |
| && "Incorrect fixed stride updating instruction."); |
| switch (Opc) { |
| default: break; |
| case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; |
| case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; |
| case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; |
| case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; |
| case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; |
| case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; |
| case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; |
| case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; |
| case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; |
| case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; |
| case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; |
| case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; |
| case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; |
| case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; |
| case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; |
| case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; |
| case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; |
| case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; |
| case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; |
| case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; |
| case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; |
| case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; |
| case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; |
| case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; |
| case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; |
| case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; |
| case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; |
| |
| case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; |
| case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; |
| case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; |
| case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; |
| case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; |
| case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; |
| case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; |
| case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; |
| case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; |
| case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; |
| case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; |
| case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; |
| case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; |
| case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; |
| case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; |
| case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; |
| |
| case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; |
| case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; |
| case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; |
| case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; |
| case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; |
| case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; |
| |
| case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; |
| case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; |
| case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; |
| case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; |
| case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; |
| case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; |
| |
| case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; |
| case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; |
| case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; |
| } |
| return Opc; // If not one we handle, return it unchanged. |
| } |
| |
| /// Returns true if the given increment is a Constant known to be equal to the |
| /// access size performed by a NEON load/store. This means the "[rN]!" form can |
| /// be used. |
| static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { |
| auto C = dyn_cast<ConstantSDNode>(Inc); |
| return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; |
| } |
| |
| void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, |
| const uint16_t *DOpcodes, |
| const uint16_t *QOpcodes0, |
| const uint16_t *QOpcodes1) { |
| assert(Subtarget->hasNEON()); |
| assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); |
| SDLoc dl(N); |
| |
| SDValue MemAddr, Align; |
| bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating |
| // nodes are not intrinsics. |
| unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; |
| if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) |
| return; |
| |
| SDValue Chain = N->getOperand(0); |
| EVT VT = N->getValueType(0); |
| bool is64BitVector = VT.is64BitVector(); |
| Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); |
| |
| unsigned OpcodeIndex; |
| switch (VT.getSimpleVT().SimpleTy) { |
| default: llvm_unreachable("unhandled vld type"); |
| // Double-register operations: |
| case MVT::v8i8: OpcodeIndex = 0; break; |
| case MVT::v4f16: |
| case MVT::v4bf16: |
| case MVT::v4i16: OpcodeIndex = 1; break; |
| case MVT::v2f32: |
| case MVT::v2i32: OpcodeIndex = 2; break; |
| case MVT::v1i64: OpcodeIndex = 3; break; |
| // Quad-register operations: |
| case MVT::v16i8: OpcodeIndex = 0; break; |
| case MVT::v8f16: |
| case MVT::v8bf16: |
| case MVT::v8i16: OpcodeIndex = 1; break; |
| case MVT::v4f32: |
| case MVT::v4i32: OpcodeIndex = 2; break; |
| case MVT::v2f64: |
| case MVT::v2i64: OpcodeIndex = 3; break; |
| } |
| |
| EVT ResTy; |
| if (NumVecs == 1) |
| ResTy = VT; |
| else { |
| unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; |
| if (!is64BitVector) |
| ResTyElts *= 2; |
| ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); |
| } |
| std::vector<EVT> ResTys; |
| ResTys.push_back(ResTy); |
| if (isUpdating) |
| ResTys.push_back(MVT::i32); |
| ResTys.push_back(MVT::Other); |
| |
| SDValue Pred = getAL(CurDAG, dl); |
| SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); |
| SDNode *VLd; |
| SmallVector<SDValue, 7> Ops; |
| |
| // Double registers and VLD1/VLD2 quad registers are directly supported. |
| if (is64BitVector || NumVecs <= 2) { |
| unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : |
| QOpcodes0[OpcodeIndex]); |
| Ops.push_back(MemAddr); |
| Ops.push_back(Align); |
| if (isUpdating) { |
| SDValue Inc = N->getOperand(AddrOpIdx + 1); |
| bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); |
| if (!IsImmUpdate) { |
| // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so |
| // check for the opcode rather than the number of vector elements. |
| if (isVLDfixed(Opc)) |
| Opc = getVLDSTRegisterUpdateOpcode(Opc); |
| Ops.push_back(Inc); |
| // VLD1/VLD2 fixed increment does not need Reg0 so only include it in |
| // the operands if not such an opcode. |
| } else if (!isVLDfixed(Opc)) |
| Ops.push_back(Reg0); |
| } |
| Ops.push_back(Pred); |
| Ops.push_back(Reg0); |
| Ops.push_back(Chain); |
| VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| |
| } else { |
| // Otherwise, quad registers are loaded with two separate instructions, |
| // where one loads the even registers and the other loads the odd registers. |
| EVT AddrTy = MemAddr.getValueType(); |
| |
| // Load the even subregs. This is always an updating load, so that it |
| // provides the address to the second load for the odd subregs. |
| SDValue ImplDef = |
| SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); |
| const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; |
| SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, |
| ResTy, AddrTy, MVT::Other, OpsA); |
| Chain = SDValue(VLdA, 2); |
| |
| // Load the odd subregs. |
| Ops.push_back(SDValue(VLdA, 1)); |
| Ops.push_back(Align); |
| if (isUpdating) { |
| SDValue Inc = N->getOperand(AddrOpIdx + 1); |
| assert(isa<ConstantSDNode>(Inc.getNode()) && |
| "only constant post-increment update allowed for VLD3/4"); |
| (void)Inc; |
| Ops.push_back(Reg0); |
| } |
| Ops.push_back(SDValue(VLdA, 0)); |
| Ops.push_back(Pred); |
| Ops.push_back(Reg0); |
| Ops.push_back(Chain); |
| VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); |
| } |
| |
| // Transfer memoperands. |
| MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); |
| CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); |
| |
| if (NumVecs == 1) { |
| ReplaceNode(N, VLd); |
| return; |
| } |
| |
| // Extract out the subregisters. |
| SDValue SuperReg = SDValue(VLd, 0); |
| static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && |
| ARM::qsub_3 == ARM::qsub_0 + 3, |
| "Unexpected subreg numbering"); |
| unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); |
| for (unsigned Vec = 0; Vec < NumVecs; ++Vec) |
| ReplaceUses(SDValue(N, Vec), |
| CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); |
| ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); |
| if (isUpdating) |
| ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); |
| CurDAG->RemoveDeadNode(N); |
| } |
| |
| void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, |
| const uint16_t *DOpcodes, |
| const uint16_t *QOpcodes0, |
| const uint16_t *QOpcodes1) { |
| assert(Subtarget->hasNEON()); |
| assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); |
| SDLoc dl(N); |
| |
| SDValue MemAddr, Align; |
| bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating |
| // nodes are not intrinsics. |
| unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; |
| unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) |
| if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) |
| return; |
| |
| MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); |
| |
| SDValue Chain = N->getOperand(0); |
| EVT VT = N->getOperand(Vec0Idx).getValueType(); |
| bool is64BitVector = VT.is64BitVector(); |
| Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); |
| |
| unsigned OpcodeIndex; |
| switch (VT.getSimpleVT().SimpleTy) { |
| default: llvm_unreachable("unhandled vst type"); |
| // Double-register operations: |
| case MVT::v8i8: OpcodeIndex = 0; break; |
| case MVT::v4f16: |
| case MVT::v4bf16: |
| case MVT::v4i16: OpcodeIndex = 1; break; |
| case MVT::v2f32: |
| case MVT::v2i32: OpcodeIndex = 2; break; |
| case MVT::v1i64: OpcodeIndex = 3; break; |
| // Quad-register operations: |
| case MVT::v16i8: OpcodeIndex = 0; break; |
| case MVT::v8f16: |
| case MVT::v8bf16: |
| case MVT::v8i16: OpcodeIndex = 1; break; |
| case MVT::v4f32: |
| case MVT::v4i32: OpcodeIndex = 2; break; |
| case MVT::v2f64: |
| case MVT::v2i64: OpcodeIndex = 3; break; |
| } |
| |
| std::vector<EVT> ResTys; |
| if (isUpdating) |
| ResTys.push_back(MVT::i32); |
| ResTys.push_back(MVT::Other); |
| |
| SDValue Pred = getAL(CurDAG, dl); |
| SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); |
| SmallVector<SDValue, 7> Ops; |
| |
| // Double registers and VST1/VST2 quad registers are directly supported. |
| if (is64BitVector || NumVecs <= 2) { |
| SDValue SrcReg; |
| if (NumVecs == 1) { |
| SrcReg = N->getOperand(Vec0Idx); |
| } else if (is64BitVector) { |
| // Form a REG_SEQUENCE to force register allocation. |
| SDValue V0 = N->getOperand(Vec0Idx + 0); |
| SDValue V1 = N->getOperand(Vec0Idx + 1); |
| if (NumVecs == 2) |
| SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); |
| else { |
| SDValue V2 = N->getOperand(Vec0Idx + 2); |
| // If it's a vst3, form a quad D-register and leave the last part as |
| // an undef. |
| SDValue V3 = (NumVecs == 3) |
| ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) |
| : N->getOperand(Vec0Idx + 3); |
| SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); |
| } |
| } else { |
| // Form a QQ register. |
| SDValue Q0 = N->getOperand(Vec0Idx); |
| SDValue Q1 = N->getOperand(Vec0Idx + 1); |
| SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); |
| } |
| |
| unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : |
| QOpcodes0[OpcodeIndex]); |
| Ops.push_back(MemAddr); |
| Ops.push_back(Align); |
| if (isUpdating) { |
| SDValue Inc = N->getOperand(AddrOpIdx + 1); |
| bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); |
| if (!IsImmUpdate) { |
| // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so |
| // check for the opcode rather than the number of vector elements. |
| if (isVSTfixed(Opc)) |
| Opc = getVLDSTRegisterUpdateOpcode(Opc); |
| Ops.push_back(Inc); |
| } |
| // VST1/VST2 fixed increment does not need Reg0 so only include it in |
| // the operands if not such an opcode. |
| else if (!isVSTfixed(Opc)) |
| Ops.push_back(Reg0); |
| } |
| Ops.push_back(SrcReg); |
| Ops.push_back(Pred); |
| Ops.push_back(Reg0); |
| Ops.push_back(Chain); |
| SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| |
| // Transfer memoperands. |
| CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); |
| |
| ReplaceNode(N, VSt); |
| return; |
| } |
| |
| // Otherwise, quad registers are stored with two separate instructions, |
| // where one stores the even registers and the other stores the odd registers. |
| |
| // Form the QQQQ REG_SEQUENCE. |
| SDValue V0 = N->getOperand(Vec0Idx + 0); |
| SDValue V1 = N->getOperand(Vec0Idx + 1); |
| SDValue V2 = N->getOperand(Vec0Idx + 2); |
| SDValue V3 = (NumVecs == 3) |
| ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) |
| : N->getOperand(Vec0Idx + 3); |
| SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); |
| |
| // Store the even D registers. This is always an updating store, so that it |
| // provides the address to the second store for the odd subregs. |
| const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; |
| SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, |
| MemAddr.getValueType(), |
| MVT::Other, OpsA); |
| CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); |
| Chain = SDValue(VStA, 1); |
| |
| // Store the odd D registers. |
| Ops.push_back(SDValue(VStA, 0)); |
| Ops.push_back(Align); |
| if (isUpdating) { |
| SDValue Inc = N->getOperand(AddrOpIdx + 1); |
| assert(isa<ConstantSDNode>(Inc.getNode()) && |
| "only constant post-increment update allowed for VST3/4"); |
| (void)Inc; |
| Ops.push_back(Reg0); |
| } |
| Ops.push_back(RegSeq); |
| Ops.push_back(Pred); |
| Ops.push_back(Reg0); |
| Ops.push_back(Chain); |
| SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, |
| Ops); |
| CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); |
| ReplaceNode(N, VStB); |
| } |
| |
| void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, |
| unsigned NumVecs, |
| const uint16_t *DOpcodes, |
| const uint16_t *QOpcodes) { |
| assert(Subtarget->hasNEON()); |
| assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); |
| <
|