| //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines an instruction selector for the AArch64 target. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AArch64MachineFunctionInfo.h" |
| #include "AArch64TargetMachine.h" |
| #include "MCTargetDesc/AArch64AddressingModes.h" |
| #include "llvm/ADT/APSInt.h" |
| #include "llvm/CodeGen/SelectionDAGISel.h" |
| #include "llvm/IR/Function.h" // To access function attributes. |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/IntrinsicsAArch64.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "aarch64-isel" |
| |
| //===--------------------------------------------------------------------===// |
| /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine |
| /// instructions for SelectionDAG operations. |
| /// |
| namespace { |
| |
| class AArch64DAGToDAGISel : public SelectionDAGISel { |
| |
| /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can |
| /// make the right decision when generating code for different targets. |
| const AArch64Subtarget *Subtarget; |
| |
| public: |
| explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, |
| CodeGenOpt::Level OptLevel) |
| : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} |
| |
| StringRef getPassName() const override { |
| return "AArch64 Instruction Selection"; |
| } |
| |
| bool runOnMachineFunction(MachineFunction &MF) override { |
| Subtarget = &MF.getSubtarget<AArch64Subtarget>(); |
| return SelectionDAGISel::runOnMachineFunction(MF); |
| } |
| |
| void Select(SDNode *Node) override; |
| |
| /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for |
| /// inline asm expressions. |
| bool SelectInlineAsmMemoryOperand(const SDValue &Op, |
| unsigned ConstraintID, |
| std::vector<SDValue> &OutOps) override; |
| |
| template <signed Low, signed High, signed Scale> |
| bool SelectRDVLImm(SDValue N, SDValue &Imm); |
| |
| bool tryMLAV64LaneV128(SDNode *N); |
| bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); |
| bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); |
| bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); |
| bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); |
| bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { |
| return SelectShiftedRegister(N, false, Reg, Shift); |
| } |
| bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { |
| return SelectShiftedRegister(N, true, Reg, Shift); |
| } |
| bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexed7S(N, 1, Base, OffImm); |
| } |
| bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexed7S(N, 2, Base, OffImm); |
| } |
| bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexed7S(N, 4, Base, OffImm); |
| } |
| bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexed7S(N, 8, Base, OffImm); |
| } |
| bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexed7S(N, 16, Base, OffImm); |
| } |
| bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); |
| } |
| bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); |
| } |
| bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexed(N, 1, Base, OffImm); |
| } |
| bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexed(N, 2, Base, OffImm); |
| } |
| bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexed(N, 4, Base, OffImm); |
| } |
| bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexed(N, 8, Base, OffImm); |
| } |
| bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeIndexed(N, 16, Base, OffImm); |
| } |
| bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeUnscaled(N, 1, Base, OffImm); |
| } |
| bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeUnscaled(N, 2, Base, OffImm); |
| } |
| bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeUnscaled(N, 4, Base, OffImm); |
| } |
| bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeUnscaled(N, 8, Base, OffImm); |
| } |
| bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { |
| return SelectAddrModeUnscaled(N, 16, Base, OffImm); |
| } |
| template <unsigned Size, unsigned Max> |
| bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { |
| // Test if there is an appropriate addressing mode and check if the |
| // immediate fits. |
| bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); |
| if (Found) { |
| if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) { |
| int64_t C = CI->getSExtValue(); |
| if (C <= Max) |
| return true; |
| } |
| } |
| |
| // Otherwise, base only, materialize address in register. |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); |
| return true; |
| } |
| |
| template<int Width> |
| bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, |
| SDValue &SignExtend, SDValue &DoShift) { |
| return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); |
| } |
| |
| template<int Width> |
| bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, |
| SDValue &SignExtend, SDValue &DoShift) { |
| return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); |
| } |
| |
| bool SelectDupZeroOrUndef(SDValue N) { |
| switch(N->getOpcode()) { |
| case ISD::UNDEF: |
| return true; |
| case AArch64ISD::DUP: |
| case ISD::SPLAT_VECTOR: { |
| auto Opnd0 = N->getOperand(0); |
| if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) |
| if (CN->isZero()) |
| return true; |
| if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) |
| if (CN->isZero()) |
| return true; |
| break; |
| } |
| default: |
| break; |
| } |
| |
| return false; |
| } |
| |
| bool SelectDupZero(SDValue N) { |
| switch(N->getOpcode()) { |
| case AArch64ISD::DUP: |
| case ISD::SPLAT_VECTOR: { |
| auto Opnd0 = N->getOperand(0); |
| if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) |
| if (CN->isZero()) |
| return true; |
| if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) |
| if (CN->isZero()) |
| return true; |
| break; |
| } |
| } |
| |
| return false; |
| } |
| |
| template<MVT::SimpleValueType VT> |
| bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { |
| return SelectSVEAddSubImm(N, VT, Imm, Shift); |
| } |
| |
| template <MVT::SimpleValueType VT, bool Invert = false> |
| bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { |
| return SelectSVELogicalImm(N, VT, Imm, Invert); |
| } |
| |
| template <MVT::SimpleValueType VT> |
| bool SelectSVEArithImm(SDValue N, SDValue &Imm) { |
| return SelectSVEArithImm(N, VT, Imm); |
| } |
| |
| template <unsigned Low, unsigned High, bool AllowSaturation = false> |
| bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { |
| return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); |
| } |
| |
| // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. |
| template<signed Min, signed Max, signed Scale, bool Shift> |
| bool SelectCntImm(SDValue N, SDValue &Imm) { |
| if (!isa<ConstantSDNode>(N)) |
| return false; |
| |
| int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); |
| if (Shift) |
| MulImm = 1LL << MulImm; |
| |
| if ((MulImm % std::abs(Scale)) != 0) |
| return false; |
| |
| MulImm /= Scale; |
| if ((MulImm >= Min) && (MulImm <= Max)) { |
| Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| template <signed Max, signed Scale> |
| bool SelectEXTImm(SDValue N, SDValue &Imm) { |
| if (!isa<ConstantSDNode>(N)) |
| return false; |
| |
| int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); |
| |
| if (MulImm >= 0 && MulImm <= Max) { |
| MulImm *= Scale; |
| Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /// Form sequences of consecutive 64/128-bit registers for use in NEON |
| /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have |
| /// between 1 and 4 elements. If it contains a single element that is returned |
| /// unchanged; otherwise a REG_SEQUENCE value is returned. |
| SDValue createDTuple(ArrayRef<SDValue> Vecs); |
| SDValue createQTuple(ArrayRef<SDValue> Vecs); |
| // Form a sequence of SVE registers for instructions using list of vectors, |
| // e.g. structured loads and stores (ldN, stN). |
| SDValue createZTuple(ArrayRef<SDValue> Vecs); |
| |
| /// Generic helper for the createDTuple/createQTuple |
| /// functions. Those should almost always be called instead. |
| SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], |
| const unsigned SubRegs[]); |
| |
| void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); |
| |
| bool tryIndexedLoad(SDNode *N); |
| |
| bool trySelectStackSlotTagP(SDNode *N); |
| void SelectTagP(SDNode *N); |
| |
| void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, |
| unsigned SubRegIdx); |
| void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, |
| unsigned SubRegIdx); |
| void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
| void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
| void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, |
| unsigned Opc_rr, unsigned Opc_ri, |
| bool IsIntr = false); |
| |
| bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); |
| /// SVE Reg+Imm addressing mode. |
| template <int64_t Min, int64_t Max> |
| bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, |
| SDValue &OffImm); |
| /// SVE Reg+Reg address mode. |
| template <unsigned Scale> |
| bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { |
| return SelectSVERegRegAddrMode(N, Scale, Base, Offset); |
| } |
| |
| void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); |
| void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); |
| void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
| void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
| void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, |
| unsigned Opc_rr, unsigned Opc_ri); |
| std::tuple<unsigned, SDValue, SDValue> |
| findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, |
| const SDValue &OldBase, const SDValue &OldOffset, |
| unsigned Scale); |
| |
| bool tryBitfieldExtractOp(SDNode *N); |
| bool tryBitfieldExtractOpFromSExt(SDNode *N); |
| bool tryBitfieldInsertOp(SDNode *N); |
| bool tryBitfieldInsertInZeroOp(SDNode *N); |
| bool tryShiftAmountMod(SDNode *N); |
| bool tryHighFPExt(SDNode *N); |
| |
| bool tryReadRegister(SDNode *N); |
| bool tryWriteRegister(SDNode *N); |
| |
| // Include the pieces autogenerated from the target description. |
| #include "AArch64GenDAGISel.inc" |
| |
| private: |
| bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, |
| SDValue &Shift); |
| bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, |
| SDValue &OffImm) { |
| return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); |
| } |
| bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, |
| unsigned Size, SDValue &Base, |
| SDValue &OffImm); |
| bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, |
| SDValue &OffImm); |
| bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, |
| SDValue &OffImm); |
| bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, |
| SDValue &Offset, SDValue &SignExtend, |
| SDValue &DoShift); |
| bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, |
| SDValue &Offset, SDValue &SignExtend, |
| SDValue &DoShift); |
| bool isWorthFolding(SDValue V) const; |
| bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, |
| SDValue &Offset, SDValue &SignExtend); |
| |
| template<unsigned RegWidth> |
| bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { |
| return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); |
| } |
| |
| bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); |
| |
| bool SelectCMP_SWAP(SDNode *N); |
| |
| bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); |
| |
| bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); |
| |
| bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); |
| |
| bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); |
| bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, |
| bool AllowSaturation, SDValue &Imm); |
| |
| bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); |
| bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, |
| SDValue &Offset); |
| |
| bool SelectAllActivePredicate(SDValue N); |
| }; |
| } // end anonymous namespace |
| |
| /// isIntImmediate - This method tests to see if the node is a constant |
| /// operand. If so Imm will receive the 32-bit value. |
| static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { |
| if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { |
| Imm = C->getZExtValue(); |
| return true; |
| } |
| return false; |
| } |
| |
| // isIntImmediate - This method tests to see if a constant operand. |
| // If so Imm will receive the value. |
| static bool isIntImmediate(SDValue N, uint64_t &Imm) { |
| return isIntImmediate(N.getNode(), Imm); |
| } |
| |
| // isOpcWithIntImmediate - This method tests to see if the node is a specific |
| // opcode and that it has a immediate integer right operand. |
| // If so Imm will receive the 32 bit value. |
| static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, |
| uint64_t &Imm) { |
| return N->getOpcode() == Opc && |
| isIntImmediate(N->getOperand(1).getNode(), Imm); |
| } |
| |
| bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( |
| const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { |
| switch(ConstraintID) { |
| default: |
| llvm_unreachable("Unexpected asm memory constraint"); |
| case InlineAsm::Constraint_m: |
| case InlineAsm::Constraint_o: |
| case InlineAsm::Constraint_Q: |
| // We need to make sure that this one operand does not end up in XZR, thus |
| // require the address to be in a PointerRegClass register. |
| const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); |
| const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); |
| SDLoc dl(Op); |
| SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); |
| SDValue NewOp = |
| SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, |
| dl, Op.getValueType(), |
| Op, RC), 0); |
| OutOps.push_back(NewOp); |
| return false; |
| } |
| return true; |
| } |
| |
| /// SelectArithImmed - Select an immediate value that can be represented as |
| /// a 12-bit value shifted left by either 0 or 12. If so, return true with |
| /// Val set to the 12-bit value and Shift set to the shifter operand. |
| bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, |
| SDValue &Shift) { |
| // This function is called from the addsub_shifted_imm ComplexPattern, |
| // which lists [imm] as the list of opcode it's interested in, however |
| // we still need to check whether the operand is actually an immediate |
| // here because the ComplexPattern opcode list is only used in |
| // root-level opcode matching. |
| if (!isa<ConstantSDNode>(N.getNode())) |
| return false; |
| |
| uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); |
| unsigned ShiftAmt; |
| |
| if (Immed >> 12 == 0) { |
| ShiftAmt = 0; |
| } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { |
| ShiftAmt = 12; |
| Immed = Immed >> 12; |
| } else |
| return false; |
| |
| unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); |
| SDLoc dl(N); |
| Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); |
| Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); |
| return true; |
| } |
| |
| /// SelectNegArithImmed - As above, but negates the value before trying to |
| /// select it. |
| bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, |
| SDValue &Shift) { |
| // This function is called from the addsub_shifted_imm ComplexPattern, |
| // which lists [imm] as the list of opcode it's interested in, however |
| // we still need to check whether the operand is actually an immediate |
| // here because the ComplexPattern opcode list is only used in |
| // root-level opcode matching. |
| if (!isa<ConstantSDNode>(N.getNode())) |
| return false; |
| |
| // The immediate operand must be a 24-bit zero-extended immediate. |
| uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); |
| |
| // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" |
| // have the opposite effect on the C flag, so this pattern mustn't match under |
| // those circumstances. |
| if (Immed == 0) |
| return false; |
| |
| if (N.getValueType() == MVT::i32) |
| Immed = ~((uint32_t)Immed) + 1; |
| else |
| Immed = ~Immed + 1ULL; |
| if (Immed & 0xFFFFFFFFFF000000ULL) |
| return false; |
| |
| Immed &= 0xFFFFFFULL; |
| return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, |
| Shift); |
| } |
| |
| /// getShiftTypeForNode - Translate a shift node to the corresponding |
| /// ShiftType value. |
| static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { |
| switch (N.getOpcode()) { |
| default: |
| return AArch64_AM::InvalidShiftExtend; |
| case ISD::SHL: |
| return AArch64_AM::LSL; |
| case ISD::SRL: |
| return AArch64_AM::LSR; |
| case ISD::SRA: |
| return AArch64_AM::ASR; |
| case ISD::ROTR: |
| return AArch64_AM::ROR; |
| } |
| } |
| |
| /// Determine whether it is worth it to fold SHL into the addressing |
| /// mode. |
| static bool isWorthFoldingSHL(SDValue V) { |
| assert(V.getOpcode() == ISD::SHL && "invalid opcode"); |
| // It is worth folding logical shift of up to three places. |
| auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); |
| if (!CSD) |
| return false; |
| unsigned ShiftVal = CSD->getZExtValue(); |
| if (ShiftVal > 3) |
| return false; |
| |
| // Check if this particular node is reused in any non-memory related |
| // operation. If yes, do not try to fold this node into the address |
| // computation, since the computation will be kept. |
| const SDNode *Node = V.getNode(); |
| for (SDNode *UI : Node->uses()) |
| if (!isa<MemSDNode>(*UI)) |
| for (SDNode *UII : UI->uses()) |
| if (!isa<MemSDNode>(*UII)) |
| return false; |
| return true; |
| } |
| |
| /// Determine whether it is worth to fold V into an extended register. |
| bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { |
| // Trivial if we are optimizing for code size or if there is only |
| // one use of the value. |
| if (CurDAG->shouldOptForSize() || V.hasOneUse()) |
| return true; |
| // If a subtarget has a fastpath LSL we can fold a logical shift into |
| // the addressing mode and save a cycle. |
| if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && |
| isWorthFoldingSHL(V)) |
| return true; |
| if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { |
| const SDValue LHS = V.getOperand(0); |
| const SDValue RHS = V.getOperand(1); |
| if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) |
| return true; |
| if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) |
| return true; |
| } |
| |
| // It hurts otherwise, since the value will be reused. |
| return false; |
| } |
| |
| /// SelectShiftedRegister - Select a "shifted register" operand. If the value |
| /// is not shifted, set the Shift operand to default of "LSL 0". The logical |
| /// instructions allow the shifted register to be rotated, but the arithmetic |
| /// instructions do not. The AllowROR parameter specifies whether ROR is |
| /// supported. |
| bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, |
| SDValue &Reg, SDValue &Shift) { |
| AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); |
| if (ShType == AArch64_AM::InvalidShiftExtend) |
| return false; |
| if (!AllowROR && ShType == AArch64_AM::ROR) |
| return false; |
| |
| if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
| unsigned BitSize = N.getValueSizeInBits(); |
| unsigned Val = RHS->getZExtValue() & (BitSize - 1); |
| unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); |
| |
| Reg = N.getOperand(0); |
| Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); |
| return isWorthFolding(N); |
| } |
| |
| return false; |
| } |
| |
| /// getExtendTypeForNode - Translate an extend node to the corresponding |
| /// ExtendType value. |
| static AArch64_AM::ShiftExtendType |
| getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { |
| if (N.getOpcode() == ISD::SIGN_EXTEND || |
| N.getOpcode() == ISD::SIGN_EXTEND_INREG) { |
| EVT SrcVT; |
| if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) |
| SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); |
| else |
| SrcVT = N.getOperand(0).getValueType(); |
| |
| if (!IsLoadStore && SrcVT == MVT::i8) |
| return AArch64_AM::SXTB; |
| else if (!IsLoadStore && SrcVT == MVT::i16) |
| return AArch64_AM::SXTH; |
| else if (SrcVT == MVT::i32) |
| return AArch64_AM::SXTW; |
| assert(SrcVT != MVT::i64 && "extend from 64-bits?"); |
| |
| return AArch64_AM::InvalidShiftExtend; |
| } else if (N.getOpcode() == ISD::ZERO_EXTEND || |
| N.getOpcode() == ISD::ANY_EXTEND) { |
| EVT SrcVT = N.getOperand(0).getValueType(); |
| if (!IsLoadStore && SrcVT == MVT::i8) |
| return AArch64_AM::UXTB; |
| else if (!IsLoadStore && SrcVT == MVT::i16) |
| return AArch64_AM::UXTH; |
| else if (SrcVT == MVT::i32) |
| return AArch64_AM::UXTW; |
| assert(SrcVT != MVT::i64 && "extend from 64-bits?"); |
| |
| return AArch64_AM::InvalidShiftExtend; |
| } else if (N.getOpcode() == ISD::AND) { |
| ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); |
| if (!CSD) |
| return AArch64_AM::InvalidShiftExtend; |
| uint64_t AndMask = CSD->getZExtValue(); |
| |
| switch (AndMask) { |
| default: |
| return AArch64_AM::InvalidShiftExtend; |
| case 0xFF: |
| return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; |
| case 0xFFFF: |
| return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; |
| case 0xFFFFFFFF: |
| return AArch64_AM::UXTW; |
| } |
| } |
| |
| return AArch64_AM::InvalidShiftExtend; |
| } |
| |
| // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. |
| static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { |
| if (DL->getOpcode() != AArch64ISD::DUPLANE16 && |
| DL->getOpcode() != AArch64ISD::DUPLANE32) |
| return false; |
| |
| SDValue SV = DL->getOperand(0); |
| if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) |
| return false; |
| |
| SDValue EV = SV.getOperand(1); |
| if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) |
| return false; |
| |
| ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); |
| ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); |
| LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); |
| LaneOp = EV.getOperand(0); |
| |
| return true; |
| } |
| |
| // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a |
| // high lane extract. |
| static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, |
| SDValue &LaneOp, int &LaneIdx) { |
| |
| if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { |
| std::swap(Op0, Op1); |
| if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) |
| return false; |
| } |
| StdOp = Op1; |
| return true; |
| } |
| |
| /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand |
| /// is a lane in the upper half of a 128-bit vector. Recognize and select this |
| /// so that we don't emit unnecessary lane extracts. |
| bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { |
| SDLoc dl(N); |
| SDValue Op0 = N->getOperand(0); |
| SDValue Op1 = N->getOperand(1); |
| SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. |
| SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. |
| int LaneIdx = -1; // Will hold the lane index. |
| |
| if (Op1.getOpcode() != ISD::MUL || |
| !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, |
| LaneIdx)) { |
| std::swap(Op0, Op1); |
| if (Op1.getOpcode() != ISD::MUL || |
| !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, |
| LaneIdx)) |
| return false; |
| } |
| |
| SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); |
| |
| SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; |
| |
| unsigned MLAOpc = ~0U; |
| |
| switch (N->getSimpleValueType(0).SimpleTy) { |
| default: |
| llvm_unreachable("Unrecognized MLA."); |
| case MVT::v4i16: |
| MLAOpc = AArch64::MLAv4i16_indexed; |
| break; |
| case MVT::v8i16: |
| MLAOpc = AArch64::MLAv8i16_indexed; |
| break; |
| case MVT::v2i32: |
| MLAOpc = AArch64::MLAv2i32_indexed; |
| break; |
| case MVT::v4i32: |
| MLAOpc = AArch64::MLAv4i32_indexed; |
| break; |
| } |
| |
| ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); |
| return true; |
| } |
| |
| bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { |
| SDLoc dl(N); |
| SDValue SMULLOp0; |
| SDValue SMULLOp1; |
| int LaneIdx; |
| |
| if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, |
| LaneIdx)) |
| return false; |
| |
| SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); |
| |
| SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; |
| |
| unsigned SMULLOpc = ~0U; |
| |
| if (IntNo == Intrinsic::aarch64_neon_smull) { |
| switch (N->getSimpleValueType(0).SimpleTy) { |
| default: |
| llvm_unreachable("Unrecognized SMULL."); |
| case MVT::v4i32: |
| SMULLOpc = AArch64::SMULLv4i16_indexed; |
| break; |
| case MVT::v2i64: |
| SMULLOpc = AArch64::SMULLv2i32_indexed; |
| break; |
| } |
| } else if (IntNo == Intrinsic::aarch64_neon_umull) { |
| switch (N->getSimpleValueType(0).SimpleTy) { |
| default: |
| llvm_unreachable("Unrecognized SMULL."); |
| case MVT::v4i32: |
| SMULLOpc = AArch64::UMULLv4i16_indexed; |
| break; |
| case MVT::v2i64: |
| SMULLOpc = AArch64::UMULLv2i32_indexed; |
| break; |
| } |
| } else |
| llvm_unreachable("Unrecognized intrinsic."); |
| |
| ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); |
| return true; |
| } |
| |
| /// Instructions that accept extend modifiers like UXTW expect the register |
| /// being extended to be a GPR32, but the incoming DAG might be acting on a |
| /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if |
| /// this is the case. |
| static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { |
| if (N.getValueType() == MVT::i32) |
| return N; |
| |
| SDLoc dl(N); |
| SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); |
| MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, |
| dl, MVT::i32, N, SubReg); |
| return SDValue(Node, 0); |
| } |
| |
| // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. |
| template<signed Low, signed High, signed Scale> |
| bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { |
| if (!isa<ConstantSDNode>(N)) |
| return false; |
| |
| int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); |
| if ((MulImm % std::abs(Scale)) == 0) { |
| int64_t RDVLImm = MulImm / Scale; |
| if ((RDVLImm >= Low) && (RDVLImm <= High)) { |
| Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /// SelectArithExtendedRegister - Select a "extended register" operand. This |
| /// operand folds in an extend followed by an optional left shift. |
| bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, |
| SDValue &Shift) { |
| unsigned ShiftVal = 0; |
| AArch64_AM::ShiftExtendType Ext; |
| |
| if (N.getOpcode() == ISD::SHL) { |
| ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); |
| if (!CSD) |
| return false; |
| ShiftVal = CSD->getZExtValue(); |
| if (ShiftVal > 4) |
| return false; |
| |
| Ext = getExtendTypeForNode(N.getOperand(0)); |
| if (Ext == AArch64_AM::InvalidShiftExtend) |
| return false; |
| |
| Reg = N.getOperand(0).getOperand(0); |
| } else { |
| Ext = getExtendTypeForNode(N); |
| if (Ext == AArch64_AM::InvalidShiftExtend) |
| return false; |
| |
| Reg = N.getOperand(0); |
| |
| // Don't match if free 32-bit -> 64-bit zext can be used instead. |
| if (Ext == AArch64_AM::UXTW && |
| Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) |
| return false; |
| } |
| |
| // AArch64 mandates that the RHS of the operation must use the smallest |
| // register class that could contain the size being extended from. Thus, |
| // if we're folding a (sext i8), we need the RHS to be a GPR32, even though |
| // there might not be an actual 32-bit value in the program. We can |
| // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. |
| assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); |
| Reg = narrowIfNeeded(CurDAG, Reg); |
| Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), |
| MVT::i32); |
| return isWorthFolding(N); |
| } |
| |
| /// If there's a use of this ADDlow that's not itself a load/store then we'll |
| /// need to create a real ADD instruction from it anyway and there's no point in |
| /// folding it into the mem op. Theoretically, it shouldn't matter, but there's |
| /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding |
| /// leads to duplicated ADRP instructions. |
| static bool isWorthFoldingADDlow(SDValue N) { |
| for (auto Use : N->uses()) { |
| if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && |
| Use->getOpcode() != ISD::ATOMIC_LOAD && |
| Use->getOpcode() != ISD::ATOMIC_STORE) |
| return false; |
| |
| // ldar and stlr have much more restrictive addressing modes (just a |
| // register). |
| if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering())) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit |
| /// immediate" address. The "Size" argument is the size in bytes of the memory |
| /// reference, which determines the scale. |
| bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, |
| unsigned BW, unsigned Size, |
| SDValue &Base, |
| SDValue &OffImm) { |
| SDLoc dl(N); |
| const DataLayout &DL = CurDAG->getDataLayout(); |
| const TargetLowering *TLI = getTargetLowering(); |
| if (N.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(N)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
| return true; |
| } |
| |
| // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed |
| // selected here doesn't support labels/immediates, only base+offset. |
| if (CurDAG->isBaseWithConstantOffset(N)) { |
| if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
| if (IsSignedImm) { |
| int64_t RHSC = RHS->getSExtValue(); |
| unsigned Scale = Log2_32(Size); |
| int64_t Range = 0x1LL << (BW - 1); |
| |
| if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && |
| RHSC < (Range << Scale)) { |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| } |
| OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); |
| return true; |
| } |
| } else { |
| // unsigned Immediate |
| uint64_t RHSC = RHS->getZExtValue(); |
| unsigned Scale = Log2_32(Size); |
| uint64_t Range = 0x1ULL << BW; |
| |
| if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| } |
| OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); |
| return true; |
| } |
| } |
| } |
| } |
| // Base only. The address will be materialized into a register before |
| // the memory is accessed. |
| // add x0, Xbase, #offset |
| // stp x1, x2, [x0] |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
| return true; |
| } |
| |
| /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit |
| /// immediate" address. The "Size" argument is the size in bytes of the memory |
| /// reference, which determines the scale. |
| bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, |
| SDValue &Base, SDValue &OffImm) { |
| SDLoc dl(N); |
| const DataLayout &DL = CurDAG->getDataLayout(); |
| const TargetLowering *TLI = getTargetLowering(); |
| if (N.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(N)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
| return true; |
| } |
| |
| if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { |
| GlobalAddressSDNode *GAN = |
| dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); |
| Base = N.getOperand(0); |
| OffImm = N.getOperand(1); |
| if (!GAN) |
| return true; |
| |
| if (GAN->getOffset() % Size == 0 && |
| GAN->getGlobal()->getPointerAlignment(DL) >= Size) |
| return true; |
| } |
| |
| if (CurDAG->isBaseWithConstantOffset(N)) { |
| if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
| int64_t RHSC = (int64_t)RHS->getZExtValue(); |
| unsigned Scale = Log2_32(Size); |
| if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| } |
| OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); |
| return true; |
| } |
| } |
| } |
| |
| // Before falling back to our general case, check if the unscaled |
| // instructions can handle this. If so, that's preferable. |
| if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) |
| return false; |
| |
| // Base only. The address will be materialized into a register before |
| // the memory is accessed. |
| // add x0, Xbase, #offset |
| // ldr x0, [x0] |
| Base = N; |
| OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
| return true; |
| } |
| |
| /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit |
| /// immediate" address. This should only match when there is an offset that |
| /// is not valid for a scaled immediate addressing mode. The "Size" argument |
| /// is the size in bytes of the memory reference, which is needed here to know |
| /// what is valid for a scaled immediate. |
| bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, |
| SDValue &Base, |
| SDValue &OffImm) { |
| if (!CurDAG->isBaseWithConstantOffset(N)) |
| return false; |
| if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
| int64_t RHSC = RHS->getSExtValue(); |
| // If the offset is valid as a scaled immediate, don't match here. |
| if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && |
| RHSC < (0x1000 << Log2_32(Size))) |
| return false; |
| if (RHSC >= -256 && RHSC < 256) { |
| Base = N.getOperand(0); |
| if (Base.getOpcode() == ISD::FrameIndex) { |
| int FI = cast<FrameIndexSDNode>(Base)->getIndex(); |
| const TargetLowering *TLI = getTargetLowering(); |
| Base = CurDAG->getTargetFrameIndex( |
| FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| } |
| OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { |
| SDLoc dl(N); |
| SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); |
| SDValue ImpDef = SDValue( |
| CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); |
| MachineSDNode *Node = CurDAG->getMachineNode( |
| TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); |
| return SDValue(Node, 0); |
| } |
| |
| /// Check if the given SHL node (\p N), can be used to form an |
| /// extended register for an addressing mode. |
| bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, |
| bool WantExtend, SDValue &Offset, |
| SDValue &SignExtend) { |
| assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); |
| ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); |
| if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) |
| return false; |
| |
| SDLoc dl(N); |
| if (WantExtend) { |
| AArch64_AM::ShiftExtendType Ext = |
| getExtendTypeForNode(N.getOperand(0), true); |
| if (Ext == AArch64_AM::InvalidShiftExtend) |
| return false; |
| |
| Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); |
| SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, |
| MVT::i32); |
| } else { |
| Offset = N.getOperand(0); |
| SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); |
| } |
| |
| unsigned LegalShiftVal = Log2_32(Size); |
| unsigned ShiftVal = CSD->getZExtValue(); |
| |
| if (ShiftVal != 0 && ShiftVal != LegalShiftVal) |
| return false; |
| |
| return isWorthFolding(N); |
| } |
| |
| bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, |
| SDValue &Base, SDValue &Offset, |
| SDValue &SignExtend, |
| SDValue &DoShift) { |
| if (N.getOpcode() != ISD::ADD) |
| return false; |
| SDValue LHS = N.getOperand(0); |
| SDValue RHS = N.getOperand(1); |
| SDLoc dl(N); |
| |
| // We don't want to match immediate adds here, because they are better lowered |
| // to the register-immediate addressing modes. |
| if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) |
| return false; |
| |
| // Check if this particular node is reused in any non-memory related |
| // operation. If yes, do not try to fold this node into the address |
| // computation, since the computation will be kept. |
| const SDNode *Node = N.getNode(); |
| for (SDNode *UI : Node->uses()) { |
| if (!isa<MemSDNode>(*UI)) |
| return false; |
| } |
| |
| // Remember if it is worth folding N when it produces extended register. |
| bool IsExtendedRegisterWorthFolding = isWorthFolding(N); |
| |
| // Try to match a shifted extend on the RHS. |
| if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && |
| SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { |
| Base = LHS; |
| DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); |
| return true; |
| } |
| |
| // Try to match a shifted extend on the LHS. |
| if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && |
| SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { |
| Base = RHS; |
| DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); |
| return true; |
| } |
| |
| // There was no shift, whatever else we find. |
| DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); |
| |
| AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; |
| // Try to match an unshifted extend on the LHS. |
| if (IsExtendedRegisterWorthFolding && |
| (Ext = getExtendTypeForNode(LHS, true)) != |
| AArch64_AM::InvalidShiftExtend) { |
| Base = RHS; |
| Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); |
| SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, |
| MVT::i32); |
| if (isWorthFolding(LHS)) |
| return true; |
| } |
| |
| // Try to match an unshifted extend on the RHS. |
| if (IsExtendedRegisterWorthFolding && |
| (Ext = getExtendTypeForNode(RHS, true)) != |
| AArch64_AM::InvalidShiftExtend) { |
| Base = LHS; |
| Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); |
| SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, |
| MVT::i32); |
| if (isWorthFolding(RHS)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| // Check if the given immediate is preferred by ADD. If an immediate can be |
| // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be |
| // encoded by one MOVZ, return true. |
| static bool isPreferredADD(int64_t ImmOff) { |
| // Constant in [0x0, 0xfff] can be encoded in ADD. |
| if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) |
| return true; |
| // Check if it can be encoded in an "ADD LSL #12". |
| if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) |
| // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. |
| return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && |
| (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; |
| return false; |
| } |
| |
| bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, |
| SDValue &Base, SDValue &Offset, |
| SDValue &SignExtend, |
| SDValue &DoShift) { |
| if (N.getOpcode() != ISD::ADD) |
| return false; |
| SDValue LHS = N.getOperand(0); |
| SDValue RHS = N.getOperand(1); |
| SDLoc DL(N); |
| |
| // Check if this particular node is reused in any non-memory related |
| // operation. If yes, do not try to fold this node into the address |
| // computation, since the computation will be kept. |
| const SDNode *Node = N.getNode(); |
| for (SDNode *UI : Node->uses()) { |
| if (!isa<MemSDNode>(*UI)) |
| return false; |
| } |
| |
| // Watch out if RHS is a wide immediate, it can not be selected into |
| // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into |
| // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate |
| // instructions like: |
| // MOV X0, WideImmediate |
| // ADD X1, BaseReg, X0 |
| // LDR X2, [X1, 0] |
| // For such situation, using [BaseReg, XReg] addressing mode can save one |
| // ADD/SUB: |
| // MOV X0, WideImmediate |
| // LDR X2, [BaseReg, X0] |
| if (isa<ConstantSDNode>(RHS)) { |
| int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); |
| unsigned Scale = Log2_32(Size); |
| // Skip the immediate can be selected by load/store addressing mode. |
| // Also skip the immediate can be encoded by a single ADD (SUB is also |
| // checked by using -ImmOff). |
| if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || |
| isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) |
| return false; |
| |
| SDValue Ops[] = { RHS }; |
| SDNode *MOVI = |
| CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); |
| SDValue MOVIV = SDValue(MOVI, 0); |
| // This ADD of two X register will be selected into [Reg+Reg] mode. |
| N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); |
| } |
| |
| // Remember if it is worth folding N when it produces extended register. |
| bool IsExtendedRegisterWorthFolding = isWorthFolding(N); |
| |
| // Try to match a shifted extend on the RHS. |
| if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && |
| SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { |
| Base = LHS; |
| DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); |
| return true; |
| } |
| |
| // Try to match a shifted extend on the LHS. |
| if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && |
| SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { |
| Base = RHS; |
| DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); |
| return true; |
| } |
| |
| // Match any non-shifted, non-extend, non-immediate add expression. |
| Base = LHS; |
| Offset = RHS; |
| SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); |
| DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); |
| // Reg1 + Reg2 is free: no check needed. |
| return true; |
| } |
| |
| SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { |
| static const unsigned RegClassIDs[] = { |
| AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; |
| static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, |
| AArch64::dsub2, AArch64::dsub3}; |
| |
| return createTuple(Regs, RegClassIDs, SubRegs); |
| } |
| |
| SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { |
| static const unsigned RegClassIDs[] = { |
| AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; |
| static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, |
| AArch64::qsub2, AArch64::qsub3}; |
| |
| return createTuple(Regs, RegClassIDs, SubRegs); |
| } |
| |
| SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { |
| static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, |
| AArch64::ZPR3RegClassID, |
| AArch64::ZPR4RegClassID}; |
| static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, |
| AArch64::zsub2, AArch64::zsub3}; |
| |
| return createTuple(Regs, RegClassIDs, SubRegs); |
| } |
| |
| SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, |
| const unsigned RegClassIDs[], |
| const unsigned SubRegs[]) { |
| // There's no special register-class for a vector-list of 1 element: it's just |
| // a vector. |
| if (Regs.size() == 1) |
| return Regs[0]; |
| |
| assert(Regs.size() >= 2 && Regs.size() <= 4); |
| |
| SDLoc DL(Regs[0]); |
| |
| SmallVector<SDValue, 4> Ops; |
| |
| // First operand of REG_SEQUENCE is the desired RegClass. |
| Ops.push_back( |
| CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); |
| |
| // Then we get pairs of source & subregister-position for the components. |
| for (unsigned i = 0; i < Regs.size(); ++i) { |
| Ops.push_back(Regs[i]); |
| Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); |
| } |
| |
| SDNode *N = |
| CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); |
| return SDValue(N, 0); |
| } |
| |
| void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, |
| bool isExt) { |
| SDLoc dl(N); |
| EVT VT = N->getValueType(0); |
| |
| unsigned ExtOff = isExt; |
| |
| // Form a REG_SEQUENCE to force register allocation. |
| unsigned Vec0Off = ExtOff + 1; |
| SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, |
| N->op_begin() + Vec0Off + NumVecs); |
| SDValue RegSeq = createQTuple(Regs); |
| |
| SmallVector<SDValue, 6> Ops; |
| if (isExt) |
| Ops.push_back(N->getOperand(1)); |
| Ops.push_back(RegSeq); |
| Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); |
| ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); |
| } |
| |
| bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { |
| LoadSDNode *LD = cast<LoadSDNode>(N); |
| if (LD->isUnindexed()) |
| return false; |
| EVT VT = LD->getMemoryVT(); |
| EVT DstVT = N->getValueType(0); |
| ISD::MemIndexedMode AM = LD->getAddressingMode(); |
| bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; |
| |
| // We're not doing validity checking here. That was done when checking |
| // if we should mark the load as indexed or not. We're just selecting |
| // the right instruction. |
| unsigned Opcode = 0; |
| |
| ISD::LoadExtType ExtType = LD->getExtensionType(); |
| bool InsertTo64 = false; |
| if (VT == MVT::i64) |
| Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; |
| else if (VT == MVT::i32) { |
| if (ExtType == ISD::NON_EXTLOAD) |
| Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; |
| else if (ExtType == ISD::SEXTLOAD) |
| Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; |
| else { |
| Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; |
| InsertTo64 = true; |
| // The result of the load is only i32. It's the subreg_to_reg that makes |
| // it into an i64. |
| DstVT = MVT::i32; |
| } |
| } else if (VT == MVT::i16) { |
| if (ExtType == ISD::SEXTLOAD) { |
| if (DstVT == MVT::i64) |
| Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; |
| else |
| Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; |
| } else { |
| Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; |
| InsertTo64 = DstVT == MVT::i64; |
| // The result of the load is only i32. It's the subreg_to_reg that makes |
| // it into an i64. |
| DstVT = MVT::i32; |
| } |
| } else if (VT == MVT::i8) { |
| if (ExtType == ISD::SEXTLOAD) { |
| if (DstVT == MVT::i64) |
| Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; |
| else |
| Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; |
| } else { |
| Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; |
| InsertTo64 = DstVT == MVT::i64; |
| // The result of the load is only i32. It's the subreg_to_reg that makes |
| // it into an i64. |
| DstVT = MVT::i32; |
| } |
| } else if (VT == MVT::f16) { |
| Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; |
| } else if (VT == MVT::bf16) { |
| Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; |
| } else if (VT == MVT::f32) { |
| Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; |
| } else if (VT == MVT::f64 || VT.is64BitVector()) { |
| Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; |
| } else if (VT.is128BitVector()) { |
| Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; |
| } else |
| return false; |
| SDValue Chain = LD->getChain(); |
| SDValue Base = LD->getBasePtr(); |
| ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); |
| int OffsetVal = (int)OffsetOp->getZExtValue(); |
| SDLoc dl(N); |
| SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); |
| SDValue Ops[] = { Base, Offset, Chain }; |
| SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, |
| MVT::Other, Ops); |
| |
| // Transfer memoperands. |
| MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); |
| CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); |
| |
| // Either way, we're replacing the node, so tell the caller that. |
| SDValue LoadedVal = SDValue(Res, 1); |
| if (InsertTo64) { |
| SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); |
| LoadedVal = |
| SDValue(CurDAG->getMachineNode( |
| AArch64::SUBREG_TO_REG, dl, MVT::i64, |
| CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, |
| SubReg), |
| 0); |
| } |
| |
| ReplaceUses(SDValue(N, 0), LoadedVal); |
| ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); |
| ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); |
| CurDAG->RemoveDeadNode(N); |
| return true; |
| } |
| |
| void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, |
| unsigned SubRegIdx) { |
| SDLoc dl(N); |
| EVT VT = N->getValueType(0); |
| SDValue Chain = N->getOperand(0); |
| |
| SDValue Ops[] = {N->getOperand(2), // Mem operand; |
| Chain}; |
| |
| const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
| |
| SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| SDValue SuperReg = SDValue(Ld, 0); |
| for (unsigned i = 0; i < NumVecs; ++i) |
| ReplaceUses(SDValue(N, i), |
| CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); |
| |
| ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); |
| |
| // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, |
| // because it's too simple to have needed special treatment during lowering. |
| if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { |
| MachineMemOperand *MemOp = MemIntr->getMemOperand(); |
| CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); |
| } |
| |
| CurDAG->RemoveDeadNode(N); |
| } |
| |
| void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, |
| unsigned Opc, unsigned SubRegIdx) { |
| SDLoc dl(N); |
| EVT VT = N->getValueType(0); |
| SDValue Chain = N->getOperand(0); |
| |
| SDValue Ops[] = {N->getOperand(1), // Mem operand |
| N->getOperand(2), // Incremental |
| Chain}; |
| |
| const EVT ResTys[] = {MVT::i64, // Type of the write back register |
| MVT::Untyped, MVT::Other}; |
| |
| SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| |
| // Update uses of write back register |
| ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); |
| |
| // Update uses of vector list |
| SDValue SuperReg = SDValue(Ld, 1); |
| if (NumVecs == 1) |
| ReplaceUses(SDValue(N, 0), SuperReg); |
| else |
| for (unsigned i = 0; i < NumVecs; ++i) |
| ReplaceUses(SDValue(N, i), |
| CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); |
| |
| // Update the chain |
| ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); |
| CurDAG->RemoveDeadNode(N); |
| } |
| |
| /// Optimize \param OldBase and \param OldOffset selecting the best addressing |
| /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the |
| /// new Base and an SDValue representing the new offset. |
| std::tuple<unsigned, SDValue, SDValue> |
| AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, |
| unsigned Opc_ri, |
| const SDValue &OldBase, |
| const SDValue &OldOffset, |
| unsigned Scale) { |
| SDValue NewBase = OldBase; |
| SDValue NewOffset = OldOffset; |
| // Detect a possible Reg+Imm addressing mode. |
| const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( |
| N, OldBase, NewBase, NewOffset); |
| |
| // Detect a possible reg+reg addressing mode, but only if we haven't already |
| // detected a Reg+Imm one. |
| const bool IsRegReg = |
| !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); |
| |
| // Select the instruction. |
| return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); |
| } |
| |
| void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, |
| unsigned Scale, unsigned Opc_ri, |
| unsigned Opc_rr, bool IsIntr) { |
| assert(Scale < 4 && "Invalid scaling value."); |
| SDLoc DL(N); |
| EVT VT = N->getValueType(0); |
| SDValue Chain = N->getOperand(0); |
| |
| // Optimize addressing mode. |
| SDValue Base, Offset; |
| unsigned Opc; |
| std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( |
| N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), |
| CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); |
| |
| SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate |
| Base, // Memory operand |
| Offset, Chain}; |
| |
| const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
| |
| SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); |
| SDValue SuperReg = SDValue(Load, 0); |
| for (unsigned i = 0; i < NumVecs; ++i) |
| ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
| AArch64::zsub0 + i, DL, VT, SuperReg)); |
| |
| // Copy chain |
| unsigned ChainIdx = NumVecs; |
| ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); |
| CurDAG->RemoveDeadNode(N); |
| } |
| |
| void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, |
| unsigned Opc) { |
| SDLoc dl(N); |
| EVT VT = N->getOperand(2)->getValueType(0); |
| |
| // Form a REG_SEQUENCE to force register allocation. |
| bool Is128Bit = VT.getSizeInBits() == 128; |
| SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
| SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); |
| |
| SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; |
| SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); |
| |
| // Transfer memoperands. |
| MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); |
| CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); |
| |
| ReplaceNode(N, St); |
| } |
| |
| void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, |
| unsigned Scale, unsigned Opc_rr, |
| unsigned Opc_ri) { |
| SDLoc dl(N); |
| |
| // Form a REG_SEQUENCE to force register allocation. |
| SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
| SDValue RegSeq = createZTuple(Regs); |
| |
| // Optimize addressing mode. |
| unsigned Opc; |
| SDValue Offset, Base; |
| std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( |
| N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), |
| CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); |
| |
| SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate |
| Base, // address |
| Offset, // offset |
| N->getOperand(0)}; // chain |
| SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); |
| |
| ReplaceNode(N, St); |
| } |
| |
| bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, |
| SDValue &OffImm) { |
| SDLoc dl(N); |
| const DataLayout &DL = CurDAG->getDataLayout(); |
| const TargetLowering *TLI = getTargetLowering(); |
| |
| // Try to match it for the frame address |
| if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { |
| int FI = FINode->getIndex(); |
| Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, |
| unsigned Opc) { |
| SDLoc dl(N); |
| EVT VT = N->getOperand(2)->getValueType(0); |
| const EVT ResTys[] = {MVT::i64, // Type of the write back register |
| MVT::Other}; // Type for the Chain |
| |
| // Form a REG_SEQUENCE to force register allocation. |
| bool Is128Bit = VT.getSizeInBits() == 128; |
| SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
| SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); |
| |
| SDValue Ops[] = {RegSeq, |
| N->getOperand(NumVecs + 1), // base register |
| N->getOperand(NumVecs + 2), // Incremental |
| N->getOperand(0)}; // Chain |
| SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| |
| ReplaceNode(N, St); |
| } |
| |
| namespace { |
| /// WidenVector - Given a value in the V64 register class, produce the |
| /// equivalent value in the V128 register class. |
| class WidenVector { |
| SelectionDAG &DAG; |
| |
| public: |
| WidenVector(SelectionDAG &DAG) : DAG(DAG) {} |
| |
| SDValue operator()(SDValue V64Reg) { |
| EVT VT = V64Reg.getValueType(); |
| unsigned NarrowSize = VT.getVectorNumElements(); |
| MVT EltTy = VT.getVectorElementType().getSimpleVT(); |
| MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); |
| SDLoc DL(V64Reg); |
| |
| SDValue Undef = |
| SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); |
| return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); |
| } |
| }; |
| } // namespace |
| |
| /// NarrowVector - Given a value in the V128 register class, produce the |
| /// equivalent value in the V64 register class. |
| static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { |
| EVT VT = V128Reg.getValueType(); |
| unsigned WideSize = VT.getVectorNumElements(); |
| MVT EltTy = VT.getVectorElementType().getSimpleVT(); |
| MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); |
| |
| return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, |
| V128Reg); |
| } |
| |
| void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, |
| unsigned Opc) { |
| SDLoc dl(N); |
| EVT VT = N->getValueType(0); |
| bool Narrow = VT.getSizeInBits() == 64; |
| |
| // Form a REG_SEQUENCE to force register allocation. |
| SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
| |
| if (Narrow) |
| transform(Regs, Regs.begin(), |
| WidenVector(*CurDAG)); |
| |
| SDValue RegSeq = createQTuple(Regs); |
| |
| const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
| |
| unsigned LaneNo = |
| cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); |
| |
| SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), |
| N->getOperand(NumVecs + 3), N->getOperand(0)}; |
| SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| SDValue SuperReg = SDValue(Ld, 0); |
| |
| EVT WideVT = RegSeq.getOperand(1)->getValueType(0); |
| static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, |
| AArch64::qsub2, AArch64::qsub3 }; |
| for (unsigned i = 0; i < NumVecs; ++i) { |
| SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); |
| if (Narrow) |
| NV = NarrowVector(NV, *CurDAG); |
| ReplaceUses(SDValue(N, i), NV); |
| } |
| |
| ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); |
| CurDAG->RemoveDeadNode(N); |
| } |
| |
| void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, |
| unsigned Opc) { |
| SDLoc dl(N); |
| EVT VT = N->getValueType(0); |
| bool Narrow = VT.getSizeInBits() == 64; |
| |
| // Form a REG_SEQUENCE to force register allocation. |
| SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
| |
| if (Narrow) |
| transform(Regs, Regs.begin(), |
| WidenVector(*CurDAG)); |
| |
| SDValue RegSeq = createQTuple(Regs); |
| |
| const EVT ResTys[] = {MVT::i64, // Type of the write back register |
| RegSeq->getValueType(0), MVT::Other}; |
| |
| unsigned LaneNo = |
| cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); |
| |
| SDValue Ops[] = {RegSeq, |
| CurDAG->getTargetConstant(LaneNo, dl, |
| MVT::i64), // Lane Number |
| N->getOperand(NumVecs + 2), // Base register |
| N->getOperand(NumVecs + 3), // Incremental |
| N->getOperand(0)}; |
| SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| |
| // Update uses of the write back register |
| ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); |
| |
| // Update uses of the vector list |
| SDValue SuperReg = SDValue(Ld, 1); |
| if (NumVecs == 1) { |
| ReplaceUses(SDValue(N, 0), |
| Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); |
| } else { |
| EVT WideVT = RegSeq.getOperand(1)->getValueType(0); |
| static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, |
| AArch64::qsub2, AArch64::qsub3 }; |
| for (unsigned i = 0; i < NumVecs; ++i) { |
| SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, |
| SuperReg); |
| if (Narrow) |
| NV = NarrowVector(NV, *CurDAG); |
| ReplaceUses(SDValue(N, i), NV); |
| } |
| } |
| |
| // Update the Chain |
| ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); |
| CurDAG->RemoveDeadNode(N); |
| } |
| |
| void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, |
| unsigned Opc) { |
| SDLoc dl(N); |
| EVT VT = N->getOperand(2)->getValueType(0); |
| bool Narrow = VT.getSizeInBits() == 64; |
| |
| // Form a REG_SEQUENCE to force register allocation. |
| SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
| |
| if (Narrow) |
| transform(Regs, Regs.begin(), |
| WidenVector(*CurDAG)); |
| |
| SDValue RegSeq = createQTuple(Regs); |
| |
| unsigned LaneNo = |
| cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); |
| |
| SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), |
| N->getOperand(NumVecs + 3), N->getOperand(0)}; |
| SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
| |
| // Transfer memoperands. |
| MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); |
| CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); |
| |
| ReplaceNode(N, St); |
| } |
| |
| void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, |
| unsigned Opc) { |
| SDLoc dl(N); |
| EVT VT = N->getOperand(2)->getValueType(0); |
| bool Narrow = VT.getSizeInBits() == 64; |
| |
| // Form a REG_SEQUENCE to force register allocation. |
| SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
| |
| if (Narrow) |
| transform(Regs, Regs.begin(), |
| WidenVector(*CurDAG)); |
| |
| SDValue RegSeq = createQTuple(Regs); |
| |
| const EVT ResTys[] = {MVT::i64, // Type of the write back register |
| MVT::Other}; |
| |
| unsigned LaneNo = |
| cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); |
| |
| SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), |
| N->getOperand(NumVecs + 2), // Base Register |
| N->getOperand(NumVecs + 3), // Incremental |
| N->getOperand(0)}; |
| SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| |
| // Transfer memoperands. |
| MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); |
| CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); |
| |
| ReplaceNode(N, St); |
| } |
| |
| static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, |
| unsigned &Opc, SDValue &Opd0, |
| unsigned &LSB, unsigned &MSB, |
| unsigned NumberOfIgnoredLowBits, |
| bool BiggerPattern) { |
| assert(N->getOpcode() == ISD::AND && |
| "N must be a AND operation to call this function"); |
| |
| EVT VT = N->getValueType(0); |
| |
| // Here we can test the type of VT and return false when the type does not |
| // match, but since it is done prior to that call in the current context |
| // we turned that into an assert to avoid redundant code. |
| assert((VT == MVT::i32 || VT == MVT::i64) && |
| "Type checking must have been done before calling this function"); |
| |
| // FIXME: simplify-demanded-bits in DAGCombine will probably have |
| // changed the AND node to a 32-bit mask operation. We'll have to |
| // undo that as part of the transform here if we want to catch all |
| // the opportunities. |
| // Currently the NumberOfIgnoredLowBits argument helps to recover |
| // form these situations when matching bigger pattern (bitfield insert). |
| |
| // For unsigned extracts, check for a shift right and mask |
| uint64_t AndImm = 0; |
| if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) |
| return false; |
| |
| const SDNode *Op0 = N->getOperand(0).getNode(); |
| |
| // Because of simplify-demanded-bits in DAGCombine, the mask may have been |
| // simplified. Try to undo that |
| AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); |
| |
| // The immediate is a mask of the low bits iff imm & (imm+1) == 0 |
| if (AndImm & (AndImm + 1)) |
| return false; |
| |
| bool ClampMSB = false; |
| uint64_t SrlImm = 0; |
| // Handle the SRL + ANY_EXTEND case. |
| if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && |
| isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { |
| // Extend the incoming operand of the SRL to 64-bit. |
| Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); |
| // Make sure to clamp the MSB so that we preserve the semantics of the |
| // original operations. |
| ClampMSB = true; |
| } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && |
| isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, |
| SrlImm)) { |
| // If the shift result was truncated, we can still combine them. |
| Opd0 = Op0->getOperand(0).getOperand(0); |
| |
| // Use the type of SRL node. |
| VT = Opd0->getValueType(0); |
| } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { |
| Opd0 = Op0->getOperand(0); |
| } else if (BiggerPattern) { |
| // Let's pretend a 0 shift right has been performed. |
| // The resulting code will be at least as good as the original one |
| // plus it may expose more opportunities for bitfield insert pattern. |
| // FIXME: Currently we limit this to the bigger pattern, because |
| // some optimizations expect AND and not UBFM. |
| Opd0 = N->getOperand(0); |
| } else |
| return false; |
| |
| // Bail out on large immediates. This happens when no proper |
| // combining/constant folding was performed. |
| if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { |
| LLVM_DEBUG( |
| (dbgs() << N |
| << ": Found large shift immediate, this should not happen\n")); |
| return false; |
| } |
| |
| LSB = SrlImm; |
| MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) |
| : countTrailingOnes<uint64_t>(AndImm)) - |
| 1; |
| if (ClampMSB) |
| // Since we're moving the extend before the right shift operation, we need |
| // to clamp the MSB to make sure we don't shift in undefined bits instead of |
| // the zeros which would get shifted in with the original right shift |
| // operation. |
| MSB = MSB > 31 ? 31 : MSB; |
| |
| Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; |
| return true; |
| } |
| |
| static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, |
| SDValue &Opd0, unsigned &Immr, |
| unsigned &Imms) { |
| assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); |
| |
| EVT VT = N->getValueType(0); |
| unsigned BitWidth = VT.getSizeInBits(); |
| assert((VT == MVT::i32 || VT == MVT::i64) && |
| "Type checking must have been done before calling this function"); |
| |
| SDValue Op = N->getOperand(0); |
| if (Op->getOpcode() == ISD::TRUNCATE) { |
| Op = Op->getOperand(0); |
| VT = Op->getValueType(0); |
| BitWidth = VT.getSizeInBits(); |
| } |
| |
| uint64_t ShiftImm; |
| if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && |
| !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) |
| return false; |
| |
| unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); |
| if (ShiftImm + Width > BitWidth) |
| return false; |
| |
| Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; |
| Opd0 = Op.getOperand(0); |
| Immr = ShiftImm; |
| Imms = ShiftImm + Width - 1; |
| return true; |
| } |
| |
| static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, |
| SDValue &Opd0, unsigned &LSB, |
| unsigned &MSB) { |
| // We are looking for the following pattern which basically extracts several |
| // continuous bits from the source value and places it from the LSB of the |
| // destination value, all other bits of the destination value or set to zero: |
| // |
| // Value2 = AND Value, MaskImm |
| // SRL Value2, ShiftImm |
| // |
| // with MaskImm >> ShiftImm to search for the bit width. |
| // |
| // This gets selected into a single UBFM: |
| // |
| // UBFM Value, ShiftImm, BitWide + SrlImm -1 |
| // |
| |
| if (N->getOpcode() != ISD::SRL) |
| return false; |
| |
| uint64_t AndMask = 0; |
| if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) |
| return false; |
| |
| Opd0 = N->getOperand(0).getOperand(0); |
| |
| uint64_t SrlImm = 0; |
| if (!isIntImmediate(N->getOperand(1), SrlImm)) |
| return false; |
| |
| // Check whether we really have several bits extract here. |
| unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); |
| if (BitWide && isMask_64(AndMask >> SrlImm)) { |
| if (N->getValueType(0) == MVT::i32) |
| Opc = AArch64::UBFMWri; |
| else |
| Opc = AArch64::UBFMXri; |
| |
| LSB = SrlImm; |
| MSB = BitWide + SrlImm - 1; |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, |
| unsigned &Immr, unsigned &Imms, |
| bool BiggerPattern) { |
| assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && |
| "N must be a SHR/SRA operation to call this function"); |
| |
| EVT VT = N->getValueType(0); |
| |
| // Here we can test the type of VT and return false when the type does not |
| // match, but since it is done prior to that call in the current context |
| // we turned that into an assert to avoid redundant code. |
| assert((VT == MVT::i32 || VT == MVT::i64) && |
| "Type checking must have been done before calling this function"); |
| |
| // Check for AND + SRL doing several bits extract. |
| if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) |
| return true; |
| |
| // We're looking for a shift of a shift. |
| uint64_t ShlImm = 0; |
| uint64_t TruncBits = 0; |
| if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { |
| Opd0 = N->getOperand(0).getOperand(0); |
| } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && |
| N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { |
| // We are looking for a shift of truncate. Truncate from i64 to i32 could |
| // be considered as setting high 32 bits as zero. Our strategy here is to |
| // always generate 64bit UBFM. This consistency will help the CSE pass |
| // later find more redundancy. |
| Opd0 = N->getOperand(0).getOperand(0); |
| TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); |
| VT = Opd0.getValueType(); |
| assert(VT == MVT::i64 && "the promoted type should be i64"); |
| } else if (BiggerPattern) { |
| // Let's pretend a 0 shift left has been performed. |
| // FIXME: Currently we limit this to the bigger pattern case, |
| // because some optimizations expect AND and not UBFM |
| Opd0 = N->getOperand(0); |
| } else |
| return false; |
| |
| // Missing combines/constant folding may have left us with strange |
| // constants. |
| if (ShlImm >= VT.getSizeInBits()) { |
| LLVM_DEBUG( |
| (dbgs() << N |
| << ": Found large shift immediate, this should not happen\n")); |
| return false; |
| } |
| |
| uint64_t SrlImm = 0; |
| if (!isIntImmediate(N->getOperand(1), SrlImm)) |
| return false; |
| |
| assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && |
| "bad amount in shift node!"); |
| int immr = SrlImm - ShlImm; |
| Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; |
| Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; |
| // SRA requires a signed extraction |
| if (VT == MVT::i32) |
| Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; |
| else |
| Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; |
| return true; |
| } |
| |
| bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { |
| assert(N->getOpcode() == ISD::SIGN_EXTEND); |
| |
| EVT VT = N->getValueType(0); |
| EVT NarrowVT = N->getOperand(0)->getValueType(0); |
| if (VT != MVT::i64 || NarrowVT != MVT::i32) |
| return false; |
| |
| uint64_t ShiftImm; |
| SDValue Op = N->getOperand(0); |
| if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) |
| return false; |
| |
| SDLoc dl(N); |
| // Extend the incoming operand of the shift to 64-bits. |
| SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); |
| unsigned Immr = ShiftImm; |
| unsigned Imms = NarrowVT.getSizeInBits() - 1; |
| SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), |
| CurDAG->getTargetConstant(Imms, dl, VT)}; |
| CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); |
| return true; |
| } |
| |
| /// Try to form fcvtl2 instructions from a floating-point extend of a high-half |
| /// extract of a subvector. |
| bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { |
| assert(N->getOpcode() == ISD::FP_EXTEND); |
| |
| // There are 2 forms of fcvtl2 - extend to double or extend to float. |
| SDValue Extract = N->getOperand(0); |
| EVT VT = N->getValueType(0); |
| EVT NarrowVT = Extract.getValueType(); |
| if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && |
| (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) |
| return false; |
| |
| // Optionally look past a bitcast. |
| Extract = peekThroughBitcasts(Extract); |
| if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) |
| return false; |
| |
| // Match extract from start of high half index. |
| // Example: v8i16 -> v4i16 means the extract must begin at index 4. |
| unsigned ExtractIndex = Extract.getConstantOperandVal(1); |
| if (ExtractIndex != Extract.getValueType().getVectorNumElements()) |
| return false; |
| |
| auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; |
| CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); |
| return true; |
| } |
| |
| static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, |
| SDValue &Opd0, unsigned &Immr, unsigned &Imms, |
| unsigned NumberOfIgnoredLowBits = 0, |
| bool BiggerPattern = false) { |
| if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) |
| return false; |
| |
| switch (N->getOpcode()) { |
| default: |
| if (!N->isMachineOpcode()) |
| return false; |
| break; |
| case ISD::AND: |
| return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, |
| NumberOfIgnoredLowBits, BiggerPattern); |
| case ISD::SRL: |
| case ISD::SRA: |
| return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); |
| |
| case ISD::SIGN_EXTEND_INREG: |
| return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); |
| } |
| |
| unsigned NOpc = N->getMachineOpcode(); |
| switch (NOpc) { |
| default: |
| return false; |
| case AArch64::SBFMWri: |
| case AArch64::UBFMWri: |
| case AArch64::SBFMXri: |
| case AArch64::UBFMXri: |
| Opc = NOpc; |
| Opd0 = N->getOperand(0); |
| Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); |
| Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); |
| return true; |
| } |
| // Unreachable |
| return false; |
| } |
| |
| bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { |
| unsigned Opc, Immr, Imms; |
| SDValue Opd0; |
| if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) |
| return false; |
| |
| EVT VT = N->getValueType(0); |
| SDLoc dl(N); |
| |
| // If the bit extract operation is 64bit but the original type is 32bit, we |
| // need to add one EXTRACT_SUBREG. |
| if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { |
| SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), |
| CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; |
| |
| SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); |
| SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); |
| ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, |
| MVT::i32, SDValue(BFM, 0), SubReg)); |
| return true; |
| } |
| |
| SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), |
| CurDAG->getTargetConstant(Imms, dl, VT)}; |
| CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
| return true; |
| } |
| |
| /// Does DstMask form a complementary pair with the mask provided by |
| /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, |
| /// this asks whether DstMask zeroes precisely those bits that will be set by |
| /// the other half. |
| static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, |
| unsigned NumberOfIgnoredHighBits, EVT VT) { |
| assert((VT == MVT::i32 || VT == MVT::i64) && |
| "i32 or i64 mask type expected!"); |
| unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; |
| |
| APInt SignificantDstMask = APInt(BitWidth, DstMask); |
| APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); |
| |
| return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && |
| (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); |
| } |
| |
| // Look for bits that will be useful for later uses. |
| // A bit is consider useless as soon as it is dropped and never used |
| // before it as been dropped. |
| // E.g., looking for useful bit of x |
| // 1. y = x & 0x7 |
| // 2. z = y >> 2 |
| // After #1, x useful bits are 0x7, then the useful bits of x, live through |
| // y. |
| // After #2, the useful bits of x are 0x4. |
| // However, if x is used on an unpredicatable instruction, then all its bits |
| // are useful. |
| // E.g. |
| // 1. y = x & 0x7 |
| // 2. z = y >> 2 |
| // 3. str x, [@x] |
| static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); |
| |
| static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, |
| unsigned Depth) { |
| uint64_t Imm = |
| cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); |
| Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); |
| UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); |
| getUsefulBits(Op, UsefulBits, Depth + 1); |
| } |
| |
| static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, |
| uint64_t Imm, uint64_t MSB, |
| unsigned Depth) { |
| // inherit the bitwidth value |
| APInt OpUsefulBits(UsefulBits); |
| OpUsefulBits = 1; |
| |
| if (MSB >= Imm) { |
| OpUsefulBits <<= MSB - Imm + 1; |
| --OpUsefulBits; |
| // The interesting part will be in the lower part of the result |
| getUsefulBits(Op, OpUsefulBits, Depth + 1); |
| // The interesting part was starting at Imm in the argument |
| OpUsefulBits <<= Imm; |
| } else { |
| OpUsefulBits <<= MSB + 1; |
| --OpUsefulBits; |
| // The interesting part will be shifted in the result |
| OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; |
| getUsefulBits(Op, OpUsefulBits, Depth + 1); |
| // The interesting part was at zero in the argument |
| OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); |
| } |
| |
| UsefulBits &= OpUsefulBits; |
| } |
| |
| static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, |
| unsigned Depth) { |
| uint64_t Imm = |
| cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); |
| uint64_t MSB = |
| cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); |
| |
| getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); |
| } |
| |
| static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, |
| unsigned Depth) { |
| uint64_t ShiftTypeAndValue = |
| cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); |
| APInt Mask(UsefulBits); |
| Mask.clearAllBits(); |
| Mask.flipAllBits(); |
| |
| if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { |
| // Shift Left |
| uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); |
| Mask <<= ShiftAmt; |
| getUsefulBits(Op, Mask, Depth + 1); |
| Mask.lshrInPlace(ShiftAmt); |
| } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { |
| // Shift Right |
| // We do not handle AArch64_AM::ASR, because the sign will change the |
| // number of useful bits |
| uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); |
| Mask.lshrInPlace(ShiftAmt); |
| getUsefulBits(Op, Mask, Depth + 1); |
| Mask <<= ShiftAmt; |
| } else |
| return; |
| |
| UsefulBits &= Mask; |
| } |
| |
| static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, |
| unsigned Depth) { |
| uint64_t Imm = |
| cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); |
| uint64_t MSB = |
| cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); |
| |
| APInt OpUsefulBits(UsefulBits); |
| OpUsefulBits = 1; |
| |
| APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); |
| ResultUsefulBits.flipAllBits(); |
| APInt Mask(UsefulBits.getBitWidth(), 0); |
| |
| getUsefulBits(Op, ResultUsefulBits, Depth + 1); |
| |
| if (MSB >= Imm) { |
| // The instruction is a BFXIL. |
| uint64_t Width = MSB - Imm + 1; |
| uint64_t LSB = Imm; |
| |
| OpUsefulBits <<= Width; |
| --OpUsefulBits; |
| |
| if (Op.getOperand(1) == Orig) { |
| // Copy the low bits from the result to bits starting from LSB. |
| Mask = ResultUsefulBits & OpUsefulBits; |
| Mask <<= LSB; |
| } |
| |
| if (Op.getOperand(0) == Orig) |
| // Bits starting from LSB in the input contribute to the result. |
| Mask |= (ResultUsefulBits & ~OpUsefulBits); |
| } else { |
| // The instruction is a BFI. |
| uint64_t Width = MSB + 1; |
| uint64_t LSB = UsefulBits.getBitWidth() - Imm; |
| |
| OpUsefulBits <<= Width; |
| --OpUsefulBits; |
| OpUsefulBits <<= LSB; |
| |
| if (Op.getOperand(1) == Orig) { |
| // Copy the bits from the result to the zero bits. |
| Mask = ResultUsefulBits & OpUsefulBits; |
| Mask.lshrInPlace(LSB); |
| } |
| |
| if (Op.getOperand(0) == Orig) |
| Mask |= (ResultUsefulBits & ~OpUsefulBits); |
| } |
| |
| UsefulBits &= Mask; |
| } |
| |
| static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, |
| SDValue Orig, unsigned Depth) { |
| |
| // Users of this node should have already been instruction selected |
| // FIXME: Can we turn that into an assert? |
| if (!UserNode->isMachineOpcode()) |
| return; |
| |
| switch (UserNode->getMachineOpcode()) { |
| default: |
| return; |
| case AArch64::ANDSWri: |
| case AArch64::ANDSXri: |
| case AArch64::ANDWri: |
| case AArch64::ANDXri: |
| // We increment Depth only when we call the getUsefulBits |
| return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, |
| Depth); |
| case AArch64::UBFMWri: |
| case AArch64::UBFMXri: |
| return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); |
| |
| case AArch64::ORRWrs: |
| case AArch64::ORRXrs: |
| if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) |
| getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, |
| Depth); |
| return; |
| case AArch64::BFMWri: |
| case AArch64::BFMXri: |
| return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); |
| |
| case AArch64::STRBBui: |
| case AArch64::STURBBi: |
| if (UserNode->getOperand(0) != Orig) |
| return; |
| UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); |
| return; |
| |
| case AArch64::STRHHui: |
| case AArch64::STURHHi: |
| if (UserNode->getOperand(0) != Orig) |
| return; |
| UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); |
| return; |
| } |
| } |
| |
| static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { |
| if (Depth >= SelectionDAG::MaxRecursionDepth) |
| return; |
| // Initialize UsefulBits |
| if (!Depth) { |
| unsigned Bitwidth = Op.getScalarValueSizeInBits(); |
| // At the beginning, assume every produced bits is useful |
| UsefulBits = APInt(Bitwidth, 0); |
| UsefulBits.flipAllBits(); |
| } |
| APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); |
| |
| for (SDNode *Node : Op.getNode()->uses()) { |
| // A use cannot produce useful bits |
| APInt UsefulBitsForUse = APInt(UsefulBits); |
| getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); |
| UsersUsefulBits |= UsefulBitsForUse; |
| } |
| // UsefulBits contains the produced bits that are meaningful for the |
| // current definition, thus a user cannot make a bit meaningful at |
| // this point |
| UsefulBits &= UsersUsefulBits; |
| } |
| |
| /// Create a machine node performing a notional SHL of Op by ShlAmount. If |
| /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is |
| /// 0, return Op unchanged. |
| static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { |
| if (ShlAmount == 0) |
| return Op; |
| |
| EVT VT = Op.getValueType(); |
| SDLoc dl(Op); |
| unsigned BitWidth = VT.getSizeInBits(); |
| unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; |
| |
| SDNode *ShiftNode; |
| if (ShlAmount > 0) { |
| // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt |
| ShiftNode = CurDAG->getMachineNode( |
| UBFMOpc, dl, VT, Op, |
| CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), |
| CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); |
| } else { |
| // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 |
| assert(ShlAmount < 0 && "expected right shift"); |
| int ShrAmount = -ShlAmount; |
| ShiftNode = CurDAG->getMachineNode( |
| UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), |
| CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); |
| } |
| |
| return SDValue(ShiftNode, 0); |
| } |
| |
| /// Does this tree qualify as an attempt to move a bitfield into position, |
| /// essentially "(and (shl VAL, N), Mask)". |
| static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, |
| bool BiggerPattern, |
| SDValue &Src, int &ShiftAmount, |
| int &MaskWidth) { |
| EVT VT = Op.getValueType(); |
| unsigned BitWidth = VT.getSizeInBits(); |
| (void)BitWidth; |
| assert(BitWidth == 32 || BitWidth == 64); |
| |
| KnownBits Known = CurDAG->computeKnownBits(Op); |
| |
| // Non-zero in the sense that they're not provably zero, which is the key |
| // point if we want to use this value |
| uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); |
| |
| // Discard a constant AND mask if present. It's safe because the node will |
| // already have been factored into the computeKnownBits calculation above. |
| uint64_t AndImm; |
| if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { |
| assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); |
| Op = Op.getOperand(0); |
| } |
| |
| // Don't match if the SHL has more than one use, since then we'll end up |
| // generating SHL+UBFIZ instead of just keeping SHL+AND. |
| if (!BiggerPattern && !Op.hasOneUse()) |
| return false; |
| |
| uint64_t ShlImm; |
| if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) |
| return false; |
| Op = Op.getOperand(0); |
| |
| if (!isShiftedMask_64(NonZeroBits)) |
| return false; |
| |
| ShiftAmount = countTrailingZeros(NonZeroBits); |
| MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); |
| |
| // BFI encompasses sufficiently many nodes that it's worth inserting an extra |
| // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL |
| // amount. BiggerPattern is true when this pattern is being matched for BFI, |
| // BiggerPattern is false when this pattern is being matched for UBFIZ, in |
| // which case it is not profitable to insert an extra shift. |
| if (ShlImm - ShiftAmount != 0 && !BiggerPattern) |
| return false; |
| Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); |
| |
| return true; |
| } |
| |
| static bool isShiftedMask(uint64_t Mask, EVT VT) { |
| assert(VT == MVT::i32 || VT == MVT::i64); |
| if (VT == MVT::i32) |
| return isShiftedMask_32(Mask); |
| return isShiftedMask_64(Mask); |
| } |
| |
| // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being |
| // inserted only sets known zero bits. |
| static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { |
| assert(N->getOpcode() == ISD |