| //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the AArch64-specific support for the FastISel class. Some |
| // of the target-specific code is generated by tablegen in the file |
| // AArch64GenFastISel.inc, which is #included here. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AArch64.h" |
| #include "AArch64CallingConvention.h" |
| #include "AArch64MachineFunctionInfo.h" |
| #include "AArch64RegisterInfo.h" |
| #include "AArch64Subtarget.h" |
| #include "MCTargetDesc/AArch64AddressingModes.h" |
| #include "Utils/AArch64BaseInfo.h" |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/Analysis/BranchProbabilityInfo.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/FastISel.h" |
| #include "llvm/CodeGen/FunctionLoweringInfo.h" |
| #include "llvm/CodeGen/ISDOpcodes.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineConstantPool.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineMemOperand.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/MachineValueType.h" |
| #include "llvm/CodeGen/RuntimeLibcalls.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/Argument.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/Constant.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GetElementPtrTypeIterator.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/InstrTypes.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/IntrinsicsAArch64.h" |
| #include "llvm/IR/Operator.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/User.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/MC/MCInstrDesc.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/MC/MCSymbol.h" |
| #include "llvm/Support/AtomicOrdering.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CodeGen.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/MathExtras.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <cstdint> |
| #include <iterator> |
| #include <utility> |
| |
| using namespace llvm; |
| |
| namespace { |
| |
| class AArch64FastISel final : public FastISel { |
| class Address { |
| public: |
| using BaseKind = enum { |
| RegBase, |
| FrameIndexBase |
| }; |
| |
| private: |
| BaseKind Kind = RegBase; |
| AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; |
| union { |
| unsigned Reg; |
| int FI; |
| } Base; |
| unsigned OffsetReg = 0; |
| unsigned Shift = 0; |
| int64_t Offset = 0; |
| const GlobalValue *GV = nullptr; |
| |
| public: |
| Address() { Base.Reg = 0; } |
| |
| void setKind(BaseKind K) { Kind = K; } |
| BaseKind getKind() const { return Kind; } |
| void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } |
| AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } |
| bool isRegBase() const { return Kind == RegBase; } |
| bool isFIBase() const { return Kind == FrameIndexBase; } |
| |
| void setReg(unsigned Reg) { |
| assert(isRegBase() && "Invalid base register access!"); |
| Base.Reg = Reg; |
| } |
| |
| unsigned getReg() const { |
| assert(isRegBase() && "Invalid base register access!"); |
| return Base.Reg; |
| } |
| |
| void setOffsetReg(unsigned Reg) { |
| OffsetReg = Reg; |
| } |
| |
| unsigned getOffsetReg() const { |
| return OffsetReg; |
| } |
| |
| void setFI(unsigned FI) { |
| assert(isFIBase() && "Invalid base frame index access!"); |
| Base.FI = FI; |
| } |
| |
| unsigned getFI() const { |
| assert(isFIBase() && "Invalid base frame index access!"); |
| return Base.FI; |
| } |
| |
| void setOffset(int64_t O) { Offset = O; } |
| int64_t getOffset() { return Offset; } |
| void setShift(unsigned S) { Shift = S; } |
| unsigned getShift() { return Shift; } |
| |
| void setGlobalValue(const GlobalValue *G) { GV = G; } |
| const GlobalValue *getGlobalValue() { return GV; } |
| }; |
| |
| /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can |
| /// make the right decision when generating code for different targets. |
| const AArch64Subtarget *Subtarget; |
| LLVMContext *Context; |
| |
| bool fastLowerArguments() override; |
| bool fastLowerCall(CallLoweringInfo &CLI) override; |
| bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; |
| |
| private: |
| // Selection routines. |
| bool selectAddSub(const Instruction *I); |
| bool selectLogicalOp(const Instruction *I); |
| bool selectLoad(const Instruction *I); |
| bool selectStore(const Instruction *I); |
| bool selectBranch(const Instruction *I); |
| bool selectIndirectBr(const Instruction *I); |
| bool selectCmp(const Instruction *I); |
| bool selectSelect(const Instruction *I); |
| bool selectFPExt(const Instruction *I); |
| bool selectFPTrunc(const Instruction *I); |
| bool selectFPToInt(const Instruction *I, bool Signed); |
| bool selectIntToFP(const Instruction *I, bool Signed); |
| bool selectRem(const Instruction *I, unsigned ISDOpcode); |
| bool selectRet(const Instruction *I); |
| bool selectTrunc(const Instruction *I); |
| bool selectIntExt(const Instruction *I); |
| bool selectMul(const Instruction *I); |
| bool selectShift(const Instruction *I); |
| bool selectBitCast(const Instruction *I); |
| bool selectFRem(const Instruction *I); |
| bool selectSDiv(const Instruction *I); |
| bool selectGetElementPtr(const Instruction *I); |
| bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); |
| |
| // Utility helper routines. |
| bool isTypeLegal(Type *Ty, MVT &VT); |
| bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); |
| bool isValueAvailable(const Value *V) const; |
| bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); |
| bool computeCallAddress(const Value *V, Address &Addr); |
| bool simplifyAddress(Address &Addr, MVT VT); |
| void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, |
| MachineMemOperand::Flags Flags, |
| unsigned ScaleFactor, MachineMemOperand *MMO); |
| bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); |
| bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, |
| MaybeAlign Alignment); |
| bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, |
| const Value *Cond); |
| bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); |
| bool optimizeSelect(const SelectInst *SI); |
| unsigned getRegForGEPIndex(const Value *Idx); |
| |
| // Emit helper routines. |
| unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
| const Value *RHS, bool SetFlags = false, |
| bool WantResult = true, bool IsZExt = false); |
| unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| unsigned RHSReg, bool SetFlags = false, |
| bool WantResult = true); |
| unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| uint64_t Imm, bool SetFlags = false, |
| bool WantResult = true); |
| unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, |
| uint64_t ShiftImm, bool SetFlags = false, |
| bool WantResult = true); |
| unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, |
| uint64_t ShiftImm, bool SetFlags = false, |
| bool WantResult = true); |
| |
| // Emit functions. |
| bool emitCompareAndBranch(const BranchInst *BI); |
| bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); |
| bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); |
| bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); |
| bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); |
| unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, |
| MachineMemOperand *MMO = nullptr); |
| bool emitStore(MVT VT, unsigned SrcReg, Address Addr, |
| MachineMemOperand *MMO = nullptr); |
| bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, |
| MachineMemOperand *MMO = nullptr); |
| unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); |
| unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); |
| unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
| bool SetFlags = false, bool WantResult = true, |
| bool IsZExt = false); |
| unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); |
| unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
| bool SetFlags = false, bool WantResult = true, |
| bool IsZExt = false); |
| unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, |
| bool WantResult = true); |
| unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, |
| AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, |
| bool WantResult = true); |
| unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, |
| const Value *RHS); |
| unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
| uint64_t Imm); |
| unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
| unsigned RHSReg, uint64_t ShiftImm); |
| unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); |
| unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); |
| unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); |
| unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); |
| unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); |
| unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, |
| bool IsZExt = true); |
| unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); |
| unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, |
| bool IsZExt = true); |
| unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); |
| unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, |
| bool IsZExt = false); |
| |
| unsigned materializeInt(const ConstantInt *CI, MVT VT); |
| unsigned materializeFP(const ConstantFP *CFP, MVT VT); |
| unsigned materializeGV(const GlobalValue *GV); |
| |
| // Call handling routines. |
| private: |
| CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; |
| bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, |
| unsigned &NumBytes); |
| bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes); |
| |
| public: |
| // Backend specific FastISel code. |
| unsigned fastMaterializeAlloca(const AllocaInst *AI) override; |
| unsigned fastMaterializeConstant(const Constant *C) override; |
| unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; |
| |
| explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, |
| const TargetLibraryInfo *LibInfo) |
| : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { |
| Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); |
| Context = &FuncInfo.Fn->getContext(); |
| } |
| |
| bool fastSelectInstruction(const Instruction *I) override; |
| |
| #include "AArch64GenFastISel.inc" |
| }; |
| |
| } // end anonymous namespace |
| |
| /// Check if the sign-/zero-extend will be a noop. |
| static bool isIntExtFree(const Instruction *I) { |
| assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
| "Unexpected integer extend instruction."); |
| assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && |
| "Unexpected value type."); |
| bool IsZExt = isa<ZExtInst>(I); |
| |
| if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) |
| if (LI->hasOneUse()) |
| return true; |
| |
| if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) |
| if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) |
| return true; |
| |
| return false; |
| } |
| |
| /// Determine the implicit scale factor that is applied by a memory |
| /// operation for a given value type. |
| static unsigned getImplicitScaleFactor(MVT VT) { |
| switch (VT.SimpleTy) { |
| default: |
| return 0; // invalid |
| case MVT::i1: // fall-through |
| case MVT::i8: |
| return 1; |
| case MVT::i16: |
| return 2; |
| case MVT::i32: // fall-through |
| case MVT::f32: |
| return 4; |
| case MVT::i64: // fall-through |
| case MVT::f64: |
| return 8; |
| } |
| } |
| |
| CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { |
| if (CC == CallingConv::WebKit_JS) |
| return CC_AArch64_WebKit_JS; |
| if (CC == CallingConv::GHC) |
| return CC_AArch64_GHC; |
| if (CC == CallingConv::CFGuard_Check) |
| return CC_AArch64_Win64_CFGuard_Check; |
| return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; |
| } |
| |
| unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { |
| assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && |
| "Alloca should always return a pointer."); |
| |
| // Don't handle dynamic allocas. |
| if (!FuncInfo.StaticAllocaMap.count(AI)) |
| return 0; |
| |
| DenseMap<const AllocaInst *, int>::iterator SI = |
| FuncInfo.StaticAllocaMap.find(AI); |
| |
| if (SI != FuncInfo.StaticAllocaMap.end()) { |
| Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), |
| ResultReg) |
| .addFrameIndex(SI->second) |
| .addImm(0) |
| .addImm(0); |
| return ResultReg; |
| } |
| |
| return 0; |
| } |
| |
| unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { |
| if (VT > MVT::i64) |
| return 0; |
| |
| if (!CI->isZero()) |
| return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); |
| |
| // Create a copy from the zero register to materialize a "0" value. |
| const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass |
| : &AArch64::GPR32RegClass; |
| unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
| Register ResultReg = createResultReg(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), |
| ResultReg).addReg(ZeroReg, getKillRegState(true)); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { |
| // Positive zero (+0.0) has to be materialized with a fmov from the zero |
| // register, because the immediate version of fmov cannot encode zero. |
| if (CFP->isNullValue()) |
| return fastMaterializeFloatZero(CFP); |
| |
| if (VT != MVT::f32 && VT != MVT::f64) |
| return 0; |
| |
| const APFloat Val = CFP->getValueAPF(); |
| bool Is64Bit = (VT == MVT::f64); |
| // This checks to see if we can use FMOV instructions to materialize |
| // a constant, otherwise we have to materialize via the constant pool. |
| int Imm = |
| Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); |
| if (Imm != -1) { |
| unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; |
| return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); |
| } |
| |
| // For the large code model materialize the FP constant in code. |
| if (TM.getCodeModel() == CodeModel::Large) { |
| unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; |
| const TargetRegisterClass *RC = Is64Bit ? |
| &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| |
| Register TmpReg = createResultReg(RC); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg) |
| .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); |
| |
| Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
| TII.get(TargetOpcode::COPY), ResultReg) |
| .addReg(TmpReg, getKillRegState(true)); |
| |
| return ResultReg; |
| } |
| |
| // Materialize via constant pool. MachineConstantPool wants an explicit |
| // alignment. |
| Align Alignment = DL.getPrefTypeAlign(CFP->getType()); |
| |
| unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); |
| Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), |
| ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); |
| |
| unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; |
| Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) |
| .addReg(ADRPReg) |
| .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { |
| // We can't handle thread-local variables quickly yet. |
| if (GV->isThreadLocal()) |
| return 0; |
| |
| // MachO still uses GOT for large code-model accesses, but ELF requires |
| // movz/movk sequences, which FastISel doesn't handle yet. |
| if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) |
| return 0; |
| |
| unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); |
| |
| EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); |
| if (!DestEVT.isSimple()) |
| return 0; |
| |
| Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
| unsigned ResultReg; |
| |
| if (OpFlags & AArch64II::MO_GOT) { |
| // ADRP + LDRX |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), |
| ADRPReg) |
| .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); |
| |
| unsigned LdrOpc; |
| if (Subtarget->isTargetILP32()) { |
| ResultReg = createResultReg(&AArch64::GPR32RegClass); |
| LdrOpc = AArch64::LDRWui; |
| } else { |
| ResultReg = createResultReg(&AArch64::GPR64RegClass); |
| LdrOpc = AArch64::LDRXui; |
| } |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc), |
| ResultReg) |
| .addReg(ADRPReg) |
| .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | |
| AArch64II::MO_NC | OpFlags); |
| if (!Subtarget->isTargetILP32()) |
| return ResultReg; |
| |
| // LDRWui produces a 32-bit register, but pointers in-register are 64-bits |
| // so we must extend the result on ILP32. |
| Register Result64 = createResultReg(&AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
| TII.get(TargetOpcode::SUBREG_TO_REG)) |
| .addDef(Result64) |
| .addImm(0) |
| .addReg(ResultReg, RegState::Kill) |
| .addImm(AArch64::sub_32); |
| return Result64; |
| } else { |
| // ADRP + ADDX |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), |
| ADRPReg) |
| .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); |
| |
| if (OpFlags & AArch64II::MO_TAGGED) { |
| // MO_TAGGED on the page indicates a tagged address. Set the tag now. |
| // We do so by creating a MOVK that sets bits 48-63 of the register to |
| // (global address + 0x100000000 - PC) >> 48. This assumes that we're in |
| // the small code model so we can assume a binary size of <= 4GB, which |
| // makes the untagged PC relative offset positive. The binary must also be |
| // loaded into address range [0, 2^48). Both of these properties need to |
| // be ensured at runtime when using tagged addresses. |
| // |
| // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that |
| // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands |
| // are not exactly 1:1 with FastISel so we cannot easily abstract this |
| // out. At some point, it would be nice to find a way to not have this |
| // duplciate code. |
| unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi), |
| DstReg) |
| .addReg(ADRPReg) |
| .addGlobalAddress(GV, /*Offset=*/0x100000000, |
| AArch64II::MO_PREL | AArch64II::MO_G3) |
| .addImm(48); |
| ADRPReg = DstReg; |
| } |
| |
| ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), |
| ResultReg) |
| .addReg(ADRPReg) |
| .addGlobalAddress(GV, 0, |
| AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) |
| .addImm(0); |
| } |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { |
| EVT CEVT = TLI.getValueType(DL, C->getType(), true); |
| |
| // Only handle simple types. |
| if (!CEVT.isSimple()) |
| return 0; |
| MVT VT = CEVT.getSimpleVT(); |
| // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, |
| // 'null' pointers need to have a somewhat special treatment. |
| if (isa<ConstantPointerNull>(C)) { |
| assert(VT == MVT::i64 && "Expected 64-bit pointers"); |
| return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); |
| } |
| |
| if (const auto *CI = dyn_cast<ConstantInt>(C)) |
| return materializeInt(CI, VT); |
| else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) |
| return materializeFP(CFP, VT); |
| else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) |
| return materializeGV(GV); |
| |
| return 0; |
| } |
| |
| unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { |
| assert(CFP->isNullValue() && |
| "Floating-point constant is not a positive zero."); |
| MVT VT; |
| if (!isTypeLegal(CFP->getType(), VT)) |
| return 0; |
| |
| if (VT != MVT::f32 && VT != MVT::f64) |
| return 0; |
| |
| bool Is64Bit = (VT == MVT::f64); |
| unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
| unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; |
| return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg); |
| } |
| |
| /// Check if the multiply is by a power-of-2 constant. |
| static bool isMulPowOf2(const Value *I) { |
| if (const auto *MI = dyn_cast<MulOperator>(I)) { |
| if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) |
| if (C->getValue().isPowerOf2()) |
| return true; |
| if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) |
| if (C->getValue().isPowerOf2()) |
| return true; |
| } |
| return false; |
| } |
| |
| // Computes the address to get to an object. |
| bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) |
| { |
| const User *U = nullptr; |
| unsigned Opcode = Instruction::UserOp1; |
| if (const Instruction *I = dyn_cast<Instruction>(Obj)) { |
| // Don't walk into other basic blocks unless the object is an alloca from |
| // another block, otherwise it may not have a virtual register assigned. |
| if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || |
| FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
| Opcode = I->getOpcode(); |
| U = I; |
| } |
| } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { |
| Opcode = C->getOpcode(); |
| U = C; |
| } |
| |
| if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) |
| if (Ty->getAddressSpace() > 255) |
| // Fast instruction selection doesn't support the special |
| // address spaces. |
| return false; |
| |
| switch (Opcode) { |
| default: |
| break; |
| case Instruction::BitCast: |
| // Look through bitcasts. |
| return computeAddress(U->getOperand(0), Addr, Ty); |
| |
| case Instruction::IntToPtr: |
| // Look past no-op inttoptrs. |
| if (TLI.getValueType(DL, U->getOperand(0)->getType()) == |
| TLI.getPointerTy(DL)) |
| return computeAddress(U->getOperand(0), Addr, Ty); |
| break; |
| |
| case Instruction::PtrToInt: |
| // Look past no-op ptrtoints. |
| if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) |
| return computeAddress(U->getOperand(0), Addr, Ty); |
| break; |
| |
| case Instruction::GetElementPtr: { |
| Address SavedAddr = Addr; |
| uint64_t TmpOffset = Addr.getOffset(); |
| |
| // Iterate through the GEP folding the constants into offsets where |
| // we can. |
| for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); |
| GTI != E; ++GTI) { |
| const Value *Op = GTI.getOperand(); |
| if (StructType *STy = GTI.getStructTypeOrNull()) { |
| const StructLayout *SL = DL.getStructLayout(STy); |
| unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); |
| TmpOffset += SL->getElementOffset(Idx); |
| } else { |
| uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); |
| while (true) { |
| if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { |
| // Constant-offset addressing. |
| TmpOffset += CI->getSExtValue() * S; |
| break; |
| } |
| if (canFoldAddIntoGEP(U, Op)) { |
| // A compatible add with a constant operand. Fold the constant. |
| ConstantInt *CI = |
| cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); |
| TmpOffset += CI->getSExtValue() * S; |
| // Iterate on the other operand. |
| Op = cast<AddOperator>(Op)->getOperand(0); |
| continue; |
| } |
| // Unsupported |
| goto unsupported_gep; |
| } |
| } |
| } |
| |
| // Try to grab the base operand now. |
| Addr.setOffset(TmpOffset); |
| if (computeAddress(U->getOperand(0), Addr, Ty)) |
| return true; |
| |
| // We failed, restore everything and try the other options. |
| Addr = SavedAddr; |
| |
| unsupported_gep: |
| break; |
| } |
| case Instruction::Alloca: { |
| const AllocaInst *AI = cast<AllocaInst>(Obj); |
| DenseMap<const AllocaInst *, int>::iterator SI = |
| FuncInfo.StaticAllocaMap.find(AI); |
| if (SI != FuncInfo.StaticAllocaMap.end()) { |
| Addr.setKind(Address::FrameIndexBase); |
| Addr.setFI(SI->second); |
| return true; |
| } |
| break; |
| } |
| case Instruction::Add: { |
| // Adds of constants are common and easy enough. |
| const Value *LHS = U->getOperand(0); |
| const Value *RHS = U->getOperand(1); |
| |
| if (isa<ConstantInt>(LHS)) |
| std::swap(LHS, RHS); |
| |
| if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { |
| Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); |
| return computeAddress(LHS, Addr, Ty); |
| } |
| |
| Address Backup = Addr; |
| if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) |
| return true; |
| Addr = Backup; |
| |
| break; |
| } |
| case Instruction::Sub: { |
| // Subs of constants are common and easy enough. |
| const Value *LHS = U->getOperand(0); |
| const Value *RHS = U->getOperand(1); |
| |
| if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { |
| Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); |
| return computeAddress(LHS, Addr, Ty); |
| } |
| break; |
| } |
| case Instruction::Shl: { |
| if (Addr.getOffsetReg()) |
| break; |
| |
| const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); |
| if (!CI) |
| break; |
| |
| unsigned Val = CI->getZExtValue(); |
| if (Val < 1 || Val > 3) |
| break; |
| |
| uint64_t NumBytes = 0; |
| if (Ty && Ty->isSized()) { |
| uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
| NumBytes = NumBits / 8; |
| if (!isPowerOf2_64(NumBits)) |
| NumBytes = 0; |
| } |
| |
| if (NumBytes != (1ULL << Val)) |
| break; |
| |
| Addr.setShift(Val); |
| Addr.setExtendType(AArch64_AM::LSL); |
| |
| const Value *Src = U->getOperand(0); |
| if (const auto *I = dyn_cast<Instruction>(Src)) { |
| if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
| // Fold the zext or sext when it won't become a noop. |
| if (const auto *ZE = dyn_cast<ZExtInst>(I)) { |
| if (!isIntExtFree(ZE) && |
| ZE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::UXTW); |
| Src = ZE->getOperand(0); |
| } |
| } else if (const auto *SE = dyn_cast<SExtInst>(I)) { |
| if (!isIntExtFree(SE) && |
| SE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::SXTW); |
| Src = SE->getOperand(0); |
| } |
| } |
| } |
| } |
| |
| if (const auto *AI = dyn_cast<BinaryOperator>(Src)) |
| if (AI->getOpcode() == Instruction::And) { |
| const Value *LHS = AI->getOperand(0); |
| const Value *RHS = AI->getOperand(1); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
| if (C->getValue() == 0xffffffff) |
| std::swap(LHS, RHS); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
| if (C->getValue() == 0xffffffff) { |
| Addr.setExtendType(AArch64_AM::UXTW); |
| Register Reg = getRegForValue(LHS); |
| if (!Reg) |
| return false; |
| Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| } |
| |
| Register Reg = getRegForValue(Src); |
| if (!Reg) |
| return false; |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| case Instruction::Mul: { |
| if (Addr.getOffsetReg()) |
| break; |
| |
| if (!isMulPowOf2(U)) |
| break; |
| |
| const Value *LHS = U->getOperand(0); |
| const Value *RHS = U->getOperand(1); |
| |
| // Canonicalize power-of-2 value to the RHS. |
| if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
| if (C->getValue().isPowerOf2()) |
| std::swap(LHS, RHS); |
| |
| assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); |
| const auto *C = cast<ConstantInt>(RHS); |
| unsigned Val = C->getValue().logBase2(); |
| if (Val < 1 || Val > 3) |
| break; |
| |
| uint64_t NumBytes = 0; |
| if (Ty && Ty->isSized()) { |
| uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
| NumBytes = NumBits / 8; |
| if (!isPowerOf2_64(NumBits)) |
| NumBytes = 0; |
| } |
| |
| if (NumBytes != (1ULL << Val)) |
| break; |
| |
| Addr.setShift(Val); |
| Addr.setExtendType(AArch64_AM::LSL); |
| |
| const Value *Src = LHS; |
| if (const auto *I = dyn_cast<Instruction>(Src)) { |
| if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
| // Fold the zext or sext when it won't become a noop. |
| if (const auto *ZE = dyn_cast<ZExtInst>(I)) { |
| if (!isIntExtFree(ZE) && |
| ZE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::UXTW); |
| Src = ZE->getOperand(0); |
| } |
| } else if (const auto *SE = dyn_cast<SExtInst>(I)) { |
| if (!isIntExtFree(SE) && |
| SE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::SXTW); |
| Src = SE->getOperand(0); |
| } |
| } |
| } |
| } |
| |
| Register Reg = getRegForValue(Src); |
| if (!Reg) |
| return false; |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| case Instruction::And: { |
| if (Addr.getOffsetReg()) |
| break; |
| |
| if (!Ty || DL.getTypeSizeInBits(Ty) != 8) |
| break; |
| |
| const Value *LHS = U->getOperand(0); |
| const Value *RHS = U->getOperand(1); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
| if (C->getValue() == 0xffffffff) |
| std::swap(LHS, RHS); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
| if (C->getValue() == 0xffffffff) { |
| Addr.setShift(0); |
| Addr.setExtendType(AArch64_AM::LSL); |
| Addr.setExtendType(AArch64_AM::UXTW); |
| |
| Register Reg = getRegForValue(LHS); |
| if (!Reg) |
| return false; |
| Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| break; |
| } |
| case Instruction::SExt: |
| case Instruction::ZExt: { |
| if (!Addr.getReg() || Addr.getOffsetReg()) |
| break; |
| |
| const Value *Src = nullptr; |
| // Fold the zext or sext when it won't become a noop. |
| if (const auto *ZE = dyn_cast<ZExtInst>(U)) { |
| if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::UXTW); |
| Src = ZE->getOperand(0); |
| } |
| } else if (const auto *SE = dyn_cast<SExtInst>(U)) { |
| if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { |
| Addr.setExtendType(AArch64_AM::SXTW); |
| Src = SE->getOperand(0); |
| } |
| } |
| |
| if (!Src) |
| break; |
| |
| Addr.setShift(0); |
| Register Reg = getRegForValue(Src); |
| if (!Reg) |
| return false; |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| } // end switch |
| |
| if (Addr.isRegBase() && !Addr.getReg()) { |
| Register Reg = getRegForValue(Obj); |
| if (!Reg) |
| return false; |
| Addr.setReg(Reg); |
| return true; |
| } |
| |
| if (!Addr.getOffsetReg()) { |
| Register Reg = getRegForValue(Obj); |
| if (!Reg) |
| return false; |
| Addr.setOffsetReg(Reg); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { |
| const User *U = nullptr; |
| unsigned Opcode = Instruction::UserOp1; |
| bool InMBB = true; |
| |
| if (const auto *I = dyn_cast<Instruction>(V)) { |
| Opcode = I->getOpcode(); |
| U = I; |
| InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); |
| } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { |
| Opcode = C->getOpcode(); |
| U = C; |
| } |
| |
| switch (Opcode) { |
| default: break; |
| case Instruction::BitCast: |
| // Look past bitcasts if its operand is in the same BB. |
| if (InMBB) |
| return computeCallAddress(U->getOperand(0), Addr); |
| break; |
| case Instruction::IntToPtr: |
| // Look past no-op inttoptrs if its operand is in the same BB. |
| if (InMBB && |
| TLI.getValueType(DL, U->getOperand(0)->getType()) == |
| TLI.getPointerTy(DL)) |
| return computeCallAddress(U->getOperand(0), Addr); |
| break; |
| case Instruction::PtrToInt: |
| // Look past no-op ptrtoints if its operand is in the same BB. |
| if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) |
| return computeCallAddress(U->getOperand(0), Addr); |
| break; |
| } |
| |
| if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { |
| Addr.setGlobalValue(GV); |
| return true; |
| } |
| |
| // If all else fails, try to materialize the value in a register. |
| if (!Addr.getGlobalValue()) { |
| Addr.setReg(getRegForValue(V)); |
| return Addr.getReg() != 0; |
| } |
| |
| return false; |
| } |
| |
| bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { |
| EVT evt = TLI.getValueType(DL, Ty, true); |
| |
| if (Subtarget->isTargetILP32() && Ty->isPointerTy()) |
| return false; |
| |
| // Only handle simple types. |
| if (evt == MVT::Other || !evt.isSimple()) |
| return false; |
| VT = evt.getSimpleVT(); |
| |
| // This is a legal type, but it's not something we handle in fast-isel. |
| if (VT == MVT::f128) |
| return false; |
| |
| // Handle all other legal types, i.e. a register that will directly hold this |
| // value. |
| return TLI.isTypeLegal(VT); |
| } |
| |
| /// Determine if the value type is supported by FastISel. |
| /// |
| /// FastISel for AArch64 can handle more value types than are legal. This adds |
| /// simple value type such as i1, i8, and i16. |
| bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { |
| if (Ty->isVectorTy() && !IsVectorAllowed) |
| return false; |
| |
| if (isTypeLegal(Ty, VT)) |
| return true; |
| |
| // If this is a type than can be sign or zero-extended to a basic operation |
| // go ahead and accept it now. |
| if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) |
| return true; |
| |
| return false; |
| } |
| |
| bool AArch64FastISel::isValueAvailable(const Value *V) const { |
| if (!isa<Instruction>(V)) |
| return true; |
| |
| const auto *I = cast<Instruction>(V); |
| return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; |
| } |
| |
| bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { |
| if (Subtarget->isTargetILP32()) |
| return false; |
| |
| unsigned ScaleFactor = getImplicitScaleFactor(VT); |
| if (!ScaleFactor) |
| return false; |
| |
| bool ImmediateOffsetNeedsLowering = false; |
| bool RegisterOffsetNeedsLowering = false; |
| int64_t Offset = Addr.getOffset(); |
| if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) |
| ImmediateOffsetNeedsLowering = true; |
| else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && |
| !isUInt<12>(Offset / ScaleFactor)) |
| ImmediateOffsetNeedsLowering = true; |
| |
| // Cannot encode an offset register and an immediate offset in the same |
| // instruction. Fold the immediate offset into the load/store instruction and |
| // emit an additional add to take care of the offset register. |
| if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) |
| RegisterOffsetNeedsLowering = true; |
| |
| // Cannot encode zero register as base. |
| if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) |
| RegisterOffsetNeedsLowering = true; |
| |
| // If this is a stack pointer and the offset needs to be simplified then put |
| // the alloca address into a register, set the base type back to register and |
| // continue. This should almost never happen. |
| if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) |
| { |
| Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), |
| ResultReg) |
| .addFrameIndex(Addr.getFI()) |
| .addImm(0) |
| .addImm(0); |
| Addr.setKind(Address::RegBase); |
| Addr.setReg(ResultReg); |
| } |
| |
| if (RegisterOffsetNeedsLowering) { |
| unsigned ResultReg = 0; |
| if (Addr.getReg()) { |
| if (Addr.getExtendType() == AArch64_AM::SXTW || |
| Addr.getExtendType() == AArch64_AM::UXTW ) |
| ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), |
| Addr.getOffsetReg(), Addr.getExtendType(), |
| Addr.getShift()); |
| else |
| ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), |
| Addr.getOffsetReg(), AArch64_AM::LSL, |
| Addr.getShift()); |
| } else { |
| if (Addr.getExtendType() == AArch64_AM::UXTW) |
| ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), |
| Addr.getShift(), /*IsZExt=*/true); |
| else if (Addr.getExtendType() == AArch64_AM::SXTW) |
| ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), |
| Addr.getShift(), /*IsZExt=*/false); |
| else |
| ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), |
| Addr.getShift()); |
| } |
| if (!ResultReg) |
| return false; |
| |
| Addr.setReg(ResultReg); |
| Addr.setOffsetReg(0); |
| Addr.setShift(0); |
| Addr.setExtendType(AArch64_AM::InvalidShiftExtend); |
| } |
| |
| // Since the offset is too large for the load/store instruction get the |
| // reg+offset into a register. |
| if (ImmediateOffsetNeedsLowering) { |
| unsigned ResultReg; |
| if (Addr.getReg()) |
| // Try to fold the immediate into the add instruction. |
| ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); |
| else |
| ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); |
| |
| if (!ResultReg) |
| return false; |
| Addr.setReg(ResultReg); |
| Addr.setOffset(0); |
| } |
| return true; |
| } |
| |
| void AArch64FastISel::addLoadStoreOperands(Address &Addr, |
| const MachineInstrBuilder &MIB, |
| MachineMemOperand::Flags Flags, |
| unsigned ScaleFactor, |
| MachineMemOperand *MMO) { |
| int64_t Offset = Addr.getOffset() / ScaleFactor; |
| // Frame base works a bit differently. Handle it separately. |
| if (Addr.isFIBase()) { |
| int FI = Addr.getFI(); |
| // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size |
| // and alignment should be based on the VT. |
| MMO = FuncInfo.MF->getMachineMemOperand( |
| MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, |
| MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); |
| // Now add the rest of the operands. |
| MIB.addFrameIndex(FI).addImm(Offset); |
| } else { |
| assert(Addr.isRegBase() && "Unexpected address kind."); |
| const MCInstrDesc &II = MIB->getDesc(); |
| unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; |
| Addr.setReg( |
| constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); |
| Addr.setOffsetReg( |
| constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); |
| if (Addr.getOffsetReg()) { |
| assert(Addr.getOffset() == 0 && "Unexpected offset"); |
| bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || |
| Addr.getExtendType() == AArch64_AM::SXTX; |
| MIB.addReg(Addr.getReg()); |
| MIB.addReg(Addr.getOffsetReg()); |
| MIB.addImm(IsSigned); |
| MIB.addImm(Addr.getShift() != 0); |
| } else |
| MIB.addReg(Addr.getReg()).addImm(Offset); |
| } |
| |
| if (MMO) |
| MIB.addMemOperand(MMO); |
| } |
| |
| unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
| const Value *RHS, bool SetFlags, |
| bool WantResult, bool IsZExt) { |
| AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; |
| bool NeedExtend = false; |
| switch (RetVT.SimpleTy) { |
| default: |
| return 0; |
| case MVT::i1: |
| NeedExtend = true; |
| break; |
| case MVT::i8: |
| NeedExtend = true; |
| ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; |
| break; |
| case MVT::i16: |
| NeedExtend = true; |
| ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; |
| break; |
| case MVT::i32: // fall-through |
| case MVT::i64: |
| break; |
| } |
| MVT SrcVT = RetVT; |
| RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); |
| |
| // Canonicalize immediates to the RHS first. |
| if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) |
| std::swap(LHS, RHS); |
| |
| // Canonicalize mul by power of 2 to the RHS. |
| if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) |
| if (isMulPowOf2(LHS)) |
| std::swap(LHS, RHS); |
| |
| // Canonicalize shift immediate to the RHS. |
| if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) |
| if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) |
| if (isa<ConstantInt>(SI->getOperand(1))) |
| if (SI->getOpcode() == Instruction::Shl || |
| SI->getOpcode() == Instruction::LShr || |
| SI->getOpcode() == Instruction::AShr ) |
| std::swap(LHS, RHS); |
| |
| Register LHSReg = getRegForValue(LHS); |
| if (!LHSReg) |
| return 0; |
| |
| if (NeedExtend) |
| LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); |
| |
| unsigned ResultReg = 0; |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) { |
| uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); |
| if (C->isNegative()) |
| ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags, |
| WantResult); |
| else |
| ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, |
| WantResult); |
| } else if (const auto *C = dyn_cast<Constant>(RHS)) |
| if (C->isNullValue()) |
| ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult); |
| |
| if (ResultReg) |
| return ResultReg; |
| |
| // Only extend the RHS within the instruction if there is a valid extend type. |
| if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && |
| isValueAvailable(RHS)) { |
| if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) |
| if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) |
| if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { |
| Register RHSReg = getRegForValue(SI->getOperand(0)); |
| if (!RHSReg) |
| return 0; |
| return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, |
| C->getZExtValue(), SetFlags, WantResult); |
| } |
| Register RHSReg = getRegForValue(RHS); |
| if (!RHSReg) |
| return 0; |
| return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0, |
| SetFlags, WantResult); |
| } |
| |
| // Check if the mul can be folded into the instruction. |
| if (RHS->hasOneUse() && isValueAvailable(RHS)) { |
| if (isMulPowOf2(RHS)) { |
| const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); |
| const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) |
| if (C->getValue().isPowerOf2()) |
| std::swap(MulLHS, MulRHS); |
| |
| assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); |
| uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); |
| Register RHSReg = getRegForValue(MulLHS); |
| if (!RHSReg) |
| return 0; |
| ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL, |
| ShiftVal, SetFlags, WantResult); |
| if (ResultReg) |
| return ResultReg; |
| } |
| } |
| |
| // Check if the shift can be folded into the instruction. |
| if (RHS->hasOneUse() && isValueAvailable(RHS)) { |
| if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { |
| if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { |
| AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; |
| switch (SI->getOpcode()) { |
| default: break; |
| case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; |
| case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; |
| case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; |
| } |
| uint64_t ShiftVal = C->getZExtValue(); |
| if (ShiftType != AArch64_AM::InvalidShiftExtend) { |
| Register RHSReg = getRegForValue(SI->getOperand(0)); |
| if (!RHSReg) |
| return 0; |
| ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, |
| ShiftVal, SetFlags, WantResult); |
| if (ResultReg) |
| return ResultReg; |
| } |
| } |
| } |
| } |
| |
| Register RHSReg = getRegForValue(RHS); |
| if (!RHSReg) |
| return 0; |
| |
| if (NeedExtend) |
| RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); |
| |
| return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); |
| } |
| |
| unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| unsigned RHSReg, bool SetFlags, |
| bool WantResult) { |
| assert(LHSReg && RHSReg && "Invalid register number."); |
| |
| if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || |
| RHSReg == AArch64::SP || RHSReg == AArch64::WSP) |
| return 0; |
| |
| if (RetVT != MVT::i32 && RetVT != MVT::i64) |
| return 0; |
| |
| static const unsigned OpcTable[2][2][2] = { |
| { { AArch64::SUBWrr, AArch64::SUBXrr }, |
| { AArch64::ADDWrr, AArch64::ADDXrr } }, |
| { { AArch64::SUBSWrr, AArch64::SUBSXrr }, |
| { AArch64::ADDSWrr, AArch64::ADDSXrr } } |
| }; |
| bool Is64Bit = RetVT == MVT::i64; |
| unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
| const TargetRegisterClass *RC = |
| Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| unsigned ResultReg; |
| if (WantResult) |
| ResultReg = createResultReg(RC); |
| else |
| ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
| RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) |
| .addReg(LHSReg) |
| .addReg(RHSReg); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| uint64_t Imm, bool SetFlags, |
| bool WantResult) { |
| assert(LHSReg && "Invalid register number."); |
| |
| if (RetVT != MVT::i32 && RetVT != MVT::i64) |
| return 0; |
| |
| unsigned ShiftImm; |
| if (isUInt<12>(Imm)) |
| ShiftImm = 0; |
| else if ((Imm & 0xfff000) == Imm) { |
| ShiftImm = 12; |
| Imm >>= 12; |
| } else |
| return 0; |
| |
| static const unsigned OpcTable[2][2][2] = { |
| { { AArch64::SUBWri, AArch64::SUBXri }, |
| { AArch64::ADDWri, AArch64::ADDXri } }, |
| { { AArch64::SUBSWri, AArch64::SUBSXri }, |
| { AArch64::ADDSWri, AArch64::ADDSXri } } |
| }; |
| bool Is64Bit = RetVT == MVT::i64; |
| unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
| const TargetRegisterClass *RC; |
| if (SetFlags) |
| RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| else |
| RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; |
| unsigned ResultReg; |
| if (WantResult) |
| ResultReg = createResultReg(RC); |
| else |
| ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) |
| .addReg(LHSReg) |
| .addImm(Imm) |
| .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| unsigned RHSReg, |
| AArch64_AM::ShiftExtendType ShiftType, |
| uint64_t ShiftImm, bool SetFlags, |
| bool WantResult) { |
| assert(LHSReg && RHSReg && "Invalid register number."); |
| assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && |
| RHSReg != AArch64::SP && RHSReg != AArch64::WSP); |
| |
| if (RetVT != MVT::i32 && RetVT != MVT::i64) |
| return 0; |
| |
| // Don't deal with undefined shifts. |
| if (ShiftImm >= RetVT.getSizeInBits()) |
| return 0; |
| |
| static const unsigned OpcTable[2][2][2] = { |
| { { AArch64::SUBWrs, AArch64::SUBXrs }, |
| { AArch64::ADDWrs, AArch64::ADDXrs } }, |
| { { AArch64::SUBSWrs, AArch64::SUBSXrs }, |
| { AArch64::ADDSWrs, AArch64::ADDSXrs } } |
| }; |
| bool Is64Bit = RetVT == MVT::i64; |
| unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
| const TargetRegisterClass *RC = |
| Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| unsigned ResultReg; |
| if (WantResult) |
| ResultReg = createResultReg(RC); |
| else |
| ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
| RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) |
| .addReg(LHSReg) |
| .addReg(RHSReg) |
| .addImm(getShifterImm(ShiftType, ShiftImm)); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
| unsigned RHSReg, |
| AArch64_AM::ShiftExtendType ExtType, |
| uint64_t ShiftImm, bool SetFlags, |
| bool WantResult) { |
| assert(LHSReg && RHSReg && "Invalid register number."); |
| assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && |
| RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); |
| |
| if (RetVT != MVT::i32 && RetVT != MVT::i64) |
| return 0; |
| |
| if (ShiftImm >= 4) |
| return 0; |
| |
| static const unsigned OpcTable[2][2][2] = { |
| { { AArch64::SUBWrx, AArch64::SUBXrx }, |
| { AArch64::ADDWrx, AArch64::ADDXrx } }, |
| { { AArch64::SUBSWrx, AArch64::SUBSXrx }, |
| { AArch64::ADDSWrx, AArch64::ADDSXrx } } |
| }; |
| bool Is64Bit = RetVT == MVT::i64; |
| unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
| const TargetRegisterClass *RC = nullptr; |
| if (SetFlags) |
| RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
| else |
| RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; |
| unsigned ResultReg; |
| if (WantResult) |
| ResultReg = createResultReg(RC); |
| else |
| ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
| RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) |
| .addReg(LHSReg) |
| .addReg(RHSReg) |
| .addImm(getArithExtendImm(ExtType, ShiftImm)); |
| return ResultReg; |
| } |
| |
| bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { |
| Type *Ty = LHS->getType(); |
| EVT EVT = TLI.getValueType(DL, Ty, true); |
| if (!EVT.isSimple()) |
| return false; |
| MVT VT = EVT.getSimpleVT(); |
| |
| switch (VT.SimpleTy) { |
| default: |
| return false; |
| case MVT::i1: |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| case MVT::i64: |
| return emitICmp(VT, LHS, RHS, IsZExt); |
| case MVT::f32: |
| case MVT::f64: |
| return emitFCmp(VT, LHS, RHS); |
| } |
| } |
| |
| bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, |
| bool IsZExt) { |
| return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, |
| IsZExt) != 0; |
| } |
| |
| bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { |
| return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, |
| /*SetFlags=*/true, /*WantResult=*/false) != 0; |
| } |
| |
| bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { |
| if (RetVT != MVT::f32 && RetVT != MVT::f64) |
| return false; |
| |
| // Check to see if the 2nd operand is a constant that we can encode directly |
| // in the compare. |
| bool UseImm = false; |
| if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) |
| if (CFP->isZero() && !CFP->isNegative()) |
| UseImm = true; |
| |
| Register LHSReg = getRegForValue(LHS); |
| if (!LHSReg) |
| return false; |
| |
| if (UseImm) { |
| unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) |
| .addReg(LHSReg); |
| return true; |
| } |
| |
| Register RHSReg = getRegForValue(RHS); |
| if (!RHSReg) |
| return false; |
| |
| unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) |
| .addReg(LHSReg) |
| .addReg(RHSReg); |
| return true; |
| } |
| |
| unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
| bool SetFlags, bool WantResult, bool IsZExt) { |
| return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, |
| IsZExt); |
| } |
| |
| /// This method is a wrapper to simplify add emission. |
| /// |
| /// First try to emit an add with an immediate operand using emitAddSub_ri. If |
| /// that fails, then try to materialize the immediate into a register and use |
| /// emitAddSub_rr instead. |
| unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { |
| unsigned ResultReg; |
| if (Imm < 0) |
| ResultReg = emitAddSub_ri(false, VT, Op0, -Imm); |
| else |
| ResultReg = emitAddSub_ri(true, VT, Op0, Imm); |
| |
| if (ResultReg) |
| return ResultReg; |
| |
| unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); |
| if (!CReg) |
| return 0; |
| |
| ResultReg = emitAddSub_rr(true, VT, Op0, CReg); |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
| bool SetFlags, bool WantResult, bool IsZExt) { |
| return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, |
| IsZExt); |
| } |
| |
| unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, |
| unsigned RHSReg, bool WantResult) { |
| return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, |
| /*SetFlags=*/true, WantResult); |
| } |
| |
| unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, |
| unsigned RHSReg, |
| AArch64_AM::ShiftExtendType ShiftType, |
| uint64_t ShiftImm, bool WantResult) { |
| return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, |
| ShiftImm, /*SetFlags=*/true, WantResult); |
| } |
| |
| unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, |
| const Value *LHS, const Value *RHS) { |
| // Canonicalize immediates to the RHS first. |
| if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) |
| std::swap(LHS, RHS); |
| |
| // Canonicalize mul by power-of-2 to the RHS. |
| if (LHS->hasOneUse() && isValueAvailable(LHS)) |
| if (isMulPowOf2(LHS)) |
| std::swap(LHS, RHS); |
| |
| // Canonicalize shift immediate to the RHS. |
| if (LHS->hasOneUse() && isValueAvailable(LHS)) |
| if (const auto *SI = dyn_cast<ShlOperator>(LHS)) |
| if (isa<ConstantInt>(SI->getOperand(1))) |
| std::swap(LHS, RHS); |
| |
| Register LHSReg = getRegForValue(LHS); |
| if (!LHSReg) |
| return 0; |
| |
| unsigned ResultReg = 0; |
| if (const auto *C = dyn_cast<ConstantInt>(RHS)) { |
| uint64_t Imm = C->getZExtValue(); |
| ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); |
| } |
| if (ResultReg) |
| return ResultReg; |
| |
| // Check if the mul can be folded into the instruction. |
| if (RHS->hasOneUse() && isValueAvailable(RHS)) { |
| if (isMulPowOf2(RHS)) { |
| const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); |
| const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) |
| if (C->getValue().isPowerOf2()) |
| std::swap(MulLHS, MulRHS); |
| |
| assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); |
| uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); |
| |
| Register RHSReg = getRegForValue(MulLHS); |
| if (!RHSReg) |
| return 0; |
| ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); |
| if (ResultReg) |
| return ResultReg; |
| } |
| } |
| |
| // Check if the shift can be folded into the instruction. |
| if (RHS->hasOneUse() && isValueAvailable(RHS)) { |
| if (const auto *SI = dyn_cast<ShlOperator>(RHS)) |
| if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { |
| uint64_t ShiftVal = C->getZExtValue(); |
| Register RHSReg = getRegForValue(SI->getOperand(0)); |
| if (!RHSReg) |
| return 0; |
| ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); |
| if (ResultReg) |
| return ResultReg; |
| } |
| } |
| |
| Register RHSReg = getRegForValue(RHS); |
| if (!RHSReg) |
| return 0; |
| |
| MVT VT = std::max(MVT::i32, RetVT.SimpleTy); |
| ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg); |
| if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
| uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
| ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); |
| } |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, |
| unsigned LHSReg, uint64_t Imm) { |
| static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
| "ISD nodes are not consecutive!"); |
| static const unsigned OpcTable[3][2] = { |
| { AArch64::ANDWri, AArch64::ANDXri }, |
| { AArch64::ORRWri, AArch64::ORRXri }, |
| { AArch64::EORWri, AArch64::EORXri } |
| }; |
| const TargetRegisterClass *RC; |
| unsigned Opc; |
| unsigned RegSize; |
| switch (RetVT.SimpleTy) { |
| default: |
| return 0; |
| case MVT::i1: |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: { |
| unsigned Idx = ISDOpc - ISD::AND; |
| Opc = OpcTable[Idx][0]; |
| RC = &AArch64::GPR32spRegClass; |
| RegSize = 32; |
| break; |
| } |
| case MVT::i64: |
| Opc = OpcTable[ISDOpc - ISD::AND][1]; |
| RC = &AArch64::GPR64spRegClass; |
| RegSize = 64; |
| break; |
| } |
| |
| if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) |
| return 0; |
| |
| Register ResultReg = |
| fastEmitInst_ri(Opc, RC, LHSReg, |
| AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); |
| if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { |
| uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
| ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); |
| } |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, |
| unsigned LHSReg, unsigned RHSReg, |
| uint64_t ShiftImm) { |
| static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
| "ISD nodes are not consecutive!"); |
| static const unsigned OpcTable[3][2] = { |
| { AArch64::ANDWrs, AArch64::ANDXrs }, |
| { AArch64::ORRWrs, AArch64::ORRXrs }, |
| { AArch64::EORWrs, AArch64::EORXrs } |
| }; |
| |
| // Don't deal with undefined shifts. |
| if (ShiftImm >= RetVT.getSizeInBits()) |
| return 0; |
| |
| const TargetRegisterClass *RC; |
| unsigned Opc; |
| switch (RetVT.SimpleTy) { |
| default: |
| return 0; |
| case MVT::i1: |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| Opc = OpcTable[ISDOpc - ISD::AND][0]; |
| RC = &AArch64::GPR32RegClass; |
| break; |
| case MVT::i64: |
| Opc = OpcTable[ISDOpc - ISD::AND][1]; |
| RC = &AArch64::GPR64RegClass; |
| break; |
| } |
| Register ResultReg = |
| fastEmitInst_rri(Opc, RC, LHSReg, RHSReg, |
| AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); |
| if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
| uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
| ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); |
| } |
| return ResultReg; |
| } |
| |
| unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, |
| uint64_t Imm) { |
| return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm); |
| } |
| |
| unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, |
| bool WantZExt, MachineMemOperand *MMO) { |
| if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
| return 0; |
| |
| // Simplify this down to something we can handle. |
| if (!simplifyAddress(Addr, VT)) |
| return 0; |
| |
| unsigned ScaleFactor = getImplicitScaleFactor(VT); |
| if (!ScaleFactor) |
| llvm_unreachable("Unexpected value type."); |
| |
| // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
| // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
| bool UseScaled = true; |
| if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { |
| UseScaled = false; |
| ScaleFactor = 1; |
| } |
| |
| static const unsigned GPOpcTable[2][8][4] = { |
| // Sign-extend. |
| { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, |
| AArch64::LDURXi }, |
| { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, |
| AArch64::LDURXi }, |
| { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, |
| AArch64::LDRXui }, |
| { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, |
| AArch64::LDRXui }, |
| { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, |
| AArch64::LDRXroX }, |
| { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, |
| AArch64::LDRXroX }, |
| { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, |
| AArch64::LDRXroW }, |
| { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, |
| AArch64::LDRXroW } |
| }, |
| // Zero-extend. |
| { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
| AArch64::LDURXi }, |
| { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
| AArch64::LDURXi }, |
| { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
| AArch64::LDRXui }, |
| { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
| AArch64::LDRXui }, |
| { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
| AArch64::LDRXroX }, |
| { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
| AArch64::LDRXroX }, |
| { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
| AArch64::LDRXroW }, |
| { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
| AArch64::LDRXroW } |
| } |
| }; |
| |
| static const unsigned FPOpcTable[4][2] = { |
| { AArch64::LDURSi, AArch64::LDURDi }, |
| { AArch64::LDRSui, AArch64::LDRDui }, |
| { AArch64::LDRSroX, AArch64::LDRDroX }, |
| { AArch64::LDRSroW, AArch64::LDRDroW } |
| }; |
| |
| unsigned Opc; |
| const TargetRegisterClass *RC; |
| bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && |
| Addr.getOffsetReg(); |
| unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; |
| if (Addr.getExtendType() == AArch64_AM::UXTW || |
| Addr.getExtendType() == AArch64_AM::SXTW) |
| Idx++; |
| |
| bool IsRet64Bit = RetVT == MVT::i64; |
| switch (VT.SimpleTy) { |
| default: |
| llvm_unreachable("Unexpected value type."); |
| case MVT::i1: // Intentional fall-through. |
| case MVT::i8: |
| Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; |
| RC = (IsRet64Bit && !WantZExt) ? |
| &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
| break; |
| case MVT::i16: |
| Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; |
| RC = (IsRet64Bit && !WantZExt) ? |
| &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
| break; |
| case MVT::i32: |
| Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; |
| RC = (IsRet64Bit && !WantZExt) ? |
| &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
| break; |
| case MVT::i64: |
| Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; |
| RC = &AArch64::GPR64RegClass; |
| break; |
| case MVT::f32: |
| Opc = FPOpcTable[Idx][0]; |
| RC = &AArch64::FPR32RegClass; |
| break; |
| case MVT::f64: |
| Opc = FPOpcTable[Idx][1]; |
| RC = &AArch64::FPR64RegClass; |
| break; |
| } |
| |
| // Create the base instruction, then add the operands. |
| Register ResultReg = createResultReg(RC); |
| MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
| TII.get(Opc), ResultReg); |
| addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); |
| |
| // Loading an i1 requires special handling. |
| if (VT == MVT::i1) { |
| unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); |
| assert(ANDReg && "Unexpected AND instruction emission failure."); |
| ResultReg = ANDReg; |
| } |
| |
| // For zero-extending loads to 64bit we emit a 32bit load and then convert |
| // the 32bit reg to a 64bit reg. |
| if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { |
| Register Reg64 = createResultReg(&AArch64::GPR64RegClass); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, |
| TII.get(AArch64::SUBREG_TO_REG), Reg64) |
| .addImm(0) |
| .addReg(ResultReg, getKillRegState(true)) |
| .addImm(AArch64::sub_32); |
| ResultReg = Reg64; |
| } |
| return ResultReg; |
| } |
| |
| bool AArch64FastISel::selectAddSub(const Instruction *I) { |
| MVT VT; |
| if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
| return false; |
| |
| if (VT.isVector()) |
| return selectOperator(I, I->getOpcode()); |
| |
| unsigned ResultReg; |
| switch (I->getOpcode()) { |
| default: |
| llvm_unreachable("Unexpected instruction."); |
| case Instruction::Add: |
| ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); |
| break; |
| case Instruction::Sub: |
| ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); |
| break; |
| } |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectLogicalOp(const Instruction *I) { |
| MVT VT; |
| if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
| return false; |
| |
| if (VT.isVector()) |
| return selectOperator(I, I->getOpcode()); |
| |
| unsigned ResultReg; |
| switch (I->getOpcode()) { |
| default: |
| llvm_unreachable("Unexpected instruction."); |
| case Instruction::And: |
| ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); |
| break; |
| case Instruction::Or: |
| ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); |
| break; |
| case Instruction::Xor: |
| ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); |
| break; |
| } |
| if (!ResultReg) |
| return false; |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectLoad(const Instruction *I) { |
| MVT VT; |
| // Verify we have a legal type before going any further. Currently, we handle |
| // simple types that will directly fit in a register (i32/f32/i64/f64) or |
| // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
| if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || |
| cast<LoadInst>(I)->isAtomic()) |
| return false; |
| |
| const Value *SV = I->getOperand(0); |
| if (TLI.supportSwiftError()) { |
| // Swifterror values can come from either a function parameter with |
| // swifterror attribute or an alloca with swifterror attribute. |
| if (const Argument *Arg = dyn_cast<Argument>(SV)) { |
| if (Arg->hasSwiftErrorAttr()) |
| return false; |
| } |
| |
| if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { |
| if (Alloca->isSwiftError()) |
| return false; |
| } |
| } |
| |
| // See if we can handle this address. |
| Address Addr; |
| if (!computeAddress(I->getOperand(0), Addr, I->getType())) |
| return false; |
| |
| // Fold the following sign-/zero-extend into the load instruction. |
| bool WantZExt = true; |
| MVT RetVT = VT; |
| const Value *IntExtVal = nullptr; |
| if (I->hasOneUse()) { |
| if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { |
| if (isTypeSupported(ZE->getType(), RetVT)) |
| IntExtVal = ZE; |
| else |
| RetVT = VT; |
| } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { |
| if (isTypeSupported(SE->getType(), RetVT)) |
| IntExtVal = SE; |
| else |
| RetVT = VT; |
| WantZExt = false; |
| } |
| } |
| |
| unsigned ResultReg = |
| emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); |
| if (!ResultReg) |
| return false; |
| |
| // There are a few different cases we have to handle, because the load or the |
| // sign-/zero-extend might not be selected by FastISel if we fall-back to |
| // SelectionDAG. There is also an ordering issue when both instructions are in |
| // different basic blocks. |
| // 1.) The load instruction is selected by FastISel, but the integer extend |
| // not. This usually happens when the integer extend is in a different |
| // basic block and SelectionDAG took over for that basic block. |
| // 2.) The load instruction is selected before the integer extend. This only |
| // happens when the integer extend is in a different basic block. |
| // 3.) The load instruction is selected by SelectionDAG and the integer extend |
| // by FastISel. This happens if there are instructions between the load |
| // and the integer extend that couldn't be selected by FastISel. |
| if (IntExtVal) { |
| // The integer extend hasn't been emitted yet. FastISel or SelectionDAG |
| // could select it. Emit a copy to subreg if necessary. FastISel will remove |
| // it when it selects the integer extend. |
| Register Reg = lookUpRegForValue(IntExtVal); |
| auto *MI = MRI.getUniqueVRegDef(Reg); |
| if (!MI) { |
| if (RetVT == MVT::i64 && VT <= MVT::i32) { |
| if (WantZExt) { |
| // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). |
| MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); |
| ResultReg = std::prev(I)->getOperand(0).getReg(); |
| removeDeadCode(I, std::next(I)); |
| } else |
| ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, |
| AArch64::sub_32); |
| } |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| // The integer extend has already been emitted - delete all the instructions |
| // that have been emitted by the integer extend lowering code and use the |
| // result from the load instruction directly. |
| while (MI) { |
| Reg = 0; |
| for (auto &Opnd : MI->uses()) { |
| if (Opnd.isReg()) { |
| Reg = Opnd.getReg(); |
| break; |
| } |
| } |
| MachineBasicBlock::iterator I(MI); |
| removeDeadCode(I, std::next(I)); |
| MI = nullptr; |
| if (Reg) |
| MI = MRI.getUniqueVRegDef(Reg); |
| } |
| updateValueMap(IntExtVal, ResultReg); |
| return true; |
| } |
| |
| updateValueMap(I, ResultReg); |
| return true; |
| } |
| |
| bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, |
| unsigned AddrReg, |
| MachineMemOperand *MMO) { |
| unsigned Opc; |
| switch (VT.SimpleTy) { |
| default: return false; |
| case MVT::i8: Opc = AArch64::STLRB; break; |
| case MVT::i16: Opc = AArch64::STLRH; break; |
| case MVT::i32: Opc = AArch64::STLRW; break; |
| case MVT::i64: Opc = AArch64::STLRX; break; |
| } |
| |
| const MCInstrDesc &II = TII.get(Opc); |
| SrcReg = constrainOperandRegClass(II, SrcReg, 0); |
| AddrReg = constrainOperandRegClass(II, AddrReg, 1); |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) |
| .addReg(SrcReg) |
| .addReg(AddrReg) |
| .addMemOperand(MMO); |
| return true; |
| } |
| |
| bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, |
| MachineMemOperand *MMO) { |
| if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
| return false; |
| |
| // Simplify this down to something we can handle. |
| if (!simplifyAddress(Addr, VT)) |
| return false; |
| |
| unsigned ScaleFactor = getImplicitScaleFactor(VT); |
| if (!ScaleFactor) |
| llvm_unreachable("Unexpected value type."); |
| |
| // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
| // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
| bool UseScaled = true; |
| if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { |
| UseScaled = false; |
| ScaleFactor = 1; |
| } |
| |
| static const unsigned OpcTable[4][6] = { |
| { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, |
| AArch64::STURSi, AArch64::STURDi }, |
| { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, |
| AArch64::STRSui, AArch64::STRDui }, |
| { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, |
| AArch64::STRSroX, AArch64::STRDroX }, |
| { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, |
| AArch64::STRSroW, AArch64::STRDroW } |
| }; |
| |
| unsigned Opc; |
| bool VTIsi1 = false; |
| bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && |
| Addr.getOffsetReg(); |
| unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; |
| if (Addr.getExtendType() == AArch64_AM::UXTW || |
| Addr.getExtendType() == AArch64_AM::SXTW) |
| Idx++; |
| |
| switch (VT.SimpleTy) { |
| default: llvm_unreachable("Unexpected value type."); |
| case MVT::i1: VTIsi1 = true; [[fallthrough]]; |
| case MVT::i8: Opc = OpcTable[Idx][0]; break; |
| case MVT::i16: Opc = OpcTable[Idx][1]; break; |
| case MVT::i32: Opc = OpcTable[Idx][2]; break; |
| case MVT::i64: Opc = OpcTable[Idx][3]; break; |
| case MVT::f32: Opc = OpcTable[Idx][4]; break; |
| case MVT::f64: Opc = OpcTable[Idx][5]; break; |
| } |
| |
| // Storing an i1 requires special handling. |
| if (VTIsi1 && SrcReg != AArch64::WZR) { |
| unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); |
| assert(ANDReg && "Unexpected AND instruction emission failure."); |
| SrcReg = ANDReg; |
| } |
| // Create the base instruction, then add the operands. |
| const MCInstrDesc &II = TII.get(Opc); |
| SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); |
| MachineInstrBuilder MIB = |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg); |
| addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); |
| |
| return true; |
| } |
| |
| bool AArch64FastISel::selectStore(const Instruction *I) { |
| MVT VT; |
| const Value *Op0 = I->getOperand(0); |
| // Verify we have a legal type before going any further. Currently, we handle |
| // simple types that will directly fit in a register (i32/f32/i64/f64) or |
| // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
| if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) |
| return false; |
| |
| const Value *PtrV = I->getOperand(1); |
| if (TLI.supportSwiftError()) { |
| // Swifterror values can come from either a function parameter with |
| // swifterror attribute or an alloca with swifterror attribute. |
| if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { |
| if (Arg->hasSwiftErrorAttr()) |
| return false; |
| } |
| |
| if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { |
| if (Alloca->isSwiftError()) |
| return false; |
| } |
| } |
| |
| // Get the value to be stored into a register. Use the zero register directly |
| // when possible to avoid an unnecessary copy and a wasted register. |
| unsigned SrcReg = 0; |
| if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { |
| if (CI->isZero()) |
| SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
| } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { |
| if (CF->isZero() && !CF->isNegative()) { |
| VT = MVT::getIntegerVT(VT.getSizeInBits()); |
| SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
| } |
| } |
| |
| if (!SrcReg) |
| SrcReg = getRegForValue(Op0); |
| |
| if (!SrcReg) |
| return false; |
| |
| auto *SI = cast<StoreInst>(I); |
| |
| // Try to emit a STLR for seq_cst/release. |
| if (SI->isAtomic()) { |
| AtomicOrdering Ord = SI->getOrdering(); |
| // The non-atomic instructions are sufficient for relaxed stores. |
| if (isReleaseOrStronger(Ord)) { |
| // The STLR addressing mode only supports a base reg; pass that directly. |
| Register AddrReg = getRegForValue(PtrV); |
| return emitStoreRelease(VT, SrcReg, AddrReg, |
| createMachineMemOperandFor(I)); |
| } |
| } |
| |
| // See if we can handle this address. |
| Address Addr; |
| if (!computeAddress(PtrV, Addr, Op0->getType())) |
| return false; |
| |
| if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) |
| return false; |
| return true; |
| } |
| |
| static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { |
| switch (Pred) { |
| case CmpInst::FCMP_ONE: |
| case CmpInst::FCMP_UEQ: |
| default: |
| // AL is our "false" for now. The other two need more compares. |
| return AArch64CC::AL; |
| case CmpInst::ICMP_EQ: |
| case CmpInst::FCMP_OEQ: |
| return AArch64CC::EQ; |
| case CmpInst::ICMP_SGT: |
| case CmpInst::FCMP_OGT: |
| return AArch64CC::GT; |
| case CmpInst::ICMP_SGE: |
| case CmpInst::FCMP_OGE: |
| return AArch64CC::GE; |
| case CmpInst::ICMP_UGT: |
| case CmpInst::FCMP_UGT: |
| return AArch64CC::HI; |
| case CmpInst::FCMP_OLT: |
| return AArch64CC::MI; |
| case CmpInst::ICMP_ULE: |
| case CmpInst::FCMP_OLE: |
| return AArch64CC::LS; |
| case CmpInst::FCMP_ORD: |
| return AArch64CC::VC; |
| case CmpInst::FCMP_UNO: |
| return AArch64CC::VS; |
| case CmpInst::FCMP_UGE: |
| return AArch64CC::PL; |
| case CmpInst::ICMP_SLT: |
| case CmpInst::FCMP_ULT: |
| return AArch64CC::LT; |
| case CmpInst::ICMP_SLE: |
| case CmpInst::FCMP_ULE: |
| return AArch64CC::LE; |
| case CmpInst::FCMP_UNE: |
| case CmpInst::ICMP_NE: |
| return AArch64CC::NE; |
| case CmpInst::ICMP_UGE: |
| return AArch64CC::HS; |
| case CmpInst::ICMP_ULT: |
| return AArch64CC::LO; |
| } |
| } |
| |
| /// Try to emit a combined compare-and-branch instruction. |
| bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { |
| // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions |
| // will not be produced, as they are conditional branch instructions that do |
| // not set flags. |
| if (FuncInfo.MF->getFunction().hasFnAttribute( |
| Attribute::SpeculativeLoadHardening)) |
| return false; |
| |
| assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); |
| const CmpInst *CI = cast<CmpInst>(BI->getCondition()); |
| CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
| |
| const Value *LHS = CI->getOperand(0); |
| const Value *RHS = CI->getOperand(1); |
| |
| MVT VT; |
| if (!isTypeSupported(LHS->getType(), VT)) |
| return false; |
| |
| unsigned BW = VT.getSizeInBits(); |
| if (BW > 64) |
| return false; |
| |
| MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
| MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; |
| |
| // Try to take advantage of fallthrough opportunities. |
| if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { |
| std::swap(TBB, FBB); |
| Predicate = CmpInst::getInversePredicate(Predicate); |
| } |
| |
| int TestBit = -1; |
| bool IsCmpNE; |
| switch (Predicate) { |
| default: |
| return false; |
| case CmpInst::ICMP_EQ: |
| case CmpInst::ICMP_NE: |
| if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) |
| std::swap(LHS, RHS); |
| |
| if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) |
| return false; |
| |
| if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) |
| if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { |
| const Value *AndLHS = AI->getOperand(0); |
| const Value *AndRHS = AI->getOperand(1); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) |
| if (C->getValue().isPowerOf2()) |
| std::swap(AndLHS, AndRHS); |
| |
| if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) |
| if (C->getValue().isPowerOf2()) { |
| TestBit = C->getValue().logBase2(); |
| LHS = AndLHS; |
| } |
| } |
| |
| if (VT == MVT::i1) |
| TestBit = 0; |
| |
| IsCmpNE = Predicate == CmpInst::ICMP_NE; |
| break; |
| case CmpInst::ICMP_SLT: |
| case CmpInst::ICMP_SGE: |
| if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) |
| return false; |
| |
| TestBit = BW - 1; |
| IsCmpNE = Predicate == CmpInst::ICMP_SLT; |
| break; |
| case CmpInst::ICMP_SGT: |
| case CmpInst::ICMP_SLE: |
| if (!isa<ConstantInt>(RHS)) |
| return false; |
| |
| if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) |
| return false; |
| |
| TestBit = BW - 1; |
| IsCmpNE = Predicate == CmpInst::ICMP_SLE; |
| break; |
| } // end switch |
| |
| static const unsigned OpcTable[2][2][2] = { |
| { {AArch64::CBZW, AArch64::CBZX }, |
| {AArch64::CBNZW, AArch64::CBNZX} }, |
| { {AArch64::TBZW, AArch64::TBZX }, |
| {AArch64::TBNZW, AArch64::TBNZX} } |
| }; |
| |
| bool IsBitTest = TestBit != -1; |
| bool Is64Bit = BW == 64; |
| if (TestBit < 32 && TestBit >= 0) |
| Is64Bit = false; |
| |
| unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; |
| const MCInstrDesc &II = TII.get(Opc); |
| |
| Register SrcReg = getRegForValue(LHS); |
| if (!SrcReg) |
| return false; |
| |
| if (BW == 64 && !Is64Bit) |
| SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); |
| |
| if ((BW < 32) && !IsBitTest) |
| SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); |
| |
| // Emit the combined compare and branch instruction. |
| SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); |
| MachineInstrBuilder MIB = |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) |
| .addReg(SrcReg); |
| if (IsBitTest) |
| MIB.addImm(TestBit); |
| MIB.addMBB(TBB); |
| |
| finishCondBranch(BI->getParent(), TBB, FBB); |
| return true; |
| } |
| |
| bool AArch64FastISel::selectBranch(const Instruction *I) { |
| const BranchInst *BI = cast<BranchInst>(I); |
| if (BI->isUnconditional()) { |
| MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
| fastEmitBranch(MSucc, BI->getDebugLoc()); |
| return true; |
| } |
| |
| MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
| MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; |
| |
| if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { |
| if (CI->hasOneUse() && isValueAvailable(CI)) { |
| // Try to optimize or fold the cmp. |
| CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
| switch (Predicate) { |
| default: |
| break; |
| case CmpInst::FCMP_FALSE: |
| fastEmitBranch(FBB, MIMD.getDL()); |
| return true; |
| case CmpInst::FCMP_TRUE: |
| fastEmitBranch(TBB, MIMD.getDL()); |
| return true; |
| } |
| |
| // Try to emit a combined compare-and-branch first. |
| if (emitCompareAndBranch(BI)) |
| return true; |
| |
| // Try to take advantage of fallthrough opportunities. |
| if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { |
| std::swap(TBB, FBB); |
| Predicate = CmpInst::getInversePredicate(Predicate); |
| } |
| |
| // Emit the cmp. |
| if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) |
| return false; |
| |
| // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch |
| // instruction. |
| AArch64CC::CondCode CC = getCompareCC(Predicate); |
| AArch64CC::CondCode ExtraCC = AArch64CC::AL; |
| switch (Predicate) { |
| default: |
| break; |
| case CmpInst::FCMP_UEQ: |
| ExtraCC = AArch64CC::EQ; |
| CC = AArch64CC::VS; |
| break; |
| case CmpInst::FCMP_ONE: |
| ExtraCC = AArch64CC::MI; |
| CC = AArch64CC::GT; |
| break; |
| } |
| assert((CC != AArch64CC::AL) && "Unexpected condition code."); |
| |
| // Emit the extra branch for FCMP_UEQ and FCMP_ONE. |
| if (ExtraCC != AArch64CC::AL) { |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) |
| .addImm(ExtraCC) |
| .addMBB(TBB); |
| } |
| |
| // Emit the branch. |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) |
| .addImm(CC) |
| .addMBB(TBB); |
| |
| finishCondBranch(BI->getParent(), TBB, FBB); |
| return true; |
| } |
| } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { |
| uint64_t Imm = CI->getZExtValue(); |
| MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; |
| BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B)) |
| .addMBB(Target); |
| |
| // Obtain the branch probability and add the target to the successor list. |
| if (FuncInfo.BPI) { |
| auto BranchProbability = FuncInfo.BPI-&
|