blob: ce1c1f7d403ad23eefe6a7521baa2e92ac727834 [file] [log] [blame]
//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the AArch64-specific support for the FastISel class. Some
// of the target-specific code is generated by tablegen in the file
// AArch64GenFastISel.inc, which is #included here.
//
//===----------------------------------------------------------------------===//
#include "AArch64.h"
#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
using namespace llvm;
namespace {
class AArch64FastISel final : public FastISel {
class Address {
public:
using BaseKind = enum {
RegBase,
FrameIndexBase
};
private:
BaseKind Kind = RegBase;
AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
union {
unsigned Reg;
int FI;
} Base;
unsigned OffsetReg = 0;
unsigned Shift = 0;
int64_t Offset = 0;
const GlobalValue *GV = nullptr;
public:
Address() { Base.Reg = 0; }
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
Base.Reg = Reg;
}
unsigned getReg() const {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
void setOffsetReg(unsigned Reg) {
OffsetReg = Reg;
}
unsigned getOffsetReg() const {
return OffsetReg;
}
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
}
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
return Base.FI;
}
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
void setShift(unsigned S) { Shift = S; }
unsigned getShift() { return Shift; }
void setGlobalValue(const GlobalValue *G) { GV = G; }
const GlobalValue *getGlobalValue() { return GV; }
};
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
LLVMContext *Context;
bool fastLowerArguments() override;
bool fastLowerCall(CallLoweringInfo &CLI) override;
bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
private:
// Selection routines.
bool selectAddSub(const Instruction *I);
bool selectLogicalOp(const Instruction *I);
bool selectLoad(const Instruction *I);
bool selectStore(const Instruction *I);
bool selectBranch(const Instruction *I);
bool selectIndirectBr(const Instruction *I);
bool selectCmp(const Instruction *I);
bool selectSelect(const Instruction *I);
bool selectFPExt(const Instruction *I);
bool selectFPTrunc(const Instruction *I);
bool selectFPToInt(const Instruction *I, bool Signed);
bool selectIntToFP(const Instruction *I, bool Signed);
bool selectRem(const Instruction *I, unsigned ISDOpcode);
bool selectRet(const Instruction *I);
bool selectTrunc(const Instruction *I);
bool selectIntExt(const Instruction *I);
bool selectMul(const Instruction *I);
bool selectShift(const Instruction *I);
bool selectBitCast(const Instruction *I);
bool selectFRem(const Instruction *I);
bool selectSDiv(const Instruction *I);
bool selectGetElementPtr(const Instruction *I);
bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
bool isValueAvailable(const Value *V) const;
bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
bool computeCallAddress(const Value *V, Address &Addr);
bool simplifyAddress(Address &Addr, MVT VT);
void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
MachineMemOperand::Flags Flags,
unsigned ScaleFactor, MachineMemOperand *MMO);
bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
MaybeAlign Alignment);
bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
const Value *Cond);
bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
bool optimizeSelect(const SelectInst *SI);
unsigned getRegForGEPIndex(const Value *Idx);
// Emit helper routines.
unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
const Value *RHS, bool SetFlags = false,
bool WantResult = true, bool IsZExt = false);
unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
unsigned RHSReg, bool SetFlags = false,
bool WantResult = true);
unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
uint64_t Imm, bool SetFlags = false,
bool WantResult = true);
unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool SetFlags = false,
bool WantResult = true);
unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
uint64_t ShiftImm, bool SetFlags = false,
bool WantResult = true);
// Emit functions.
bool emitCompareAndBranch(const BranchInst *BI);
bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
MachineMemOperand *MMO = nullptr);
bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
MachineMemOperand *MMO = nullptr);
bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
MachineMemOperand *MMO = nullptr);
unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags = false, bool WantResult = true,
bool IsZExt = false);
unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags = false, bool WantResult = true,
bool IsZExt = false);
unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
bool WantResult = true);
unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
bool WantResult = true);
unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
const Value *RHS);
unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
uint64_t Imm);
unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
unsigned RHSReg, uint64_t ShiftImm);
unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
bool IsZExt = true);
unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
bool IsZExt = true);
unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
bool IsZExt = false);
unsigned materializeInt(const ConstantInt *CI, MVT VT);
unsigned materializeFP(const ConstantFP *CFP, MVT VT);
unsigned materializeGV(const GlobalValue *GV);
// Call handling routines.
private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
unsigned &NumBytes);
bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
public:
// Backend specific FastISel code.
unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
unsigned fastMaterializeConstant(const Constant *C) override;
unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
Context = &FuncInfo.Fn->getContext();
}
bool fastSelectInstruction(const Instruction *I) override;
#include "AArch64GenFastISel.inc"
};
} // end anonymous namespace
/// Check if the sign-/zero-extend will be a noop.
static bool isIntExtFree(const Instruction *I) {
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
"Unexpected integer extend instruction.");
assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
"Unexpected value type.");
bool IsZExt = isa<ZExtInst>(I);
if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
if (LI->hasOneUse())
return true;
if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
return true;
return false;
}
/// Determine the implicit scale factor that is applied by a memory
/// operation for a given value type.
static unsigned getImplicitScaleFactor(MVT VT) {
switch (VT.SimpleTy) {
default:
return 0; // invalid
case MVT::i1: // fall-through
case MVT::i8:
return 1;
case MVT::i16:
return 2;
case MVT::i32: // fall-through
case MVT::f32:
return 4;
case MVT::i64: // fall-through
case MVT::f64:
return 8;
}
}
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
if (CC == CallingConv::WebKit_JS)
return CC_AArch64_WebKit_JS;
if (CC == CallingConv::GHC)
return CC_AArch64_GHC;
if (CC == CallingConv::CFGuard_Check)
return CC_AArch64_Win64_CFGuard_Check;
return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
}
unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
"Alloca should always return a pointer.");
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI))
return 0;
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(SI->second)
.addImm(0)
.addImm(0);
return ResultReg;
}
return 0;
}
unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
if (VT > MVT::i64)
return 0;
if (!CI->isZero())
return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
// Create a copy from the zero register to materialize a "0" value.
const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
: &AArch64::GPR32RegClass;
unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
ResultReg).addReg(ZeroReg, getKillRegState(true));
return ResultReg;
}
unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
// Positive zero (+0.0) has to be materialized with a fmov from the zero
// register, because the immediate version of fmov cannot encode zero.
if (CFP->isNullValue())
return fastMaterializeFloatZero(CFP);
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
const APFloat Val = CFP->getValueAPF();
bool Is64Bit = (VT == MVT::f64);
// This checks to see if we can use FMOV instructions to materialize
// a constant, otherwise we have to materialize via the constant pool.
int Imm =
Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
if (Imm != -1) {
unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
}
// For the large code model materialize the FP constant in code.
if (TM.getCodeModel() == CodeModel::Large) {
unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
const TargetRegisterClass *RC = Is64Bit ?
&AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
Register TmpReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
.addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(TmpReg, getKillRegState(true));
return ResultReg;
}
// Materialize via constant pool. MachineConstantPool wants an explicit
// alignment.
Align Alignment = DL.getPrefTypeAlign(CFP->getType());
unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
.addReg(ADRPReg)
.addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
return ResultReg;
}
unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
// We can't handle thread-local variables quickly yet.
if (GV->isThreadLocal())
return 0;
// MachO still uses GOT for large code-model accesses, but ELF requires
// movz/movk sequences, which FastISel doesn't handle yet.
if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
return 0;
unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
if (!DestEVT.isSimple())
return 0;
Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
unsigned ResultReg;
if (OpFlags & AArch64II::MO_GOT) {
// ADRP + LDRX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
unsigned LdrOpc;
if (Subtarget->isTargetILP32()) {
ResultReg = createResultReg(&AArch64::GPR32RegClass);
LdrOpc = AArch64::LDRWui;
} else {
ResultReg = createResultReg(&AArch64::GPR64RegClass);
LdrOpc = AArch64::LDRXui;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
ResultReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
AArch64II::MO_NC | OpFlags);
if (!Subtarget->isTargetILP32())
return ResultReg;
// LDRWui produces a 32-bit register, but pointers in-register are 64-bits
// so we must extend the result on ILP32.
Register Result64 = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::SUBREG_TO_REG))
.addDef(Result64)
.addImm(0)
.addReg(ResultReg, RegState::Kill)
.addImm(AArch64::sub_32);
return Result64;
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
if (OpFlags & AArch64II::MO_TAGGED) {
// MO_TAGGED on the page indicates a tagged address. Set the tag now.
// We do so by creating a MOVK that sets bits 48-63 of the register to
// (global address + 0x100000000 - PC) >> 48. This assumes that we're in
// the small code model so we can assume a binary size of <= 4GB, which
// makes the untagged PC relative offset positive. The binary must also be
// loaded into address range [0, 2^48). Both of these properties need to
// be ensured at runtime when using tagged addresses.
//
// TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
// also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
// are not exactly 1:1 with FastISel so we cannot easily abstract this
// out. At some point, it would be nice to find a way to not have this
// duplciate code.
unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
DstReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, /*Offset=*/0x100000000,
AArch64II::MO_PREL | AArch64II::MO_G3)
.addImm(48);
ADRPReg = DstReg;
}
ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
ResultReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
.addImm(0);
}
return ResultReg;
}
unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple())
return 0;
MVT VT = CEVT.getSimpleVT();
// arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
// 'null' pointers need to have a somewhat special treatment.
if (isa<ConstantPointerNull>(C)) {
assert(VT == MVT::i64 && "Expected 64-bit pointers");
return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
}
if (const auto *CI = dyn_cast<ConstantInt>(C))
return materializeInt(CI, VT);
else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return materializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return materializeGV(GV);
return 0;
}
unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
assert(CFP->isNullValue() &&
"Floating-point constant is not a positive zero.");
MVT VT;
if (!isTypeLegal(CFP->getType(), VT))
return 0;
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
bool Is64Bit = (VT == MVT::f64);
unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
}
/// Check if the multiply is by a power-of-2 constant.
static bool isMulPowOf2(const Value *I) {
if (const auto *MI = dyn_cast<MulOperator>(I)) {
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
if (C->getValue().isPowerOf2())
return true;
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
if (C->getValue().isPowerOf2())
return true;
}
return false;
}
// Computes the address to get to an object.
bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
{
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
// another block, otherwise it may not have a virtual register assigned.
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
Opcode = I->getOpcode();
U = I;
}
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
Opcode = C->getOpcode();
U = C;
}
if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
switch (Opcode) {
default:
break;
case Instruction::BitCast:
// Look through bitcasts.
return computeAddress(U->getOperand(0), Addr, Ty);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
uint64_t TmpOffset = Addr.getOffset();
// Iterate through the GEP folding the constants into offsets where
// we can.
for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
GTI != E; ++GTI) {
const Value *Op = GTI.getOperand();
if (StructType *STy = GTI.getStructTypeOrNull()) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
break;
}
if (canFoldAddIntoGEP(U, Op)) {
// A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
// Iterate on the other operand.
Op = cast<AddOperator>(Op)->getOperand(0);
continue;
}
// Unsupported
goto unsupported_gep;
}
}
}
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
if (computeAddress(U->getOperand(0), Addr, Ty))
return true;
// We failed, restore everything and try the other options.
Addr = SavedAddr;
unsupported_gep:
break;
}
case Instruction::Alloca: {
const AllocaInst *AI = cast<AllocaInst>(Obj);
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
Addr.setKind(Address::FrameIndexBase);
Addr.setFI(SI->second);
return true;
}
break;
}
case Instruction::Add: {
// Adds of constants are common and easy enough.
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
if (isa<ConstantInt>(LHS))
std::swap(LHS, RHS);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
return computeAddress(LHS, Addr, Ty);
}
Address Backup = Addr;
if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
return true;
Addr = Backup;
break;
}
case Instruction::Sub: {
// Subs of constants are common and easy enough.
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
return computeAddress(LHS, Addr, Ty);
}
break;
}
case Instruction::Shl: {
if (Addr.getOffsetReg())
break;
const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
if (!CI)
break;
unsigned Val = CI->getZExtValue();
if (Val < 1 || Val > 3)
break;
uint64_t NumBytes = 0;
if (Ty && Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
}
if (NumBytes != (1ULL << Val))
break;
Addr.setShift(Val);
Addr.setExtendType(AArch64_AM::LSL);
const Value *Src = U->getOperand(0);
if (const auto *I = dyn_cast<Instruction>(Src)) {
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
// Fold the zext or sext when it won't become a noop.
if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
if (!isIntExtFree(ZE) &&
ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
Src = ZE->getOperand(0);
}
} else if (const auto *SE = dyn_cast<SExtInst>(I)) {
if (!isIntExtFree(SE) &&
SE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::SXTW);
Src = SE->getOperand(0);
}
}
}
}
if (const auto *AI = dyn_cast<BinaryOperator>(Src))
if (AI->getOpcode() == Instruction::And) {
const Value *LHS = AI->getOperand(0);
const Value *RHS = AI->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(LHS))
if (C->getValue() == 0xffffffff)
std::swap(LHS, RHS);
if (const auto *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue() == 0xffffffff) {
Addr.setExtendType(AArch64_AM::UXTW);
Register Reg = getRegForValue(LHS);
if (!Reg)
return false;
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
Addr.setOffsetReg(Reg);
return true;
}
}
Register Reg = getRegForValue(Src);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
case Instruction::Mul: {
if (Addr.getOffsetReg())
break;
if (!isMulPowOf2(U))
break;
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
// Canonicalize power-of-2 value to the RHS.
if (const auto *C = dyn_cast<ConstantInt>(LHS))
if (C->getValue().isPowerOf2())
std::swap(LHS, RHS);
assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
const auto *C = cast<ConstantInt>(RHS);
unsigned Val = C->getValue().logBase2();
if (Val < 1 || Val > 3)
break;
uint64_t NumBytes = 0;
if (Ty && Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
}
if (NumBytes != (1ULL << Val))
break;
Addr.setShift(Val);
Addr.setExtendType(AArch64_AM::LSL);
const Value *Src = LHS;
if (const auto *I = dyn_cast<Instruction>(Src)) {
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
// Fold the zext or sext when it won't become a noop.
if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
if (!isIntExtFree(ZE) &&
ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
Src = ZE->getOperand(0);
}
} else if (const auto *SE = dyn_cast<SExtInst>(I)) {
if (!isIntExtFree(SE) &&
SE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::SXTW);
Src = SE->getOperand(0);
}
}
}
}
Register Reg = getRegForValue(Src);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
case Instruction::And: {
if (Addr.getOffsetReg())
break;
if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
break;
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(LHS))
if (C->getValue() == 0xffffffff)
std::swap(LHS, RHS);
if (const auto *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue() == 0xffffffff) {
Addr.setShift(0);
Addr.setExtendType(AArch64_AM::LSL);
Addr.setExtendType(AArch64_AM::UXTW);
Register Reg = getRegForValue(LHS);
if (!Reg)
return false;
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
Addr.setOffsetReg(Reg);
return true;
}
break;
}
case Instruction::SExt:
case Instruction::ZExt: {
if (!Addr.getReg() || Addr.getOffsetReg())
break;
const Value *Src = nullptr;
// Fold the zext or sext when it won't become a noop.
if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
Src = ZE->getOperand(0);
}
} else if (const auto *SE = dyn_cast<SExtInst>(U)) {
if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::SXTW);
Src = SE->getOperand(0);
}
}
if (!Src)
break;
Addr.setShift(0);
Register Reg = getRegForValue(Src);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
} // end switch
if (Addr.isRegBase() && !Addr.getReg()) {
Register Reg = getRegForValue(Obj);
if (!Reg)
return false;
Addr.setReg(Reg);
return true;
}
if (!Addr.getOffsetReg()) {
Register Reg = getRegForValue(Obj);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
return false;
}
bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
bool InMBB = true;
if (const auto *I = dyn_cast<Instruction>(V)) {
Opcode = I->getOpcode();
U = I;
InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
} else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
}
switch (Opcode) {
default: break;
case Instruction::BitCast:
// Look past bitcasts if its operand is in the same BB.
if (InMBB)
return computeCallAddress(U->getOperand(0), Addr);
break;
case Instruction::IntToPtr:
// Look past no-op inttoptrs if its operand is in the same BB.
if (InMBB &&
TLI.getValueType(DL, U->getOperand(0)->getType()) ==
TLI.getPointerTy(DL))
return computeCallAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints if its operand is in the same BB.
if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeCallAddress(U->getOperand(0), Addr);
break;
}
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
Addr.setGlobalValue(GV);
return true;
}
// If all else fails, try to materialize the value in a register.
if (!Addr.getGlobalValue()) {
Addr.setReg(getRegForValue(V));
return Addr.getReg() != 0;
}
return false;
}
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(DL, Ty, true);
if (Subtarget->isTargetILP32() && Ty->isPointerTy())
return false;
// Only handle simple types.
if (evt == MVT::Other || !evt.isSimple())
return false;
VT = evt.getSimpleVT();
// This is a legal type, but it's not something we handle in fast-isel.
if (VT == MVT::f128)
return false;
// Handle all other legal types, i.e. a register that will directly hold this
// value.
return TLI.isTypeLegal(VT);
}
/// Determine if the value type is supported by FastISel.
///
/// FastISel for AArch64 can handle more value types than are legal. This adds
/// simple value type such as i1, i8, and i16.
bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
if (Ty->isVectorTy() && !IsVectorAllowed)
return false;
if (isTypeLegal(Ty, VT))
return true;
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now.
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
return true;
return false;
}
bool AArch64FastISel::isValueAvailable(const Value *V) const {
if (!isa<Instruction>(V))
return true;
const auto *I = cast<Instruction>(V);
return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
}
bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
if (Subtarget->isTargetILP32())
return false;
unsigned ScaleFactor = getImplicitScaleFactor(VT);
if (!ScaleFactor)
return false;
bool ImmediateOffsetNeedsLowering = false;
bool RegisterOffsetNeedsLowering = false;
int64_t Offset = Addr.getOffset();
if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
ImmediateOffsetNeedsLowering = true;
else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
!isUInt<12>(Offset / ScaleFactor))
ImmediateOffsetNeedsLowering = true;
// Cannot encode an offset register and an immediate offset in the same
// instruction. Fold the immediate offset into the load/store instruction and
// emit an additional add to take care of the offset register.
if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
RegisterOffsetNeedsLowering = true;
// Cannot encode zero register as base.
if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
RegisterOffsetNeedsLowering = true;
// If this is a stack pointer and the offset needs to be simplified then put
// the alloca address into a register, set the base type back to register and
// continue. This should almost never happen.
if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
{
Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(Addr.getFI())
.addImm(0)
.addImm(0);
Addr.setKind(Address::RegBase);
Addr.setReg(ResultReg);
}
if (RegisterOffsetNeedsLowering) {
unsigned ResultReg = 0;
if (Addr.getReg()) {
if (Addr.getExtendType() == AArch64_AM::SXTW ||
Addr.getExtendType() == AArch64_AM::UXTW )
ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
Addr.getOffsetReg(), Addr.getExtendType(),
Addr.getShift());
else
ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
Addr.getOffsetReg(), AArch64_AM::LSL,
Addr.getShift());
} else {
if (Addr.getExtendType() == AArch64_AM::UXTW)
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
Addr.getShift(), /*IsZExt=*/true);
else if (Addr.getExtendType() == AArch64_AM::SXTW)
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
Addr.getShift(), /*IsZExt=*/false);
else
ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
Addr.getShift());
}
if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffsetReg(0);
Addr.setShift(0);
Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
}
// Since the offset is too large for the load/store instruction get the
// reg+offset into a register.
if (ImmediateOffsetNeedsLowering) {
unsigned ResultReg;
if (Addr.getReg())
// Try to fold the immediate into the add instruction.
ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
else
ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffset(0);
}
return true;
}
void AArch64FastISel::addLoadStoreOperands(Address &Addr,
const MachineInstrBuilder &MIB,
MachineMemOperand::Flags Flags,
unsigned ScaleFactor,
MachineMemOperand *MMO) {
int64_t Offset = Addr.getOffset() / ScaleFactor;
// Frame base works a bit differently. Handle it separately.
if (Addr.isFIBase()) {
int FI = Addr.getFI();
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI).addImm(Offset);
} else {
assert(Addr.isRegBase() && "Unexpected address kind.");
const MCInstrDesc &II = MIB->getDesc();
unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
Addr.setReg(
constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
Addr.setOffsetReg(
constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
if (Addr.getOffsetReg()) {
assert(Addr.getOffset() == 0 && "Unexpected offset");
bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
Addr.getExtendType() == AArch64_AM::SXTX;
MIB.addReg(Addr.getReg());
MIB.addReg(Addr.getOffsetReg());
MIB.addImm(IsSigned);
MIB.addImm(Addr.getShift() != 0);
} else
MIB.addReg(Addr.getReg()).addImm(Offset);
}
if (MMO)
MIB.addMemOperand(MMO);
}
unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
const Value *RHS, bool SetFlags,
bool WantResult, bool IsZExt) {
AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
bool NeedExtend = false;
switch (RetVT.SimpleTy) {
default:
return 0;
case MVT::i1:
NeedExtend = true;
break;
case MVT::i8:
NeedExtend = true;
ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
break;
case MVT::i16:
NeedExtend = true;
ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
break;
case MVT::i32: // fall-through
case MVT::i64:
break;
}
MVT SrcVT = RetVT;
RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
// Canonicalize immediates to the RHS first.
if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
std::swap(LHS, RHS);
// Canonicalize mul by power of 2 to the RHS.
if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
if (isMulPowOf2(LHS))
std::swap(LHS, RHS);
// Canonicalize shift immediate to the RHS.
if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
if (isa<ConstantInt>(SI->getOperand(1)))
if (SI->getOpcode() == Instruction::Shl ||
SI->getOpcode() == Instruction::LShr ||
SI->getOpcode() == Instruction::AShr )
std::swap(LHS, RHS);
Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return 0;
if (NeedExtend)
LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
unsigned ResultReg = 0;
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
if (C->isNegative())
ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
WantResult);
else
ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
WantResult);
} else if (const auto *C = dyn_cast<Constant>(RHS))
if (C->isNullValue())
ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
if (ResultReg)
return ResultReg;
// Only extend the RHS within the instruction if there is a valid extend type.
if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
isValueAvailable(RHS)) {
if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
Register RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return 0;
return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
C->getZExtValue(), SetFlags, WantResult);
}
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
SetFlags, WantResult);
}
// Check if the mul can be folded into the instruction.
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (isMulPowOf2(RHS)) {
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
if (C->getValue().isPowerOf2())
std::swap(MulLHS, MulRHS);
assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
Register RHSReg = getRegForValue(MulLHS);
if (!RHSReg)
return 0;
ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
ShiftVal, SetFlags, WantResult);
if (ResultReg)
return ResultReg;
}
}
// Check if the shift can be folded into the instruction.
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
switch (SI->getOpcode()) {
default: break;
case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
}
uint64_t ShiftVal = C->getZExtValue();
if (ShiftType != AArch64_AM::InvalidShiftExtend) {
Register RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return 0;
ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
ShiftVal, SetFlags, WantResult);
if (ResultReg)
return ResultReg;
}
}
}
}
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
if (NeedExtend)
RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
}
unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
unsigned RHSReg, bool SetFlags,
bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
return 0;
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWrr, AArch64::SUBXrr },
{ AArch64::ADDWrr, AArch64::ADDXrr } },
{ { AArch64::SUBSWrr, AArch64::SUBSXrr },
{ AArch64::ADDSWrr, AArch64::ADDSXrr } }
};
bool Is64Bit = RetVT == MVT::i64;
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
const TargetRegisterClass *RC =
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
unsigned ResultReg;
if (WantResult)
ResultReg = createResultReg(RC);
else
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(LHSReg)
.addReg(RHSReg);
return ResultReg;
}
unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
uint64_t Imm, bool SetFlags,
bool WantResult) {
assert(LHSReg && "Invalid register number.");
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
unsigned ShiftImm;
if (isUInt<12>(Imm))
ShiftImm = 0;
else if ((Imm & 0xfff000) == Imm) {
ShiftImm = 12;
Imm >>= 12;
} else
return 0;
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWri, AArch64::SUBXri },
{ AArch64::ADDWri, AArch64::ADDXri } },
{ { AArch64::SUBSWri, AArch64::SUBSXri },
{ AArch64::ADDSWri, AArch64::ADDSXri } }
};
bool Is64Bit = RetVT == MVT::i64;
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
const TargetRegisterClass *RC;
if (SetFlags)
RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
else
RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
unsigned ResultReg;
if (WantResult)
ResultReg = createResultReg(RC);
else
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(LHSReg)
.addImm(Imm)
.addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
return ResultReg;
}
unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
unsigned RHSReg,
AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool SetFlags,
bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
// Don't deal with undefined shifts.
if (ShiftImm >= RetVT.getSizeInBits())
return 0;
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWrs, AArch64::SUBXrs },
{ AArch64::ADDWrs, AArch64::ADDXrs } },
{ { AArch64::SUBSWrs, AArch64::SUBSXrs },
{ AArch64::ADDSWrs, AArch64::ADDSXrs } }
};
bool Is64Bit = RetVT == MVT::i64;
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
const TargetRegisterClass *RC =
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
unsigned ResultReg;
if (WantResult)
ResultReg = createResultReg(RC);
else
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(LHSReg)
.addReg(RHSReg)
.addImm(getShifterImm(ShiftType, ShiftImm));
return ResultReg;
}
unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
unsigned RHSReg,
AArch64_AM::ShiftExtendType ExtType,
uint64_t ShiftImm, bool SetFlags,
bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
if (ShiftImm >= 4)
return 0;
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWrx, AArch64::SUBXrx },
{ AArch64::ADDWrx, AArch64::ADDXrx } },
{ { AArch64::SUBSWrx, AArch64::SUBSXrx },
{ AArch64::ADDSWrx, AArch64::ADDSXrx } }
};
bool Is64Bit = RetVT == MVT::i64;
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
const TargetRegisterClass *RC = nullptr;
if (SetFlags)
RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
else
RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
unsigned ResultReg;
if (WantResult)
ResultReg = createResultReg(RC);
else
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(LHSReg)
.addReg(RHSReg)
.addImm(getArithExtendImm(ExtType, ShiftImm));
return ResultReg;
}
bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
Type *Ty = LHS->getType();
EVT EVT = TLI.getValueType(DL, Ty, true);
if (!EVT.isSimple())
return false;
MVT VT = EVT.getSimpleVT();
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
return emitICmp(VT, LHS, RHS, IsZExt);
case MVT::f32:
case MVT::f64:
return emitFCmp(VT, LHS, RHS);
}
}
bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
bool IsZExt) {
return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
IsZExt) != 0;
}
bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
/*SetFlags=*/true, /*WantResult=*/false) != 0;
}
bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
if (RetVT != MVT::f32 && RetVT != MVT::f64)
return false;
// Check to see if the 2nd operand is a constant that we can encode directly
// in the compare.
bool UseImm = false;
if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
if (CFP->isZero() && !CFP->isNegative())
UseImm = true;
Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
if (UseImm) {
unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
.addReg(LHSReg);
return true;
}
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
.addReg(LHSReg)
.addReg(RHSReg);
return true;
}
unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags, bool WantResult, bool IsZExt) {
return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
IsZExt);
}
/// This method is a wrapper to simplify add emission.
///
/// First try to emit an add with an immediate operand using emitAddSub_ri. If
/// that fails, then try to materialize the immediate into a register and use
/// emitAddSub_rr instead.
unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
unsigned ResultReg;
if (Imm < 0)
ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
else
ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
if (ResultReg)
return ResultReg;
unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
if (!CReg)
return 0;
ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
return ResultReg;
}
unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags, bool WantResult, bool IsZExt) {
return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
IsZExt);
}
unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
unsigned RHSReg, bool WantResult) {
return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
/*SetFlags=*/true, WantResult);
}
unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
unsigned RHSReg,
AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool WantResult) {
return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
ShiftImm, /*SetFlags=*/true, WantResult);
}
unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
const Value *LHS, const Value *RHS) {
// Canonicalize immediates to the RHS first.
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
std::swap(LHS, RHS);
// Canonicalize mul by power-of-2 to the RHS.
if (LHS->hasOneUse() && isValueAvailable(LHS))
if (isMulPowOf2(LHS))
std::swap(LHS, RHS);
// Canonicalize shift immediate to the RHS.
if (LHS->hasOneUse() && isValueAvailable(LHS))
if (const auto *SI = dyn_cast<ShlOperator>(LHS))
if (isa<ConstantInt>(SI->getOperand(1)))
std::swap(LHS, RHS);
Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return 0;
unsigned ResultReg = 0;
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
uint64_t Imm = C->getZExtValue();
ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
}
if (ResultReg)
return ResultReg;
// Check if the mul can be folded into the instruction.
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (isMulPowOf2(RHS)) {
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
if (C->getValue().isPowerOf2())
std::swap(MulLHS, MulRHS);
assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
Register RHSReg = getRegForValue(MulLHS);
if (!RHSReg)
return 0;
ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
if (ResultReg)
return ResultReg;
}
}
// Check if the shift can be folded into the instruction.
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (const auto *SI = dyn_cast<ShlOperator>(RHS))
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
uint64_t ShiftVal = C->getZExtValue();
Register RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return 0;
ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
if (ResultReg)
return ResultReg;
}
}
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
}
return ResultReg;
}
unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
unsigned LHSReg, uint64_t Imm) {
static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
"ISD nodes are not consecutive!");
static const unsigned OpcTable[3][2] = {
{ AArch64::ANDWri, AArch64::ANDXri },
{ AArch64::ORRWri, AArch64::ORRXri },
{ AArch64::EORWri, AArch64::EORXri }
};
const TargetRegisterClass *RC;
unsigned Opc;
unsigned RegSize;
switch (RetVT.SimpleTy) {
default:
return 0;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32: {
unsigned Idx = ISDOpc - ISD::AND;
Opc = OpcTable[Idx][0];
RC = &AArch64::GPR32spRegClass;
RegSize = 32;
break;
}
case MVT::i64:
Opc = OpcTable[ISDOpc - ISD::AND][1];
RC = &AArch64::GPR64spRegClass;
RegSize = 64;
break;
}
if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
return 0;
Register ResultReg =
fastEmitInst_ri(Opc, RC, LHSReg,
AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
}
return ResultReg;
}
unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
unsigned LHSReg, unsigned RHSReg,
uint64_t ShiftImm) {
static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
"ISD nodes are not consecutive!");
static const unsigned OpcTable[3][2] = {
{ AArch64::ANDWrs, AArch64::ANDXrs },
{ AArch64::ORRWrs, AArch64::ORRXrs },
{ AArch64::EORWrs, AArch64::EORXrs }
};
// Don't deal with undefined shifts.
if (ShiftImm >= RetVT.getSizeInBits())
return 0;
const TargetRegisterClass *RC;
unsigned Opc;
switch (RetVT.SimpleTy) {
default:
return 0;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
Opc = OpcTable[ISDOpc - ISD::AND][0];
RC = &AArch64::GPR32RegClass;
break;
case MVT::i64:
Opc = OpcTable[ISDOpc - ISD::AND][1];
RC = &AArch64::GPR64RegClass;
break;
}
Register ResultReg =
fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
}
return ResultReg;
}
unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
uint64_t Imm) {
return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
}
unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
bool WantZExt, MachineMemOperand *MMO) {
if (!TLI.allowsMisalignedMemoryAccesses(VT))
return 0;
// Simplify this down to something we can handle.
if (!simplifyAddress(Addr, VT))
return 0;
unsigned ScaleFactor = getImplicitScaleFactor(VT);
if (!ScaleFactor)
llvm_unreachable("Unexpected value type.");
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
bool UseScaled = true;
if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
UseScaled = false;
ScaleFactor = 1;
}
static const unsigned GPOpcTable[2][8][4] = {
// Sign-extend.
{ { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
AArch64::LDURXi },
{ AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
AArch64::LDURXi },
{ AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
AArch64::LDRXui },
{ AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
AArch64::LDRXui },
{ AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
AArch64::LDRXroX },
{ AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
AArch64::LDRXroX },
{ AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
AArch64::LDRXroW },
{ AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
AArch64::LDRXroW }
},
// Zero-extend.
{ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
AArch64::LDURXi },
{ AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
AArch64::LDURXi },
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
AArch64::LDRXui },
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
AArch64::LDRXui },
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
AArch64::LDRXroX },
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
AArch64::LDRXroX },
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
AArch64::LDRXroW },
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
AArch64::LDRXroW }
}
};
static const unsigned FPOpcTable[4][2] = {
{ AArch64::LDURSi, AArch64::LDURDi },
{ AArch64::LDRSui, AArch64::LDRDui },
{ AArch64::LDRSroX, AArch64::LDRDroX },
{ AArch64::LDRSroW, AArch64::LDRDroW }
};
unsigned Opc;
const TargetRegisterClass *RC;
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
Addr.getOffsetReg();
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
if (Addr.getExtendType() == AArch64_AM::UXTW ||
Addr.getExtendType() == AArch64_AM::SXTW)
Idx++;
bool IsRet64Bit = RetVT == MVT::i64;
switch (VT.SimpleTy) {
default:
llvm_unreachable("Unexpected value type.");
case MVT::i1: // Intentional fall-through.
case MVT::i8:
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
RC = (IsRet64Bit && !WantZExt) ?
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i16:
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
RC = (IsRet64Bit && !WantZExt) ?
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i32:
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
RC = (IsRet64Bit && !WantZExt) ?
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i64:
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
RC = &AArch64::GPR64RegClass;
break;
case MVT::f32:
Opc = FPOpcTable[Idx][0];
RC = &AArch64::FPR32RegClass;
break;
case MVT::f64:
Opc = FPOpcTable[Idx][1];
RC = &AArch64::FPR64RegClass;
break;
}
// Create the base instruction, then add the operands.
Register ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(Opc), ResultReg);
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
// Loading an i1 requires special handling.
if (VT == MVT::i1) {
unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
ResultReg = ANDReg;
}
// For zero-extending loads to 64bit we emit a 32bit load and then convert
// the 32bit reg to a 64bit reg.
if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::SUBREG_TO_REG), Reg64)
.addImm(0)
.addReg(ResultReg, getKillRegState(true))
.addImm(AArch64::sub_32);
ResultReg = Reg64;
}
return ResultReg;
}
bool AArch64FastISel::selectAddSub(const Instruction *I) {
MVT VT;
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
return false;
if (VT.isVector())
return selectOperator(I, I->getOpcode());
unsigned ResultReg;
switch (I->getOpcode()) {
default:
llvm_unreachable("Unexpected instruction.");
case Instruction::Add:
ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
break;
case Instruction::Sub:
ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
break;
}
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
MVT VT;
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
return false;
if (VT.isVector())
return selectOperator(I, I->getOpcode());
unsigned ResultReg;
switch (I->getOpcode()) {
default:
llvm_unreachable("Unexpected instruction.");
case Instruction::And:
ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
break;
case Instruction::Or:
ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
break;
case Instruction::Xor:
ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
break;
}
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectLoad(const Instruction *I) {
MVT VT;
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
cast<LoadInst>(I)->isAtomic())
return false;
const Value *SV = I->getOperand(0);
if (TLI.supportSwiftError()) {
// Swifterror values can come from either a function parameter with
// swifterror attribute or an alloca with swifterror attribute.
if (const Argument *Arg = dyn_cast<Argument>(SV)) {
if (Arg->hasSwiftErrorAttr())
return false;
}
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
if (Alloca->isSwiftError())
return false;
}
}
// See if we can handle this address.
Address Addr;
if (!computeAddress(I->getOperand(0), Addr, I->getType()))
return false;
// Fold the following sign-/zero-extend into the load instruction.
bool WantZExt = true;
MVT RetVT = VT;
const Value *IntExtVal = nullptr;
if (I->hasOneUse()) {
if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
if (isTypeSupported(ZE->getType(), RetVT))
IntExtVal = ZE;
else
RetVT = VT;
} else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
if (isTypeSupported(SE->getType(), RetVT))
IntExtVal = SE;
else
RetVT = VT;
WantZExt = false;
}
}
unsigned ResultReg =
emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
if (!ResultReg)
return false;
// There are a few different cases we have to handle, because the load or the
// sign-/zero-extend might not be selected by FastISel if we fall-back to
// SelectionDAG. There is also an ordering issue when both instructions are in
// different basic blocks.
// 1.) The load instruction is selected by FastISel, but the integer extend
// not. This usually happens when the integer extend is in a different
// basic block and SelectionDAG took over for that basic block.
// 2.) The load instruction is selected before the integer extend. This only
// happens when the integer extend is in a different basic block.
// 3.) The load instruction is selected by SelectionDAG and the integer extend
// by FastISel. This happens if there are instructions between the load
// and the integer extend that couldn't be selected by FastISel.
if (IntExtVal) {
// The integer extend hasn't been emitted yet. FastISel or SelectionDAG
// could select it. Emit a copy to subreg if necessary. FastISel will remove
// it when it selects the integer extend.
Register Reg = lookUpRegForValue(IntExtVal);
auto *MI = MRI.getUniqueVRegDef(Reg);
if (!MI) {
if (RetVT == MVT::i64 && VT <= MVT::i32) {
if (WantZExt) {
// Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
ResultReg = std::prev(I)->getOperand(0).getReg();
removeDeadCode(I, std::next(I));
} else
ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
AArch64::sub_32);
}
updateValueMap(I, ResultReg);
return true;
}
// The integer extend has already been emitted - delete all the instructions
// that have been emitted by the integer extend lowering code and use the
// result from the load instruction directly.
while (MI) {
Reg = 0;
for (auto &Opnd : MI->uses()) {
if (Opnd.isReg()) {
Reg = Opnd.getReg();
break;
}
}
MachineBasicBlock::iterator I(MI);
removeDeadCode(I, std::next(I));
MI = nullptr;
if (Reg)
MI = MRI.getUniqueVRegDef(Reg);
}
updateValueMap(IntExtVal, ResultReg);
return true;
}
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
unsigned AddrReg,
MachineMemOperand *MMO) {
unsigned Opc;
switch (VT.SimpleTy) {
default: return false;
case MVT::i8: Opc = AArch64::STLRB; break;
case MVT::i16: Opc = AArch64::STLRH; break;
case MVT::i32: Opc = AArch64::STLRW; break;
case MVT::i64: Opc = AArch64::STLRX; break;
}
const MCInstrDesc &II = TII.get(Opc);
SrcReg = constrainOperandRegClass(II, SrcReg, 0);
AddrReg = constrainOperandRegClass(II, AddrReg, 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(SrcReg)
.addReg(AddrReg)
.addMemOperand(MMO);
return true;
}
bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
MachineMemOperand *MMO) {
if (!TLI.allowsMisalignedMemoryAccesses(VT))
return false;
// Simplify this down to something we can handle.
if (!simplifyAddress(Addr, VT))
return false;
unsigned ScaleFactor = getImplicitScaleFactor(VT);
if (!ScaleFactor)
llvm_unreachable("Unexpected value type.");
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
bool UseScaled = true;
if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
UseScaled = false;
ScaleFactor = 1;
}
static const unsigned OpcTable[4][6] = {
{ AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
AArch64::STURSi, AArch64::STURDi },
{ AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
AArch64::STRSui, AArch64::STRDui },
{ AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
AArch64::STRSroX, AArch64::STRDroX },
{ AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
AArch64::STRSroW, AArch64::STRDroW }
};
unsigned Opc;
bool VTIsi1 = false;
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
Addr.getOffsetReg();
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
if (Addr.getExtendType() == AArch64_AM::UXTW ||
Addr.getExtendType() == AArch64_AM::SXTW)
Idx++;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type.");
case MVT::i1: VTIsi1 = true; [[fallthrough]];
case MVT::i8: Opc = OpcTable[Idx][0]; break;
case MVT::i16: Opc = OpcTable[Idx][1]; break;
case MVT::i32: Opc = OpcTable[Idx][2]; break;
case MVT::i64: Opc = OpcTable[Idx][3]; break;
case MVT::f32: Opc = OpcTable[Idx][4]; break;
case MVT::f64: Opc = OpcTable[Idx][5]; break;
}
// Storing an i1 requires special handling.
if (VTIsi1 && SrcReg != AArch64::WZR) {
unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
SrcReg = ANDReg;
}
// Create the base instruction, then add the operands.
const MCInstrDesc &II = TII.get(Opc);
SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
return true;
}
bool AArch64FastISel::selectStore(const Instruction *I) {
MVT VT;
const Value *Op0 = I->getOperand(0);
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
return false;
const Value *PtrV = I->getOperand(1);
if (TLI.supportSwiftError()) {
// Swifterror values can come from either a function parameter with
// swifterror attribute or an alloca with swifterror attribute.
if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
if (Arg->hasSwiftErrorAttr())
return false;
}
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
if (Alloca->isSwiftError())
return false;
}
}
// Get the value to be stored into a register. Use the zero register directly
// when possible to avoid an unnecessary copy and a wasted register.
unsigned SrcReg = 0;
if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
if (CI->isZero())
SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
} else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
if (CF->isZero() && !CF->isNegative()) {
VT = MVT::getIntegerVT(VT.getSizeInBits());
SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
}
}
if (!SrcReg)
SrcReg = getRegForValue(Op0);
if (!SrcReg)
return false;
auto *SI = cast<StoreInst>(I);
// Try to emit a STLR for seq_cst/release.
if (SI->isAtomic()) {
AtomicOrdering Ord = SI->getOrdering();
// The non-atomic instructions are sufficient for relaxed stores.
if (isReleaseOrStronger(Ord)) {
// The STLR addressing mode only supports a base reg; pass that directly.
Register AddrReg = getRegForValue(PtrV);
return emitStoreRelease(VT, SrcReg, AddrReg,
createMachineMemOperandFor(I));
}
}
// See if we can handle this address.
Address Addr;
if (!computeAddress(PtrV, Addr, Op0->getType()))
return false;
if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
return false;
return true;
}
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
switch (Pred) {
case CmpInst::FCMP_ONE:
case CmpInst::FCMP_UEQ:
default:
// AL is our "false" for now. The other two need more compares.
return AArch64CC::AL;
case CmpInst::ICMP_EQ:
case CmpInst::FCMP_OEQ:
return AArch64CC::EQ;
case CmpInst::ICMP_SGT:
case CmpInst::FCMP_OGT:
return AArch64CC::GT;
case CmpInst::ICMP_SGE:
case CmpInst::FCMP_OGE:
return AArch64CC::GE;
case CmpInst::ICMP_UGT:
case CmpInst::FCMP_UGT:
return AArch64CC::HI;
case CmpInst::FCMP_OLT:
return AArch64CC::MI;
case CmpInst::ICMP_ULE:
case CmpInst::FCMP_OLE:
return AArch64CC::LS;
case CmpInst::FCMP_ORD:
return AArch64CC::VC;
case CmpInst::FCMP_UNO:
return AArch64CC::VS;
case CmpInst::FCMP_UGE:
return AArch64CC::PL;
case CmpInst::ICMP_SLT:
case CmpInst::FCMP_ULT:
return AArch64CC::LT;
case CmpInst::ICMP_SLE:
case CmpInst::FCMP_ULE:
return AArch64CC::LE;
case CmpInst::FCMP_UNE:
case CmpInst::ICMP_NE:
return AArch64CC::NE;
case CmpInst::ICMP_UGE:
return AArch64CC::HS;
case CmpInst::ICMP_ULT:
return AArch64CC::LO;
}
}
/// Try to emit a combined compare-and-branch instruction.
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
// will not be produced, as they are conditional branch instructions that do
// not set flags.
if (FuncInfo.MF->getFunction().hasFnAttribute(
Attribute::SpeculativeLoadHardening))
return false;
assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
const CmpInst *CI = cast<CmpInst>(BI->getCondition());
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
const Value *LHS = CI->getOperand(0);
const Value *RHS = CI->getOperand(1);
MVT VT;
if (!isTypeSupported(LHS->getType(), VT))
return false;
unsigned BW = VT.getSizeInBits();
if (BW > 64)
return false;
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
// Try to take advantage of fallthrough opportunities.
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
int TestBit = -1;
bool IsCmpNE;
switch (Predicate) {
default:
return false;
case CmpInst::ICMP_EQ:
case CmpInst::ICMP_NE:
if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
std::swap(LHS, RHS);
if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
return false;
if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
const Value *AndLHS = AI->getOperand(0);
const Value *AndRHS = AI->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
if (C->getValue().isPowerOf2())
std::swap(AndLHS, AndRHS);
if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
if (C->getValue().isPowerOf2()) {
TestBit = C->getValue().logBase2();
LHS = AndLHS;
}
}
if (VT == MVT::i1)
TestBit = 0;
IsCmpNE = Predicate == CmpInst::ICMP_NE;
break;
case CmpInst::ICMP_SLT:
case CmpInst::ICMP_SGE:
if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
return false;
TestBit = BW - 1;
IsCmpNE = Predicate == CmpInst::ICMP_SLT;
break;
case CmpInst::ICMP_SGT:
case CmpInst::ICMP_SLE:
if (!isa<ConstantInt>(RHS))
return false;
if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
return false;
TestBit = BW - 1;
IsCmpNE = Predicate == CmpInst::ICMP_SLE;
break;
} // end switch
static const unsigned OpcTable[2][2][2] = {
{ {AArch64::CBZW, AArch64::CBZX },
{AArch64::CBNZW, AArch64::CBNZX} },
{ {AArch64::TBZW, AArch64::TBZX },
{AArch64::TBNZW, AArch64::TBNZX} }
};
bool IsBitTest = TestBit != -1;
bool Is64Bit = BW == 64;
if (TestBit < 32 && TestBit >= 0)
Is64Bit = false;
unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
const MCInstrDesc &II = TII.get(Opc);
Register SrcReg = getRegForValue(LHS);
if (!SrcReg)
return false;
if (BW == 64 && !Is64Bit)
SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
if ((BW < 32) && !IsBitTest)
SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
// Emit the combined compare and branch instruction.
SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
.addReg(SrcReg);
if (IsBitTest)
MIB.addImm(TestBit);
MIB.addMBB(TBB);
finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
bool AArch64FastISel::selectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
if (BI->isUnconditional()) {
MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
fastEmitBranch(MSucc, BI->getDebugLoc());
return true;
}
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && isValueAvailable(CI)) {
// Try to optimize or fold the cmp.
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
switch (Predicate) {
default:
break;
case CmpInst::FCMP_FALSE:
fastEmitBranch(FBB, MIMD.getDL());
return true;
case CmpInst::FCMP_TRUE:
fastEmitBranch(TBB, MIMD.getDL());
return true;
}
// Try to emit a combined compare-and-branch first.
if (emitCompareAndBranch(BI))
return true;
// Try to take advantage of fallthrough opportunities.
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
// Emit the cmp.
if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
// FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
// instruction.
AArch64CC::CondCode CC = getCompareCC(Predicate);
AArch64CC::CondCode ExtraCC = AArch64CC::AL;
switch (Predicate) {
default:
break;
case CmpInst::FCMP_UEQ:
ExtraCC = AArch64CC::EQ;
CC = AArch64CC::VS;
break;
case CmpInst::FCMP_ONE:
ExtraCC = AArch64CC::MI;
CC = AArch64CC::GT;
break;
}
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
// Emit the extra branch for FCMP_UEQ and FCMP_ONE.
if (ExtraCC != AArch64CC::AL) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
.addImm(ExtraCC)
.addMBB(TBB);
}
// Emit the branch.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
.addMBB(Target);
// Obtain the branch probability and add the target to the successor list.
if (FuncInfo.BPI) {
auto BranchProbability = FuncInfo.BPI-&