blob: 563f2b7a07351983123bac2110631ce2a6337c5e [file] [log] [blame]
//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the targeting of the InstructionSelector class for
/// AArch64.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
#include "AArch64GlobalISelUtils.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "AArch64GlobalISelUtils.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "aarch64-isel"
using namespace llvm;
using namespace MIPatternMatch;
using namespace AArch64GISelUtils;
namespace llvm {
class BlockFrequencyInfo;
class ProfileSummaryInfo;
}
namespace {
#define GET_GLOBALISEL_PREDICATE_BITSET
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATE_BITSET
class AArch64InstructionSelector : public InstructionSelector {
public:
AArch64InstructionSelector(const AArch64TargetMachine &TM,
const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI);
bool select(MachineInstr &I) override;
static const char *getName() { return DEBUG_TYPE; }
void setupMF(MachineFunction &MF, GISelKnownBits *KB,
CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) override {
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
MIB.setMF(MF);
// hasFnAttribute() is expensive to call on every BRCOND selection, so
// cache it here for each run of the selector.
ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
MFReturnAddr = Register();
processPHIs(MF);
}
private:
/// tblgen-erated 'select' implementation, used as the initial selector for
/// the patterns that don't require complex C++.
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
// A lowering phase that runs before any selection attempts.
// Returns true if the instruction was modified.
bool preISelLower(MachineInstr &I);
// An early selection function that runs before the selectImpl() call.
bool earlySelect(MachineInstr &I);
// Do some preprocessing of G_PHIs before we begin selection.
void processPHIs(MachineFunction &MF);
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
bool contractCrossBankCopyIntoStore(MachineInstr &I,
MachineRegisterInfo &MRI);
bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
///@{
/// Helper functions for selectCompareBranch.
bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
MachineIRBuilder &MIB) const;
bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
MachineIRBuilder &MIB) const;
bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
MachineIRBuilder &MIB) const;
bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
///@}
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI);
bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
// Helper to generate an equivalent of scalar_to_vector into a new register,
// returned via 'Dst'.
MachineInstr *emitScalarToVector(unsigned EltSize,
const TargetRegisterClass *DstRC,
Register Scalar,
MachineIRBuilder &MIRBuilder) const;
/// Emit a lane insert into \p DstReg, or a new vector register if None is
/// provided.
///
/// The lane inserted into is defined by \p LaneIdx. The vector source
/// register is given by \p SrcReg. The register containing the element is
/// given by \p EltReg.
MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
Register EltReg, unsigned LaneIdx,
const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const;
/// Emit a sequence of instructions representing a constant \p CV for a
/// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
///
/// \returns the last instruction in the sequence on success, and nullptr
/// otherwise.
MachineInstr *emitConstantVector(Register Dst, Constant *CV,
MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI);
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI);
/// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
/// SUBREG_TO_REG.
bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
/// Helper function to select vector load intrinsics like
/// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
/// \p Opc is the opcode that the selected instruction should use.
/// \p NumVecs is the number of vector destinations for the instruction.
/// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
MachineInstr &I);
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineRegisterInfo &MRI);
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
MachineIRBuilder &MIRBuilder) const;
// Emit a vector concat operation.
MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
Register Op2,
MachineIRBuilder &MIRBuilder) const;
// Emit an integer compare between LHS and RHS, which checks for Predicate.
MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
/// Emit a floating point comparison between \p LHS and \p RHS.
/// \p Pred if given is the intended predicate to use.
MachineInstr *emitFPCompare(Register LHS, Register RHS,
MachineIRBuilder &MIRBuilder,
Optional<CmpInst::Predicate> = None) const;
MachineInstr *emitInstr(unsigned Opcode,
std::initializer_list<llvm::DstOp> DstOps,
std::initializer_list<llvm::SrcOp> SrcOps,
MachineIRBuilder &MIRBuilder,
const ComplexRendererFns &RenderFns = None) const;
/// Helper function to emit an add or sub instruction.
///
/// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
/// in a specific order.
///
/// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
///
/// \code
/// const std::array<std::array<unsigned, 2>, 4> Table {
/// {{AArch64::ADDXri, AArch64::ADDWri},
/// {AArch64::ADDXrs, AArch64::ADDWrs},
/// {AArch64::ADDXrr, AArch64::ADDWrr},
/// {AArch64::SUBXri, AArch64::SUBWri},
/// {AArch64::ADDXrx, AArch64::ADDWrx}}};
/// \endcode
///
/// Each row in the table corresponds to a different addressing mode. Each
/// column corresponds to a different register size.
///
/// \attention Rows must be structured as follows:
/// - Row 0: The ri opcode variants
/// - Row 1: The rs opcode variants
/// - Row 2: The rr opcode variants
/// - Row 3: The ri opcode variants for negative immediates
/// - Row 4: The rx opcode variants
///
/// \attention Columns must be structured as follows:
/// - Column 0: The 64-bit opcode variants
/// - Column 1: The 32-bit opcode variants
///
/// \p Dst is the destination register of the binop to emit.
/// \p LHS is the left-hand operand of the binop to emit.
/// \p RHS is the right-hand operand of the binop to emit.
MachineInstr *emitAddSub(
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
AArch64CC::CondCode CC,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
AArch64CC::CondCode Pred,
MachineIRBuilder &MIRBuilder) const;
/// Emit a CSet for a FP compare.
///
/// \p Dst is expected to be a 32-bit scalar register.
MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
MachineIRBuilder &MIRBuilder) const;
/// Emit the overflow op for \p Opcode.
///
/// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
/// G_USUBO, etc.
std::pair<MachineInstr *, AArch64CC::CondCode>
emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
/// \p IsNegative is true if the test should be "not zero".
/// This will also optimize the test bit instruction when possible.
MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
/// Emit a CB(N)Z instruction which branches to \p DestMBB.
MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
MachineBasicBlock *DestMBB,
MachineIRBuilder &MIB) const;
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
// We use these manually instead of using the importer since it doesn't
// support SDNodeXForm.
ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
unsigned Size) const;
ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 1);
}
ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 2);
}
ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 4);
}
ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 8);
}
ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 16);
}
/// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
/// from complex pattern matchers like selectAddrModeIndexed().
ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
MachineRegisterInfo &MRI) const;
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
unsigned Size) const;
template <int Width>
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
return selectAddrModeIndexed(Root, Width / 8);
}
bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
const MachineRegisterInfo &MRI) const;
ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
unsigned SizeInBytes) const;
/// Returns a \p ComplexRendererFns which contains a base, offset, and whether
/// or not a shift + extend should be folded into an addressing mode. Returns
/// None when this is not profitable or possible.
ComplexRendererFns
selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
MachineOperand &Offset, unsigned SizeInBytes,
bool WantsExt) const;
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const;
template <int Width>
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
return selectAddrModeXRO(Root, Width / 8);
}
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
unsigned SizeInBytes) const;
template <int Width>
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
return selectAddrModeWRO(Root, Width / 8);
}
ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
bool AllowROR = false) const;
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
return selectShiftedRegister(Root);
}
ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
return selectShiftedRegister(Root, true);
}
/// Given an extend instruction, determine the correct shift-extend type for
/// that instruction.
///
/// If the instruction is going to be used in a load or store, pass
/// \p IsLoadStore = true.
AArch64_AM::ShiftExtendType
getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
bool IsLoadStore = false) const;
/// Move \p Reg to \p RC if \p Reg is not already on \p RC.
///
/// \returns Either \p Reg if no change was necessary, or the new register
/// created by moving \p Reg.
///
/// Note: This uses emitCopy right now.
Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
MachineIRBuilder &MIB) const;
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
int OpIdx = -1) const;
void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
int OpIdx = -1) const;
void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
// Optimization methods.
bool tryOptSelect(MachineInstr &MI);
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
/// Return true if \p MI is a load or store of \p NumBytes bytes.
bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
/// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
/// register zeroed out. In other words, the result of MI has been explicitly
/// zero extended.
bool isDef32(const MachineInstr &MI) const;
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
const AArch64RegisterInfo &TRI;
const AArch64RegisterBankInfo &RBI;
bool ProduceNonFlagSettingCondBr = false;
// Some cached values used during selection.
// We use LR as a live-in register, and we keep track of it here as it can be
// clobbered by calls.
Register MFReturnAddr;
MachineIRBuilder MIB;
#define GET_GLOBALISEL_PREDICATES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_DECL
// We declare the temporaries used by selectImpl() in the class to minimize the
// cost of constructing placeholder values.
#define GET_GLOBALISEL_TEMPORARIES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_DECL
};
} // end anonymous namespace
#define GET_GLOBALISEL_IMPL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_IMPL
AArch64InstructionSelector::AArch64InstructionSelector(
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI)
: InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_INIT
{
}
// FIXME: This should be target-independent, inferred from the types declared
// for each class in the bank.
static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
const RegisterBankInfo &RBI,
bool GetAllRegSet = false) {
if (RB.getID() == AArch64::GPRRegBankID) {
if (Ty.getSizeInBits() <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
if (Ty.getSizeInBits() == 64)
return GetAllRegSet ? &AArch64::GPR64allRegClass
: &AArch64::GPR64RegClass;
if (Ty.getSizeInBits() == 128)
return &AArch64::XSeqPairsClassRegClass;
return nullptr;
}
if (RB.getID() == AArch64::FPRRegBankID) {
switch (Ty.getSizeInBits()) {
case 8:
return &AArch64::FPR8RegClass;
case 16:
return &AArch64::FPR16RegClass;
case 32:
return &AArch64::FPR32RegClass;
case 64:
return &AArch64::FPR64RegClass;
case 128:
return &AArch64::FPR128RegClass;
}
return nullptr;
}
return nullptr;
}
/// Given a register bank, and size in bits, return the smallest register class
/// that can represent that combination.
static const TargetRegisterClass *
getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
bool GetAllRegSet = false) {
unsigned RegBankID = RB.getID();
if (RegBankID == AArch64::GPRRegBankID) {
if (SizeInBits <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
if (SizeInBits == 64)
return GetAllRegSet ? &AArch64::GPR64allRegClass
: &AArch64::GPR64RegClass;
if (SizeInBits == 128)
return &AArch64::XSeqPairsClassRegClass;
}
if (RegBankID == AArch64::FPRRegBankID) {
switch (SizeInBits) {
default:
return nullptr;
case 8:
return &AArch64::FPR8RegClass;
case 16:
return &AArch64::FPR16RegClass;
case 32:
return &AArch64::FPR32RegClass;
case 64:
return &AArch64::FPR64RegClass;
case 128:
return &AArch64::FPR128RegClass;
}
}
return nullptr;
}
/// Returns the correct subregister to use for a given register class.
static bool getSubRegForClass(const TargetRegisterClass *RC,
const TargetRegisterInfo &TRI, unsigned &SubReg) {
switch (TRI.getRegSizeInBits(*RC)) {
case 8:
SubReg = AArch64::bsub;
break;
case 16:
SubReg = AArch64::hsub;
break;
case 32:
if (RC != &AArch64::FPR32RegClass)
SubReg = AArch64::sub_32;
else
SubReg = AArch64::ssub;
break;
case 64:
SubReg = AArch64::dsub;
break;
default:
LLVM_DEBUG(
dbgs() << "Couldn't find appropriate subregister for register class.");
return false;
}
return true;
}
/// Returns the minimum size the given register bank can hold.
static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
switch (RB.getID()) {
case AArch64::GPRRegBankID:
return 32;
case AArch64::FPRRegBankID:
return 8;
default:
llvm_unreachable("Tried to get minimum size for unknown register bank.");
}
}
/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
/// Helper function for functions like createDTuple and createQTuple.
///
/// \p RegClassIDs - The list of register class IDs available for some tuple of
/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
/// expected to contain between 2 and 4 tuple classes.
///
/// \p SubRegs - The list of subregister classes associated with each register
/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
/// subregister class. The index of each subregister class is expected to
/// correspond with the index of each register class.
///
/// \returns Either the destination register of REG_SEQUENCE instruction that
/// was created, or the 0th element of \p Regs if \p Regs contains a single
/// element.
static Register createTuple(ArrayRef<Register> Regs,
const unsigned RegClassIDs[],
const unsigned SubRegs[], MachineIRBuilder &MIB) {
unsigned NumRegs = Regs.size();
if (NumRegs == 1)
return Regs[0];
assert(NumRegs >= 2 && NumRegs <= 4 &&
"Only support between two and 4 registers in a tuple!");
const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
auto RegSequence =
MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
RegSequence.addUse(Regs[I]);
RegSequence.addImm(SubRegs[I]);
}
return RegSequence.getReg(0);
}
/// Create a tuple of D-registers using the registers in \p Regs.
static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
static const unsigned RegClassIDs[] = {
AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
AArch64::dsub2, AArch64::dsub3};
return createTuple(Regs, RegClassIDs, SubRegs, MIB);
}
/// Create a tuple of Q-registers using the registers in \p Regs.
static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
static const unsigned RegClassIDs[] = {
AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
AArch64::qsub2, AArch64::qsub3};
return createTuple(Regs, RegClassIDs, SubRegs, MIB);
}
static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
auto &MI = *Root.getParent();
auto &MBB = *MI.getParent();
auto &MF = *MBB.getParent();
auto &MRI = MF.getRegInfo();
uint64_t Immed;
if (Root.isImm())
Immed = Root.getImm();
else if (Root.isCImm())
Immed = Root.getCImm()->getZExtValue();
else if (Root.isReg()) {
auto ValAndVReg =
getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
Immed = ValAndVReg->Value.getSExtValue();
} else
return None;
return Immed;
}
/// Check whether \p I is a currently unsupported binary operation:
/// - it has an unsized type
/// - an operand is not a vreg
/// - all operands are not in the same bank
/// These are checks that should someday live in the verifier, but right now,
/// these are mostly limitations of the aarch64 selector.
static bool unsupportedBinOp(const MachineInstr &I,
const AArch64RegisterBankInfo &RBI,
const MachineRegisterInfo &MRI,
const AArch64RegisterInfo &TRI) {
LLT Ty = MRI.getType(I.getOperand(0).getReg());
if (!Ty.isValid()) {
LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
return true;
}
const RegisterBank *PrevOpBank = nullptr;
for (auto &MO : I.operands()) {
// FIXME: Support non-register operands.
if (!MO.isReg()) {
LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
return true;
}
// FIXME: Can generic operations have physical registers operands? If
// so, this will need to be taught about that, and we'll need to get the
// bank out of the minimal class for the register.
// Either way, this needs to be documented (and possibly verified).
if (!Register::isVirtualRegister(MO.getReg())) {
LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
return true;
}
const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
if (!OpBank) {
LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
return true;
}
if (PrevOpBank && OpBank != PrevOpBank) {
LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
return true;
}
PrevOpBank = OpBank;
}
return false;
}
/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
/// and of size \p OpSize.
/// \returns \p GenericOpc if the combination is unsupported.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
switch (RegBankID) {
case AArch64::GPRRegBankID:
if (OpSize == 32) {
switch (GenericOpc) {
case TargetOpcode::G_SHL:
return AArch64::LSLVWr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVWr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVWr;
default:
return GenericOpc;
}
} else if (OpSize == 64) {
switch (GenericOpc) {
case TargetOpcode::G_PTR_ADD:
return AArch64::ADDXrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVXr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVXr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVXr;
default:
return GenericOpc;
}
}
break;
case AArch64::FPRRegBankID:
switch (OpSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_FADD:
return AArch64::FADDSrr;
case TargetOpcode::G_FSUB:
return AArch64::FSUBSrr;
case TargetOpcode::G_FMUL:
return AArch64::FMULSrr;
case TargetOpcode::G_FDIV:
return AArch64::FDIVSrr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_FADD:
return AArch64::FADDDrr;
case TargetOpcode::G_FSUB:
return AArch64::FSUBDrr;
case TargetOpcode::G_FMUL:
return AArch64::FMULDrr;
case TargetOpcode::G_FDIV:
return AArch64::FDIVDrr;
case TargetOpcode::G_OR:
return AArch64::ORRv8i8;
default:
return GenericOpc;
}
}
break;
}
return GenericOpc;
}
/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
/// appropriate for the (value) register bank \p RegBankID and of memory access
/// size \p OpSize. This returns the variant with the base+unsigned-immediate
/// addressing mode (e.g., LDRXui).
/// \returns \p GenericOpc if the combination is unsupported.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
const bool isStore = GenericOpc == TargetOpcode::G_STORE;
switch (RegBankID) {
case AArch64::GPRRegBankID:
switch (OpSize) {
case 8:
return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
case 16:
return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
case 32:
return isStore ? AArch64::STRWui : AArch64::LDRWui;
case 64:
return isStore ? AArch64::STRXui : AArch64::LDRXui;
}
break;
case AArch64::FPRRegBankID:
switch (OpSize) {
case 8:
return isStore ? AArch64::STRBui : AArch64::LDRBui;
case 16:
return isStore ? AArch64::STRHui : AArch64::LDRHui;
case 32:
return isStore ? AArch64::STRSui : AArch64::LDRSui;
case 64:
return isStore ? AArch64::STRDui : AArch64::LDRDui;
case 128:
return isStore ? AArch64::STRQui : AArch64::LDRQui;
}
break;
}
return GenericOpc;
}
#ifndef NDEBUG
/// Helper function that verifies that we have a valid copy at the end of
/// selectCopy. Verifies that the source and dest have the expected sizes and
/// then returns true.
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
// Make sure the size of the source and dest line up.
assert(
(DstSize == SrcSize ||
// Copies are a mean to setup initial types, the number of
// bits may not exactly match.
(Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
// Copies are a mean to copy bits around, as long as we are
// on the same register class, that's fine. Otherwise, that
// means we need some SUBREG_TO_REG or AND & co.
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!");
// Check the size of the destination.
assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
"GPRs cannot get more than 64-bit width values");
return true;
}
#endif
/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
/// to \p *To.
///
/// E.g "To = COPY SrcReg:SubReg"
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
const RegisterBankInfo &RBI, Register SrcReg,
const TargetRegisterClass *To, unsigned SubReg) {
assert(SrcReg.isValid() && "Expected a valid source register?");
assert(To && "Destination register class cannot be null");
assert(SubReg && "Expected a valid subregister");
MachineIRBuilder MIB(I);
auto SubRegCopy =
MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(SubRegCopy.getReg(0));
// It's possible that the destination register won't be constrained. Make
// sure that happens.
if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
return true;
}
/// Helper function to get the source and destination register classes for a
/// copy. Returns a std::pair containing the source register class for the
/// copy, and the destination register class for the copy. If a register class
/// cannot be determined, then it will be nullptr.
static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
// Special casing for cross-bank copies of s1s. We can technically represent
// a 1-bit value with any size of register. The minimum size for a GPR is 32
// bits. So, we need to put the FPR on 32 bits as well.
//
// FIXME: I'm not sure if this case holds true outside of copies. If it does,
// then we can pull it into the helpers that get the appropriate class for a
// register bank. Or make a new helper that carries along some constraint
// information.
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
SrcSize = DstSize = 32;
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
getMinClassForRegBank(DstRegBank, DstSize, true)};
}
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
// Find the correct register classes for the source and destination registers.
const TargetRegisterClass *SrcRC;
const TargetRegisterClass *DstRC;
std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
if (!DstRC) {
LLVM_DEBUG(dbgs() << "Unexpected dest size "
<< RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
return false;
}
// A couple helpers below, for making sure that the copy we produce is valid.
// Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
// to verify that the src and dst are the same size, since that's handled by
// the SUBREG_TO_REG.
bool KnownValid = false;
// Returns true, or asserts if something we don't expect happens. Instead of
// returning true, we return isValidCopy() to ensure that we verify the
// result.
auto CheckCopy = [&]() {
// If we have a bitcast or something, we can't have physical registers.
assert((I.isCopy() ||
(!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
!Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
"No phys reg on generic operator!");
bool ValidCopy = true;
#ifndef NDEBUG
ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
assert(ValidCopy && "Invalid copy.");
#endif
(void)KnownValid;
return ValidCopy;
};
// Is this a copy? If so, then we may need to insert a subregister copy.
if (I.isCopy()) {
// Yes. Check if there's anything to fix up.
if (!SrcRC) {
LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
return false;
}
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
unsigned SubReg;
// If the source bank doesn't support a subregister copy small enough,
// then we first need to copy to the destination bank.
if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
const TargetRegisterClass *DstTempRC =
getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
getSubRegForClass(DstRC, TRI, SubReg);
MachineIRBuilder MIB(I);
auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
} else if (SrcSize > DstSize) {
// If the source register is bigger than the destination we need to
// perform a subregister copy.
const TargetRegisterClass *SubRegRC =
getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
getSubRegForClass(SubRegRC, TRI, SubReg);
copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
} else if (DstSize > SrcSize) {
// If the destination register is bigger than the source we need to do
// a promotion using SUBREG_TO_REG.
const TargetRegisterClass *PromotionRC =
getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
getSubRegForClass(SrcRC, TRI, SubReg);
Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
.addImm(0)
.addUse(SrcReg)
.addImm(SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(PromoteReg);
// Promise that the copy is implicitly validated by the SUBREG_TO_REG.
KnownValid = true;
}
// If the destination is a physical register, then there's nothing to
// change, so we're done.
if (Register::isPhysicalRegister(DstReg))
return CheckCopy();
}
// No need to constrain SrcReg. It will get constrained when we hit another
// of its use or its defs. Copies do not have constraints.
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
<< " operand\n");
return false;
}
// If this a GPR ZEXT that we want to just reduce down into a copy.
// The sizes will be mismatched with the source < 32b but that's ok.
if (I.getOpcode() == TargetOpcode::G_ZEXT) {
I.setDesc(TII.get(AArch64::COPY));
assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
return selectCopy(I, TII, MRI, TRI, RBI);
}
I.setDesc(TII.get(AArch64::COPY));
return CheckCopy();
}
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
if (!DstTy.isScalar() || !SrcTy.isScalar())
return GenericOpc;
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
switch (DstSize) {
case 32:
switch (SrcSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUWSri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUWSri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUWSr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUWSr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUXSri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUXSri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUWDr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUWDr;
default:
return GenericOpc;
}
default:
return GenericOpc;
}
case 64:
switch (SrcSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUWDri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUWDri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUXSr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUXSr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUXDri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUXDri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUXDr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUXDr;
default:
return GenericOpc;
}
default:
return GenericOpc;
}
default:
return GenericOpc;
};
return GenericOpc;
}
MachineInstr *
AArch64InstructionSelector::emitSelect(Register Dst, Register True,
Register False, AArch64CC::CondCode CC,
MachineIRBuilder &MIB) const {
MachineRegisterInfo &MRI = *MIB.getMRI();
assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?");
LLT Ty = MRI.getType(True);
if (Ty.isVector())
return nullptr;
const unsigned Size = Ty.getSizeInBits();
assert((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?");
const bool Is32Bit = Size == 32;
if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
return &*FCSel;
}
// By default, we'll try and emit a CSEL.
unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
bool Optimized = false;
auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
&Optimized](Register &Reg, Register &OtherReg,
bool Invert) {
if (Optimized)
return false;
// Attempt to fold:
//
// %sub = G_SUB 0, %x
// %select = G_SELECT cc, %reg, %sub
//
// Into:
// %select = CSNEG %reg, %x, cc
Register MatchReg;
if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
Reg = MatchReg;
if (Invert) {
CC = AArch64CC::getInvertedCondCode(CC);
std::swap(Reg, OtherReg);
}
return true;
}
// Attempt to fold:
//
// %xor = G_XOR %x, -1
// %select = G_SELECT cc, %reg, %xor
//
// Into:
// %select = CSINV %reg, %x, cc
if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
Reg = MatchReg;
if (Invert) {
CC = AArch64CC::getInvertedCondCode(CC);
std::swap(Reg, OtherReg);
}
return true;
}
// Attempt to fold:
//
// %add = G_ADD %x, 1
// %select = G_SELECT cc, %reg, %add
//
// Into:
// %select = CSINC %reg, %x, cc
if (mi_match(Reg, MRI,
m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
Reg = MatchReg;
if (Invert) {
CC = AArch64CC::getInvertedCondCode(CC);
std::swap(Reg, OtherReg);
}
return true;
}
return false;
};
// Helper lambda which tries to use CSINC/CSINV for the instruction when its
// true/false values are constants.
// FIXME: All of these patterns already exist in tablegen. We should be
// able to import these.
auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
&Optimized]() {
if (Optimized)
return false;
auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
if (!TrueCst && !FalseCst)
return false;
Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
if (TrueCst && FalseCst) {
int64_t T = TrueCst->Value.getSExtValue();
int64_t F = FalseCst->Value.getSExtValue();
if (T == 0 && F == 1) {
// G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
True = ZReg;
False = ZReg;
return true;
}
if (T == 0 && F == -1) {
// G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
True = ZReg;
False = ZReg;
return true;
}
}
if (TrueCst) {
int64_t T = TrueCst->Value.getSExtValue();
if (T == 1) {
// G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
True = False;
False = ZReg;
CC = AArch64CC::getInvertedCondCode(CC);
return true;
}
if (T == -1) {
// G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
True = False;
False = ZReg;
CC = AArch64CC::getInvertedCondCode(CC);
return true;
}
}
if (FalseCst) {
int64_t F = FalseCst->Value.getSExtValue();
if (F == 1) {
// G_SELECT cc, t, 1 -> CSINC t, zreg, cc
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
False = ZReg;
return true;
}
if (F == -1) {
// G_SELECT cc, t, -1 -> CSINC t, zreg, cc
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
False = ZReg;
return true;
}
}
return false;
};
Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
Optimized |= TryOptSelectCst();
auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
return &*SelectInst;
}
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
switch (P) {
default:
llvm_unreachable("Unknown condition code!");
case CmpInst::ICMP_NE:
return AArch64CC::NE;
case CmpInst::ICMP_EQ:
return AArch64CC::EQ;
case CmpInst::ICMP_SGT:
return AArch64CC::GT;
case CmpInst::ICMP_SGE:
return AArch64CC::GE;
case CmpInst::ICMP_SLT:
return AArch64CC::LT;
case CmpInst::ICMP_SLE:
return AArch64CC::LE;
case CmpInst::ICMP_UGT:
return AArch64CC::HI;
case CmpInst::ICMP_UGE:
return AArch64CC::HS;
case CmpInst::ICMP_ULT:
return AArch64CC::LO;
case CmpInst::ICMP_ULE:
return AArch64CC::LS;
}
}
/// Return a register which can be used as a bit to test in a TB(N)Z.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
MachineRegisterInfo &MRI) {
assert(Reg.isValid() && "Expected valid register!");
bool HasZext = false;
while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
unsigned Opc = MI->getOpcode();
if (!MI->getOperand(0).isReg() ||
!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
break;
// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
//
// (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
// on the truncated x is the same as the bit number on x.
if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
Opc == TargetOpcode::G_TRUNC) {
if (Opc == TargetOpcode::G_ZEXT)
HasZext = true;
Register NextReg = MI->getOperand(1).getReg();
// Did we find something worth folding?
if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
break;
// NextReg is worth folding. Keep looking.
Reg = NextReg;
continue;
}
// Attempt to find a suitable operation with a constant on one side.
Optional<uint64_t> C;
Register TestReg;
switch (Opc) {
default:
break;
case TargetOpcode::G_AND:
case TargetOpcode::G_XOR: {
TestReg = MI->getOperand(1).getReg();
Register ConstantReg = MI->getOperand(2).getReg();
auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!VRegAndVal) {
// AND commutes, check the other side for a constant.
// FIXME: Can we canonicalize the constant so that it's always on the
// same side at some point earlier?
std::swap(ConstantReg, TestReg);
VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
}
if (VRegAndVal) {
if (HasZext)
C = VRegAndVal->Value.getZExtValue();
else
C = VRegAndVal->Value.getSExtValue();
}
break;
}
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
case TargetOpcode::G_SHL: {
TestReg = MI->getOperand(1).getReg();
auto VRegAndVal =
getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (VRegAndVal)
C = VRegAndVal->Value.getSExtValue();
break;
}
}
// Didn't find a constant or viable register. Bail out of the loop.
if (!C || !TestReg.isValid())
break;
// We found a suitable instruction with a constant. Check to see if we can
// walk through the instruction.
Register NextReg;
unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
switch (Opc) {
default:
break;
case TargetOpcode::G_AND:
// (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
if ((*C >> Bit) & 1)
NextReg = TestReg;
break;
case TargetOpcode::G_SHL:
// (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
// the type of the register.
if (*C <= Bit && (Bit - *C) < TestRegSize) {
NextReg = TestReg;
Bit = Bit - *C;
}
break;
case TargetOpcode::G_ASHR:
// (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
// in x
NextReg = TestReg;
Bit = Bit + *C;
if (Bit >= TestRegSize)
Bit = TestRegSize - 1;
break;
case TargetOpcode::G_LSHR:
// (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
if ((Bit + *C) < TestRegSize) {
NextReg = TestReg;
Bit = Bit + *C;
}
break;
case TargetOpcode::G_XOR:
// We can walk through a G_XOR by inverting whether we use tbz/tbnz when
// appropriate.
//
// e.g. If x' = xor x, c, and the b-th bit is set in c then
//
// tbz x', b -> tbnz x, b
//
// Because x' only has the b-th bit set if x does not.
if ((*C >> Bit) & 1)
Invert = !Invert;
NextReg = TestReg;
break;
}
// Check if we found anything worth folding.
if (!NextReg.isValid())
return Reg;
Reg = NextReg;
}
return Reg;
}
MachineInstr *AArch64InstructionSelector::emitTestBit(
Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const {
assert(TestReg.isValid());
assert(ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!");
MachineRegisterInfo &MRI = *MIB.getMRI();
// Attempt to optimize the test bit by walking over instructions.
TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
LLT Ty = MRI.getType(TestReg);
unsigned Size = Ty.getSizeInBits();
assert(!Ty.isVector() && "Expected a scalar!");
assert(Bit < 64 && "Bit is too large!");
// When the test register is a 64-bit register, we have to narrow to make
// TBNZW work.
bool UseWReg = Bit < 32;
unsigned NecessarySize = UseWReg ? 32 : 64;
if (Size != NecessarySize)
TestReg = moveScalarRegClass(
TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
MIB);
static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
{AArch64::TBZW, AArch64::TBNZW}};
unsigned Opc = OpcTable[UseWReg][IsNegative];
auto TestBitMI =
MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
return &*TestBitMI;
}
bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const {
assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
// Given something like this:
//
// %x = ...Something...
// %one = G_CONSTANT i64 1
// %zero = G_CONSTANT i64 0
// %and = G_AND %x, %one
// %cmp = G_ICMP intpred(ne), %and, %zero
// %cmp_trunc = G_TRUNC %cmp
// G_BRCOND %cmp_trunc, %bb.3
//
// We want to try and fold the AND into the G_BRCOND and produce either a
// TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
//
// In this case, we'd get
//
// TBNZ %x %bb.3
//
// Check if the AND has a constant on its RHS which we can use as a mask.
// If it's a power of 2, then it's the same as checking a specific bit.
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
auto MaybeBit = getIConstantVRegValWithLookThrough(
AndInst.getOperand(2).getReg(), *MIB.getMRI());
if (!MaybeBit)
return false;
int32_t Bit = MaybeBit->Value.exactLogBase2();
if (Bit < 0)
return false;
Register TestReg = AndInst.getOperand(1).getReg();
// Emit a TB(N)Z.
emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
return true;
}
MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
bool IsNegative,
MachineBasicBlock *DestMBB,
MachineIRBuilder &MIB) const {
assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
MachineRegisterInfo &MRI = *MIB.getMRI();
assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
AArch64::GPRRegBankID &&
"Expected GPRs only?");
auto Ty = MRI.getType(CompareReg);
unsigned Width = Ty.getSizeInBits();
assert(!Ty.isVector() && "Expected scalar only?");
assert(Width <= 64 && "Expected width to be at most 64?");
static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
{AArch64::CBNZW, AArch64::CBNZX}};
unsigned Opc = OpcTable[IsNegative][Width == 64];
auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
return &*BranchMI;
}
bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
assert(I.getOpcode() == TargetOpcode::G_BRCOND);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
// totally clean. Some of them require two branches to implement.
auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
Pred);
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
if (CC2 != AArch64CC::AL)
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
assert(I.getOpcode() == TargetOpcode::G_BRCOND);
// Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
//
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
// instructions will not be produced, as they are conditional branch
// instructions that do not set flags.
if (!ProduceNonFlagSettingCondBr)
return false;
MachineRegisterInfo &MRI = *MIB.getMRI();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
auto Pred =
static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
Register LHS = ICmp.getOperand(2).getReg();
Register RHS = ICmp.getOperand(3).getReg();
// We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
// When we can emit a TB(N)Z, prefer that.
//
// Handle non-commutative condition codes first.
// Note that we don't want to do this when we have a G_AND because it can
// become a tst. The tst will make the test bit in the TB(N)Z redundant.
if (VRegAndVal && !AndInst) {
int64_t C = VRegAndVal->Value.getSExtValue();
// When we have a greater-than comparison, we can just test if the msb is
// zero.
if (C == -1 && Pred == CmpInst::ICMP_SGT) {
uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
I.eraseFromParent();
return true;
}
// When we have a less than comparison, we can just test if the msb is not
// zero.
if (C == 0 && Pred == CmpInst::ICMP_SLT) {
uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
I.eraseFromParent();
return true;
}
}
// Attempt to handle commutative condition codes. Right now, that's only
// eq/ne.
if (ICmpInst::isEquality(Pred)) {
if (!VRegAndVal) {
std::swap(RHS, LHS);
VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
}
if (VRegAndVal && VRegAndVal->Value == 0) {
// If there's a G_AND feeding into this branch, try to fold it away by
// emitting a TB(N)Z instead.
//
// Note: If we have LT, then it *is* possible to fold, but it wouldn't be
// beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
// would be redundant.
if (AndInst &&
tryOptAndIntoCompareBranch(
*AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
I.eraseFromParent();
return true;
}
// Otherwise, try to emit a CB(N)Z instead.
auto LHSTy = MRI.getType(LHS);
if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
I.eraseFromParent();
return true;
}
}
}
return false;
}
bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
assert(I.getOpcode() == TargetOpcode::G_BRCOND);
if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
return true;
// Couldn't optimize. Emit a compare + a Bcc.
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
auto PredOp = ICmp.getOperand(1);
emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectCompareBranch(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
Register CondReg = I.getOperand(0).getReg();
MachineInstr *CCMI = MRI.getVRegDef(CondReg);
if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
CondReg = CCMI->getOperand(1).getReg();
CCMI = MRI.getVRegDef(CondReg);
}
// Try to select the G_BRCOND using whatever is feeding the condition if
// possible.
unsigned CCMIOpc = CCMI->getOpcode();
if (CCMIOpc == TargetOpcode::G_FCMP)
return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
if (CCMIOpc == TargetOpcode::G_ICMP)
return selectCompareBranchFedByICmp(I, *CCMI, MIB);
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
// instructions will not be produced, as they are conditional branch
// instructions that do not set flags.
if (ProduceNonFlagSettingCondBr) {
emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
I.getOperand(1).getMBB(), MIB);
I.eraseFromParent();
return true;
}
// Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
auto TstMI =
MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
auto Bcc = MIB.buildInstr(AArch64::Bcc)
.addImm(AArch64CC::EQ)
.addMBB(I.getOperand(1).getMBB());
I.eraseFromParent();
return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
}
/// Returns the element immediate value of a vector shift operand if found.
/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
static Optional<int64_t> getVectorShiftImm(Register Reg,
MachineRegisterInfo &MRI) {
assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
MachineInstr *OpMI = MRI.getVRegDef(Reg);
assert(OpMI && "Expected to find a vreg def for vector shift operand");
return getAArch64VectorSplatScalar(*OpMI, MRI);
}
/// Matches and returns the shift immediate value for a SHL instruction given
/// a shift operand.
static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
if (!ShiftImm)
return None;
// Check the immediate is in range for a SHL.
int64_t Imm = *ShiftImm;
if (Imm < 0)
return None;
switch (SrcTy.getElementType().getSizeInBits()) {
default:
LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
return None;
case 8:
if (Imm > 7)
return None;
break;
case 16:
if (Imm > 15)
return None;
break;
case 32:
if (Imm > 31)
return None;
break;
case 64:
if (Imm > 63)
return None;
break;
}
return Imm;
}
bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_SHL);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
Register Src2Reg = I.getOperand(2).getReg();
if (!Ty.isVector())
return false;
// Check if we have a vector of constants on RHS that we can select as the
// immediate form.
Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
unsigned Opc = 0;
if (Ty == LLT::fixed_vector(2, 64)) {
Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
} else if (Ty == LLT::fixed_vector(4, 32)) {
Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
} else if (Ty == LLT::fixed_vector(2, 32)) {
Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
} else if (Ty == LLT::fixed_vector(4, 16)) {
Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
} else if (Ty == LLT::fixed_vector(8, 16)) {
Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
} else if (Ty == LLT::fixed_vector(16, 8)) {
Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
} else if (Ty == LLT::fixed_vector(8, 8)) {
Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
return false;
}
auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
if (ImmVal)
Shl.addImm(*ImmVal);
else
Shl.addUse(Src2Reg);
constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectVectorAshrLshr(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_ASHR ||
I.getOpcode() == TargetOpcode::G_LSHR);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
Register Src2Reg = I.getOperand(2).getReg();
if (!Ty.isVector())
return false;
bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
// We expect the immediate case to be lowered in the PostLegalCombiner to
// AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
// There is not a shift right register instruction, but the shift left
// register instruction takes a signed value, where negative numbers specify a
// right shift.
unsigned Opc = 0;
unsigned NegOpc = 0;
const TargetRegisterClass *RC =
getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
if (Ty == LLT::fixed_vector(2, 64)) {
Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
NegOpc = AArch64::NEGv2i64;
} else if (Ty == LLT::fixed_vector(4, 32)) {
Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
NegOpc = AArch64::NEGv4i32;
} else if (Ty == LLT::fixed_vector(2, 32)) {
Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
NegOpc = AArch64::NEGv2i32;
} else if (Ty == LLT::fixed_vector(4, 16)) {
Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
NegOpc = AArch64::NEGv4i16;
} else if (Ty == LLT::fixed_vector(8, 16)) {
Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
NegOpc = AArch64::NEGv8i16;
} else if (Ty == LLT::fixed_vector(16, 8)) {
Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
NegOpc = AArch64::NEGv16i8;
} else if (Ty == LLT::fixed_vector(8, 8)) {
Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
NegOpc = AArch64::NEGv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
return false;
}
auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectVaStartAAPCS(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
return false;
}
bool AArch64InstructionSelector::selectVaStartDarwin(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
Register ListReg = I.getOperand(0).getReg();
Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
auto MIB =
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
.addDef(ArgsAddrReg)
.addFrameIndex(FuncInfo->getVarArgsStackIndex())
.addImm(0)
.addImm(0);
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
.addUse(ArgsAddrReg)
.addUse(ListReg)
.addImm(0)
.addMemOperand(*I.memoperands_begin());
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
void AArch64InstructionSelector::materializeLargeCMVal(
MachineInstr &I, const Value *V, unsigned OpFlags) {
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
MovZ->addOperand(MF, I.getOperand(1));
MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
AArch64II::MO_NC);
MovZ->addOperand(MF, MachineOperand::CreateImm(0));
constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
Register ForceDstReg) {
Register DstReg = ForceDstReg
? ForceDstReg
: MRI.createVirtualRegister(&AArch64::GPR64RegClass);
auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
if (auto *GV = dyn_cast<GlobalValue>(V)) {
MovI->addOperand(MF, MachineOperand::CreateGA(
GV, MovZ->getOperand(1).getOffset(), Flags));
} else {
MovI->addOperand(
MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
MovZ->getOperand(1).getOffset(), Flags));
}
MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
return DstReg;
};
Register DstReg = BuildMovK(MovZ.getReg(0),
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
}
bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
case TargetOpcode::G_STORE: {
bool Changed = contractCrossBankCopyIntoStore(I, MRI);
MachineOperand &SrcOp = I.getOperand(0);
if (MRI.getType(SrcOp.getReg()).isPointer()) {
// Allow matching with imported patterns for stores of pointers. Unlike
// G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
// and constrain.
auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
Register NewSrc = Copy.getReg(0);
SrcOp.setReg(NewSrc);
RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
Changed = true;
}
return Changed;
}
case TargetOpcode::G_PTR_ADD:
return convertPtrAddToAdd(I, MRI);
case TargetOpcode::G_LOAD: {
// For scalar loads of pointers, we try to convert the dest type from p0
// to s64 so that our imported patterns can match. Like with the G_PTR_ADD
// conversion, this should be ok because all users should have been
// selected already, so the type doesn't matter for them.
Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
if (!DstTy.isPointer())
return false;
MRI.setType(DstReg, LLT::scalar(64));
return true;
}
case AArch64::G_DUP: {
// Convert the type from p0 to s64 to help selection.
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
if (!DstTy.getElementType().isPointer())
return false;
auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
MRI.setType(I.getOperand(0).getReg(),
DstTy.changeElementType(LLT::scalar(64)));
MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
I.getOperand(1).setReg(NewSrc.getReg(0));
return true;
}
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_SITOFP: {
// If both source and destination regbanks are FPR, then convert the opcode
// to G_SITOF so that the importer can select it to an fpr variant.
// Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
// copy.
Register SrcReg = I.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
return false;
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
if (I.getOpcode() == TargetOpcode::G_SITOFP)
I.setDesc(TII.get(AArch64::G_SITOF));
else
I.setDesc(TII.get(AArch64::G_UITOF));
return true;
}
return false;
}
default:
return false;
}
}
/// This lowering tries to look for G_PTR_ADD instructions and then converts
/// them to a standard G_ADD with a COPY on the source.
///
/// The motivation behind this is to expose the add semantics to the imported
/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
/// because the selector works bottom up, uses before defs. By the time we
/// end up trying to select a G_PTR_ADD, we should have already attempted to
/// fold this into addressing modes and were therefore unsuccessful.
bool AArch64InstructionSelector::convertPtrAddToAdd(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
Register DstReg = I.getOperand(0).getReg();
Register AddOp1Reg = I.getOperand(1).getReg();
const LLT PtrTy = MRI.getType(DstReg);
if (PtrTy.getAddressSpace() != 0)
return false;
const LLT CastPtrTy =
PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
// Set regbanks on the registers.
if (PtrTy.isVector())
MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
else
MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
// Now turn the %dst(p0) = G_PTR_ADD %base, off into:
// %dst(intty) = G_ADD %intbase, off
I.setDesc(TII.get(TargetOpcode::G_ADD));
MRI.setType(DstReg, CastPtrTy);
I.getOperand(1).setReg(PtrToInt.getReg(0));
if (!select(*PtrToInt)) {
LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
return false;
}
// Also take the opportunity here to try to do some optimization.
// Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
Register NegatedReg;
if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
return true;
I.getOperand(2).setReg(NegatedReg);
I.setDesc(TII.get(TargetOpcode::G_SUB));
return true;
}
bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
MachineRegisterInfo &MRI) {
// We try to match the immediate variant of LSL, which is actually an alias
// for a special case of UBFM. Otherwise, we fall back to the imported
// selector which will match the register variant.
assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
const auto &MO = I.getOperand(2);
auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
if (!VRegAndVal)
return false;
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
if (DstTy.isVector())
return false;
bool Is64Bit = DstTy.getSizeInBits() == 64;
auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
if (!Imm1Fn || !Imm2Fn)
return false;
auto NewI =
MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
{I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
for (auto &RenderFn : *Imm1Fn)
RenderFn(NewI);
for (auto &RenderFn : *Imm2Fn)
RenderFn(NewI);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
}
bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
// If we're storing a scalar, it doesn't matter what register bank that
// scalar is on. All that matters is the size.
//
// So, if we see something like this (with a 32-bit scalar as an example):
//
// %x:gpr(s32) = ... something ...
// %y:fpr(s32) = COPY %x:gpr(s32)
// G_STORE %y:fpr(s32)
//
// We can fix this up into something like this:
//
// G_STORE %x:gpr(s32)
//
// And then continue the selection process normally.
Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
if (!DefDstReg.isValid())
return false;
LLT DefDstTy = MRI.getType(DefDstReg);
Register StoreSrcReg = I.getOperand(0).getReg();
LLT StoreSrcTy = MRI.getType(StoreSrcReg);
// If we get something strange like a physical register, then we shouldn't
// go any further.
if (!DefDstTy.isValid())
return false;
// Are the source and dst types the same size?
if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
return false;
if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
RBI.getRegBank(DefDstReg, MRI, TRI))
return false;
// We have a cross-bank copy, which is entering a store. Let's fold it.
I.getOperand(0).setReg(DefDstReg);
return true;
}
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
case AArch64::G_DUP: {
// Before selecting a DUP instruction, check if it is better selected as a
// MOV or load from a constant pool.
Register Src = I.getOperand(1).getReg();
auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
if (!ValAndVReg)
return false;
LLVMContext &Ctx = MF.getFunction().getContext();
Register Dst = I.getOperand(0).getReg();
auto *CV = ConstantDataVector::getSplat(
MRI.getType(Dst).getNumElements(),
ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
ValAndVReg->Value));
if (!emitConstantVector(Dst, CV, MIB, MRI))
return false;
I.eraseFromParent();
return true;
}
case TargetOpcode::G_SEXT:
// Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
// over a normal extend.
if (selectUSMovFromExtend(I, MRI))
return true;
return false;
case TargetOpcode::G_BR:
return false;
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
case TargetOpcode::G_CONSTANT: {
bool IsZero = false;
if (I.getOperand(1).isCImm())
IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
else if (I.getOperand(1).isImm())
IsZero = I.getOperand(1).getImm() == 0;
if (!IsZero)
return false;
Register DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
if (Ty.getSizeInBits() == 64) {
I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
} else if (Ty.getSizeInBits() == 32) {
I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
} else
return false;
I.setDesc(TII.get(TargetOpcode::COPY));
return true;
}
case TargetOpcode::G_ADD: {
// Check if this is being fed by a G_ICMP on either side.
//
// (cmp pred, x, y) + z
//
// In the above case, when the cmp is true, we increment z by 1. So, we can
// fold the add into the cset for the cmp by using cinc.
//
// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
Register AddDst = I.getOperand(0).getReg();
Register AddLHS = I.getOperand(1).getReg();
Register AddRHS = I.getOperand(2).getReg();
// Only handle scalars.
LLT Ty = MRI.getType(AddLHS);
if (Ty.isVector())
return false;
// Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
// bits.
unsigned Size = Ty.getSizeInBits();
if (Size != 32 && Size != 64)
return false;
auto MatchCmp = [&](Register Reg) -> MachineInstr * {
if (!MRI.hasOneNonDBGUse(Reg))
return nullptr;
// If the LHS of the add is 32 bits, then we want to fold a 32-bit
// compare.
if (Size == 32)
return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
// We model scalar compares using 32-bit destinations right now.
// If it's a 64-bit compare, it'll have 64-bit sources.
Register ZExt;
if (!mi_match(Reg, MRI,
m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
return nullptr;
auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
if (!Cmp ||
MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
return nullptr;
return Cmp;
};
// Try to match
// z + (cmp pred, x, y)
MachineInstr *Cmp = MatchCmp(AddRHS);
if (!Cmp) {
// (cmp pred, x, y) + z
std::swap(AddLHS, AddRHS);
Cmp = MatchCmp(AddRHS);
if (!Cmp)
return false;
}
auto &PredOp = Cmp->getOperand(1);
auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
const AArch64CC::CondCode InvCC =
changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
MIB.setInstrAndDebugLoc(I);
emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
/*RHS=*/Cmp->getOperand(3), PredOp, MIB);
emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_OR: {
// Look for operations that take the lower `Width=Size-ShiftImm` bits of
// `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
// shifting and masking that we can replace with a BFI (encoded as a BFM).
Register Dst = I.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
if (!Ty.isScalar())
return false;
unsigned Size = Ty.getSizeInBits();
if (Size != 32 && Size != 64)
return false;
Register ShiftSrc;
int64_t ShiftImm;
Register MaskSrc;
int64_t MaskImm;
if (!mi_match(
Dst, MRI,
m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
return false;
if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
return false;
int64_t Immr = Size - ShiftImm;
int64_t Imms = Size - ShiftImm - 1;
unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
I.eraseFromParent();
return true;
}
default:
return false;
}
}
bool AArch64InstructionSelector::select(MachineInstr &I) {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
const AArch64Subtarget *Subtarget =
&static_cast<const AArch64Subtarget &>(MF.getSubtarget());
if (Subtarget->requiresStrictAlign()) {
// We don't support this feature yet.
LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
return false;
}
MIB.setInstrAndDebugLoc(I);
unsigned Opcode = I.getOpcode();
// G_PHI requires same handling as PHI
if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
// Certain non-generic instructions also need some special handling.
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const RegClassOrRegBank &RegClassOrBank =
MRI.getRegClassOrRegBank(DefReg);
const TargetRegisterClass *DefRC
= RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
if (!DefRC) {
if (!DefTy.isValid()) {
LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
return false;
}
const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
if (!DefRC) {
LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
return false;
}
}
I.setDesc(TII.get(TargetOpcode::PHI));
return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
}
if (I.isCopy())
return selectCopy(I, TII, MRI, TRI, RBI);
return true;
}
if (I.getNumOperands() != I.getNumExplicitOperands()) {
LLVM_DEBUG(
dbgs() << "Generic instruction has unexpected implicit operands\n");
return false;
}
// Try to do some lowering before we start instruction selecting. These
// lowerings are purely transformations on the input G_MIR and so selection
// must continue after any modification of the instruction.
if (preISelLower(I)) {
Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
}
// There may be patterns where the importer can't deal with them optimally,
// but does select it to a suboptimal sequence so our custom C++ selection
// code later never has a chance to work on it. Therefore, we have an early
// selection attempt here to give priority to certain selection routines
// over the imported ones.
if (earlySelect(I))
return true;
if (selectImpl(I, *CoverageInfo))
return true;
LLT Ty =
I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
switch (Opcode) {
case TargetOpcode::G_SBFX:
case TargetOpcode::G_UBFX: {
static const unsigned OpcTable[2][2] = {
{AArch64::UBFMWri, AArch64::UBFMXri},
{AArch64::SBFMWri, AArch64::SBFMXri}};
bool IsSigned = Opcode == TargetOpcode::G_SBFX;
unsigned Size = Ty.getSizeInBits();
unsigned Opc = OpcTable[IsSigned][Size == 64];
auto Cst1 =
getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
assert(Cst1 && "Should have gotten a constant for src 1?");
auto Cst2 =
getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
assert(Cst2 && "Should have gotten a constant for src 2?");
auto LSB = Cst1->Value.getZExtValue();