blob: 7410c7677fbb150dec77f0cd70d01f6808245b53 [file] [log] [blame]
//===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the targeting of the RegisterBankInfo class for
/// AArch64.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
#include "AArch64RegisterBankInfo.h"
#include "AArch64InstrInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
#define GET_TARGET_REGBANK_IMPL
#include "AArch64GenRegisterBank.inc"
// This file will be TableGen'ed at some point.
#include "AArch64GenRegisterBankInfo.def"
using namespace llvm;
AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
: AArch64GenRegisterBankInfo() {
static llvm::once_flag InitializeRegisterBankFlag;
static auto InitializeRegisterBankOnce = [&]() {
// We have only one set of register banks, whatever the subtarget
// is. Therefore, the initialization of the RegBanks table should be
// done only once. Indeed the table of all register banks
// (AArch64::RegBanks) is unique in the compiler. At some point, it
// will get tablegen'ed and the whole constructor becomes empty.
const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
(void)RBGPR;
assert(&AArch64::GPRRegBank == &RBGPR &&
"The order in RegBanks is messed up");
const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
(void)RBFPR;
assert(&AArch64::FPRRegBank == &RBFPR &&
"The order in RegBanks is messed up");
const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
(void)RBCCR;
assert(&AArch64::CCRegBank == &RBCCR &&
"The order in RegBanks is messed up");
// The GPR register bank is fully defined by all the registers in
// GR64all + its subclasses.
assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
"Subclass not added?");
assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit");
// The FPR register bank is fully defined by all the registers in
// GR64all + its subclasses.
assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
"Subclass not added?");
assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
"Subclass not added?");
assert(RBFPR.getSize() == 512 &&
"FPRs should hold up to 512-bit via QQQQ sequence");
assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
"Class not added?");
assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
// Check that the TableGen'ed like file is in sync we our expectations.
// First, the Idx.
assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
{PMI_GPR32, PMI_GPR64}) &&
"PartialMappingIdx's are incorrectly ordered");
assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
{PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
PMI_FPR256, PMI_FPR512}) &&
"PartialMappingIdx's are incorrectly ordered");
// Now, the content.
// Check partial mapping.
#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
do { \
assert( \
checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
#Idx " is incorrectly initialized"); \
} while (false)
CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
// Check value mapping.
#define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
do { \
assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
PartialMappingIdx::PMI_First##RBName, Size, \
Offset) && \
#RBName #Size " " #Offset " is incorrectly initialized"); \
} while (false)
#define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
CHECK_VALUEMAP(GPR, 32);
CHECK_VALUEMAP(GPR, 64);
CHECK_VALUEMAP(FPR, 16);
CHECK_VALUEMAP(FPR, 32);
CHECK_VALUEMAP(FPR, 64);
CHECK_VALUEMAP(FPR, 128);
CHECK_VALUEMAP(FPR, 256);
CHECK_VALUEMAP(FPR, 512);
// Check the value mapping for 3-operands instructions where all the operands
// map to the same value mapping.
#define CHECK_VALUEMAP_3OPS(RBName, Size) \
do { \
CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
} while (false)
CHECK_VALUEMAP_3OPS(GPR, 32);
CHECK_VALUEMAP_3OPS(GPR, 64);
CHECK_VALUEMAP_3OPS(FPR, 32);
CHECK_VALUEMAP_3OPS(FPR, 64);
CHECK_VALUEMAP_3OPS(FPR, 128);
CHECK_VALUEMAP_3OPS(FPR, 256);
CHECK_VALUEMAP_3OPS(FPR, 512);
#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
do { \
unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
(void)PartialMapDstIdx; \
(void)PartialMapSrcIdx; \
const ValueMapping *Map = getCopyMapping( \
AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \
(void)Map; \
assert(Map[0].BreakDown == \
&AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
Map[0].NumBreakDowns == 1 && #RBNameDst #Size \
" Dst is incorrectly initialized"); \
assert(Map[1].BreakDown == \
&AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \
" Src is incorrectly initialized"); \
\
} while (false)
CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
do { \
unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
(void)PartialMapDstIdx; \
(void)PartialMapSrcIdx; \
const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
(void)Map; \
assert(Map[0].BreakDown == \
&AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
" Dst is incorrectly initialized"); \
assert(Map[1].BreakDown == \
&AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
" Src is incorrectly initialized"); \
\
} while (false)
CHECK_VALUEMAP_FPEXT(32, 16);
CHECK_VALUEMAP_FPEXT(64, 16);
CHECK_VALUEMAP_FPEXT(64, 32);
CHECK_VALUEMAP_FPEXT(128, 64);
assert(verify(TRI) && "Invalid register bank information");
};
llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
}
unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
const RegisterBank &B,
unsigned Size) const {
// What do we do with different size?
// copy are same size.
// Will introduce other hooks for different size:
// * extract cost.
// * build_sequence cost.
// Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
// FIXME: This should be deduced from the scheduling model.
if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
// FMOVXDr or FMOVWSr.
return 5;
if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
// FMOVDXr or FMOVSWr.
return 4;
return RegisterBankInfo::copyCost(A, B, Size);
}
const RegisterBank &
AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
LLT) const {
switch (RC.getID()) {
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
case AArch64::FPR16_loRegClassID:
case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
case AArch64::FPR32RegClassID:
case AArch64::FPR64RegClassID:
case AArch64::FPR64_loRegClassID:
case AArch64::FPR128RegClassID:
case AArch64::FPR128_loRegClassID:
case AArch64::DDRegClassID:
case AArch64::DDDRegClassID:
case AArch64::DDDDRegClassID:
case AArch64::QQRegClassID:
case AArch64::QQQRegClassID:
case AArch64::QQQQRegClassID:
return getRegBank(AArch64::FPRRegBankID);
case AArch64::GPR32commonRegClassID:
case AArch64::GPR32RegClassID:
case AArch64::GPR32spRegClassID:
case AArch64::GPR32sponlyRegClassID:
case AArch64::GPR32argRegClassID:
case AArch64::GPR32allRegClassID:
case AArch64::GPR64commonRegClassID:
case AArch64::GPR64RegClassID:
case AArch64::GPR64spRegClassID:
case AArch64::GPR64sponlyRegClassID:
case AArch64::GPR64argRegClassID:
case AArch64::GPR64allRegClassID:
case AArch64::GPR64noipRegClassID:
case AArch64::GPR64common_and_GPR64noipRegClassID:
case AArch64::GPR64noip_and_tcGPR64RegClassID:
case AArch64::tcGPR64RegClassID:
case AArch64::rtcGPR64RegClassID:
case AArch64::WSeqPairsClassRegClassID:
case AArch64::XSeqPairsClassRegClassID:
return getRegBank(AArch64::GPRRegBankID);
case AArch64::CCRRegClassID:
return getRegBank(AArch64::CCRegBankID);
default:
llvm_unreachable("Register class not supported");
}
}
RegisterBankInfo::InstructionMappings
AArch64RegisterBankInfo::getInstrAlternativeMappings(
const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
switch (MI.getOpcode()) {
case TargetOpcode::G_OR: {
// 32 and 64-bit or can be mapped on either FPR or
// GPR for the same cost.
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
if (Size != 32 && Size != 64)
break;
// If the instruction has any implicit-defs or uses,
// do not mess with it.
if (MI.getNumOperands() != 3)
break;
InstructionMappings AltMappings;
const InstructionMapping &GPRMapping = getInstructionMapping(
/*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
/*NumOperands*/ 3);
const InstructionMapping &FPRMapping = getInstructionMapping(
/*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
/*NumOperands*/ 3);
AltMappings.push_back(&GPRMapping);
AltMappings.push_back(&FPRMapping);
return AltMappings;
}
case TargetOpcode::G_BITCAST: {
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
if (Size != 32 && Size != 64)
break;
// If the instruction has any implicit-defs or uses,
// do not mess with it.
if (MI.getNumOperands() != 2)
break;
InstructionMappings AltMappings;
const InstructionMapping &GPRMapping = getInstructionMapping(
/*ID*/ 1, /*Cost*/ 1,
getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
/*NumOperands*/ 2);
const InstructionMapping &FPRMapping = getInstructionMapping(
/*ID*/ 2, /*Cost*/ 1,
getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
/*NumOperands*/ 2);
const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
/*ID*/ 3,
/*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
/*NumOperands*/ 2);
const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
/*ID*/ 3,
/*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
/*NumOperands*/ 2);
AltMappings.push_back(&GPRMapping);
AltMappings.push_back(&FPRMapping);
AltMappings.push_back(&GPRToFPRMapping);
AltMappings.push_back(&FPRToGPRMapping);
return AltMappings;
}
case TargetOpcode::G_LOAD: {
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
if (Size != 64)
break;
// If the instruction has any implicit-defs or uses,
// do not mess with it.
if (MI.getNumOperands() != 2)
break;
InstructionMappings AltMappings;
const InstructionMapping &GPRMapping = getInstructionMapping(
/*ID*/ 1, /*Cost*/ 1,
getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
// Addresses are GPR 64-bit.
getValueMapping(PMI_FirstGPR, 64)}),
/*NumOperands*/ 2);
const InstructionMapping &FPRMapping = getInstructionMapping(
/*ID*/ 2, /*Cost*/ 1,
getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
// Addresses are GPR 64-bit.
getValueMapping(PMI_FirstGPR, 64)}),
/*NumOperands*/ 2);
AltMappings.push_back(&GPRMapping);
AltMappings.push_back(&FPRMapping);
return AltMappings;
}
default:
break;
}
return RegisterBankInfo::getInstrAlternativeMappings(MI);
}
void AArch64RegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
switch (OpdMapper.getMI().getOpcode()) {
case TargetOpcode::G_OR:
case TargetOpcode::G_BITCAST:
case TargetOpcode::G_LOAD:
// Those ID must match getInstrAlternativeMappings.
assert((OpdMapper.getInstrMapping().getID() >= 1 &&
OpdMapper.getInstrMapping().getID() <= 4) &&
"Don't know how to handle that ID");
return applyDefaultMapping(OpdMapper);
default:
llvm_unreachable("Don't know how to handle that operation");
}
}
/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
/// having only floating-point operands.
static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
return true;
}
return false;
}
const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
const MachineInstr &MI) const {
const unsigned Opc = MI.getOpcode();
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumOperands = MI.getNumOperands();
assert(NumOperands <= 3 &&
"This code is for instructions with 3 or less operands");
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = Ty.getSizeInBits();
bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
#ifndef NDEBUG
// Make sure all the operands are using similar size and type.
// Should probably be checked by the machine verifier.
// This code won't catch cases where the number of lanes is
// different between the operands.
// If we want to go to that level of details, it is probably
// best to check that the types are the same, period.
// Currently, we just check that the register banks are the same
// for each types.
for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
assert(
AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
RBIdx, OpTy.getSizeInBits()) ==
AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
"Operand has incompatible size");
bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
(void)OpIsFPR;
assert(IsFPR == OpIsFPR && "Operand has incompatible type");
}
#endif // End NDEBUG.
return getInstructionMapping(DefaultMappingID, 1,
getValueMapping(RBIdx, Size), NumOperands);
}
bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
unsigned Depth) const {
unsigned Op = MI.getOpcode();
// Do we have an explicit floating point instruction?
if (isPreISelGenericFloatingPointOpcode(Op))
return true;
// No. Check if we have a copy-like instruction. If we do, then we could
// still be fed by floating point instructions.
if (Op != TargetOpcode::COPY && !MI.isPHI() &&
!isPreISelGenericOptimizationHint(Op))
return false;
// Check if we already know the register bank.
auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
if (RB == &AArch64::FPRRegBank)
return true;
if (RB == &AArch64::GPRRegBank)
return false;
// We don't know anything.
//
// If we have a phi, we may be able to infer that it will be assigned a FPR
// based off of its inputs.
if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
return false;
return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
return Op.isReg() &&
onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
});
}
bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
unsigned Depth) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FCMP:
return true;
default:
break;
}
return hasFPConstraints(MI, MRI, TRI, Depth);
}
bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
unsigned Depth) const {
switch (MI.getOpcode()) {
case AArch64::G_DUP:
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
case TargetOpcode::G_INSERT_VECTOR_ELT:
case TargetOpcode::G_BUILD_VECTOR:
case TargetOpcode::G_BUILD_VECTOR_TRUNC:
return true;
default:
break;
}
return hasFPConstraints(MI, MRI, TRI, Depth);
}
const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const unsigned Opc = MI.getOpcode();
// Try the default logic for non-generic instructions that are either copies
// or already have some operands assigned to banks.
if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
Opc == TargetOpcode::G_PHI) {
const RegisterBankInfo::InstructionMapping &Mapping =
getInstrMappingImpl(MI);
if (Mapping.isValid())
return Mapping;
}
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
switch (Opc) {
// G_{F|S|U}REM are not listed because they are not legal.
// Arithmetic ops.
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_MUL:
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
// Bitwise ops.
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
// Floating point ops.
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
return getSameKindOfOperandsMapping(MI);
case TargetOpcode::G_FPEXT: {
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
return getInstructionMapping(
DefaultMappingID, /*Cost*/ 1,
getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
/*NumOperands*/ 2);
}
// Shifts.
case TargetOpcode::G_SHL:
case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR: {
LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
return getInstructionMapping(DefaultMappingID, 1,
&ValMappings[Shift64Imm], 3);
return getSameKindOfOperandsMapping(MI);
}
case TargetOpcode::COPY: {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
// Check if one of the register is not a generic register.
if ((Register::isPhysicalRegister(DstReg) ||
!MRI.getType(DstReg).isValid()) ||
(Register::isPhysicalRegister(SrcReg) ||
!MRI.getType(SrcReg).isValid())) {
const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
if (!DstRB)
DstRB = SrcRB;
else if (!SrcRB)
SrcRB = DstRB;
// If both RB are null that means both registers are generic.
// We shouldn't be here.
assert(DstRB && SrcRB && "Both RegBank were nullptr");
unsigned Size = getSizeInBits(DstReg, MRI, TRI);
return getInstructionMapping(
DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
// We only care about the mapping of the destination.
/*NumOperands*/ 1);
}
// Both registers are generic, use G_BITCAST.
LLVM_FALLTHROUGH;
}
case TargetOpcode::G_BITCAST: {
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
unsigned Size = DstTy.getSizeInBits();
bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
const RegisterBank &DstRB =
DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
const RegisterBank &SrcRB =
SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
return getInstructionMapping(
DefaultMappingID, copyCost(DstRB, SrcRB, Size),
getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
// We only care about the mapping of the destination for COPY.
/*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
}
default:
break;
}
unsigned NumOperands = MI.getNumOperands();
// Track the size and bank of each register. We don't do partial mappings.
SmallVector<unsigned, 4> OpSize(NumOperands);
SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
auto &MO = MI.getOperand(Idx);
if (!MO.isReg() || !MO.getReg())
continue;
LLT Ty = MRI.getType(MO.getReg());
OpSize[Idx] = Ty.getSizeInBits();
// As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
// For floating-point instructions, scalars go in FPRs.
if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
Ty.getSizeInBits() > 64)
OpRegBankIdx[Idx] = PMI_FirstFPR;
else
OpRegBankIdx[Idx] = PMI_FirstGPR;
}
unsigned Cost = 1;
// Some of the floating-point instructions have mixed GPR and FPR operands:
// fine-tune the computed mapping.
switch (Opc) {
case AArch64::G_DUP: {
Register ScalarReg = MI.getOperand(1).getReg();
LLT ScalarTy = MRI.getType(ScalarReg);
auto ScalarDef = MRI.getVRegDef(ScalarReg);
// s8 is an exception for G_DUP, which we always want on gpr.
if (ScalarTy.getSizeInBits() != 8 &&
(getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
onlyDefinesFP(*ScalarDef, MRI, TRI)))
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
else
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
break;
}
case TargetOpcode::G_TRUNC: {
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
break;
}
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP: {
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
break;
// Integer to FP conversions don't necessarily happen between GPR -> FPR
// regbanks. They can also be done within an FPR register.
Register SrcReg = MI.getOperand(1).getReg();
if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
else
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
break;
}
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
break;
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
case TargetOpcode::G_FCMP:
OpRegBankIdx = {PMI_FirstGPR,
/* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
break;
case TargetOpcode::G_BITCAST:
// This is going to be a cross register bank copy and this is expensive.
if (OpRegBankIdx[0] != OpRegBankIdx[1])
Cost = copyCost(
*AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
*AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
OpSize[0]);
break;
case TargetOpcode::G_LOAD:
// Loading in vector unit is slightly more expensive.
// This is actually only true for the LD1R and co instructions,
// but anyway for the fast mode this number does not matter and
// for the greedy mode the cost of the cross bank copy will
// offset this number.
// FIXME: Should be derived from the scheduling model.
if (OpRegBankIdx[0] != PMI_FirstGPR)
Cost = 2;
else
// Check if that load feeds fp instructions.
// In that case, we want the default mapping to be on FPR
// instead of blind map every scalar to GPR.
for (const MachineInstr &UseMI :
MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
// If we have at least one direct use in a FP instruction,
// assume this was a floating point load in the IR.
// If it was not, we would have had a bitcast before
// reaching that instruction.
// Int->FP conversion operations are also captured in onlyDefinesFP().
if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
}
break;
case TargetOpcode::G_STORE:
// Check if that store is fed by fp instructions.
if (OpRegBankIdx[0] == PMI_FirstGPR) {
Register VReg = MI.getOperand(0).getReg();
if (!VReg)
break;
MachineInstr *DefMI = MRI.getVRegDef(VReg);
if (onlyDefinesFP(*DefMI, MRI, TRI))
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
break;
case TargetOpcode::G_SELECT: {
// If the destination is FPR, preserve that.
if (OpRegBankIdx[0] != PMI_FirstGPR)
break;
// If we're taking in vectors, we have no choice but to put everything on
// FPRs, except for the condition. The condition must always be on a GPR.
LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
if (SrcTy.isVector()) {
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
break;
}
// Try to minimize the number of copies. If we have more floating point
// constrained values than not, then we'll put everything on FPR. Otherwise,
// everything has to be on GPR.
unsigned NumFP = 0;
// Check if the uses of the result always produce floating point values.
//
// For example:
//
// %z = G_SELECT %cond %x %y
// fpr = G_FOO %z ...
if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
[&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
++NumFP;
// Check if the defs of the source values always produce floating point
// values.
//
// For example:
//
// %x = G_SOMETHING_ALWAYS_FLOAT %a ...
// %z = G_SELECT %cond %x %y
//
// Also check whether or not the sources have already been decided to be
// FPR. Keep track of this.
//
// This doesn't check the condition, since it's just whatever is in NZCV.
// This isn't passed explicitly in a register to fcsel/csel.
for (unsigned Idx = 2; Idx < 4; ++Idx) {
Register VReg = MI.getOperand(Idx).getReg();
MachineInstr *DefMI = MRI.getVRegDef(VReg);
if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
onlyDefinesFP(*DefMI, MRI, TRI))
++NumFP;
}
// If we have more FP constraints than not, then move everything over to
// FPR.
if (NumFP >= 2)
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
break;
}
case TargetOpcode::G_UNMERGE_VALUES: {
// If the first operand belongs to a FPR register bank, then make sure that
// we preserve that.
if (OpRegBankIdx[0] != PMI_FirstGPR)
break;
LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
// UNMERGE into scalars from a vector should always use FPR.
// Likewise if any of the uses are FP instructions.
if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
[&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
// Set the register bank of every operand to FPR.
for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
Idx < NumOperands; ++Idx)
OpRegBankIdx[Idx] = PMI_FirstFPR;
}
break;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
// Destination and source need to be FPRs.
OpRegBankIdx[0] = PMI_FirstFPR;
OpRegBankIdx[1] = PMI_FirstFPR;
// Index needs to be a GPR.
OpRegBankIdx[2] = PMI_FirstGPR;
break;
case TargetOpcode::G_INSERT_VECTOR_ELT:
OpRegBankIdx[0] = PMI_FirstFPR;
OpRegBankIdx[1] = PMI_FirstFPR;
// The element may be either a GPR or FPR. Preserve that behaviour.
if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
OpRegBankIdx[2] = PMI_FirstFPR;
else
OpRegBankIdx[2] = PMI_FirstGPR;
// Index needs to be a GPR.
OpRegBankIdx[3] = PMI_FirstGPR;
break;
case TargetOpcode::G_EXTRACT: {
// For s128 sources we have to use fpr.
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
if (SrcTy.getSizeInBits() == 128) {
OpRegBankIdx[0] = PMI_FirstFPR;
OpRegBankIdx[1] = PMI_FirstFPR;
}
break;
}
case TargetOpcode::G_BUILD_VECTOR: {
// If the first source operand belongs to a FPR register bank, then make
// sure that we preserve that.
if (OpRegBankIdx[1] != PMI_FirstGPR)
break;
Register VReg = MI.getOperand(1).getReg();
if (!VReg)
break;
// Get the instruction that defined the source operand reg, and check if
// it's a floating point operation. Or, if it's a type like s16 which
// doesn't have a exact size gpr register class. The exception is if the
// build_vector has all constant operands, which may be better to leave as
// gpr without copies, so it can be matched in imported patterns.
MachineInstr *DefMI = MRI.getVRegDef(VReg);
unsigned DefOpc = DefMI->getOpcode();
const LLT SrcTy = MRI.getType(VReg);
if (all_of(MI.operands(), [&](const MachineOperand &Op) {
return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
TargetOpcode::G_CONSTANT;
}))
break;
if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
SrcTy.getSizeInBits() < 32 ||
getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
// Have a floating point op.
// Make sure every operand gets mapped to a FPR register class.
unsigned NumOperands = MI.getNumOperands();
for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
OpRegBankIdx[Idx] = PMI_FirstFPR;
}
break;
}
case TargetOpcode::G_VECREDUCE_FADD:
case TargetOpcode::G_VECREDUCE_FMUL:
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMIN:
case TargetOpcode::G_VECREDUCE_ADD:
case TargetOpcode::G_VECREDUCE_MUL:
case TargetOpcode::G_VECREDUCE_AND:
case TargetOpcode::G_VECREDUCE_OR:
case TargetOpcode::G_VECREDUCE_XOR:
case TargetOpcode::G_VECREDUCE_SMAX:
case TargetOpcode::G_VECREDUCE_SMIN:
case TargetOpcode::G_VECREDUCE_UMAX:
case TargetOpcode::G_VECREDUCE_UMIN:
// Reductions produce a scalar value from a vector, the scalar should be on
// FPR bank.
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
break;
case TargetOpcode::G_VECREDUCE_SEQ_FADD:
case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
// These reductions also take a scalar accumulator input.
// Assign them FPR for now.
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
break;
}
// Finally construct the computed mapping.
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
if (!Mapping->isValid())
return getInvalidInstructionMapping();
OpdsMapping[Idx] = Mapping;
}
}
return getInstructionMapping(DefaultMappingID, Cost,
getOperandsMapping(OpdsMapping), NumOperands);
}