blob: 9005f197ea4cdc470640db7bca32b9aa83acaf06 [file] [log] [blame]
//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file This file implements the LegalizerHelper class to legalize
/// individual instructions and the LegalizeMachineIR wrapper pass for the
/// primary legalization.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "legalizer"
using namespace llvm;
using namespace LegalizeActions;
using namespace MIPatternMatch;
/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
///
/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
/// with any leftover piece as type \p LeftoverTy
///
/// Returns -1 in the first element of the pair if the breakdown is not
/// satisfiable.
static std::pair<int, int>
getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
assert(!LeftoverTy.isValid() && "this is an out argument");
unsigned Size = OrigTy.getSizeInBits();
unsigned NarrowSize = NarrowTy.getSizeInBits();
unsigned NumParts = Size / NarrowSize;
unsigned LeftoverSize = Size - NumParts * NarrowSize;
assert(Size > NarrowSize);
if (LeftoverSize == 0)
return {NumParts, 0};
if (NarrowTy.isVector()) {
unsigned EltSize = OrigTy.getScalarSizeInBits();
if (LeftoverSize % EltSize != 0)
return {-1, -1};
LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
} else {
LeftoverTy = LLT::scalar(LeftoverSize);
}
int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
return std::make_pair(NumParts, NumLeftover);
}
static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
if (!Ty.isScalar())
return nullptr;
switch (Ty.getSizeInBits()) {
case 16:
return Type::getHalfTy(Ctx);
case 32:
return Type::getFloatTy(Ctx);
case 64:
return Type::getDoubleTy(Ctx);
case 80:
return Type::getX86_FP80Ty(Ctx);
case 128:
return Type::getFP128Ty(Ctx);
default:
return nullptr;
}
}
LegalizerHelper::LegalizerHelper(MachineFunction &MF,
GISelChangeObserver &Observer,
MachineIRBuilder &Builder)
: MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
LI(*MF.getSubtarget().getLegalizerInfo()),
TLI(*MF.getSubtarget().getTargetLowering()) { }
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
GISelChangeObserver &Observer,
MachineIRBuilder &B)
: MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
TLI(*MF.getSubtarget().getTargetLowering()) { }
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
MIRBuilder.setInstrAndDebugLoc(MI);
if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
case Legal:
LLVM_DEBUG(dbgs() << ".. Already legal\n");
return AlreadyLegal;
case Libcall:
LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
return libcall(MI);
case NarrowScalar:
LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
return narrowScalar(MI, Step.TypeIdx, Step.NewType);
case WidenScalar:
LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
return widenScalar(MI, Step.TypeIdx, Step.NewType);
case Bitcast:
LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
return bitcast(MI, Step.TypeIdx, Step.NewType);
case Lower:
LLVM_DEBUG(dbgs() << ".. Lower\n");
return lower(MI, Step.TypeIdx, Step.NewType);
case FewerElements:
LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
case MoreElements:
LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
case Custom:
LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
default:
LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
return UnableToLegalize;
}
}
void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
SmallVectorImpl<Register> &VRegs) {
for (int i = 0; i < NumParts; ++i)
VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
MIRBuilder.buildUnmerge(VRegs, Reg);
}
bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
LLT MainTy, LLT &LeftoverTy,
SmallVectorImpl<Register> &VRegs,
SmallVectorImpl<Register> &LeftoverRegs) {
assert(!LeftoverTy.isValid() && "this is an out argument");
unsigned RegSize = RegTy.getSizeInBits();
unsigned MainSize = MainTy.getSizeInBits();
unsigned NumParts = RegSize / MainSize;
unsigned LeftoverSize = RegSize - NumParts * MainSize;
// Use an unmerge when possible.
if (LeftoverSize == 0) {
for (unsigned I = 0; I < NumParts; ++I)
VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
MIRBuilder.buildUnmerge(VRegs, Reg);
return true;
}
if (MainTy.isVector()) {
unsigned EltSize = MainTy.getScalarSizeInBits();
if (LeftoverSize % EltSize != 0)
return false;
LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
} else {
LeftoverTy = LLT::scalar(LeftoverSize);
}
// For irregular sizes, extract the individual parts.
for (unsigned I = 0; I != NumParts; ++I) {
Register NewReg = MRI.createGenericVirtualRegister(MainTy);
VRegs.push_back(NewReg);
MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
}
for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
Offset += LeftoverSize) {
Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
LeftoverRegs.push_back(NewReg);
MIRBuilder.buildExtract(NewReg, Reg, Offset);
}
return true;
}
void LegalizerHelper::insertParts(Register DstReg,
LLT ResultTy, LLT PartTy,
ArrayRef<Register> PartRegs,
LLT LeftoverTy,
ArrayRef<Register> LeftoverRegs) {
if (!LeftoverTy.isValid()) {
assert(LeftoverRegs.empty());
if (!ResultTy.isVector()) {
MIRBuilder.buildMerge(DstReg, PartRegs);
return;
}
if (PartTy.isVector())
MIRBuilder.buildConcatVectors(DstReg, PartRegs);
else
MIRBuilder.buildBuildVector(DstReg, PartRegs);
return;
}
unsigned PartSize = PartTy.getSizeInBits();
unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
MIRBuilder.buildUndef(CurResultReg);
unsigned Offset = 0;
for (Register PartReg : PartRegs) {
Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
CurResultReg = NewResultReg;
Offset += PartSize;
}
for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
// Use the original output register for the final insert to avoid a copy.
Register NewResultReg = (I + 1 == E) ?
DstReg : MRI.createGenericVirtualRegister(ResultTy);
MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
CurResultReg = NewResultReg;
Offset += LeftoverPartSize;
}
}
/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
const int StartIdx = Regs.size();
const int NumResults = MI.getNumOperands() - 1;
Regs.resize(Regs.size() + NumResults);
for (int I = 0; I != NumResults; ++I)
Regs[StartIdx + I] = MI.getOperand(I).getReg();
}
void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
LLT GCDTy, Register SrcReg) {
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy == GCDTy) {
// If the source already evenly divides the result type, we don't need to do
// anything.
Parts.push_back(SrcReg);
} else {
// Need to split into common type sized pieces.
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
getUnmergeResults(Parts, *Unmerge);
}
}
LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
LLT NarrowTy, Register SrcReg) {
LLT SrcTy = MRI.getType(SrcReg);
LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
extractGCDType(Parts, GCDTy, SrcReg);
return GCDTy;
}
LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
SmallVectorImpl<Register> &VRegs,
unsigned PadStrategy) {
LLT LCMTy = getLCMType(DstTy, NarrowTy);
int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
int NumOrigSrc = VRegs.size();
Register PadReg;
// Get a value we can use to pad the source value if the sources won't evenly
// cover the result type.
if (NumOrigSrc < NumParts * NumSubParts) {
if (PadStrategy == TargetOpcode::G_ZEXT)
PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
else if (PadStrategy == TargetOpcode::G_ANYEXT)
PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
else {
assert(PadStrategy == TargetOpcode::G_SEXT);
// Shift the sign bit of the low register through the high register.
auto ShiftAmt =
MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
}
}
// Registers for the final merge to be produced.
SmallVector<Register, 4> Remerge(NumParts);
// Registers needed for intermediate merges, which will be merged into a
// source for Remerge.
SmallVector<Register, 4> SubMerge(NumSubParts);
// Once we've fully read off the end of the original source bits, we can reuse
// the same high bits for remaining padding elements.
Register AllPadReg;
// Build merges to the LCM type to cover the original result type.
for (int I = 0; I != NumParts; ++I) {
bool AllMergePartsArePadding = true;
// Build the requested merges to the requested type.
for (int J = 0; J != NumSubParts; ++J) {
int Idx = I * NumSubParts + J;
if (Idx >= NumOrigSrc) {
SubMerge[J] = PadReg;
continue;
}
SubMerge[J] = VRegs[Idx];
// There are meaningful bits here we can't reuse later.
AllMergePartsArePadding = false;
}
// If we've filled up a complete piece with padding bits, we can directly
// emit the natural sized constant if applicable, rather than a merge of
// smaller constants.
if (AllMergePartsArePadding && !AllPadReg) {
if (PadStrategy == TargetOpcode::G_ANYEXT)
AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
else if (PadStrategy == TargetOpcode::G_ZEXT)
AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
// If this is a sign extension, we can't materialize a trivial constant
// with the right type and have to produce a merge.
}
if (AllPadReg) {
// Avoid creating additional instructions if we're just adding additional
// copies of padding bits.
Remerge[I] = AllPadReg;
continue;
}
if (NumSubParts == 1)
Remerge[I] = SubMerge[0];
else
Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
// In the sign extend padding case, re-use the first all-signbit merge.
if (AllMergePartsArePadding && !AllPadReg)
AllPadReg = Remerge[I];
}
VRegs = std::move(Remerge);
return LCMTy;
}
void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
ArrayRef<Register> RemergeRegs) {
LLT DstTy = MRI.getType(DstReg);
// Create the merge to the widened source, and extract the relevant bits into
// the result.
if (DstTy == LCMTy) {
MIRBuilder.buildMerge(DstReg, RemergeRegs);
return;
}
auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
if (DstTy.isScalar() && LCMTy.isScalar()) {
MIRBuilder.buildTrunc(DstReg, Remerge);
return;
}
if (LCMTy.isVector()) {
unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
SmallVector<Register, 8> UnmergeDefs(NumDefs);
UnmergeDefs[0] = DstReg;
for (unsigned I = 1; I != NumDefs; ++I)
UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
MIRBuilder.buildUnmerge(UnmergeDefs,
MIRBuilder.buildMerge(LCMTy, RemergeRegs));
return;
}
llvm_unreachable("unhandled case");
}
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
#define RTLIBCASE_INT(LibcallPrefix) \
do { \
switch (Size) { \
case 32: \
return RTLIB::LibcallPrefix##32; \
case 64: \
return RTLIB::LibcallPrefix##64; \
case 128: \
return RTLIB::LibcallPrefix##128; \
default: \
llvm_unreachable("unexpected size"); \
} \
} while (0)
#define RTLIBCASE(LibcallPrefix) \
do { \
switch (Size) { \
case 32: \
return RTLIB::LibcallPrefix##32; \
case 64: \
return RTLIB::LibcallPrefix##64; \
case 80: \
return RTLIB::LibcallPrefix##80; \
case 128: \
return RTLIB::LibcallPrefix##128; \
default: \
llvm_unreachable("unexpected size"); \
} \
} while (0)
switch (Opcode) {
case TargetOpcode::G_SDIV:
RTLIBCASE_INT(SDIV_I);
case TargetOpcode::G_UDIV:
RTLIBCASE_INT(UDIV_I);
case TargetOpcode::G_SREM:
RTLIBCASE_INT(SREM_I);
case TargetOpcode::G_UREM:
RTLIBCASE_INT(UREM_I);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
RTLIBCASE_INT(CTLZ_I);
case TargetOpcode::G_FADD:
RTLIBCASE(ADD_F);
case TargetOpcode::G_FSUB:
RTLIBCASE(SUB_F);
case TargetOpcode::G_FMUL:
RTLIBCASE(MUL_F);
case TargetOpcode::G_FDIV:
RTLIBCASE(DIV_F);
case TargetOpcode::G_FEXP:
RTLIBCASE(EXP_F);
case TargetOpcode::G_FEXP2:
RTLIBCASE(EXP2_F);
case TargetOpcode::G_FREM:
RTLIBCASE(REM_F);
case TargetOpcode::G_FPOW:
RTLIBCASE(POW_F);
case TargetOpcode::G_FMA:
RTLIBCASE(FMA_F);
case TargetOpcode::G_FSIN:
RTLIBCASE(SIN_F);
case TargetOpcode::G_FCOS:
RTLIBCASE(COS_F);
case TargetOpcode::G_FLOG10:
RTLIBCASE(LOG10_F);
case TargetOpcode::G_FLOG:
RTLIBCASE(LOG_F);
case TargetOpcode::G_FLOG2:
RTLIBCASE(LOG2_F);
case TargetOpcode::G_FCEIL:
RTLIBCASE(CEIL_F);
case TargetOpcode::G_FFLOOR:
RTLIBCASE(FLOOR_F);
case TargetOpcode::G_FMINNUM:
RTLIBCASE(FMIN_F);
case TargetOpcode::G_FMAXNUM:
RTLIBCASE(FMAX_F);
case TargetOpcode::G_FSQRT:
RTLIBCASE(SQRT_F);
case TargetOpcode::G_FRINT:
RTLIBCASE(RINT_F);
case TargetOpcode::G_FNEARBYINT:
RTLIBCASE(NEARBYINT_F);
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
RTLIBCASE(ROUNDEVEN_F);
}
llvm_unreachable("Unknown libcall function");
}
/// True if an instruction is in tail position in its caller. Intended for
/// legalizing libcalls as tail calls when possible.
static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const Function &F = MBB.getParent()->getFunction();
// Conservatively require the attributes of the call to match those of
// the return. Ignore NoAlias and NonNull because they don't affect the
// call sequence.
AttributeList CallerAttrs = F.getAttributes();
if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
.removeAttribute(Attribute::NoAlias)
.removeAttribute(Attribute::NonNull)
.hasAttributes())
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
return false;
// Only tail call if the following instruction is a standard return.
auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
return false;
return true;
}
LegalizerHelper::LegalizeResult
llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
const CallLowering::ArgInfo &Result,
ArrayRef<CallLowering::ArgInfo> Args,
const CallingConv::ID CC) {
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
CallLowering::CallLoweringInfo Info;
Info.CallConv = CC;
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = Result;
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
return LegalizerHelper::UnableToLegalize;
return LegalizerHelper::Legalized;
}
LegalizerHelper::LegalizeResult
llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
const CallLowering::ArgInfo &Result,
ArrayRef<CallLowering::ArgInfo> Args) {
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
const char *Name = TLI.getLibcallName(Libcall);
const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
return createLibcall(MIRBuilder, Name, Result, Args, CC);
}
// Useful for libcalls where all operands have the same type.
static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
Type *OpType) {
auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
SmallVector<CallLowering::ArgInfo, 3> Args;
for (unsigned i = 1; i < MI.getNumOperands(); i++)
Args.push_back({MI.getOperand(i).getReg(), OpType});
return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
Args);
}
LegalizerHelper::LegalizeResult
llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstr &MI) {
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
SmallVector<CallLowering::ArgInfo, 3> Args;
// Add all the args, except for the last which is an imm denoting 'tail'.
for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
Register Reg = MI.getOperand(i).getReg();
// Need derive an IR type for call lowering.
LLT OpLLT = MRI.getType(Reg);
Type *OpTy = nullptr;
if (OpLLT.isPointer())
OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
else
OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
Args.push_back({Reg, OpTy});
}
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
RTLIB::Libcall RTLibcall;
switch (MI.getOpcode()) {
case TargetOpcode::G_MEMCPY:
RTLibcall = RTLIB::MEMCPY;
break;
case TargetOpcode::G_MEMMOVE:
RTLibcall = RTLIB::MEMMOVE;
break;
case TargetOpcode::G_MEMSET:
RTLibcall = RTLIB::MEMSET;
break;
default:
return LegalizerHelper::UnableToLegalize;
}
const char *Name = TLI.getLibcallName(RTLibcall);
CallLowering::CallLoweringInfo Info;
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
isLibCallInTailPosition(MIRBuilder.getTII(), MI);
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
return LegalizerHelper::UnableToLegalize;
if (Info.LoweredTailCall) {
assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
// We must have a return following the call (or debug insts) to get past
// isLibCallInTailPosition.
do {
MachineInstr *Next = MI.getNextNode();
assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
"Expected instr following MI to be return or debug inst?");
// We lowered a tail call, so the call is now the return from the block.
// Delete the old return.
Next->eraseFromParent();
} while (MI.getNextNode());
}
return LegalizerHelper::Legalized;
}
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
Type *FromType) {
auto ToMVT = MVT::getVT(ToType);
auto FromMVT = MVT::getVT(FromType);
switch (Opcode) {
case TargetOpcode::G_FPEXT:
return RTLIB::getFPEXT(FromMVT, ToMVT);
case TargetOpcode::G_FPTRUNC:
return RTLIB::getFPROUND(FromMVT, ToMVT);
case TargetOpcode::G_FPTOSI:
return RTLIB::getFPTOSINT(FromMVT, ToMVT);
case TargetOpcode::G_FPTOUI:
return RTLIB::getFPTOUINT(FromMVT, ToMVT);
case TargetOpcode::G_SITOFP:
return RTLIB::getSINTTOFP(FromMVT, ToMVT);
case TargetOpcode::G_UITOFP:
return RTLIB::getUINTTOFP(FromMVT, ToMVT);
}
llvm_unreachable("Unsupported libcall function");
}
static LegalizerHelper::LegalizeResult
conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
Type *FromType) {
RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
{{MI.getOperand(1).getReg(), FromType}});
}
LegalizerHelper::LegalizeResult
LegalizerHelper::libcall(MachineInstr &MI) {
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = LLTy.getSizeInBits();
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM:
case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
Type *HLTy = IntegerType::get(Ctx, Size);
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
return UnableToLegalize;
}
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC: {
Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
if (!FromTy || !ToTy)
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI: {
// FIXME: Support other types
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(
MI, MIRBuilder,
ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP: {
// FIXME: Support other types
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(
MI, MIRBuilder,
ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET: {
LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
MI.eraseFromParent();
return Result;
}
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
uint64_t NarrowSize = NarrowTy.getSizeInBits();
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_IMPLICIT_DEF: {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
// If SizeOp0 is not an exact multiple of NarrowSize, emit
// G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
// FIXME: Although this would also be legal for the general case, it causes
// a lot of regressions in the emitted code (superfluous COPYs, artifact
// combines not being hit). This seems to be a problem related to the
// artifact combiner.
if (SizeOp0 % NarrowSize != 0) {
LLT ImplicitTy = NarrowTy;
if (DstTy.isVector())
ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
MI.eraseFromParent();
return Legalized;
}
int NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs;
for (int i = 0; i < NumParts; ++i)
DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
if (DstTy.isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_CONSTANT: {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
const APInt &Val = MI.getOperand(1).getCImm()->getValue();
unsigned TotalSize = Ty.getSizeInBits();
unsigned NarrowSize = NarrowTy.getSizeInBits();
int NumParts = TotalSize / NarrowSize;
SmallVector<Register, 4> PartRegs;
for (int I = 0; I != NumParts; ++I) {
unsigned Offset = I * NarrowSize;
auto K = MIRBuilder.buildConstant(NarrowTy,
Val.lshr(Offset).trunc(NarrowSize));
PartRegs.push_back(K.getReg(0));
}
LLT LeftoverTy;
unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
SmallVector<Register, 1> LeftoverRegs;
if (LeftoverBits != 0) {
LeftoverTy = LLT::scalar(LeftoverBits);
auto K = MIRBuilder.buildConstant(
LeftoverTy,
Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
LeftoverRegs.push_back(K.getReg(0));
}
insertParts(MI.getOperand(0).getReg(),
Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
return narrowScalarExt(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_TRUNC: {
if (TypeIdx != 1)
return UnableToLegalize;
uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
return UnableToLegalize;
}
auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FREEZE:
return reduceOperationWidth(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
case TargetOpcode::G_SADDO:
case TargetOpcode::G_SSUBO:
case TargetOpcode::G_SADDE:
case TargetOpcode::G_SSUBE:
case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO:
case TargetOpcode::G_UADDE:
case TargetOpcode::G_USUBE:
return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_MUL:
case TargetOpcode::G_UMULH:
return narrowScalarMul(MI, NarrowTy);
case TargetOpcode::G_EXTRACT:
return narrowScalarExtract(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_INSERT:
return narrowScalarInsert(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_LOAD: {
auto &MMO = **MI.memoperands_begin();
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
return UnableToLegalize;
if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
MIRBuilder.buildAnyExt(DstReg, TmpReg);
MI.eraseFromParent();
return Legalized;
}
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
}
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_SEXTLOAD: {
bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
Register DstReg = MI.getOperand(0).getReg();
Register PtrReg = MI.getOperand(1).getReg();
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = **MI.memoperands_begin();
unsigned MemSize = MMO.getSizeInBits();
if (MemSize == NarrowSize) {
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
} else if (MemSize < NarrowSize) {
MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
} else if (MemSize > NarrowSize) {
// FIXME: Need to split the load.
return UnableToLegalize;
}
if (ZExt)
MIRBuilder.buildZExt(DstReg, TmpReg);
else
MIRBuilder.buildSExt(DstReg, TmpReg);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_STORE: {
const auto &MMO = **MI.memoperands_begin();
Register SrcReg = MI.getOperand(0).getReg();
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
return UnableToLegalize;
int NumParts = SizeOp0 / NarrowSize;
unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
if (SrcTy.isVector() && LeftoverBits != 0)
return UnableToLegalize;
if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = **MI.memoperands_begin();
MIRBuilder.buildTrunc(TmpReg, SrcReg);
MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
MI.eraseFromParent();
return Legalized;
}
return reduceLoadStoreWidth(MI, 0, NarrowTy);
}
case TargetOpcode::G_SELECT:
return narrowScalarSelect(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR: {
// Legalize bitwise operation:
// A = BinOp<Ty> B, C
// into:
// B1, ..., BN = G_UNMERGE_VALUES B
// C1, ..., CN = G_UNMERGE_VALUES C
// A1 = BinOp<Ty/N> B1, C2
// ...
// AN = BinOp<Ty/N> BN, CN
// A = G_MERGE_VALUES A1, ..., AN
return narrowScalarBasic(MI, TypeIdx, NarrowTy);
}
case TargetOpcode::G_SHL:
case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
return narrowScalarShift(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTPOP:
if (TypeIdx == 1)
switch (MI.getOpcode()) {
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_CTPOP:
return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
default:
return UnableToLegalize;
}
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_INTTOPTR:
if (TypeIdx != 1)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarSrc(MI, NarrowTy, 1);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PTRTOINT:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PHI: {
// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
unsigned NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs(NumParts);
SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
Observer.changingInstr(MI);
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
SrcRegs[i / 2]);
}
MachineBasicBlock &MBB = *MI.getParent();
MIRBuilder.setInsertPt(MBB, MI);
for (unsigned i = 0; i < NumParts; ++i) {
DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
MachineInstrBuilder MIB =
MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
}
MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
case TargetOpcode::G_INSERT_VECTOR_ELT: {
if (TypeIdx != 2)
return UnableToLegalize;
int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
Observer.changingInstr(MI);
narrowScalarSrc(MI, NarrowTy, OpIdx);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_ICMP: {
uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
if (NarrowSize * 2 != SrcSize)
return UnableToLegalize;
Observer.changingInstr(MI);
Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
CmpInst::Predicate Pred =
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
} else {
MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpHEQ =
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
}
Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_SEXT_INREG: {
if (TypeIdx != 0)
return UnableToLegalize;
int64_t SizeInBits = MI.getOperand(2).getImm();
// So long as the new type has more bits than the bits we're extending we
// don't need to break it apart.
if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
Observer.changingInstr(MI);
// We don't lose any non-extension bits by truncating the src and
// sign-extending the dst.
MachineOperand &MO1 = MI.getOperand(1);
auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
MO1.setReg(TruncMIB.getReg(0));
MachineOperand &MO2 = MI.getOperand(0);
Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MIRBuilder.buildSExt(MO2, DstExt);
MO2.setReg(DstExt);
Observer.changedInstr(MI);
return Legalized;
}
// Break it apart. Components below the extension point are unmodified. The
// component containing the extension point becomes a narrower SEXT_INREG.
// Components above it are ashr'd from the component containing the
// extension point.
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
int NumParts = SizeOp0 / NarrowSize;
// List the registers where the destination will be scattered.
SmallVector<Register, 2> DstRegs;
// List the registers where the source will be split.
SmallVector<Register, 2> SrcRegs;
// Create all the temporary registers.
for (int i = 0; i < NumParts; ++i) {
Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
SrcRegs.push_back(SrcReg);
}
// Explode the big arguments into smaller chunks.
MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
Register AshrCstReg =
MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
.getReg(0);
Register FullExtensionReg = 0;
Register PartialExtensionReg = 0;
// Do the operation on each small part.
for (int i = 0; i < NumParts; ++i) {
if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
DstRegs.push_back(SrcRegs[i]);
else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
assert(PartialExtensionReg &&
"Expected to visit partial extension before full");
if (FullExtensionReg) {
DstRegs.push_back(FullExtensionReg);
continue;
}
DstRegs.push_back(
MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
.getReg(0));
FullExtensionReg = DstRegs.back();
} else {
DstRegs.push_back(
MIRBuilder
.buildInstr(
TargetOpcode::G_SEXT_INREG, {NarrowTy},
{SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
.getReg(0));
PartialExtensionReg = DstRegs.back();
}
}
// Gather the destination registers into the final destination.
Register DstReg = MI.getOperand(0).getReg();
MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_BSWAP:
case TargetOpcode::G_BITREVERSE: {
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
SmallVector<Register, 2> SrcRegs, DstRegs;
unsigned NumParts = SizeOp0 / NarrowSize;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
for (unsigned i = 0; i < NumParts; ++i) {
auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
{SrcRegs[NumParts - 1 - i]});
DstRegs.push_back(DstPart.getReg(0));
}
MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_PTRMASK: {
if (TypeIdx != 1)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarSrc(MI, NarrowTy, 2);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FPTOUI: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FPTOSI: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FPEXT:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
Observer.changedInstr(MI);
return Legalized;
}
}
Register LegalizerHelper::coerceToScalar(Register Val) {
LLT Ty = MRI.getType(Val);
if (Ty.isScalar())
return Val;
const DataLayout &DL = MIRBuilder.getDataLayout();
LLT NewTy = LLT::scalar(Ty.getSizeInBits());
if (Ty.isPointer()) {
if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
return Register();
return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
}
Register NewVal = Val;
assert(Ty.isVector());
LLT EltTy = Ty.getElementType();
if (EltTy.isPointer())
NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
}
void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
unsigned OpIdx, unsigned ExtOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
MO.setReg(ExtB.getReg(0));
}
void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
MO.setReg(ExtB.getReg(0));
}
void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx, unsigned TruncOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
MO.setReg(DstExt);
}
void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
unsigned OpIdx, unsigned ExtOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
MO.setReg(DstTrunc);
}
void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
}
void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
LLT OldTy = MRI.getType(MO.getReg());
unsigned OldElts = OldTy.getNumElements();
unsigned NewElts = MoreTy.getNumElements();
unsigned NumParts = NewElts / OldElts;
// Use concat_vectors if the result is a multiple of the number of elements.
if (NumParts * OldElts == NewElts) {
SmallVector<Register, 8> Parts;
Parts.push_back(MO.getReg());
Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
for (unsigned I = 1; I != NumParts; ++I)
Parts.push_back(ImpDef);
auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
MO.setReg(Concat.getReg(0));
return;
}
Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
MO.setReg(MoreReg);
}
void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
MachineOperand &Op = MI.getOperand(OpIdx);
Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
}
void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
Register CastDst = MRI.createGenericVirtualRegister(CastTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MIRBuilder.buildBitcast(MO, CastDst);
MO.setReg(CastDst);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
if (TypeIdx != 1)
return UnableToLegalize;
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
return UnableToLegalize;
Register Src1 = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(Src1);
const int DstSize = DstTy.getSizeInBits();
const int SrcSize = SrcTy.getSizeInBits();
const int WideSize = WideTy.getSizeInBits();
const int NumMerge = (DstSize + WideSize - 1) / WideSize;
unsigned NumOps = MI.getNumOperands();
unsigned NumSrc = MI.getNumOperands() - 1;
unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
if (WideSize >= DstSize) {
// Directly pack the bits in the target type.
Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
for (unsigned I = 2; I != NumOps; ++I) {
const unsigned Offset = (I - 1) * PartSize;
Register SrcReg = MI.getOperand(I).getReg();
assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
MRI.createGenericVirtualRegister(WideTy);
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
MIRBuilder.buildOr(NextResult, ResultReg, Shl);
ResultReg = NextResult;
}
if (WideSize > DstSize)
MIRBuilder.buildTrunc(DstReg, ResultReg);
else if (DstTy.isPointer())
MIRBuilder.buildIntToPtr(DstReg, ResultReg);
MI.eraseFromParent();
return Legalized;
}
// Unmerge the original values to the GCD type, and recombine to the next
// multiple greater than the original type.
//
// %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
// %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
// %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
// %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
// %10:_(s6) = G_MERGE_VALUES %4, %5, %6
// %11:_(s6) = G_MERGE_VALUES %7, %8, %9
// %12:_(s12) = G_MERGE_VALUES %10, %11
//
// Padding with undef if necessary:
//
// %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
// %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
// %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
// %7:_(s2) = G_IMPLICIT_DEF
// %8:_(s6) = G_MERGE_VALUES %3, %4, %5
// %9:_(s6) = G_MERGE_VALUES %6, %7, %7
// %10:_(s12) = G_MERGE_VALUES %8, %9
const int GCD = greatestCommonDivisor(SrcSize, WideSize);
LLT GCDTy = LLT::scalar(GCD);
SmallVector<Register, 8> Parts;
SmallVector<Register, 8> NewMergeRegs;
SmallVector<Register, 8> Unmerges;
LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
// Decompose the original operands if they don't evenly divide.
for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
Register SrcReg = MI.getOperand(I).getReg();
if (GCD == SrcSize) {
Unmerges.push_back(SrcReg);
} else {
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
Unmerges.push_back(Unmerge.getReg(J));
}
}
// Pad with undef to the next size that is a multiple of the requested size.
if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
Unmerges.push_back(UndefReg);
}
const int PartsPerGCD = WideSize / GCD;
// Build merges of each piece.
ArrayRef<Register> Slicer(Unmerges);
for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
NewMergeRegs.push_back(Merge.getReg(0));
}
// A truncate may be necessary if the requested type doesn't evenly divide the
// original result type.
if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
MIRBuilder.buildMerge(DstReg, NewMergeRegs);
} else {
auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
}
MI.eraseFromParent();
return Legalized;
}
Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
Register WideReg = MRI.createGenericVirtualRegister(WideTy);
LLT OrigTy = MRI.getType(OrigReg);
LLT LCMTy = getLCMType(WideTy, OrigTy);
const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
Register UnmergeSrc = WideReg;
// Create a merge to the LCM type, padding with undef
// %0:_(<3 x s32>) = G_FOO => <4 x s32>
// =>
// %1:_(<4 x s32>) = G_FOO
// %2:_(<4 x s32>) = G_IMPLICIT_DEF
// %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
// %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
if (NumMergeParts > 1) {
Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
MergeParts[0] = WideReg;
UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
}
// Unmerge to the original register and pad with dead defs.
SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
UnmergeResults[0] = OrigReg;
for (int I = 1; I != NumUnmergeParts; ++I)
UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
return WideReg;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
if (TypeIdx != 0)
return UnableToLegalize;
int NumDst = MI.getNumOperands() - 1;
Register SrcReg = MI.getOperand(NumDst).getReg();
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
return UnableToLegalize;
Register Dst0Reg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst0Reg);
if (!DstTy.isScalar())
return UnableToLegalize;
if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
if (SrcTy.isPointer()) {
const DataLayout &DL = MIRBuilder.getDataLayout();
if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
LLVM_DEBUG(
dbgs() << "Not casting non-integral address space integer\n");
return UnableToLegalize;
}
SrcTy = LLT::scalar(SrcTy.getSizeInBits());
SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
}
// Widen SrcTy to WideTy. This does not affect the result, but since the
// user requested this size, it is probably better handled than SrcTy and
// should reduce the total number of legalization artifacts
if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
SrcTy = WideTy;
SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
}
// Theres no unmerge type to target. Directly extract the bits from the
// source type
unsigned DstSize = DstTy.getSizeInBits();
MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
for (int I = 1; I != NumDst; ++I) {
auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
}
MI.eraseFromParent();
return Legalized;
}
// Extend the source to a wider type.
LLT LCMTy = getLCMType(SrcTy, WideTy);
Register WideSrc = SrcReg;
if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
// TODO: If this is an integral address space, cast to integer and anyext.
if (SrcTy.isPointer()) {
LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
return UnableToLegalize;
}
WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
}
auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
// Create a sequence of unmerges and merges to the original results. Since we
// may have widened the source, we will need to pad the results with dead defs
// to cover the source register.
// e.g. widen s48 to s64:
// %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
//
// =>
// %4:_(s192) = G_ANYEXT %0:_(s96)
// %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
// ; unpack to GCD type, with extra dead defs
// %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
// %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
// dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
// %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
// %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
const LLT GCDTy = getGCDType(WideTy, DstTy);
const int NumUnmerge = Unmerge->getNumOperands() - 1;
const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
// Directly unmerge to the destination without going through a GCD type
// if possible
if (PartsPerRemerge == 1) {
const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
for (int I = 0; I != NumUnmerge; ++I) {
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
for (int J = 0; J != PartsPerUnmerge; ++J) {
int Idx = I * PartsPerUnmerge + J;
if (Idx < NumDst)
MIB.addDef(MI.getOperand(Idx).getReg());
else {
// Create dead def for excess components.
MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
}
}
MIB.addUse(Unmerge.getReg(I));
}
} else {
SmallVector<Register, 16> Parts;
for (int J = 0; J != NumUnmerge; ++J)
extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
SmallVector<Register, 8> RemergeParts;
for (int I = 0; I != NumDst; ++I) {
for (int J = 0; J < PartsPerRemerge; ++J) {
const int Idx = I * PartsPerRemerge + J;
RemergeParts.emplace_back(Parts[Idx]);
}
MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
RemergeParts.clear();
}
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
LLT DstTy = MRI.getType(DstReg);
unsigned Offset = MI.getOperand(2).getImm();
if (TypeIdx == 0) {
if (SrcTy.isVector() || DstTy.isVector())
return UnableToLegalize;
SrcOp Src(SrcReg);
if (SrcTy.isPointer()) {
// Extracts from pointers can be handled only if they are really just
// simple integers.
const DataLayout &DL = MIRBuilder.getDataLayout();
if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
return UnableToLegalize;
LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
SrcTy = SrcAsIntTy;
}
if (DstTy.isPointer())
return UnableToLegalize;
if (Offset == 0) {
// Avoid a shift in the degenerate case.
MIRBuilder.buildTrunc(DstReg,
MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
MI.eraseFromParent();
return Legalized;
}
// Do a shift in the source type.
LLT ShiftTy = SrcTy;
if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
Src = MIRBuilder.buildAnyExt(WideTy, Src);
ShiftTy = WideTy;
}
auto LShr = MIRBuilder.buildLShr(
ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
MIRBuilder.buildTrunc(DstReg, LShr);
MI.eraseFromParent();
return Legalized;
}
if (SrcTy.isScalar()) {
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
Observer.changedInstr(MI);
return Legalized;
}
if (!SrcTy.isVector())
return UnableToLegalize;
if (DstTy != SrcTy.getElementType())
return UnableToLegalize;
if (Offset % SrcTy.getScalarSizeInBits() != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
Offset);
widenScalarDst(MI, WideTy.getScalarType(), 0);
Observer.changedInstr(MI);
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
if (TypeIdx != 0 || WideTy.isVector())
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
if (TypeIdx == 1)
return UnableToLegalize; // TODO
unsigned Opcode;
unsigned ExtOpcode;
Optional<Register> CarryIn = None;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
case TargetOpcode::G_SADDO:
Opcode = TargetOpcode::G_ADD;
ExtOpcode = TargetOpcode::G_SEXT;
break;
case TargetOpcode::G_SSUBO:
Opcode = TargetOpcode::G_SUB;
ExtOpcode = TargetOpcode::G_SEXT;
break;
case TargetOpcode::G_UADDO:
Opcode = TargetOpcode::G_ADD;
ExtOpcode = TargetOpcode::G_ZEXT;
break;
case TargetOpcode::G_USUBO:
Opcode = TargetOpcode::G_SUB;
ExtOpcode = TargetOpcode::G_ZEXT;
break;
case TargetOpcode::G_SADDE:
Opcode = TargetOpcode::G_UADDE;
ExtOpcode = TargetOpcode::G_SEXT;
CarryIn = MI.getOperand(4).getReg();
break;
case TargetOpcode::G_SSUBE:
Opcode = TargetOpcode::G_USUBE;
ExtOpcode = TargetOpcode::G_SEXT;
CarryIn = MI.getOperand(4).getReg();
break;
case TargetOpcode::G_UADDE:
Opcode = TargetOpcode::G_UADDE;
ExtOpcode = TargetOpcode::G_ZEXT;
CarryIn = MI.getOperand(4).getReg();
break;
case TargetOpcode::G_USUBE:
Opcode = TargetOpcode::G_USUBE;
ExtOpcode = TargetOpcode::G_ZEXT;
CarryIn = MI.getOperand(4).getReg();
break;
}
auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
// Do the arithmetic in the larger type.
Register NewOp;
if (CarryIn) {
LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
NewOp = MIRBuilder
.buildInstr(Opcode, {WideTy, CarryOutTy},
{LHSExt, RHSExt, *CarryIn})
.getReg(0);
} else {
NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
}
LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
// There is no overflow if the ExtOp is the same as NewOp.
MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
// Now trunc the NewOp to the original result.
MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
MI.getOpcode() == TargetOpcode::G_USHLSAT;
// We can convert this to:
// 1. Any extend iN to iM
// 2. SHL by M-N
// 3. [US][ADD|SUB|SHL]SAT
// 4. L/ASHR by M-N
//
// It may be more efficient to lower this to a min and a max operation in
// the higher precision arithmetic if the promoted operation isn't legal,
// but this decision is up to the target's lowering request.
Register DstReg = MI.getOperand(0).getReg();
unsigned NewBits = WideTy.getScalarSizeInBits();
unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
// Shifts must zero-extend the RHS to preserve the unsigned quantity, and
// must not left shift the RHS to preserve the shift amount.
auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
: MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
{ShiftL, ShiftR}, MI.getFlags());
// Use a shift that will preserve the number of sign bits when the trunc is
// folded away.
auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
: MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
MIRBuilder.buildTrunc(DstReg, Result);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
if (TypeIdx == 1)
return UnableToLegalize;
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
Register Result = MI.getOperand(0).getReg();
Register OriginalOverflow = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
LLT SrcTy = MRI.getType(LHS);
LLT OverflowTy = MRI.getType(OriginalOverflow);
unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
// To determine if the result overflowed in the larger type, we extend the
// input to the larger type, do the multiply (checking if it overflows),
// then also check the high bits of the result to see if overflow happened
// there.
unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
{LeftOperand, RightOperand});
auto Mul = Mulo->getOperand(0);
MIRBuilder.buildTrunc(Result, Mul);
MachineInstrBuilder ExtResult;
// Overflow occurred if it occurred in the larger type, or if the high part
// of the result does not zero/sign-extend the low part. Check this second
// possibility first.
if (IsSigned) {
// For signed, overflow occurred when the high part does not sign-extend
// the low part.
ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
} else {
// Unsigned overflow occurred when the high part does not zero-extend the
// low part.
ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
}
// Multiplication cannot overflow if the WideTy is >= 2 * original width,
// so we don't need to check the overflow result of larger type Mulo.
if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
auto Overflow =
MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
// Finally check if the multiplication in the larger type itself overflowed.
MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
} else {
MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_EXTRACT:
return widenScalarExtract(MI, TypeIdx, WideTy);
case TargetOpcode::G_INSERT:
return widenScalarInsert(MI, TypeIdx, WideTy);
case TargetOpcode::G_MERGE_VALUES:
return widenScalarMergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_UNMERGE_VALUES:
return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_SADDO:
case TargetOpcode::G_SSUBO:
case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO:
case TargetOpcode::G_SADDE:
case TargetOpcode::G_SSUBE:
case TargetOpcode::G_UADDE:
case TargetOpcode::G_USUBE:
return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
case TargetOpcode::G_UMULO:
case TargetOpcode::G_SMULO:
return widenScalarMulo(MI, TypeIdx, WideTy);
case TargetOpcode::G_SADDSAT:
case TargetOpcode::G_SSUBSAT:
case TargetOpcode::G_SSHLSAT:
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:
case TargetOpcode::G_USHLSAT:
return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTPOP: {
if (TypeIdx == 0) {
Observer.changingInstr(MI);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
Register SrcReg = MI.getOperand(1).getReg();
// First ZEXT the input.
auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
LLT CurTy = MRI.getType(SrcReg);
if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
// The count is the same in the larger type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
auto TopBit =
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
MIBSrc = MIRBuilder.buildOr(
WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
}
// Perform the operation at the larger size.
auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
// This is already the correct result for CTPOP and CTTZs
if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
// The correct result is NewOp - (Difference in widety and current ty).
unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
MIBNewOp = MIRBuilder.buildSub(
WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
}
MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_BSWAP: {
Observer.changingInstr(MI);
Register DstReg = MI.getOperand(0).getReg();
Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
MI.getOperand(0).setReg(DstExt);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
LLT Ty = MRI.getType(DstReg);
unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
MIRBuilder.buildTrunc(DstReg, ShrReg);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_BITREVERSE: {
Observer.changingInstr(MI);
Register DstReg = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(DstReg);
unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
MI.getOperand(0).setReg(DstExt);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
MIRBuilder.buildTrunc(DstReg, Shift);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FREEZE:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
case TargetOpcode::G_SUB:
// Perform operation at larger width (any extension is fines here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SHL:
Observer.changingInstr(MI);
if (TypeIdx == 0) {
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
} else {
assert(TypeIdx == 1);
// The "number of bits to shift" operand must preserve its value as an
// unsigned integer:
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SDIV:
case TargetOpcode::G_SREM:
case TargetOpcode::G_SMIN:
case TargetOpcode::G_SMAX:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
Observer.changingInstr(MI);
if (TypeIdx == 0) {
unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
widenScalarSrc(MI, WideTy, 1, CvtOp);
widenScalarDst(MI, WideTy);
} else {
assert(TypeIdx == 1);
// The "number of bits to shift" operand must preserve its value as an
// unsigned integer:
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_UDIV:
case TargetOpcode::G_UREM:
case TargetOpcode::G_UMIN:
case TargetOpcode::G_UMAX:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SELECT:
Observer.changingInstr(MI);
if (TypeIdx == 0) {
// Perform operation at larger width (any extension is fine here, high
// bits don't affect the result) and then truncate the result back to the
// original type.
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
} else {
bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
// Explicit extension is required here since high bits affect the result.
widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SITOFP:
Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
else
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_UITOFP:
Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
else
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
Observer.changingInstr(MI);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_STORE: {
if (TypeIdx != 0)
return UnableToLegalize;
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
if (!Ty.isScalar())
return UnableToLegalize;
Observer.changingInstr(MI);
unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
widenScalarSrc(MI, WideTy, 0, ExtType);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_CONSTANT: {
MachineOperand &SrcMO = MI.getOperand(1);
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
MRI.getType(MI.getOperand(0).getReg()));
assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
ExtOpc == TargetOpcode::G_ANYEXT) &&
"Illegal Extend");
const APInt &SrcVal = SrcMO.getCImm()->getValue();
const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
? SrcVal.sext(WideTy.getSizeInBits())
: SrcVal.zext(WideTy.getSizeInBits());
Observer.changingInstr(MI);
SrcMO.setCImm(ConstantInt::get(Ctx, Val));
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FCONSTANT: {
MachineOperand &SrcMO = MI.getOperand(1);
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
APFloat Val = SrcMO.getFPImm()->getValueAPF();
bool LosesInfo;
switch (WideTy.getSizeInBits()) {
case 32:
Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
&LosesInfo);
break;
case 64:
Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
&LosesInfo);
break;
default:
return UnableToLegalize;
}
assert(!LosesInfo && "extend should always be lossless");
Observer.changingInstr(MI);
SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_IMPLICIT_DEF: {
Observer.changingInstr(MI);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_BRCOND:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FCMP:
Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else {
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ICMP:
Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else {
unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
MI.getOperand(1).getPredicate()))
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT;
widenScalarSrc(MI, WideTy, 2, ExtOpcode);
widenScalarSrc(MI, WideTy, 3, ExtOpcode);
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PTR_ADD:
assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PHI: {
assert(TypeIdx == 0 && "Expecting only Idx 0");
Observer.changingInstr(MI);
for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
}
MachineBasicBlock &MBB = *MI.getParent();
MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
if (TypeIdx == 0) {
Register VecReg = MI.getOperand(1).getReg();
LLT VecTy = MRI.getType(VecReg);
Observer.changingInstr(MI);
widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
WideTy.getSizeInBits()),
1, TargetOpcode::G_SEXT);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
if (TypeIdx != 2)
return UnableToLegalize;
Observer.changingInstr(MI);
// TODO: Probably should be zext
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_INSERT_VECTOR_ELT: {
if (TypeIdx == 1) {
Observer.changingInstr(MI);
Register VecReg = MI.getOperand(1).getReg();
LLT VecTy = MRI.getType(VecReg);
LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideVecTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
if (TypeIdx == 2) {
Observer.changingInstr(MI);
// TODO: Probably should be zext
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
}
return UnableToLegalize;
}
case TargetOpcode::G_FADD:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FMAD:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FCANONICALIZE:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMINIMUM:
case TargetOpcode::G_FMAXIMUM:
case TargetOpcode::G_FDIV: