| //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // This file contains some helper functions which try to cleanup artifacts |
| // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make |
| // the types match. This file also contains some combines of merges that happens |
| // at the end of the legalization. |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
| #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
| |
| #include "llvm/ADT/SmallBitVector.h" |
| #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" |
| #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
| #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
| #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
| #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
| #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| #include "llvm/CodeGen/GlobalISel/Utils.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/Register.h" |
| #include "llvm/CodeGen/TargetOpcodes.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/Support/Debug.h" |
| |
| #define DEBUG_TYPE "legalizer" |
| |
| namespace llvm { |
| class LegalizationArtifactCombiner { |
| MachineIRBuilder &Builder; |
| MachineRegisterInfo &MRI; |
| const LegalizerInfo &LI; |
| GISelKnownBits *KB; |
| |
| static bool isArtifactCast(unsigned Opc) { |
| switch (Opc) { |
| case TargetOpcode::G_TRUNC: |
| case TargetOpcode::G_SEXT: |
| case TargetOpcode::G_ZEXT: |
| case TargetOpcode::G_ANYEXT: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| public: |
| LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
| const LegalizerInfo &LI, |
| GISelKnownBits *KB = nullptr) |
| : Builder(B), MRI(MRI), LI(LI), KB(KB) {} |
| |
| bool tryCombineAnyExt(MachineInstr &MI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| SmallVectorImpl<Register> &UpdatedDefs, |
| GISelObserverWrapper &Observer) { |
| using namespace llvm::MIPatternMatch; |
| assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); |
| |
| Builder.setInstrAndDebugLoc(MI); |
| Register DstReg = MI.getOperand(0).getReg(); |
| Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
| |
| // aext(trunc x) - > aext/copy/trunc x |
| Register TruncSrc; |
| if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { |
| LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
| if (MRI.getType(DstReg) == MRI.getType(TruncSrc)) |
| replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs, |
| Observer); |
| else |
| Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); |
| UpdatedDefs.push_back(DstReg); |
| markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
| return true; |
| } |
| |
| // aext([asz]ext x) -> [asz]ext x |
| Register ExtSrc; |
| MachineInstr *ExtMI; |
| if (mi_match(SrcReg, MRI, |
| m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)), |
| m_GSExt(m_Reg(ExtSrc)), |
| m_GZExt(m_Reg(ExtSrc)))))) { |
| Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); |
| UpdatedDefs.push_back(DstReg); |
| markInstAndDefDead(MI, *ExtMI, DeadInsts); |
| return true; |
| } |
| |
| // Try to fold aext(g_constant) when the larger constant type is legal. |
| auto *SrcMI = MRI.getVRegDef(SrcReg); |
| if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
| const LLT DstTy = MRI.getType(DstReg); |
| if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
| auto &CstVal = SrcMI->getOperand(1); |
| Builder.buildConstant( |
| DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); |
| UpdatedDefs.push_back(DstReg); |
| markInstAndDefDead(MI, *SrcMI, DeadInsts); |
| return true; |
| } |
| } |
| return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
| } |
| |
| bool tryCombineZExt(MachineInstr &MI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| SmallVectorImpl<Register> &UpdatedDefs, |
| GISelObserverWrapper &Observer) { |
| using namespace llvm::MIPatternMatch; |
| assert(MI.getOpcode() == TargetOpcode::G_ZEXT); |
| |
| Builder.setInstrAndDebugLoc(MI); |
| Register DstReg = MI.getOperand(0).getReg(); |
| Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
| |
| // zext(trunc x) - > and (aext/copy/trunc x), mask |
| // zext(sext x) -> and (sext x), mask |
| Register TruncSrc; |
| Register SextSrc; |
| if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) || |
| mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) { |
| LLT DstTy = MRI.getType(DstReg); |
| if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || |
| isConstantUnsupported(DstTy)) |
| return false; |
| LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
| LLT SrcTy = MRI.getType(SrcReg); |
| APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); |
| if (SextSrc && (DstTy != MRI.getType(SextSrc))) |
| SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); |
| if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) |
| TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); |
| APInt ExtMaskVal = MaskVal.zext(DstTy.getScalarSizeInBits()); |
| Register AndSrc = SextSrc ? SextSrc : TruncSrc; |
| // Elide G_AND and mask constant if possible. |
| // The G_AND would also be removed by the post-legalize redundant_and |
| // combine, but in this very common case, eliding early and regardless of |
| // OptLevel results in significant compile-time and O0 code-size |
| // improvements. Inserting unnecessary instructions between a boolean def |
| // and use can also hinder ISel to detect e.g. that reloading a flags |
| // register is unnecessary. |
| if (KB && (KB->getKnownZeroes(AndSrc) | ExtMaskVal).isAllOnes()) { |
| replaceRegOrBuildCopy(DstReg, AndSrc, MRI, Builder, UpdatedDefs, |
| Observer); |
| } else { |
| auto Mask = Builder.buildConstant(DstTy, ExtMaskVal); |
| Builder.buildAnd(DstReg, AndSrc, Mask); |
| } |
| markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
| return true; |
| } |
| |
| // zext(zext x) -> (zext x) |
| Register ZextSrc; |
| if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) { |
| LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); |
| Observer.changingInstr(MI); |
| MI.getOperand(1).setReg(ZextSrc); |
| Observer.changedInstr(MI); |
| UpdatedDefs.push_back(DstReg); |
| markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
| return true; |
| } |
| |
| // Try to fold zext(g_constant) when the larger constant type is legal. |
| auto *SrcMI = MRI.getVRegDef(SrcReg); |
| if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
| const LLT DstTy = MRI.getType(DstReg); |
| if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
| auto &CstVal = SrcMI->getOperand(1); |
| Builder.buildConstant( |
| DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits())); |
| UpdatedDefs.push_back(DstReg); |
| markInstAndDefDead(MI, *SrcMI, DeadInsts); |
| return true; |
| } |
| } |
| return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
| } |
| |
| bool tryCombineSExt(MachineInstr &MI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| SmallVectorImpl<Register> &UpdatedDefs) { |
| using namespace llvm::MIPatternMatch; |
| assert(MI.getOpcode() == TargetOpcode::G_SEXT); |
| |
| Builder.setInstrAndDebugLoc(MI); |
| Register DstReg = MI.getOperand(0).getReg(); |
| Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
| |
| // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) |
| Register TruncSrc; |
| if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { |
| LLT DstTy = MRI.getType(DstReg); |
| if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) |
| return false; |
| LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
| LLT SrcTy = MRI.getType(SrcReg); |
| uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); |
| if (DstTy != MRI.getType(TruncSrc)) |
| TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); |
| Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits); |
| markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
| return true; |
| } |
| |
| // sext(zext x) -> (zext x) |
| // sext(sext x) -> (sext x) |
| Register ExtSrc; |
| MachineInstr *ExtMI; |
| if (mi_match(SrcReg, MRI, |
| m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)), |
| m_GSExt(m_Reg(ExtSrc)))))) { |
| LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); |
| Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); |
| UpdatedDefs.push_back(DstReg); |
| markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
| return true; |
| } |
| |
| // Try to fold sext(g_constant) when the larger constant type is legal. |
| auto *SrcMI = MRI.getVRegDef(SrcReg); |
| if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
| const LLT DstTy = MRI.getType(DstReg); |
| if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
| auto &CstVal = SrcMI->getOperand(1); |
| Builder.buildConstant( |
| DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); |
| UpdatedDefs.push_back(DstReg); |
| markInstAndDefDead(MI, *SrcMI, DeadInsts); |
| return true; |
| } |
| } |
| |
| return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
| } |
| |
| bool tryCombineTrunc(MachineInstr &MI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| SmallVectorImpl<Register> &UpdatedDefs, |
| GISelObserverWrapper &Observer) { |
| using namespace llvm::MIPatternMatch; |
| assert(MI.getOpcode() == TargetOpcode::G_TRUNC); |
| |
| Builder.setInstr(MI); |
| Register DstReg = MI.getOperand(0).getReg(); |
| const LLT DstTy = MRI.getType(DstReg); |
| Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
| |
| // Try to fold trunc(g_constant) when the smaller constant type is legal. |
| auto *SrcMI = MRI.getVRegDef(SrcReg); |
| if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
| if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
| auto &CstVal = SrcMI->getOperand(1); |
| Builder.buildConstant( |
| DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits())); |
| UpdatedDefs.push_back(DstReg); |
| markInstAndDefDead(MI, *SrcMI, DeadInsts); |
| return true; |
| } |
| } |
| |
| // Try to fold trunc(merge) to directly use the source of the merge. |
| // This gets rid of large, difficult to legalize, merges |
| if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) { |
| const Register MergeSrcReg = SrcMerge->getSourceReg(0); |
| const LLT MergeSrcTy = MRI.getType(MergeSrcReg); |
| |
| // We can only fold if the types are scalar |
| const unsigned DstSize = DstTy.getSizeInBits(); |
| const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits(); |
| if (!DstTy.isScalar() || !MergeSrcTy.isScalar()) |
| return false; |
| |
| if (DstSize < MergeSrcSize) { |
| // When the merge source is larger than the destination, we can just |
| // truncate the merge source directly |
| if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}})) |
| return false; |
| |
| LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: " |
| << MI); |
| |
| Builder.buildTrunc(DstReg, MergeSrcReg); |
| UpdatedDefs.push_back(DstReg); |
| } else if (DstSize == MergeSrcSize) { |
| // If the sizes match we can simply try to replace the register |
| LLVM_DEBUG( |
| dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: " |
| << MI); |
| replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs, |
| Observer); |
| } else if (DstSize % MergeSrcSize == 0) { |
| // If the trunc size is a multiple of the merge source size we can use |
| // a smaller merge instead |
| if (isInstUnsupported( |
| {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}})) |
| return false; |
| |
| LLVM_DEBUG( |
| dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: " |
| << MI); |
| |
| const unsigned NumSrcs = DstSize / MergeSrcSize; |
| assert(NumSrcs < SrcMI->getNumOperands() - 1 && |
| "trunc(merge) should require less inputs than merge"); |
| SmallVector<Register, 8> SrcRegs(NumSrcs); |
| for (unsigned i = 0; i < NumSrcs; ++i) |
| SrcRegs[i] = SrcMerge->getSourceReg(i); |
| |
| Builder.buildMergeValues(DstReg, SrcRegs); |
| UpdatedDefs.push_back(DstReg); |
| } else { |
| // Unable to combine |
| return false; |
| } |
| |
| markInstAndDefDead(MI, *SrcMerge, DeadInsts); |
| return true; |
| } |
| |
| // trunc(trunc) -> trunc |
| Register TruncSrc; |
| if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { |
| // Always combine trunc(trunc) since the eventual resulting trunc must be |
| // legal anyway as it must be legal for all outputs of the consumer type |
| // set. |
| LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI); |
| |
| Builder.buildTrunc(DstReg, TruncSrc); |
| UpdatedDefs.push_back(DstReg); |
| markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts); |
| return true; |
| } |
| |
| // trunc(ext x) -> x |
| ArtifactValueFinder Finder(MRI, Builder, LI); |
| if (Register FoundReg = |
| Finder.findValueFromDef(DstReg, 0, DstTy.getSizeInBits())) { |
| LLT FoundRegTy = MRI.getType(FoundReg); |
| if (DstTy == FoundRegTy) { |
| LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_[S,Z,ANY]EXT/G_TRUNC...): " |
| << MI;); |
| |
| replaceRegOrBuildCopy(DstReg, FoundReg, MRI, Builder, UpdatedDefs, |
| Observer); |
| UpdatedDefs.push_back(DstReg); |
| markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). |
| bool tryFoldImplicitDef(MachineInstr &MI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| SmallVectorImpl<Register> &UpdatedDefs) { |
| unsigned Opcode = MI.getOpcode(); |
| assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || |
| Opcode == TargetOpcode::G_SEXT); |
| |
| if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, |
| MI.getOperand(1).getReg(), MRI)) { |
| Builder.setInstr(MI); |
| Register DstReg = MI.getOperand(0).getReg(); |
| LLT DstTy = MRI.getType(DstReg); |
| |
| if (Opcode == TargetOpcode::G_ANYEXT) { |
| // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF |
| if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) |
| return false; |
| LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); |
| Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); |
| UpdatedDefs.push_back(DstReg); |
| } else { |
| // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top |
| // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. |
| if (isConstantUnsupported(DstTy)) |
| return false; |
| LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); |
| Builder.buildConstant(DstReg, 0); |
| UpdatedDefs.push_back(DstReg); |
| } |
| |
| markInstAndDefDead(MI, *DefMI, DeadInsts); |
| return true; |
| } |
| return false; |
| } |
| |
| bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| SmallVectorImpl<Register> &UpdatedDefs) { |
| |
| assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); |
| |
| const unsigned CastOpc = CastMI.getOpcode(); |
| |
| if (!isArtifactCast(CastOpc)) |
| return false; |
| |
| const unsigned NumDefs = MI.getNumOperands() - 1; |
| |
| const Register CastSrcReg = CastMI.getOperand(1).getReg(); |
| const LLT CastSrcTy = MRI.getType(CastSrcReg); |
| const LLT DestTy = MRI.getType(MI.getOperand(0).getReg()); |
| const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg()); |
| |
| const unsigned CastSrcSize = CastSrcTy.getSizeInBits(); |
| const unsigned DestSize = DestTy.getSizeInBits(); |
| |
| if (CastOpc == TargetOpcode::G_TRUNC) { |
| if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) { |
| // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>) |
| // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 |
| // => |
| // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0 |
| // %2:_(s8) = G_TRUNC %6 |
| // %3:_(s8) = G_TRUNC %7 |
| // %4:_(s8) = G_TRUNC %8 |
| // %5:_(s8) = G_TRUNC %9 |
| |
| unsigned UnmergeNumElts = |
| DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1; |
| LLT UnmergeTy = CastSrcTy.changeElementCount( |
| ElementCount::getFixed(UnmergeNumElts)); |
| |
| if (isInstUnsupported( |
| {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}})) |
| return false; |
| |
| Builder.setInstr(MI); |
| auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg); |
| |
| for (unsigned I = 0; I != NumDefs; ++I) { |
| Register DefReg = MI.getOperand(I).getReg(); |
| UpdatedDefs.push_back(DefReg); |
| Builder.buildTrunc(DefReg, NewUnmerge.getReg(I)); |
| } |
| |
| markInstAndDefDead(MI, CastMI, DeadInsts); |
| return true; |
| } |
| |
| if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) { |
| // %1:_(s16) = G_TRUNC %0(s32) |
| // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1 |
| // => |
| // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 |
| |
| // Unmerge(trunc) can be combined if the trunc source size is a multiple |
| // of the unmerge destination size |
| if (CastSrcSize % DestSize != 0) |
| return false; |
| |
| // Check if the new unmerge is supported |
| if (isInstUnsupported( |
| {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}})) |
| return false; |
| |
| // Gather the original destination registers and create new ones for the |
| // unused bits |
| const unsigned NewNumDefs = CastSrcSize / DestSize; |
| SmallVector<Register, 8> DstRegs(NewNumDefs); |
| for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) { |
| if (Idx < NumDefs) |
| DstRegs[Idx] = MI.getOperand(Idx).getReg(); |
| else |
| DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy); |
| } |
| |
| // Build new unmerge |
| Builder.setInstr(MI); |
| Builder.buildUnmerge(DstRegs, CastSrcReg); |
| UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs); |
| markInstAndDefDead(MI, CastMI, DeadInsts); |
| return true; |
| } |
| } |
| |
| // TODO: support combines with other casts as well |
| return false; |
| } |
| |
| static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, |
| LLT OpTy, LLT DestTy) { |
| // Check if we found a definition that is like G_MERGE_VALUES. |
| switch (MergeOp) { |
| default: |
| return false; |
| case TargetOpcode::G_BUILD_VECTOR: |
| case TargetOpcode::G_MERGE_VALUES: |
| // The convert operation that we will need to insert is |
| // going to convert the input of that type of instruction (scalar) |
| // to the destination type (DestTy). |
| // The conversion needs to stay in the same domain (scalar to scalar |
| // and vector to vector), so if we were to allow to fold the merge |
| // we would need to insert some bitcasts. |
| // E.g., |
| // <2 x s16> = build_vector s16, s16 |
| // <2 x s32> = zext <2 x s16> |
| // <2 x s16>, <2 x s16> = unmerge <2 x s32> |
| // |
| // As is the folding would produce: |
| // <2 x s16> = zext s16 <-- scalar to vector |
| // <2 x s16> = zext s16 <-- scalar to vector |
| // Which is invalid. |
| // Instead we would want to generate: |
| // s32 = zext s16 |
| // <2 x s16> = bitcast s32 |
| // s32 = zext s16 |
| // <2 x s16> = bitcast s32 |
| // |
| // That is not done yet. |
| if (ConvertOp == 0) |
| return true; |
| return !DestTy.isVector() && OpTy.isVector() && |
| DestTy == OpTy.getElementType(); |
| case TargetOpcode::G_CONCAT_VECTORS: { |
| if (ConvertOp == 0) |
| return true; |
| if (!DestTy.isVector()) |
| return false; |
| |
| const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); |
| |
| // Don't handle scalarization with a cast that isn't in the same |
| // direction as the vector cast. This could be handled, but it would |
| // require more intermediate unmerges. |
| if (ConvertOp == TargetOpcode::G_TRUNC) |
| return DestTy.getSizeInBits() <= OpEltSize; |
| return DestTy.getSizeInBits() >= OpEltSize; |
| } |
| } |
| } |
| |
| /// Try to replace DstReg with SrcReg or build a COPY instruction |
| /// depending on the register constraints. |
| static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg, |
| MachineRegisterInfo &MRI, |
| MachineIRBuilder &Builder, |
| SmallVectorImpl<Register> &UpdatedDefs, |
| GISelChangeObserver &Observer) { |
| if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) { |
| Builder.buildCopy(DstReg, SrcReg); |
| UpdatedDefs.push_back(DstReg); |
| return; |
| } |
| SmallVector<MachineInstr *, 4> UseMIs; |
| // Get the users and notify the observer before replacing. |
| for (auto &UseMI : MRI.use_instructions(DstReg)) { |
| UseMIs.push_back(&UseMI); |
| Observer.changingInstr(UseMI); |
| } |
| // Replace the registers. |
| MRI.replaceRegWith(DstReg, SrcReg); |
| UpdatedDefs.push_back(SrcReg); |
| // Notify the observer that we changed the instructions. |
| for (auto *UseMI : UseMIs) |
| Observer.changedInstr(*UseMI); |
| } |
| |
| /// Return the operand index in \p MI that defines \p Def |
| static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) { |
| unsigned DefIdx = 0; |
| for (const MachineOperand &Def : MI.defs()) { |
| if (Def.getReg() == SearchDef) |
| break; |
| ++DefIdx; |
| } |
| |
| return DefIdx; |
| } |
| |
| /// This class provides utilities for finding source registers of specific |
| /// bit ranges in an artifact. The routines can look through the source |
| /// registers if they're other artifacts to try to find a non-artifact source |
| /// of a value. |
| class ArtifactValueFinder { |
| MachineRegisterInfo &MRI; |
| MachineIRBuilder &MIB; |
| const LegalizerInfo &LI; |
| |
| // Stores the best register found in the current query so far. |
| Register CurrentBest = Register(); |
| |
| /// Given an concat_vector op \p Concat and a start bit and size, try to |
| /// find the origin of the value defined by that start position and size. |
| /// |
| /// \returns a register with the requested size, or the current best |
| /// register found during the current query. |
| Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit, |
| unsigned Size) { |
| assert(Size > 0); |
| |
| // Find the source operand that provides the bits requested. |
| Register Src1Reg = Concat.getSourceReg(0); |
| unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); |
| |
| // Operand index of the source that provides the start of the bit range. |
| unsigned StartSrcIdx = (StartBit / SrcSize) + 1; |
| // Offset into the source at which the bit range starts. |
| unsigned InRegOffset = StartBit % SrcSize; |
| // Check that the bits don't span multiple sources. |
| // FIXME: we might be able return multiple sources? Or create an |
| // appropriate concat to make it fit. |
| if (InRegOffset + Size > SrcSize) |
| return CurrentBest; |
| |
| Register SrcReg = Concat.getReg(StartSrcIdx); |
| if (InRegOffset == 0 && Size == SrcSize) { |
| CurrentBest = SrcReg; |
| return findValueFromDefImpl(SrcReg, 0, Size); |
| } |
| |
| return findValueFromDefImpl(SrcReg, InRegOffset, Size); |
| } |
| |
| /// Given an build_vector op \p BV and a start bit and size, try to find |
| /// the origin of the value defined by that start position and size. |
| /// |
| /// \returns a register with the requested size, or the current best |
| /// register found during the current query. |
| Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit, |
| unsigned Size) { |
| assert(Size > 0); |
| |
| // Find the source operand that provides the bits requested. |
| Register Src1Reg = BV.getSourceReg(0); |
| unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); |
| |
| // Operand index of the source that provides the start of the bit range. |
| unsigned StartSrcIdx = (StartBit / SrcSize) + 1; |
| // Offset into the source at which the bit range starts. |
| unsigned InRegOffset = StartBit % SrcSize; |
| |
| if (InRegOffset != 0) |
| return CurrentBest; // Give up, bits don't start at a scalar source. |
| if (Size < SrcSize) |
| return CurrentBest; // Scalar source is too large for requested bits. |
| |
| // If the bits cover multiple sources evenly, then create a new |
| // build_vector to synthesize the required size, if that's been requested. |
| if (Size > SrcSize) { |
| if (Size % SrcSize > 0) |
| return CurrentBest; // Isn't covered exactly by sources. |
| |
| unsigned NumSrcsUsed = Size / SrcSize; |
| // If we're requesting all of the sources, just return this def. |
| if (NumSrcsUsed == BV.getNumSources()) |
| return BV.getReg(0); |
| |
| LLT SrcTy = MRI.getType(Src1Reg); |
| LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy); |
| |
| // Check if the resulting build vector would be legal. |
| LegalizeActionStep ActionStep = |
| LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); |
| if (ActionStep.Action != LegalizeActions::Legal) |
| return CurrentBest; |
| |
| SmallVector<Register> NewSrcs; |
| for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; |
| ++SrcIdx) |
| NewSrcs.push_back(BV.getReg(SrcIdx)); |
| MIB.setInstrAndDebugLoc(BV); |
| return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0); |
| } |
| // A single source is requested, just return it. |
| return BV.getReg(StartSrcIdx); |
| } |
| |
| /// Given an G_INSERT op \p MI and a start bit and size, try to find |
| /// the origin of the value defined by that start position and size. |
| /// |
| /// \returns a register with the requested size, or the current best |
| /// register found during the current query. |
| Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, |
| unsigned Size) { |
| assert(MI.getOpcode() == TargetOpcode::G_INSERT); |
| assert(Size > 0); |
| |
| Register ContainerSrcReg = MI.getOperand(1).getReg(); |
| Register InsertedReg = MI.getOperand(2).getReg(); |
| LLT InsertedRegTy = MRI.getType(InsertedReg); |
| unsigned InsertOffset = MI.getOperand(3).getImm(); |
| |
| // There are 4 possible container/insertreg + requested bit-range layouts |
| // that the instruction and query could be representing. |
| // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO') |
| // and a start bit 'SB', with size S, giving an end bit 'EB', we could |
| // have... |
| // Scenario A: |
| // -------------------------- |
| // | INS | CONTAINER | |
| // -------------------------- |
| // | | |
| // SB EB |
| // |
| // Scenario B: |
| // -------------------------- |
| // | INS | CONTAINER | |
| // -------------------------- |
| // | | |
| // SB EB |
| // |
| // Scenario C: |
| // -------------------------- |
| // | CONTAINER | INS | |
| // -------------------------- |
| // | | |
| // SB EB |
| // |
| // Scenario D: |
| // -------------------------- |
| // | CONTAINER | INS | |
| // -------------------------- |
| // | | |
| // SB EB |
| // |
| // So therefore, A and D are requesting data from the INS operand, while |
| // B and C are requesting from the container operand. |
| |
| unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits(); |
| unsigned EndBit = StartBit + Size; |
| unsigned NewStartBit; |
| Register SrcRegToUse; |
| if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { |
| SrcRegToUse = ContainerSrcReg; |
| NewStartBit = StartBit; |
| return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); |
| } |
| if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { |
| SrcRegToUse = InsertedReg; |
| NewStartBit = StartBit - InsertOffset; |
| if (NewStartBit == 0 && |
| Size == MRI.getType(SrcRegToUse).getSizeInBits()) |
| CurrentBest = SrcRegToUse; |
| return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); |
| } |
| // The bit range spans both the inserted and container regions. |
| return Register(); |
| } |
| |
| /// Given an G_SEXT, G_ZEXT, G_ANYEXT op \p MI and a start bit and |
| /// size, try to find the origin of the value defined by that start |
| /// position and size. |
| /// |
| /// \returns a register with the requested size, or the current best |
| /// register found during the current query. |
| Register findValueFromExt(MachineInstr &MI, unsigned StartBit, |
| unsigned Size) { |
| assert(MI.getOpcode() == TargetOpcode::G_SEXT || |
| MI.getOpcode() == TargetOpcode::G_ZEXT || |
| MI.getOpcode() == TargetOpcode::G_ANYEXT); |
| assert(Size > 0); |
| |
| Register SrcReg = MI.getOperand(1).getReg(); |
| LLT SrcType = MRI.getType(SrcReg); |
| unsigned SrcSize = SrcType.getSizeInBits(); |
| |
| // Currently we don't go into vectors. |
| if (!SrcType.isScalar()) |
| return CurrentBest; |
| |
| if (StartBit + Size > SrcSize) |
| return CurrentBest; |
| |
| if (StartBit == 0 && SrcType.getSizeInBits() == Size) |
| CurrentBest = SrcReg; |
| return findValueFromDefImpl(SrcReg, StartBit, Size); |
| } |
| |
| /// Given an G_TRUNC op \p MI and a start bit and size, try to find |
| /// the origin of the value defined by that start position and size. |
| /// |
| /// \returns a register with the requested size, or the current best |
| /// register found during the current query. |
| Register findValueFromTrunc(MachineInstr &MI, unsigned StartBit, |
| unsigned Size) { |
| assert(MI.getOpcode() == TargetOpcode::G_TRUNC); |
| assert(Size > 0); |
| |
| Register SrcReg = MI.getOperand(1).getReg(); |
| LLT SrcType = MRI.getType(SrcReg); |
| |
| // Currently we don't go into vectors. |
| if (!SrcType.isScalar()) |
| return CurrentBest; |
| |
| return findValueFromDefImpl(SrcReg, StartBit, Size); |
| } |
| |
| /// Internal implementation for findValueFromDef(). findValueFromDef() |
| /// initializes some data like the CurrentBest register, which this method |
| /// and its callees rely upon. |
| Register findValueFromDefImpl(Register DefReg, unsigned StartBit, |
| unsigned Size) { |
| std::optional<DefinitionAndSourceRegister> DefSrcReg = |
| getDefSrcRegIgnoringCopies(DefReg, MRI); |
| MachineInstr *Def = DefSrcReg->MI; |
| DefReg = DefSrcReg->Reg; |
| // If the instruction has a single def, then simply delegate the search. |
| // For unmerge however with multiple defs, we need to compute the offset |
| // into the source of the unmerge. |
| switch (Def->getOpcode()) { |
| case TargetOpcode::G_CONCAT_VECTORS: |
| return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size); |
| case TargetOpcode::G_UNMERGE_VALUES: { |
| unsigned DefStartBit = 0; |
| unsigned DefSize = MRI.getType(DefReg).getSizeInBits(); |
| for (const auto &MO : Def->defs()) { |
| if (MO.getReg() == DefReg) |
| break; |
| DefStartBit += DefSize; |
| } |
| Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg(); |
| Register SrcOriginReg = |
| findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size); |
| if (SrcOriginReg) |
| return SrcOriginReg; |
| // Failed to find a further value. If the StartBit and Size perfectly |
| // covered the requested DefReg, return that since it's better than |
| // nothing. |
| if (StartBit == 0 && Size == DefSize) |
| return DefReg; |
| return CurrentBest; |
| } |
| case TargetOpcode::G_BUILD_VECTOR: |
| return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit, |
| Size); |
| case TargetOpcode::G_INSERT: |
| return findValueFromInsert(*Def, StartBit, Size); |
| case TargetOpcode::G_TRUNC: |
| return findValueFromTrunc(*Def, StartBit, Size); |
| case TargetOpcode::G_SEXT: |
| case TargetOpcode::G_ZEXT: |
| case TargetOpcode::G_ANYEXT: |
| return findValueFromExt(*Def, StartBit, Size); |
| default: |
| return CurrentBest; |
| } |
| } |
| |
| public: |
| ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, |
| const LegalizerInfo &Info) |
| : MRI(Mri), MIB(Builder), LI(Info) {} |
| |
| /// Try to find a source of the value defined in the def \p DefReg, starting |
| /// at position \p StartBit with size \p Size. |
| /// \returns a register with the requested size, or an empty Register if no |
| /// better value could be found. |
| Register findValueFromDef(Register DefReg, unsigned StartBit, |
| unsigned Size) { |
| CurrentBest = Register(); |
| Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); |
| return FoundReg != DefReg ? FoundReg : Register(); |
| } |
| |
| /// Try to combine the defs of an unmerge \p MI by attempting to find |
| /// values that provides the bits for each def reg. |
| /// \returns true if all the defs of the unmerge have been made dead. |
| bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer, |
| SmallVectorImpl<Register> &UpdatedDefs) { |
| unsigned NumDefs = MI.getNumDefs(); |
| LLT DestTy = MRI.getType(MI.getReg(0)); |
| |
| SmallBitVector DeadDefs(NumDefs); |
| for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { |
| Register DefReg = MI.getReg(DefIdx); |
| if (MRI.use_nodbg_empty(DefReg)) { |
| DeadDefs[DefIdx] = true; |
| continue; |
| } |
| Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); |
| if (!FoundVal) |
| continue; |
| if (MRI.getType(FoundVal) != DestTy) |
| continue; |
| |
| replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs, |
| Observer); |
| // We only want to replace the uses, not the def of the old reg. |
| Observer.changingInstr(MI); |
| MI.getOperand(DefIdx).setReg(DefReg); |
| Observer.changedInstr(MI); |
| DeadDefs[DefIdx] = true; |
| } |
| return DeadDefs.all(); |
| } |
| |
| GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size, |
| unsigned &DefOperandIdx) { |
| if (Register Def = findValueFromDefImpl(Reg, 0, Size)) { |
| if (auto *Unmerge = dyn_cast<GUnmerge>(MRI.getVRegDef(Def))) { |
| DefOperandIdx = Unmerge->findRegisterDefOperandIdx(Def); |
| return Unmerge; |
| } |
| } |
| return nullptr; |
| } |
| |
| // Check if sequence of elements from merge-like instruction is defined by |
| // another sequence of elements defined by unmerge. Most often this is the |
| // same sequence. Search for elements using findValueFromDefImpl. |
| bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx, |
| GUnmerge *Unmerge, unsigned UnmergeIdxStart, |
| unsigned NumElts, unsigned EltSize, |
| bool AllowUndef) { |
| assert(MergeStartIdx + NumElts <= MI.getNumSources()); |
| for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) { |
| unsigned EltUnmergeIdx; |
| GUnmerge *EltUnmerge = findUnmergeThatDefinesReg( |
| MI.getSourceReg(i), EltSize, EltUnmergeIdx); |
| // Check if source i comes from the same Unmerge. |
| if (EltUnmerge == Unmerge) { |
| // Check that source i's def has same index in sequence in Unmerge. |
| if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) |
| return false; |
| } else if (!AllowUndef || |
| MRI.getVRegDef(MI.getSourceReg(i))->getOpcode() != |
| TargetOpcode::G_IMPLICIT_DEF) |
| return false; |
| } |
| return true; |
| } |
| |
| bool tryCombineMergeLike(GMergeLikeInstr &MI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| SmallVectorImpl<Register> &UpdatedDefs, |
| GISelChangeObserver &Observer) { |
| Register Elt0 = MI.getSourceReg(0); |
| LLT EltTy = MRI.getType(Elt0); |
| unsigned EltSize = EltTy.getSizeInBits(); |
| |
| unsigned Elt0UnmergeIdx; |
| // Search for unmerge that will be candidate for combine. |
| auto *Unmerge = findUnmergeThatDefinesReg(Elt0, EltSize, Elt0UnmergeIdx); |
| if (!Unmerge) |
| return false; |
| |
| unsigned NumMIElts = MI.getNumSources(); |
| Register Dst = MI.getReg(0); |
| LLT DstTy = MRI.getType(Dst); |
| Register UnmergeSrc = Unmerge->getSourceReg(); |
| LLT UnmergeSrcTy = MRI.getType(UnmergeSrc); |
| |
| // Recognize copy of UnmergeSrc to Dst. |
| // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst. |
| // |
| // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty) |
| // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ... |
| // |
| // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty) |
| if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) { |
| if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize, |
| /*AllowUndef=*/DstTy.isVector())) |
| return false; |
| |
| replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer); |
| DeadInsts.push_back(&MI); |
| return true; |
| } |
| |
| // Recognize UnmergeSrc that can be unmerged to DstTy directly. |
| // Types have to be either both vector or both non-vector types. |
| // Merge-like opcodes are combined one at the time. First one creates new |
| // unmerge, following should use the same unmerge (builder performs CSE). |
| // |
| // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) |
| // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1 |
| // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 |
| // |
| // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc |
| if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && |
| (Elt0UnmergeIdx % NumMIElts == 0) && |
| getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { |
| if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, |
| EltSize, false)) |
| return false; |
| MIB.setInstrAndDebugLoc(MI); |
| auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg()); |
| unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits(); |
| replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB, |
| UpdatedDefs, Observer); |
| DeadInsts.push_back(&MI); |
| return true; |
| } |
| |
| // Recognize when multiple unmerged sources with UnmergeSrcTy type |
| // can be merged into Dst with DstTy type directly. |
| // Types have to be either both vector or both non-vector types. |
| |
| // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) |
| // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy) |
| // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3 |
| // |
| // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc |
| |
| if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && |
| getCoverTy(DstTy, UnmergeSrcTy) == DstTy) { |
| SmallVector<Register, 4> ConcatSources; |
| unsigned NumElts = Unmerge->getNumDefs(); |
| for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) { |
| unsigned EltUnmergeIdx; |
| auto *UnmergeI = findUnmergeThatDefinesReg(MI.getSourceReg(i), |
| EltSize, EltUnmergeIdx); |
| // All unmerges have to be the same size. |
| if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) || |
| (EltUnmergeIdx != 0)) |
| return false; |
| if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize, |
| false)) |
| return false; |
| ConcatSources.push_back(UnmergeI->getSourceReg()); |
| } |
| |
| MIB.setInstrAndDebugLoc(MI); |
| MIB.buildMergeLikeInstr(Dst, ConcatSources); |
| DeadInsts.push_back(&MI); |
| return true; |
| } |
| |
| return false; |
| } |
| }; |
| |
| bool tryCombineUnmergeValues(GUnmerge &MI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| SmallVectorImpl<Register> &UpdatedDefs, |
| GISelChangeObserver &Observer) { |
| unsigned NumDefs = MI.getNumDefs(); |
| Register SrcReg = MI.getSourceReg(); |
| MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI); |
| if (!SrcDef) |
| return false; |
| |
| LLT OpTy = MRI.getType(SrcReg); |
| LLT DestTy = MRI.getType(MI.getReg(0)); |
| unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); |
| |
| Builder.setInstrAndDebugLoc(MI); |
| |
| ArtifactValueFinder Finder(MRI, Builder, LI); |
| if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { |
| markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); |
| return true; |
| } |
| |
| if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) { |
| // %0:_(<4 x s16>) = G_FOO |
| // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 |
| // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 |
| // |
| // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 |
| Register SrcUnmergeSrc = SrcUnmerge->getSourceReg(); |
| LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc); |
| |
| // If we need to decrease the number of vector elements in the result type |
| // of an unmerge, this would involve the creation of an equivalent unmerge |
| // to copy back to the original result registers. |
| LegalizeActionStep ActionStep = LI.getAction( |
| {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); |
| switch (ActionStep.Action) { |
| case LegalizeActions::Lower: |
| case LegalizeActions::Unsupported: |
| break; |
| case LegalizeActions::FewerElements: |
| case LegalizeActions::NarrowScalar: |
| if (ActionStep.TypeIdx == 1) |
| return false; |
| break; |
| default: |
| return false; |
| } |
| |
| auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); |
| |
| // TODO: Should we try to process out the other defs now? If the other |
| // defs of the source unmerge are also unmerged, we end up with a separate |
| // unmerge for each one. |
| for (unsigned I = 0; I != NumDefs; ++I) { |
| Register Def = MI.getReg(I); |
| replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I), |
| MRI, Builder, UpdatedDefs, Observer); |
| } |
| |
| markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx); |
| return true; |
| } |
| |
| MachineInstr *MergeI = SrcDef; |
| unsigned ConvertOp = 0; |
| |
| // Handle intermediate conversions |
| unsigned SrcOp = SrcDef->getOpcode(); |
| if (isArtifactCast(SrcOp)) { |
| ConvertOp = SrcOp; |
| MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI); |
| } |
| |
| if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(), |
| ConvertOp, OpTy, DestTy)) { |
| // We might have a chance to combine later by trying to combine |
| // unmerge(cast) first |
| return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); |
| } |
| |
| const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; |
| |
| if (NumMergeRegs < NumDefs) { |
| if (NumDefs % NumMergeRegs != 0) |
| return false; |
| |
| Builder.setInstr(MI); |
| // Transform to UNMERGEs, for example |
| // %1 = G_MERGE_VALUES %4, %5 |
| // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 |
| // to |
| // %9, %10 = G_UNMERGE_VALUES %4 |
| // %11, %12 = G_UNMERGE_VALUES %5 |
| |
| const unsigned NewNumDefs = NumDefs / NumMergeRegs; |
| for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { |
| SmallVector<Register, 8> DstRegs; |
| for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; |
| ++j, ++DefIdx) |
| DstRegs.push_back(MI.getReg(DefIdx)); |
| |
| if (ConvertOp) { |
| LLT MergeDstTy = MRI.getType(SrcDef->getOperand(0).getReg()); |
| |
| // This is a vector that is being split and casted. Extract to the |
| // element type, and do the conversion on the scalars (or smaller |
| // vectors). |
| LLT MergeEltTy = MergeDstTy.divide(NumMergeRegs); |
| |
| // Handle split to smaller vectors, with conversions. |
| // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>) |
| // %3(<8 x s16>) = G_SEXT %2 |
| // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = |
| // G_UNMERGE_VALUES %3 |
| // |
| // => |
| // |
| // %8(<4 x s16>) = G_SEXT %0 |
| // %9(<4 x s16>) = G_SEXT %1 |
| // %4(<2 x s16>), %5(<2 x s16>) = G_UNMERGE_VALUES %8 |
| // %7(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %9 |
| |
| Register TmpReg = MRI.createGenericVirtualRegister(MergeEltTy); |
| Builder.buildInstr(ConvertOp, {TmpReg}, |
| {MergeI->getOperand(Idx + 1).getReg()}); |
| Builder.buildUnmerge(DstRegs, TmpReg); |
| } else { |
| Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); |
| } |
| UpdatedDefs.append(DstRegs.begin(), DstRegs.end()); |
| } |
| |
| } else if (NumMergeRegs > NumDefs) { |
| if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0) |
| return false; |
| |
| Builder.setInstr(MI); |
| // Transform to MERGEs |
| // %6 = G_MERGE_VALUES %17, %18, %19, %20 |
| // %7, %8 = G_UNMERGE_VALUES %6 |
| // to |
| // %7 = G_MERGE_VALUES %17, %18 |
| // %8 = G_MERGE_VALUES %19, %20 |
| |
| const unsigned NumRegs = NumMergeRegs / NumDefs; |
| for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { |
| SmallVector<Register, 8> Regs; |
| for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; |
| ++j, ++Idx) |
| Regs.push_back(MergeI->getOperand(Idx).getReg()); |
| |
| Register DefReg = MI.getReg(DefIdx); |
| Builder.buildMergeLikeInstr(DefReg, Regs); |
| UpdatedDefs.push_back(DefReg); |
| } |
| |
| } else { |
| LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); |
| |
| if (!ConvertOp && DestTy != MergeSrcTy) |
| ConvertOp = TargetOpcode::G_BITCAST; |
| |
| if (ConvertOp) { |
| Builder.setInstr(MI); |
| |
| for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
| Register DefReg = MI.getOperand(Idx).getReg(); |
| Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); |
| |
| if (!MRI.use_empty(DefReg)) { |
| Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); |
| UpdatedDefs.push_back(DefReg); |
| } |
| } |
| |
| markInstAndDefDead(MI, *MergeI, DeadInsts); |
| return true; |
| } |
| |
| assert(DestTy == MergeSrcTy && |
| "Bitcast and the other kinds of conversions should " |
| "have happened earlier"); |
| |
| Builder.setInstr(MI); |
| for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
| Register DstReg = MI.getOperand(Idx).getReg(); |
| Register SrcReg = MergeI->getOperand(Idx + 1).getReg(); |
| replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs, |
| Observer); |
| } |
| } |
| |
| markInstAndDefDead(MI, *MergeI, DeadInsts); |
| return true; |
| } |
| |
| bool tryCombineExtract(MachineInstr &MI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| SmallVectorImpl<Register> &UpdatedDefs) { |
| assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); |
| |
| // Try to use the source registers from a G_MERGE_VALUES |
| // |
| // %2 = G_MERGE_VALUES %0, %1 |
| // %3 = G_EXTRACT %2, N |
| // => |
| // |
| // for N < %2.getSizeInBits() / 2 |
| // %3 = G_EXTRACT %0, N |
| // |
| // for N >= %2.getSizeInBits() / 2 |
| // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() |
| |
| Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
| MachineInstr *MergeI = MRI.getVRegDef(SrcReg); |
| if (!MergeI || !isa<GMergeLikeInstr>(MergeI)) |
| return false; |
| |
| Register DstReg = MI.getOperand(0).getReg(); |
| LLT DstTy = MRI.getType(DstReg); |
| LLT SrcTy = MRI.getType(SrcReg); |
| |
| // TODO: Do we need to check if the resulting extract is supported? |
| unsigned ExtractDstSize = DstTy.getSizeInBits(); |
| unsigned Offset = MI.getOperand(2).getImm(); |
| unsigned NumMergeSrcs = MergeI->getNumOperands() - 1; |
| unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs; |
| unsigned MergeSrcIdx = Offset / MergeSrcSize; |
| |
| // Compute the offset of the last bit the extract needs. |
| unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize; |
| |
| // Can't handle the case where the extract spans multiple inputs. |
| if (MergeSrcIdx != EndMergeSrcIdx) |
| return false; |
| |
| // TODO: We could modify MI in place in most cases. |
| Builder.setInstr(MI); |
| Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(), |
| Offset - MergeSrcIdx * MergeSrcSize); |
| UpdatedDefs.push_back(DstReg); |
| markInstAndDefDead(MI, *MergeI, DeadInsts); |
| return true; |
| } |
| |
| /// Try to combine away MI. |
| /// Returns true if it combined away the MI. |
| /// Adds instructions that are dead as a result of the combine |
| /// into DeadInsts, which can include MI. |
| bool tryCombineInstruction(MachineInstr &MI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| GISelObserverWrapper &WrapperObserver) { |
| ArtifactValueFinder Finder(MRI, Builder, LI); |
| |
| // This might be a recursive call, and we might have DeadInsts already |
| // populated. To avoid bad things happening later with multiple vreg defs |
| // etc, process the dead instructions now if any. |
| if (!DeadInsts.empty()) |
| deleteMarkedDeadInsts(DeadInsts, WrapperObserver); |
| |
| // Put here every vreg that was redefined in such a way that it's at least |
| // possible that one (or more) of its users (immediate or COPY-separated) |
| // could become artifact combinable with the new definition (or the |
| // instruction reachable from it through a chain of copies if any). |
| SmallVector<Register, 4> UpdatedDefs; |
| bool Changed = false; |
| switch (MI.getOpcode()) { |
| default: |
| return false; |
| case TargetOpcode::G_ANYEXT: |
| Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); |
| break; |
| case TargetOpcode::G_ZEXT: |
| Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); |
| break; |
| case TargetOpcode::G_SEXT: |
| Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); |
| break; |
| case TargetOpcode::G_UNMERGE_VALUES: |
| Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts, |
| UpdatedDefs, WrapperObserver); |
| break; |
| case TargetOpcode::G_MERGE_VALUES: |
| case TargetOpcode::G_BUILD_VECTOR: |
| case TargetOpcode::G_CONCAT_VECTORS: |
| // If any of the users of this merge are an unmerge, then add them to the |
| // artifact worklist in case there's folding that can be done looking up. |
| for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) { |
| if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES || |
| U.getOpcode() == TargetOpcode::G_TRUNC) { |
| UpdatedDefs.push_back(MI.getOperand(0).getReg()); |
| break; |
| } |
| } |
| Changed = Finder.tryCombineMergeLike(cast<GMergeLikeInstr>(MI), DeadInsts, |
| UpdatedDefs, WrapperObserver); |
| break; |
| case TargetOpcode::G_EXTRACT: |
| Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); |
| break; |
| case TargetOpcode::G_TRUNC: |
| Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver); |
| if (!Changed) { |
| // Try to combine truncates away even if they are legal. As all artifact |
| // combines at the moment look only "up" the def-use chains, we achieve |
| // that by throwing truncates' users (with look through copies) into the |
| // ArtifactList again. |
| UpdatedDefs.push_back(MI.getOperand(0).getReg()); |
| } |
| break; |
| } |
| // If the main loop through the ArtifactList found at least one combinable |
| // pair of artifacts, not only combine it away (as done above), but also |
| // follow the def-use chain from there to combine everything that can be |
| // combined within this def-use chain of artifacts. |
| while (!UpdatedDefs.empty()) { |
| Register NewDef = UpdatedDefs.pop_back_val(); |
| assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg"); |
| for (MachineInstr &Use : MRI.use_instructions(NewDef)) { |
| switch (Use.getOpcode()) { |
| // Keep this list in sync with the list of all artifact combines. |
| case TargetOpcode::G_ANYEXT: |
| case TargetOpcode::G_ZEXT: |
| case TargetOpcode::G_SEXT: |
| case TargetOpcode::G_UNMERGE_VALUES: |
| case TargetOpcode::G_EXTRACT: |
| case TargetOpcode::G_TRUNC: |
| case TargetOpcode::G_BUILD_VECTOR: |
| // Adding Use to ArtifactList. |
| WrapperObserver.changedInstr(Use); |
| break; |
| case TargetOpcode::COPY: { |
| Register Copy = Use.getOperand(0).getReg(); |
| if (Copy.isVirtual()) |
| UpdatedDefs.push_back(Copy); |
| break; |
| } |
| default: |
| // If we do not have an artifact combine for the opcode, there is no |
| // point in adding it to the ArtifactList as nothing interesting will |
| // be done to it anyway. |
| break; |
| } |
| } |
| } |
| return Changed; |
| } |
| |
| private: |
| static Register getArtifactSrcReg(const MachineInstr &MI) { |
| switch (MI.getOpcode()) { |
| case TargetOpcode::COPY: |
| case TargetOpcode::G_TRUNC: |
| case TargetOpcode::G_ZEXT: |
| case TargetOpcode::G_ANYEXT: |
| case TargetOpcode::G_SEXT: |
| case TargetOpcode::G_EXTRACT: |
| return MI.getOperand(1).getReg(); |
| case TargetOpcode::G_UNMERGE_VALUES: |
| return MI.getOperand(MI.getNumOperands() - 1).getReg(); |
| default: |
| llvm_unreachable("Not a legalization artifact happen"); |
| } |
| } |
| |
| /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI |
| /// (either by killing it or changing operands) results in DefMI being dead |
| /// too. In-between COPYs or artifact-casts are also collected if they are |
| /// dead. |
| /// MI is not marked dead. |
| void markDefDead(MachineInstr &MI, MachineInstr &DefMI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| unsigned DefIdx = 0) { |
| // Collect all the copy instructions that are made dead, due to deleting |
| // this instruction. Collect all of them until the Trunc(DefMI). |
| // Eg, |
| // %1(s1) = G_TRUNC %0(s32) |
| // %2(s1) = COPY %1(s1) |
| // %3(s1) = COPY %2(s1) |
| // %4(s32) = G_ANYEXT %3(s1) |
| // In this case, we would have replaced %4 with a copy of %0, |
| // and as a result, %3, %2, %1 are dead. |
| MachineInstr *PrevMI = &MI; |
| while (PrevMI != &DefMI) { |
| Register PrevRegSrc = getArtifactSrcReg(*PrevMI); |
| |
| MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); |
| if (MRI.hasOneUse(PrevRegSrc)) { |
| if (TmpDef != &DefMI) { |
| assert((TmpDef->getOpcode() == TargetOpcode::COPY || |
| isArtifactCast(TmpDef->getOpcode())) && |
| "Expecting copy or artifact cast here"); |
| |
| DeadInsts.push_back(TmpDef); |
| } |
| } else |
| break; |
| PrevMI = TmpDef; |
| } |
| |
| if (PrevMI == &DefMI) { |
| unsigned I = 0; |
| bool IsDead = true; |
| for (MachineOperand &Def : DefMI.defs()) { |
| if (I != DefIdx) { |
| if (!MRI.use_empty(Def.getReg())) { |
| IsDead = false; |
| break; |
| } |
| } else { |
| if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg())) |
| break; |
| } |
| |
| ++I; |
| } |
| |
| if (IsDead) |
| DeadInsts.push_back(&DefMI); |
| } |
| } |
| |
| /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be |
| /// dead due to MI being killed, then mark DefMI as dead too. |
| /// Some of the combines (extends(trunc)), try to walk through redundant |
| /// copies in between the extends and the truncs, and this attempts to collect |
| /// the in between copies if they're dead. |
| void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, |
| SmallVectorImpl<MachineInstr *> &DeadInsts, |
| unsigned DefIdx = 0) { |
| DeadInsts.push_back(&MI); |
| markDefDead(MI, DefMI, DeadInsts, DefIdx); |
| } |
| |
| /// Erase the dead instructions in the list and call the observer hooks. |
| /// Normally the Legalizer will deal with erasing instructions that have been |
| /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to |
| /// process instructions which have been marked dead, but otherwise break the |
| /// MIR by introducing multiple vreg defs. For those cases, allow the combines |
| /// to explicitly delete the instructions before we run into trouble. |
| void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts, |
| GISelObserverWrapper &WrapperObserver) { |
| for (auto *DeadMI : DeadInsts) { |
| LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n"); |
| WrapperObserver.erasingInstr(*DeadMI); |
| DeadMI->eraseFromParent(); |
| } |
| DeadInsts.clear(); |
| } |
| |
| /// Checks if the target legalizer info has specified anything about the |
| /// instruction, or if unsupported. |
| bool isInstUnsupported(const LegalityQuery &Query) const { |
| using namespace LegalizeActions; |
| auto Step = LI.getAction(Query); |
| return Step.Action == Unsupported || Step.Action == NotFound; |
| } |
| |
| bool isInstLegal(const LegalityQuery &Query) const { |
| return LI.getAction(Query).Action == LegalizeActions::Legal; |
| } |
| |
| bool isConstantUnsupported(LLT Ty) const { |
| if (!Ty.isVector()) |
| return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}}); |
| |
| LLT EltTy = Ty.getElementType(); |
| return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) || |
| isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}); |
| } |
| |
| /// Looks through copy instructions and returns the actual |
| /// source register. |
| Register lookThroughCopyInstrs(Register Reg) { |
| using namespace llvm::MIPatternMatch; |
| |
| Register TmpReg; |
| while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) { |
| if (MRI.getType(TmpReg).isValid()) |
| Reg = TmpReg; |
| else |
| break; |
| } |
| return Reg; |
| } |
| }; |
| |
| } // namespace llvm |
| |
| #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |