[GlobalISel] Handle more types in narrowScalar for eq/ne G_ICMP
Generalize the existing eq/ne case using `extractParts`. The original code only
handled narrowings for types of width 2n->n. This generalization allows for any
type that can be broken down by `extractParts`.
General overview is:
- Loop over each narrow-sized part and do exactly what the 2-register case did.
- Loop over the leftover-sized parts and do the same thing
- Widen the leftover-sized XOR results to the desired narrow size
- OR that all together and then do the comparison against 0 (just like the old
code)
This shows up a lot when building clang for AArch64 using GlobalISel, so it's
worth fixing. For the sake of simplicity, this doesn't handle the non-eq/ne
case yet.
Also remove the code in this case that notifies the observer; we're just going
to delete MI anyway so talking to the observer shouldn't be necessary.
Differential Revision: https://reviews.llvm.org/D105161
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 40f6f4a..b31cf3f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1063,38 +1063,81 @@
return Legalized;
}
case TargetOpcode::G_ICMP: {
- uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
- if (NarrowSize * 2 != SrcSize)
- return UnableToLegalize;
-
- Observer.changingInstr(MI);
- Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
- Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
-
- Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
- Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
-
+ Register LHS = MI.getOperand(2).getReg();
+ LLT SrcTy = MRI.getType(LHS);
+ uint64_t SrcSize = SrcTy.getSizeInBits();
CmpInst::Predicate Pred =
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
- LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
- MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
- MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
- MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
- MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
- MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
+ // TODO: Handle the non-equality case for weird sizes.
+ if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
+ return UnableToLegalize;
+
+ LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
+ SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
+ if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
+ LHSLeftoverRegs))
+ return UnableToLegalize;
+
+ LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
+ SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
+ if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
+ RHSPartRegs, RHSLeftoverRegs))
+ return UnableToLegalize;
+
+ // We now have the LHS and RHS of the compare split into narrow-type
+ // registers, plus potentially some leftover type.
+ Register Dst = MI.getOperand(0).getReg();
+ LLT ResTy = MRI.getType(Dst);
+ if (ICmpInst::isEquality(Pred)) {
+ // For each part on the LHS and RHS, keep track of the result of XOR-ing
+ // them together. For each equal part, the result should be all 0s. For
+ // each non-equal part, we'll get at least one 1.
+ auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
+ SmallVector<Register, 4> Xors;
+ for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
+ auto LHS = std::get<0>(LHSAndRHS);
+ auto RHS = std::get<1>(LHSAndRHS);
+ auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
+ Xors.push_back(Xor);
+ }
+
+ // Build a G_XOR for each leftover register. Each G_XOR must be widened
+ // to the desired narrow type so that we can OR them together later.
+ SmallVector<Register, 4> WidenedXors;
+ for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
+ auto LHS = std::get<0>(LHSAndRHS);
+ auto RHS = std::get<1>(LHSAndRHS);
+ auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
+ LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
+ buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
+ /* PadStrategy = */ TargetOpcode::G_ZEXT);
+ Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
+ }
+
+ // Now, for each part we broke up, we know if they are equal/not equal
+ // based off the G_XOR. We can OR these all together and compare against
+ // 0 to get the result.
+ assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
+ auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
+ for (unsigned I = 2, E = Xors.size(); I < E; ++I)
+ Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
+ MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
} else {
+ // TODO: Handle non-power-of-two types.
+ assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
+ assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
+ Register LHSL = LHSPartRegs[0];
+ Register LHSH = LHSPartRegs[1];
+ Register RHSL = RHSPartRegs[0];
+ Register RHSH = RHSPartRegs[1];
MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpHEQ =
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
- MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
+ MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
}
- Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}