[LegalizeDAG] Optimize CodeGen for `ISD::CTLZ_ZERO_UNDEF` (#83039)
Previously we had the same instructions being generated for `ISD::CTLZ` and `ISD::CTLZ_ZERO_UNDEF` which did not take advantage of the fact that zero is an invalid input for `ISD::CTLZ_ZERO_UNDEF`. This commit separates codegen for the two cases to allow for the optimization for the latter case.
The details of the optimization are outlined in #82075
Fixes #82075
Co-authored-by: Manish Kausik H <hmamishkausik@gmail.com>
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 86de1f3..3f1094e 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2461,13 +2461,22 @@
NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
}
+ unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
+
+ if (MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
+ // An optimization where the result is the CTLZ after the left shift by
+ // (Difference in widety and current ty), that is,
+ // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
+ // Result = ctlz MIBSrc
+ MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
+ MIRBuilder.buildConstant(WideTy, SizeDiff));
+ }
+
// Perform the operation at the larger size.
auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
// This is already the correct result for CTPOP and CTTZs
- if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
- MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
+ if (MI.getOpcode() == TargetOpcode::G_CTLZ) {
// The correct result is NewOp - (Difference in widety and current ty).
- unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
MIBNewOp = MIRBuilder.buildSub(
WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
}