[SDAG] Use shifts if ISD::MUL is illegal when lowering ISD::CTPOP (#86505)
We can avoid libcalls.
Fixes #86205
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 4981d7b..c00fe1f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -6389,12 +6389,26 @@
// 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
// bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
- auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
// Shift count result from 8 high bits to low bits.
auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
- B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+ auto IsMulSupported = [this](const LLT Ty) {
+ auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
+ return Action == Legal || Action == WidenScalar || Action == Custom;
+ };
+ if (IsMulSupported(Ty)) {
+ auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
+ B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+ } else {
+ auto ResTmp = B8Count;
+ for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
+ auto ShiftC = B.buildConstant(Ty, Shift);
+ auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
+ ResTmp = B.buildAdd(Ty, ResTmp, Shl);
+ }
+ B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+ }
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8be03b6..962f0d9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8710,11 +8710,21 @@
}
// v = (v * 0x01010101...) >> (Len - 8)
- SDValue Mask01 =
- DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
- return DAG.getNode(ISD::SRL, dl, VT,
- DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
- DAG.getConstant(Len - 8, dl, ShVT));
+ SDValue V;
+ if (isOperationLegalOrCustomOrPromote(
+ ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
+ } else {
+ V = Op;
+ for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
+ SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
+ V = DAG.getNode(ISD::ADD, dl, VT, V,
+ DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
+ }
+ }
+ return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
}
SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
@@ -8767,10 +8777,22 @@
return Op;
// v = (v * 0x01010101...) >> (Len - 8)
- SDValue Mask01 =
- DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
- return DAG.getNode(ISD::VP_LSHR, dl, VT,
- DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
+ SDValue V;
+ if (isOperationLegalOrCustomOrPromote(
+ ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
+ } else {
+ V = Op;
+ for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
+ SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
+ V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
+ DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
+ Mask, VL);
+ }
+ }
+ return DAG.getNode(ISD::VP_LSHR, dl, VT, V,
DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
}