[MIPS GlobalISel] Select count trailing zeros
llvm.cttz.<type> intrinsic has additional i1 argument is_zero_undef,
it tells whether zero as the first argument produces a defined result.
G_CTTZ is generated from llvm.cttz.<type> (<type> <src>, i1 false)
intrinsics, clang generates these intrinsics from __builtin_ctz and
__builtin_ctzll.
G_CTTZ_ZERO_UNDEF comes from llvm.cttz.<type> (<type> <src>, i1 true).
Clang generates such intrinsics as parts of expansion of builtin_ffs
and builtin_ffsll. It is also traditionally part of and many
algorithms that are now predicated on avoiding zero-value inputs.
Add narrow scalar (algorithm uses G_CTTZ_ZERO_UNDEF) for G_CTTZ.
Lower G_CTTZ and G_CTTZ_ZERO_UNDEF for MIPS32.
Differential Revision: https://reviews.llvm.org/D73215
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 4df23ab..c6c5c18 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -981,6 +981,8 @@
switch (MI.getOpcode()) {
case TargetOpcode::G_CTLZ:
return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_CTTZ:
+ return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
default:
return UnableToLegalize;
}
@@ -3886,6 +3888,37 @@
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+
+ if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
+ MachineIRBuilder &B = MIRBuilder;
+ auto UnmergeSrc = B.buildUnmerge(NarrowTy, MI.getOperand(1));
+ // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
+ auto C_0 = B.buildConstant(NarrowTy, 0);
+ auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+ UnmergeSrc.getReg(0), C_0);
+ auto HiCTTZ = B.buildCTTZ(NarrowTy, UnmergeSrc.getReg(1));
+ auto C_NarrowSize = B.buildConstant(NarrowTy, NarrowSize);
+ auto LoIsZeroCTTZ = B.buildAdd(NarrowTy, HiCTTZ, C_NarrowSize);
+ auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(NarrowTy, UnmergeSrc.getReg(0));
+ auto LoOut = B.buildSelect(NarrowTy, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
+
+ B.buildMerge(MI.getOperand(0), {LoOut.getReg(0), C_0.getReg(0)});
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
unsigned Opc = MI.getOpcode();
auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();