IR: Add llvm.ldexp and llvm.experimental.constrained.ldexp intrinsics
AMDGPU has native instructions and target intrinsics for this, but
these really should be subject to legalization and generic
optimizations. This will enable legalization of f16->f32 on targets
without f16 support.
Implement a somewhat horrible inline expansion for targets without
libcall support. This could be better if we could introduce control
flow (GlobalISel version not yet implemented). Support for strictfp
legalization is less complete but works for the simple cases.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 5c5f843..c38951a 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -542,6 +542,8 @@
RTLIBCASE(LOG_F);
case TargetOpcode::G_FLOG2:
RTLIBCASE(LOG2_F);
+ case TargetOpcode::G_FLDEXP:
+ RTLIBCASE(LDEXP_F);
case TargetOpcode::G_FCEIL:
RTLIBCASE(CEIL_F);
case TargetOpcode::G_FFLOOR:
@@ -826,6 +828,7 @@
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
case TargetOpcode::G_FCEIL:
@@ -1413,6 +1416,9 @@
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_STRICT_FLDEXP:
+ return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
}
}
@@ -2553,14 +2559,30 @@
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_FPOWI: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- Observer.changedInstr(MI);
- return Legalized;
+ case TargetOpcode::G_FPOWI:
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_STRICT_FLDEXP: {
+ if (TypeIdx == 0) {
+ if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (TypeIdx == 1) {
+ // For some reason SelectionDAG tries to promote to a libcall without
+ // actually changing the integer type for promotion.
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ return UnableToLegalize;
}
case TargetOpcode::G_INTTOPTR:
if (TypeIdx != 1)
@@ -4136,6 +4158,7 @@
case G_FLOG:
case G_FLOG2:
case G_FLOG10:
+ case G_FLDEXP:
case G_FNEARBYINT:
case G_FCEIL:
case G_FFLOOR:
@@ -4211,6 +4234,7 @@
case G_STRICT_FSUB:
case G_STRICT_FMUL:
case G_STRICT_FMA:
+ case G_STRICT_FLDEXP:
return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
@@ -5593,6 +5617,31 @@
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ MachineIRBuilder &B = MIRBuilder;
+ Register ExpReg = MI.getOperand(2).getReg();
+ LLT ExpTy = MRI.getType(ExpReg);
+
+ unsigned ClampSize = NarrowTy.getScalarSizeInBits();
+
+ // Clamp the exponent to the range of the target type.
+ auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
+ auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
+ auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
+ auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
+
+ auto Trunc = B.buildTrunc(NarrowTy, Clamp);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(Trunc.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
const auto &TII = MIRBuilder.getTII();