[GlobalISel][AArch64] Add G_FPTOSI_SAT/G_FPTOUI_SAT (#96297)
This is an implementation of the saturating fp to int conversions for
GlobalISel. On AArch64 the converstion instrctions work this way,
producing saturating results. LegalizerHelper::lowerFPTOINT_SAT is
ported from SDAG.
AArch64 has a lot of existing tests for fptosi_sat, covering a wide
range of types. I have tried to make most of them work all at once, but
a few fall back due to other missing features such as f128 handling for
min/max.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3640b77..01e47bd 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1880,6 +1880,8 @@
}
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI_SAT:
+ case TargetOpcode::G_FPTOSI_SAT:
return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_FPEXT:
if (TypeIdx != 0)
@@ -2874,6 +2876,47 @@
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_FPTOSI_SAT:
+ case TargetOpcode::G_FPTOUI_SAT:
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0) {
+ Register OldDst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(OldDst);
+ Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
+ Register NewDst;
+ MI.getOperand(0).setReg(ExtReg);
+ uint64_t ShortBits = Ty.getScalarSizeInBits();
+ uint64_t WideBits = WideTy.getScalarSizeInBits();
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
+ // z = i16 fptosi_sat(a)
+ // ->
+ // x = i32 fptosi_sat(a)
+ // y = smin(x, 32767)
+ // z = smax(y, -32768)
+ auto MaxVal = MIRBuilder.buildConstant(
+ WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
+ auto MinVal = MIRBuilder.buildConstant(
+ WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
+ Register MidReg =
+ MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
+ NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
+ } else {
+ // z = i16 fptoui_sat(a)
+ // ->
+ // x = i32 fptoui_sat(a)
+ // y = smin(x, 65535)
+ auto MaxVal = MIRBuilder.buildConstant(
+ WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
+ NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
+ }
+ MIRBuilder.buildTrunc(OldDst, NewDst);
+ } else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+
+ Observer.changedInstr(MI);
+ return Legalized;
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
@@ -4170,6 +4213,9 @@
return lowerFPTOUI(MI);
case G_FPTOSI:
return lowerFPTOSI(MI);
+ case G_FPTOUI_SAT:
+ case G_FPTOSI_SAT:
+ return lowerFPTOINT_SAT(MI);
case G_FPTRUNC:
return lowerFPTRUNC(MI);
case G_FPOWI:
@@ -4986,6 +5032,8 @@
case G_UITOFP:
case G_FPTOSI:
case G_FPTOUI:
+ case G_FPTOSI_SAT:
+ case G_FPTOUI_SAT:
case G_INTTOPTR:
case G_PTRTOINT:
case G_ADDRSPACE_CAST:
@@ -5777,6 +5825,8 @@
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FPTOSI_SAT:
+ case TargetOpcode::G_FPTOUI_SAT:
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP: {
Observer.changingInstr(MI);
@@ -7285,6 +7335,106 @@
return Legalized;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
+ unsigned SatWidth = DstTy.getScalarSizeInBits();
+
+ // Determine minimum and maximum integer values and their corresponding
+ // floating-point values.
+ APInt MinInt, MaxInt;
+ if (IsSigned) {
+ MinInt = APInt::getSignedMinValue(SatWidth);
+ MaxInt = APInt::getSignedMaxValue(SatWidth);
+ } else {
+ MinInt = APInt::getMinValue(SatWidth);
+ MaxInt = APInt::getMaxValue(SatWidth);
+ }
+
+ const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
+ APFloat MinFloat(Semantics);
+ APFloat MaxFloat(Semantics);
+
+ APFloat::opStatus MinStatus =
+ MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
+ APFloat::opStatus MaxStatus =
+ MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
+ !(MaxStatus & APFloat::opStatus::opInexact);
+
+ // If the integer bounds are exactly representable as floats, emit a
+ // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
+ // and selects.
+ if (AreExactFloatBounds) {
+ // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
+ auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
+ auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+ SrcTy.changeElementSize(1), Src, MaxC);
+ auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
+ // Clamp by MaxFloat from above. NaN cannot occur.
+ auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
+ auto MinP =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max,
+ MinC, MachineInstr::FmNoNans);
+ auto Min =
+ MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
+ // Convert clamped value to integer. In the unsigned case we're done,
+ // because we mapped NaN to MinFloat, which will cast to zero.
+ if (!IsSigned) {
+ MIRBuilder.buildFPTOUI(Dst, Min);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Otherwise, select 0 if Src is NaN.
+ auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
+ auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
+ DstTy.changeElementSize(1), Src, Src);
+ MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
+ FpToInt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Result of direct conversion. The assumption here is that the operation is
+ // non-trapping and it's fine to apply it to an out-of-range value if we
+ // select it away later.
+ auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
+ : MIRBuilder.buildFPTOUI(DstTy, Src);
+
+ // If Src ULT MinFloat, select MinInt. In particular, this also selects
+ // MinInt if Src is NaN.
+ auto ULT =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
+ MIRBuilder.buildFConstant(SrcTy, MinFloat));
+ auto Max = MIRBuilder.buildSelect(
+ DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
+ // If Src OGT MaxFloat, select MaxInt.
+ auto OGT =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
+ MIRBuilder.buildFConstant(SrcTy, MaxFloat));
+
+ // In the unsigned case we are done, because we mapped NaN to MinInt, which
+ // is already zero.
+ if (!IsSigned) {
+ MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
+ Max);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Otherwise, select 0 if Src is NaN.
+ auto Min = MIRBuilder.buildSelect(
+ DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
+ auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
+ DstTy.changeElementSize(1), Src, Src);
+ MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {