[X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (#100079)
Also add tests for G_SITOFP and G_FPTOSI
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e64d3f5..c3b6b30 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7169,6 +7169,78 @@
return Legalized;
}
+// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
+// operations and G_SITOFP
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S1 = LLT::scalar(1);
+
+ assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
+
+ // For i64 < INT_MAX we simply reuse SITOFP.
+ // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
+ // saved before division, convert to float by SITOFP, multiply the result
+ // by 2.
+ auto One = MIRBuilder.buildConstant(S64, 1);
+ auto Zero = MIRBuilder.buildConstant(S64, 0);
+ // Result if Src < INT_MAX
+ auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
+ // Result if Src >= INT_MAX
+ auto Halved = MIRBuilder.buildLShr(S64, Src, One);
+ auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
+ auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
+ auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
+ auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
+ // Check if the original value is larger than INT_MAX by comparing with
+ // zero to pick one of the two conversions.
+ auto IsLarge =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
+ MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
+// IEEE double representation.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+
+ assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
+
+ // We create double value from 32 bit parts with 32 exponent difference.
+ // Note that + and - are float operations that adjust the implicit leading
+ // one, the bases 2^52 and 2^84 are for illustrative purposes.
+ //
+ // X = 2^52 * 1.0...LowBits
+ // Y = 2^84 * 1.0...HighBits
+ // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
+ // = - 2^52 * 1.0...HighBits
+ // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
+ auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
+ auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
+ auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
+ auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
+ auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
+
+ auto LowBits = MIRBuilder.buildTrunc(S32, Src);
+ LowBits = MIRBuilder.buildZExt(S64, LowBits);
+ auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
+ auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
+ auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
+ auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
+ MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
@@ -7183,13 +7255,15 @@
if (SrcTy != LLT::scalar(64))
return UnableToLegalize;
- if (DstTy == LLT::scalar(32)) {
+ if (DstTy == LLT::scalar(32))
// TODO: SelectionDAG has several alternative expansions to port which may
- // be more reasonble depending on the available instructions. If a target
- // has sitofp, does not have CTLZ, or can efficiently use f64 as an
- // intermediate type, this is probably worse.
- return lowerU64ToF32BitOps(MI);
- }
+ // be more reasonable depending on the available instructions. We also need
+ // a more advanced mechanism to choose an optimal version depending on
+ // target features such as sitofp or CTLZ availability.
+ return lowerU64ToF32WithSITOFP(MI);
+
+ if (DstTy == LLT::scalar(64))
+ return lowerU64ToF64BitFloatOps(MI);
return UnableToLegalize;
}