[Codegen] Generate fast fp64-to-fp16 conversions in unsafe mode.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D154528
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 266fa37..1393563 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -6255,15 +6255,29 @@
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
+ const LLT S1 = LLT::scalar(1);
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+
auto [Dst, Src] = MI.getFirst2Regs();
+ assert(MRI.getType(Dst).getScalarType() == S16 &&
+ MRI.getType(Src).getScalarType() == S64);
+
if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
return UnableToLegalize;
+ if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
+ unsigned Flags = MI.getFlags();
+ auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
+ MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
const unsigned ExpMask = 0x7ff;
const unsigned ExpBiasf64 = 1023;
const unsigned ExpBiasf16 = 15;
- const LLT S32 = LLT::scalar(32);
- const LLT S1 = LLT::scalar(1);
auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
Register U = Unmerge.getReg(0);