GlobalISel: Implement lower for G_INTRINSIC_ROUND
Mostly copied from AMDGPU lowering implementation, except used
G_SITOFP instead of directly creating a select on -1.0, 0.0.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index bb3ece3..53bda81 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2053,6 +2053,8 @@
}
case TargetOpcode::G_FMAD:
return lowerFMad(MI);
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ return lowerIntrinsicRound(MI);
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register OldValRes = MI.getOperand(0).getReg();
Register SuccessRes = MI.getOperand(1).getReg();
@@ -4130,6 +4132,33 @@
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ unsigned Flags = MI.getFlags();
+ LLT Ty = MRI.getType(DstReg);
+ const LLT CondTy = Ty.changeElementSize(1);
+
+ // result = trunc(src);
+ // if (src < 0.0 && src != result)
+ // result += -1.0.
+
+ auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+ auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
+
+ auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
+ SrcReg, Zero, Flags);
+ auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
+ SrcReg, Trunc, Flags);
+ auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
+ auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
+
+ MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
const unsigned NumDst = MI.getNumOperands() - 1;
const Register SrcReg = MI.getOperand(NumDst).getReg();