AMDGPU: Fix handling of -0 in round lowering (#65761)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a5e8024..0c3f558 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -6586,23 +6586,25 @@
// round(x) =>
// t = trunc(x);
// d = fabs(x - t);
- // o = copysign(1.0f, x);
- // return t + (d >= 0.5 ? o : 0.0);
+ // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
+ // return t + o;
auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
- auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
- auto One = MIRBuilder.buildFConstant(Ty, 1.0);
+
auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
- auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
+ auto Cmp =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
- auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
- Flags);
- auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
+ // Could emit G_UITOFP instead
+ auto One = MIRBuilder.buildFConstant(Ty, 1.0);
+ auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+ auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
+ auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
- MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
+ MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
MI.eraseFromParent();
return Legalized;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9698b6e..42479b8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2429,18 +2429,16 @@
const SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
const SDValue One = DAG.getConstantFP(1.0, SL, VT);
- const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
-
- SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
+ SDValue OneOrZeroFP = DAG.getNode(ISD::SELECT, SL, VT, Cmp, One, Zero);
- SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero);
-
- return DAG.getNode(ISD::FADD, SL, VT, T, Sel);
+ SDValue SignedOffset = DAG.getNode(ISD::FCOPYSIGN, SL, VT, OneOrZeroFP, X);
+ return DAG.getNode(ISD::FADD, SL, VT, T, SignedOffset);
}
SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {