AMDGPU: Fix handling of -0 in round lowering (#65761)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a5e8024..0c3f558 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -6586,23 +6586,25 @@
   // round(x) =>
   //  t = trunc(x);
   //  d = fabs(x - t);
-  //  o = copysign(1.0f, x);
-  //  return t + (d >= 0.5 ? o : 0.0);
+  //  o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
+  //  return t + o;
 
   auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
 
   auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
   auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
-  auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
-  auto One = MIRBuilder.buildFConstant(Ty, 1.0);
+
   auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
-  auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
+  auto Cmp =
+      MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
 
-  auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
-                                  Flags);
-  auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
+  // Could emit G_UITOFP instead
+  auto One = MIRBuilder.buildFConstant(Ty, 1.0);
+  auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+  auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
+  auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
 
-  MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
+  MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
 
   MI.eraseFromParent();
   return Legalized;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9698b6e..42479b8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2429,18 +2429,16 @@
 
   const SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
   const SDValue One = DAG.getConstantFP(1.0, SL, VT);
-  const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
-
-  SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X);
 
   EVT SetCCVT =
       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 
+  const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
   SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
+  SDValue OneOrZeroFP = DAG.getNode(ISD::SELECT, SL, VT, Cmp, One, Zero);
 
-  SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero);
-
-  return DAG.getNode(ISD::FADD, SL, VT, T, Sel);
+  SDValue SignedOffset = DAG.getNode(ISD::FCOPYSIGN, SL, VT, OneOrZeroFP, X);
+  return DAG.getNode(ISD::FADD, SL, VT, T, SignedOffset);
 }
 
 SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {