Revert "[flang][cuda] Handle floats in atomiccas (#128970)"

This reverts commit 110b77f32859f39d253623153a37671f5601de65.
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 9a10ce9..537c817 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -2734,20 +2734,6 @@
 
   mlir::Value arg1 = args[1];
   mlir::Value arg2 = args[2];
-
-  auto bitCastFloat = [&](mlir::Value arg) -> mlir::Value {
-    if (mlir::isa<mlir::Float32Type>(arg.getType()))
-      return builder.create<mlir::LLVM::BitcastOp>(loc, builder.getI32Type(),
-                                                   arg);
-    if (mlir::isa<mlir::Float64Type>(arg.getType()))
-      return builder.create<mlir::LLVM::BitcastOp>(loc, builder.getI64Type(),
-                                                   arg);
-    return arg;
-  };
-
-  arg1 = bitCastFloat(arg1);
-  arg2 = bitCastFloat(arg2);
-
   if (arg1.getType() != arg2.getType()) {
     // arg1 and arg2 need to have the same type in AtomicCmpXchgOp.
     arg2 = builder.createConvert(loc, arg1.getType(), arg2);
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index c651d34..f2b4eb5 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -175,26 +175,3 @@
 ! CHECK: %[[VAL:.*]] = fir.convert %c14{{.*}} : (i32) -> i64
 ! CHECK: %[[ADDR:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref<i64> to !llvm.ptr
 ! CHECK: llvm.cmpxchg %{{.*}}, %{{.*}}, %[[VAL]] acq_rel monotonic : !llvm.ptr, i64
-
-attributes(device) subroutine testAtomic3()
-  real :: a, i, istat
-  istat = atomiccas(a, i, 14.0)
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtestatomic3()
-! CHECK: %[[BCAST1:.*]] = llvm.bitcast %{{.*}} : f32 to i32
-! CHECK: %[[BCAST2:.*]] = llvm.bitcast %{{.*}} : f32 to i32
-! CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref<f32> to !llvm.ptr
-! CHECK: llvm.cmpxchg %[[CAST]], %[[BCAST1]], %[[BCAST2]] acq_rel monotonic : !llvm.ptr, i32
-
-attributes(device) subroutine testAtomic4()
-  real(8) :: a, i, istat
-  istat = atomiccas(a, i, 14.0d0)
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtestatomic4()
-! CHECK: %[[BCAST1:.*]] = llvm.bitcast %{{.*}} : f64 to i64
-! CHECK: %[[BCAST2:.*]] = llvm.bitcast %{{.*}} : f64 to i64
-! CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref<f64> to !llvm.ptr
-! CHECK: %[[ATOMIC:.*]] = llvm.cmpxchg %[[CAST]], %[[BCAST1]], %[[BCAST2]] acq_rel monotonic : !llvm.ptr, i64
-! CHECK: %[[RES:.*]] = llvm.extractvalue %[[ATOMIC]][1] : !llvm.struct<(i64, i1)>