[flang][cuda] Fix memory side effects on cuf.data_transfer op (#92928)

The memory side effects on the `cuf.data_transfer` operation were
swapped between the src and dst operands. We read from source and write
to destination not the opposite.
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 72157bc..1c98b41 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -154,8 +154,8 @@
     ```
   }];
 
-  let arguments = (ins Arg<AnyReferenceLike, "", [MemWrite]>:$src,
-                       Arg<AnyReferenceLike, "", [MemRead]>:$dst,
+  let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$src,
+                       Arg<AnyReferenceLike, "", [MemWrite]>:$dst,
                        cuf_DataTransferKindAttr:$transfer_kind);
 
   let assemblyFormat = [{