diff --git a/streamexecutor/include/streamexecutor/Device.h b/streamexecutor/include/streamexecutor/Device.h
index 3de9910..26d0636 100644
--- a/streamexecutor/include/streamexecutor/Device.h
+++ b/streamexecutor/include/streamexecutor/Device.h
@@ -161,19 +161,19 @@
   }
 
   template <typename T>
-  Error synchronousCopyD2H(GlobalDeviceMemory<T> Src,
+  Error synchronousCopyD2H(const GlobalDeviceMemory<T> &Src,
                            llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
     return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount);
   }
 
   template <typename T>
-  Error synchronousCopyD2H(GlobalDeviceMemory<T> Src,
+  Error synchronousCopyD2H(const GlobalDeviceMemory<T> &Src,
                            llvm::MutableArrayRef<T> Dst) {
     return synchronousCopyD2H(Src.asSlice(), Dst);
   }
 
   template <typename T>
-  Error synchronousCopyD2H(GlobalDeviceMemory<T> Src, T *Dst,
+  Error synchronousCopyD2H(const GlobalDeviceMemory<T> &Src, T *Dst,
                            size_t ElementCount) {
     return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount);
   }
@@ -216,18 +216,18 @@
   }
 
   template <typename T>
-  Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst,
+  Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> &Dst,
                            size_t ElementCount) {
     return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount);
   }
 
   template <typename T>
-  Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst) {
+  Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> &Dst) {
     return synchronousCopyH2D(Src, Dst.asSlice());
   }
 
   template <typename T>
-  Error synchronousCopyH2D(T *Src, GlobalDeviceMemory<T> Dst,
+  Error synchronousCopyH2D(T *Src, GlobalDeviceMemory<T> &Dst,
                            size_t ElementCount) {
     return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount);
   }
@@ -265,39 +265,39 @@
   }
 
   template <typename T>
-  Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
+  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src,
                            GlobalDeviceMemorySlice<T> Dst,
                            size_t ElementCount) {
     return synchronousCopyD2D(Src.asSlice(), Dst, ElementCount);
   }
 
   template <typename T>
-  Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
+  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src,
                            GlobalDeviceMemorySlice<T> Dst) {
     return synchronousCopyD2D(Src.asSlice(), Dst);
   }
 
   template <typename T>
   Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
-                           GlobalDeviceMemory<T> Dst, size_t ElementCount) {
+                           GlobalDeviceMemory<T> &Dst, size_t ElementCount) {
     return synchronousCopyD2D(Src, Dst.asSlice(), ElementCount);
   }
 
   template <typename T>
   Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
-                           GlobalDeviceMemory<T> Dst) {
+                           GlobalDeviceMemory<T> &Dst) {
     return synchronousCopyD2D(Src, Dst.asSlice());
   }
 
   template <typename T>
-  Error synchronousCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemory<T> Dst,
-                           size_t ElementCount) {
+  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src,
+                           GlobalDeviceMemory<T> &Dst, size_t ElementCount) {
     return synchronousCopyD2D(Src.asSlice(), Dst.asSlice(), ElementCount);
   }
 
   template <typename T>
-  Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
-                           GlobalDeviceMemory<T> Dst) {
+  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src,
+                           GlobalDeviceMemory<T> &Dst) {
     return synchronousCopyD2D(Src.asSlice(), Dst.asSlice());
   }
 
diff --git a/streamexecutor/include/streamexecutor/DeviceMemory.h b/streamexecutor/include/streamexecutor/DeviceMemory.h
index 45faf7b..d841d26 100644
--- a/streamexecutor/include/streamexecutor/DeviceMemory.h
+++ b/streamexecutor/include/streamexecutor/DeviceMemory.h
@@ -192,7 +192,7 @@
   size_t getElementCount() const { return getByteCount() / sizeof(ElemT); }
 
   /// Converts this memory object into a slice.
-  GlobalDeviceMemorySlice<ElemT> asSlice() {
+  GlobalDeviceMemorySlice<ElemT> asSlice() const {
     return GlobalDeviceMemorySlice<ElemT>(*this);
   }
 
diff --git a/streamexecutor/include/streamexecutor/Stream.h b/streamexecutor/include/streamexecutor/Stream.h
index 054b159..81f9ada 100644
--- a/streamexecutor/include/streamexecutor/Stream.h
+++ b/streamexecutor/include/streamexecutor/Stream.h
@@ -164,20 +164,22 @@
   }
 
   template <typename T>
-  Stream &thenCopyD2H(GlobalDeviceMemory<T> Src, llvm::MutableArrayRef<T> Dst,
-                      size_t ElementCount) {
+  Stream &thenCopyD2H(const GlobalDeviceMemory<T> &Src,
+                      llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
     thenCopyD2H(Src.asSlice(), Dst, ElementCount);
     return *this;
   }
 
   template <typename T>
-  Stream &thenCopyD2H(GlobalDeviceMemory<T> Src, llvm::MutableArrayRef<T> Dst) {
+  Stream &thenCopyD2H(const GlobalDeviceMemory<T> &Src,
+                      llvm::MutableArrayRef<T> Dst) {
     thenCopyD2H(Src.asSlice(), Dst);
     return *this;
   }
 
   template <typename T>
-  Stream &thenCopyD2H(GlobalDeviceMemory<T> Src, T *Dst, size_t ElementCount) {
+  Stream &thenCopyD2H(const GlobalDeviceMemory<T> &Src, T *Dst,
+                      size_t ElementCount) {
     thenCopyD2H(Src.asSlice(), Dst, ElementCount);
     return *this;
   }
@@ -221,20 +223,20 @@
   }
 
   template <typename T>
-  Stream &thenCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst,
+  Stream &thenCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> &Dst,
                       size_t ElementCount) {
     thenCopyH2D(Src, Dst.asSlice(), ElementCount);
     return *this;
   }
 
   template <typename T>
-  Stream &thenCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst) {
+  Stream &thenCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> &Dst) {
     thenCopyH2D(Src, Dst.asSlice());
     return *this;
   }
 
   template <typename T>
-  Stream &thenCopyH2D(T *Src, GlobalDeviceMemory<T> Dst, size_t ElementCount) {
+  Stream &thenCopyH2D(T *Src, GlobalDeviceMemory<T> &Dst, size_t ElementCount) {
     thenCopyH2D(Src, Dst.asSlice(), ElementCount);
     return *this;
   }
@@ -272,42 +274,43 @@
   }
 
   template <typename T>
-  Stream &thenCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemorySlice<T> Dst,
-                      size_t ElementCount) {
+  Stream &thenCopyD2D(const GlobalDeviceMemory<T> &Src,
+                      GlobalDeviceMemorySlice<T> Dst, size_t ElementCount) {
     thenCopyD2D(Src.asSlice(), Dst, ElementCount);
     return *this;
   }
 
   template <typename T>
-  Stream &thenCopyD2D(GlobalDeviceMemory<T> Src,
+  Stream &thenCopyD2D(const GlobalDeviceMemory<T> &Src,
                       GlobalDeviceMemorySlice<T> Dst) {
     thenCopyD2D(Src.asSlice(), Dst);
     return *this;
   }
 
   template <typename T>
-  Stream &thenCopyD2D(GlobalDeviceMemorySlice<T> Src, GlobalDeviceMemory<T> Dst,
-                      size_t ElementCount) {
+  Stream &thenCopyD2D(GlobalDeviceMemorySlice<T> Src,
+                      GlobalDeviceMemory<T> &Dst, size_t ElementCount) {
     thenCopyD2D(Src, Dst.asSlice(), ElementCount);
     return *this;
   }
 
   template <typename T>
   Stream &thenCopyD2D(GlobalDeviceMemorySlice<T> Src,
-                      GlobalDeviceMemory<T> Dst) {
+                      GlobalDeviceMemory<T> &Dst) {
     thenCopyD2D(Src, Dst.asSlice());
     return *this;
   }
 
   template <typename T>
-  Stream &thenCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemory<T> Dst,
-                      size_t ElementCount) {
+  Stream &thenCopyD2D(const GlobalDeviceMemory<T> &Src,
+                      GlobalDeviceMemory<T> &Dst, size_t ElementCount) {
     thenCopyD2D(Src.asSlice(), Dst.asSlice(), ElementCount);
     return *this;
   }
 
   template <typename T>
-  Stream &thenCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemory<T> Dst) {
+  Stream &thenCopyD2D(const GlobalDeviceMemory<T> &Src,
+                      GlobalDeviceMemory<T> &Dst) {
     thenCopyD2D(Src.asSlice(), Dst.asSlice());
     return *this;
   }
