[StreamExecutor] Fix allocateDeviceMemory

Summary:
The return value from PlatformExecutor::allocateDeviceMemory needs to be
converted from Expected<GlobalDeviceMemoryBase> to
Expected<GlobalDeviceMemory<T>> in Executor::allocateDeviceMemory.

A similar bug is also fixed for Executor::allocateHostMemory.

Thanks to jprice for identifying this bug.

Reviewers: jprice, jlebar

Subscribers: parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D23849

llvm-svn: 279658
GitOrigin-RevId: 3053bbf3b27151b88f7ca87fb61d0610cdc9e4a8
diff --git a/streamexecutor/include/streamexecutor/Executor.h b/streamexecutor/include/streamexecutor/Executor.h
index 1e2f395..6b0bc18 100644
--- a/streamexecutor/include/streamexecutor/Executor.h
+++ b/streamexecutor/include/streamexecutor/Executor.h
@@ -41,7 +41,11 @@
   /// Allocates an array of ElementCount entries of type T in device memory.
   template <typename T>
   Expected<GlobalDeviceMemory<T>> allocateDeviceMemory(size_t ElementCount) {
-    return PExecutor->allocateDeviceMemory(ElementCount * sizeof(T));
+    Expected<GlobalDeviceMemoryBase> MaybeBase =
+        PExecutor->allocateDeviceMemory(ElementCount * sizeof(T));
+    if (!MaybeBase)
+      return MaybeBase.takeError();
+    return GlobalDeviceMemory<T>(*MaybeBase);
   }
 
   /// Frees memory previously allocated with allocateDeviceMemory.
@@ -54,7 +58,11 @@
   /// Host memory allocated by this function can be used for asynchronous memory
   /// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
   template <typename T> Expected<T *> allocateHostMemory(size_t ElementCount) {
-    return PExecutor->allocateHostMemory(ElementCount * sizeof(T));
+    Expected<void *> MaybeMemory =
+        PExecutor->allocateHostMemory(ElementCount * sizeof(T));
+    if (!MaybeMemory)
+      return MaybeMemory.takeError();
+    return static_cast<T *>(*MaybeMemory);
   }
 
   /// Frees memory previously allocated with allocateHostMemory.
diff --git a/streamexecutor/lib/unittests/ExecutorTest.cpp b/streamexecutor/lib/unittests/ExecutorTest.cpp
index d2d03fb..b6719d3 100644
--- a/streamexecutor/lib/unittests/ExecutorTest.cpp
+++ b/streamexecutor/lib/unittests/ExecutorTest.cpp
@@ -54,6 +54,14 @@
     return se::Error::success();
   }
 
+  se::Error registerHostMemory(void *, size_t) override {
+    return se::Error::success();
+  }
+
+  se::Error unregisterHostMemory(void *) override {
+    return se::Error::success();
+  }
+
   se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc,
                                size_t SrcByteOffset, void *HostDst,
                                size_t DstByteOffset,
@@ -131,6 +139,25 @@
 using llvm::ArrayRef;
 using llvm::MutableArrayRef;
 
+TEST_F(ExecutorTest, AllocateAndFreeDeviceMemory) {
+  se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
+      Executor.allocateDeviceMemory<int>(10);
+  EXPECT_TRUE(static_cast<bool>(MaybeMemory));
+  EXPECT_NO_ERROR(Executor.freeDeviceMemory(*MaybeMemory));
+}
+
+TEST_F(ExecutorTest, AllocateAndFreeHostMemory) {
+  se::Expected<int *> MaybeMemory = Executor.allocateHostMemory<int>(10);
+  EXPECT_TRUE(static_cast<bool>(MaybeMemory));
+  EXPECT_NO_ERROR(Executor.freeHostMemory(*MaybeMemory));
+}
+
+TEST_F(ExecutorTest, RegisterAndUnregisterHostMemory) {
+  std::vector<int> Data(10);
+  EXPECT_NO_ERROR(Executor.registerHostMemory(Data.data(), 10));
+  EXPECT_NO_ERROR(Executor.unregisterHostMemory(Data.data()));
+}
+
 // D2H tests
 
 TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRefByCount) {