[StreamExecutor] Rename Executor to Device

Summary: This more clearly describes what the class is.

Reviewers: jlebar

Subscribers: jprice, parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D23851

llvm-svn: 279669
GitOrigin-RevId: bcc77b6249a6f193f7322dbb3fdf77482d67bbee
diff --git a/streamexecutor/include/streamexecutor/Executor.h b/streamexecutor/include/streamexecutor/Device.h
similarity index 89%
rename from streamexecutor/include/streamexecutor/Executor.h
rename to streamexecutor/include/streamexecutor/Device.h
index 6b0bc18..34bba80 100644
--- a/streamexecutor/include/streamexecutor/Executor.h
+++ b/streamexecutor/include/streamexecutor/Device.h
@@ -1,4 +1,4 @@
-//===-- Executor.h - The Executor class -------------------------*- C++ -*-===//
+//===-- Device.h - The Device class -----------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// The Executor class which represents a single device of a specific platform.
+/// The Device class which represents a single device of a specific platform.
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef STREAMEXECUTOR_EXECUTOR_H
-#define STREAMEXECUTOR_EXECUTOR_H
+#ifndef STREAMEXECUTOR_DEVICE_H
+#define STREAMEXECUTOR_DEVICE_H
 
 #include "streamexecutor/KernelSpec.h"
 #include "streamexecutor/PlatformInterfaces.h"
@@ -24,10 +24,10 @@
 class KernelInterface;
 class Stream;
 
-class Executor {
+class Device {
 public:
-  explicit Executor(PlatformExecutor *PExecutor);
-  virtual ~Executor();
+  explicit Device(PlatformDevice *PDevice);
+  virtual ~Device();
 
   /// Gets the kernel implementation for the underlying platform.
   virtual Expected<std::unique_ptr<KernelInterface>>
@@ -42,7 +42,7 @@
   template <typename T>
   Expected<GlobalDeviceMemory<T>> allocateDeviceMemory(size_t ElementCount) {
     Expected<GlobalDeviceMemoryBase> MaybeBase =
-        PExecutor->allocateDeviceMemory(ElementCount * sizeof(T));
+        PDevice->allocateDeviceMemory(ElementCount * sizeof(T));
     if (!MaybeBase)
       return MaybeBase.takeError();
     return GlobalDeviceMemory<T>(*MaybeBase);
@@ -50,7 +50,7 @@
 
   /// Frees memory previously allocated with allocateDeviceMemory.
   template <typename T> Error freeDeviceMemory(GlobalDeviceMemory<T> Memory) {
-    return PExecutor->freeDeviceMemory(Memory);
+    return PDevice->freeDeviceMemory(Memory);
   }
 
   /// Allocates an array of ElementCount entries of type T in host memory.
@@ -59,7 +59,7 @@
   /// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
   template <typename T> Expected<T *> allocateHostMemory(size_t ElementCount) {
     Expected<void *> MaybeMemory =
-        PExecutor->allocateHostMemory(ElementCount * sizeof(T));
+        PDevice->allocateHostMemory(ElementCount * sizeof(T));
     if (!MaybeMemory)
       return MaybeMemory.takeError();
     return static_cast<T *>(*MaybeMemory);
@@ -67,7 +67,7 @@
 
   /// Frees memory previously allocated with allocateHostMemory.
   template <typename T> Error freeHostMemory(T *Memory) {
-    return PExecutor->freeHostMemory(Memory);
+    return PDevice->freeHostMemory(Memory);
   }
 
   /// Registers a previously allocated host array of type T for asynchronous
@@ -77,15 +77,15 @@
   /// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
   template <typename T>
   Error registerHostMemory(T *Memory, size_t ElementCount) {
-    return PExecutor->registerHostMemory(Memory, ElementCount * sizeof(T));
+    return PDevice->registerHostMemory(Memory, ElementCount * sizeof(T));
   }
 
   /// Unregisters host memory previously registered by registerHostMemory.
   template <typename T> Error unregisterHostMemory(T *Memory) {
-    return PExecutor->unregisterHostMemory(Memory);
+    return PDevice->unregisterHostMemory(Memory);
   }
 
-  /// \anchor ExecutorHostSyncCopyGroup
+  /// \anchor DeviceHostSyncCopyGroup
   /// \name Host-synchronous device memory copying functions
   ///
   /// These methods block the calling host thread while copying data to or from
@@ -125,9 +125,9 @@
       return make_error(
           "copying too many elements, " + llvm::Twine(ElementCount) +
           ", to a host array of element count " + llvm::Twine(Dst.size()));
-    return PExecutor->synchronousCopyD2H(
-        Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.data(), 0,
-        ElementCount * sizeof(T));
+    return PDevice->synchronousCopyD2H(Src.getBaseMemory(),
+                                       Src.getElementOffset() * sizeof(T),
+                                       Dst.data(), 0, ElementCount * sizeof(T));
   }
 
   template <typename T>
@@ -179,9 +179,9 @@
                         llvm::Twine(ElementCount) +
                         ", to a device array of element count " +
                         llvm::Twine(Dst.getElementCount()));
-    return PExecutor->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(),
-                                         Dst.getElementOffset() * sizeof(T),
-                                         ElementCount * sizeof(T));
+    return PDevice->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(),
+                                       Dst.getElementOffset() * sizeof(T),
+                                       ElementCount * sizeof(T));
   }
 
   template <typename T>
@@ -234,7 +234,7 @@
                         llvm::Twine(ElementCount) +
                         ", to a device array of element count " +
                         llvm::Twine(Dst.getElementCount()));
-    return PExecutor->synchronousCopyD2D(
+    return PDevice->synchronousCopyD2D(
         Src.getBaseMemory(), Src.getElementOffset() * sizeof(T),
         Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T),
         ElementCount * sizeof(T));
@@ -292,9 +292,9 @@
   ///@} End host-synchronous device memory copying functions
 
 private:
-  PlatformExecutor *PExecutor;
+  PlatformDevice *PDevice;
 };
 
 } // namespace streamexecutor
 
-#endif // STREAMEXECUTOR_EXECUTOR_H
+#endif // STREAMEXECUTOR_DEVICE_H
diff --git a/streamexecutor/include/streamexecutor/Kernel.h b/streamexecutor/include/streamexecutor/Kernel.h
index b817162..4a2eeb4 100644
--- a/streamexecutor/include/streamexecutor/Kernel.h
+++ b/streamexecutor/include/streamexecutor/Kernel.h
@@ -54,13 +54,13 @@
 /// function as follows:
 /// \code
 ///     namespace ccn = compiler_cuda_namespace;
-///     // Assumes Executor is a pointer to the StreamExecutor on which to
-///     // launch the kernel.
+///     // Assumes Device is a pointer to the Device on which to launch the
+///     // kernel.
 ///     //
 ///     // See KernelSpec.h for details on how the compiler can create a
 ///     // MultiKernelLoaderSpec instance like SaxpyKernelLoaderSpec below.
 ///     Expected<ccn::SaxpyKernel> MaybeKernel =
-///         ccn::SaxpyKernel::create(Executor, ccn::SaxpyKernelLoaderSpec);
+///         ccn::SaxpyKernel::create(Device, ccn::SaxpyKernelLoaderSpec);
 ///     if (!MaybeKernel) { /* Handle error */ }
 ///     ccn::SaxpyKernel SaxpyKernel = *MaybeKernel;
 ///     Launch(SaxpyKernel, A, X, Y);
@@ -84,7 +84,7 @@
 
 namespace streamexecutor {
 
-class Executor;
+class Device;
 class KernelInterface;
 
 /// The base class for device kernel functions.
@@ -100,13 +100,13 @@
   KernelBase &operator=(KernelBase &&) = default;
   ~KernelBase();
 
-  /// Creates a kernel object from an Executor and a MultiKernelLoaderSpec.
+  /// Creates a kernel object from a Device and a MultiKernelLoaderSpec.
   ///
-  /// The Executor knows which platform it belongs to and the
+  /// The Device knows which platform it belongs to and the
   /// MultiKernelLoaderSpec knows how to find the kernel code for different
   /// platforms, so the combined information is enough to get the kernel code
   /// for the appropriate platform.
-  static Expected<KernelBase> create(Executor *ParentExecutor,
+  static Expected<KernelBase> create(Device *Dev,
                                      const MultiKernelLoaderSpec &Spec);
 
   const std::string &getName() const { return Name; }
@@ -116,11 +116,11 @@
   KernelInterface *getImplementation() { return Implementation.get(); }
 
 private:
-  KernelBase(Executor *ParentExecutor, const std::string &Name,
+  KernelBase(Device *Dev, const std::string &Name,
              const std::string &DemangledName,
              std::unique_ptr<KernelInterface> Implementation);
 
-  Executor *ParentExecutor;
+  Device *TheDevice;
   std::string Name;
   std::string DemangledName;
   std::unique_ptr<KernelInterface> Implementation;
@@ -136,9 +136,9 @@
   TypedKernel &operator=(TypedKernel &&) = default;
 
   /// Parameters here have the same meaning as in KernelBase::create.
-  static Expected<TypedKernel> create(Executor *ParentExecutor,
+  static Expected<TypedKernel> create(Device *Dev,
                                       const MultiKernelLoaderSpec &Spec) {
-    auto MaybeBase = KernelBase::create(ParentExecutor, Spec);
+    auto MaybeBase = KernelBase::create(Dev, Spec);
     if (!MaybeBase) {
       return MaybeBase.takeError();
     }
diff --git a/streamexecutor/include/streamexecutor/PlatformInterfaces.h b/streamexecutor/include/streamexecutor/PlatformInterfaces.h
index 2c8fce3..b7737e8 100644
--- a/streamexecutor/include/streamexecutor/PlatformInterfaces.h
+++ b/streamexecutor/include/streamexecutor/PlatformInterfaces.h
@@ -31,7 +31,7 @@
 
 namespace streamexecutor {
 
-class PlatformExecutor;
+class PlatformDevice;
 
 /// Methods supported by device kernel function objects on all platforms.
 class KernelInterface {
@@ -41,15 +41,14 @@
 /// Platform-specific stream handle.
 class PlatformStreamHandle {
 public:
-  explicit PlatformStreamHandle(PlatformExecutor *PExecutor)
-      : PExecutor(PExecutor) {}
+  explicit PlatformStreamHandle(PlatformDevice *PDevice) : PDevice(PDevice) {}
 
   virtual ~PlatformStreamHandle();
 
-  PlatformExecutor *getExecutor() { return PExecutor; }
+  PlatformDevice *getDevice() { return PDevice; }
 
 private:
-  PlatformExecutor *PExecutor;
+  PlatformDevice *PDevice;
 };
 
 /// Raw executor methods that must be implemented by each platform.
@@ -57,11 +56,11 @@
 /// This class defines the platform interface that supports executing work on a
 /// device.
 ///
-/// The public Executor and Stream classes have the type-safe versions of the
+/// The public Device and Stream classes have the type-safe versions of the
 /// functions in this interface.
-class PlatformExecutor {
+class PlatformDevice {
 public:
-  virtual ~PlatformExecutor();
+  virtual ~PlatformDevice();
 
   virtual std::string getName() const = 0;
 
diff --git a/streamexecutor/include/streamexecutor/Stream.h b/streamexecutor/include/streamexecutor/Stream.h
index 3293053..0e6e898 100644
--- a/streamexecutor/include/streamexecutor/Stream.h
+++ b/streamexecutor/include/streamexecutor/Stream.h
@@ -12,19 +12,18 @@
 /// A Stream instance represents a queue of sequential, host-asynchronous work
 /// to be performed on a device.
 ///
-/// To enqueue work on a device, first create a Executor instance for a
-/// given device and then use that Executor to create a Stream instance.
-/// The Stream instance will perform its work on the device managed by the
-/// Executor that created it.
+/// To enqueue work on a device, first create a Device instance then use that
+/// Device to create a Stream instance. The Stream instance will perform its
+/// work on the device managed by the Device object that created it.
 ///
 /// The various "then" methods of the Stream object, such as thenCopyH2D and
 /// thenLaunch, may be used to enqueue work on the Stream, and the
 /// blockHostUntilDone() method may be used to block the host code until the
 /// Stream has completed all its work.
 ///
-/// Multiple Stream instances can be created for the same Executor. This
-/// allows several independent streams of computation to be performed
-/// simultaneously on a single device.
+/// Multiple Stream instances can be created for the same Device. This allows
+/// several independent streams of computation to be performed simultaneously on
+/// a single device.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -94,8 +93,8 @@
                      const ParameterTs &... Arguments) {
     auto ArgumentArray =
         make_kernel_argument_pack<ParameterTs...>(Arguments...);
-    setError(PExecutor->launch(ThePlatformStream.get(), BlockSize, GridSize,
-                               Kernel, ArgumentArray));
+    setError(PDevice->launch(ThePlatformStream.get(), BlockSize, GridSize,
+                             Kernel, ArgumentArray));
     return *this;
   }
 
@@ -105,13 +104,13 @@
   /// return without waiting for the operation to complete.
   ///
   /// Any host memory used as a source or destination for one of these
-  /// operations must be allocated with Executor::allocateHostMemory or
-  /// registered with Executor::registerHostMemory. Otherwise, the enqueuing
-  /// operation may block until the copy operation is fully complete.
+  /// operations must be allocated with Device::allocateHostMemory or registered
+  /// with Device::registerHostMemory. Otherwise, the enqueuing operation may
+  /// block until the copy operation is fully complete.
   ///
   /// The arguments and bounds checking for these methods match the API of the
-  /// \ref ExecutorHostSyncCopyGroup
-  /// "host-synchronous device memory copying functions" of Executor.
+  /// \ref DeviceHostSyncCopyGroup
+  /// "host-synchronous device memory copying functions" of Device.
   ///@{
 
   template <typename T>
@@ -125,9 +124,9 @@
       setError("copying too many elements, " + llvm::Twine(ElementCount) +
                ", to a host array of element count " + llvm::Twine(Dst.size()));
     else
-      setError(PExecutor->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(),
-                                  Src.getElementOffset() * sizeof(T),
-                                  Dst.data(), 0, ElementCount * sizeof(T)));
+      setError(PDevice->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(),
+                                Src.getElementOffset() * sizeof(T), Dst.data(),
+                                0, ElementCount * sizeof(T)));
     return *this;
   }
 
@@ -182,7 +181,7 @@
                ", to a device array of element count " +
                llvm::Twine(Dst.getElementCount()));
     else
-      setError(PExecutor->copyH2D(
+      setError(PDevice->copyH2D(
           ThePlatformStream.get(), Src.data(), 0, Dst.getBaseMemory(),
           Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T)));
     return *this;
@@ -238,7 +237,7 @@
                ", to a device array of element count " +
                llvm::Twine(Dst.getElementCount()));
     else
-      setError(PExecutor->copyD2D(
+      setError(PDevice->copyD2D(
           ThePlatformStream.get(), Src.getBaseMemory(),
           Src.getElementOffset() * sizeof(T), Dst.getBaseMemory(),
           Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T)));
@@ -322,8 +321,8 @@
       ErrorMessage = Message.str();
   }
 
-  /// The PlatformExecutor that supports the operations of this stream.
-  PlatformExecutor *PExecutor;
+  /// The PlatformDevice that supports the operations of this stream.
+  PlatformDevice *PDevice;
 
   /// The platform-specific stream handle for this instance.
   std::unique_ptr<PlatformStreamHandle> ThePlatformStream;
diff --git a/streamexecutor/lib/CMakeLists.txt b/streamexecutor/lib/CMakeLists.txt
index 7f5cb20..cf7baf9 100644
--- a/streamexecutor/lib/CMakeLists.txt
+++ b/streamexecutor/lib/CMakeLists.txt
@@ -6,7 +6,7 @@
 add_library(
     streamexecutor
     $<TARGET_OBJECTS:utils>
-    Executor.cpp
+    Device.cpp
     Kernel.cpp
     KernelSpec.cpp
     PackedKernelArgumentArray.cpp
diff --git a/streamexecutor/lib/Executor.cpp b/streamexecutor/lib/Device.cpp
similarity index 67%
rename from streamexecutor/lib/Executor.cpp
rename to streamexecutor/lib/Device.cpp
index f103a76..4a5ec11 100644
--- a/streamexecutor/lib/Executor.cpp
+++ b/streamexecutor/lib/Device.cpp
@@ -1,4 +1,4 @@
-//===-- Executor.cpp - Executor implementation ----------------------------===//
+//===-- Device.cpp - Device implementation --------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,11 +8,11 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// Implementation of Executor class internals.
+/// Implementation of Device class internals.
 ///
 //===----------------------------------------------------------------------===//
 
-#include "streamexecutor/Executor.h"
+#include "streamexecutor/Device.h"
 
 #include <cassert>
 
@@ -23,17 +23,17 @@
 
 namespace streamexecutor {
 
-Executor::Executor(PlatformExecutor *PExecutor) : PExecutor(PExecutor) {}
+Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {}
 
-Executor::~Executor() = default;
+Device::~Device() = default;
 
-Expected<std::unique_ptr<Stream>> Executor::createStream() {
+Expected<std::unique_ptr<Stream>> Device::createStream() {
   Expected<std::unique_ptr<PlatformStreamHandle>> MaybePlatformStream =
-      PExecutor->createStream();
+      PDevice->createStream();
   if (!MaybePlatformStream) {
     return MaybePlatformStream.takeError();
   }
-  assert((*MaybePlatformStream)->getExecutor() == PExecutor &&
+  assert((*MaybePlatformStream)->getDevice() == PDevice &&
          "an executor created a stream with a different stored executor");
   return llvm::make_unique<Stream>(std::move(*MaybePlatformStream));
 }
diff --git a/streamexecutor/lib/Kernel.cpp b/streamexecutor/lib/Kernel.cpp
index 9e99e91..fa09920 100644
--- a/streamexecutor/lib/Kernel.cpp
+++ b/streamexecutor/lib/Kernel.cpp
@@ -13,31 +13,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "streamexecutor/Kernel.h"
-#include "streamexecutor/Executor.h"
+#include "streamexecutor/Device.h"
 #include "streamexecutor/PlatformInterfaces.h"
 
 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
 
 namespace streamexecutor {
 
-KernelBase::KernelBase(Executor *ParentExecutor, const std::string &Name,
+KernelBase::KernelBase(Device *Dev, const std::string &Name,
                        const std::string &DemangledName,
                        std::unique_ptr<KernelInterface> Implementation)
-    : ParentExecutor(ParentExecutor), Name(Name), DemangledName(DemangledName),
+    : TheDevice(Dev), Name(Name), DemangledName(DemangledName),
       Implementation(std::move(Implementation)) {}
 
 KernelBase::~KernelBase() = default;
 
-Expected<KernelBase> KernelBase::create(Executor *ParentExecutor,
+Expected<KernelBase> KernelBase::create(Device *Dev,
                                         const MultiKernelLoaderSpec &Spec) {
-  auto MaybeImplementation = ParentExecutor->getKernelImplementation(Spec);
+  auto MaybeImplementation = Dev->getKernelImplementation(Spec);
   if (!MaybeImplementation) {
     return MaybeImplementation.takeError();
   }
   std::string Name = Spec.getKernelName();
   std::string DemangledName =
       llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr);
-  KernelBase Instance(ParentExecutor, Name, DemangledName,
+  KernelBase Instance(Dev, Name, DemangledName,
                       std::move(*MaybeImplementation));
   return std::move(Instance);
 }
diff --git a/streamexecutor/lib/PlatformInterfaces.cpp b/streamexecutor/lib/PlatformInterfaces.cpp
index e0ae644..770cd17 100644
--- a/streamexecutor/lib/PlatformInterfaces.cpp
+++ b/streamexecutor/lib/PlatformInterfaces.cpp
@@ -18,6 +18,6 @@
 
 PlatformStreamHandle::~PlatformStreamHandle() = default;
 
-PlatformExecutor::~PlatformExecutor() = default;
+PlatformDevice::~PlatformDevice() = default;
 
 } // namespace streamexecutor
diff --git a/streamexecutor/lib/Stream.cpp b/streamexecutor/lib/Stream.cpp
index 40f52f9..20a817c 100644
--- a/streamexecutor/lib/Stream.cpp
+++ b/streamexecutor/lib/Stream.cpp
@@ -17,8 +17,7 @@
 namespace streamexecutor {
 
 Stream::Stream(std::unique_ptr<PlatformStreamHandle> PStream)
-    : PExecutor(PStream->getExecutor()), ThePlatformStream(std::move(PStream)) {
-}
+    : PDevice(PStream->getDevice()), ThePlatformStream(std::move(PStream)) {}
 
 Stream::~Stream() = default;
 
diff --git a/streamexecutor/lib/unittests/CMakeLists.txt b/streamexecutor/lib/unittests/CMakeLists.txt
index 244312f..3b414e3 100644
--- a/streamexecutor/lib/unittests/CMakeLists.txt
+++ b/streamexecutor/lib/unittests/CMakeLists.txt
@@ -1,12 +1,12 @@
 add_executable(
-    executor_test
-    ExecutorTest.cpp)
+    device_test
+    DeviceTest.cpp)
 target_link_libraries(
-    executor_test
+    device_test
     streamexecutor
     ${GTEST_BOTH_LIBRARIES}
     ${CMAKE_THREAD_LIBS_INIT})
-add_test(ExecutorTest executor_test)
+add_test(DeviceTest device_test)
 
 add_executable(
     kernel_test
diff --git a/streamexecutor/lib/unittests/DeviceTest.cpp b/streamexecutor/lib/unittests/DeviceTest.cpp
new file mode 100644
index 0000000..cb34b8b
--- /dev/null
+++ b/streamexecutor/lib/unittests/DeviceTest.cpp
@@ -0,0 +1,476 @@
+//===-- DeviceTest.cpp - Tests for Device ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the unit tests for Device code.
+///
+//===----------------------------------------------------------------------===//
+
+#include <cstdlib>
+#include <cstring>
+
+#include "streamexecutor/Device.h"
+#include "streamexecutor/PlatformInterfaces.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+namespace se = ::streamexecutor;
+
+class MockPlatformDevice : public se::PlatformDevice {
+public:
+  ~MockPlatformDevice() override {}
+
+  std::string getName() const override { return "MockPlatformDevice"; }
+
+  se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
+  createStream() override {
+    return se::make_error("not implemented");
+  }
+
+  se::Expected<se::GlobalDeviceMemoryBase>
+  allocateDeviceMemory(size_t ByteCount) override {
+    return se::GlobalDeviceMemoryBase(std::malloc(ByteCount));
+  }
+
+  se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override {
+    std::free(const_cast<void *>(Memory.getHandle()));
+    return se::Error::success();
+  }
+
+  se::Expected<void *> allocateHostMemory(size_t ByteCount) override {
+    return std::malloc(ByteCount);
+  }
+
+  se::Error freeHostMemory(void *Memory) override {
+    std::free(Memory);
+    return se::Error::success();
+  }
+
+  se::Error registerHostMemory(void *, size_t) override {
+    return se::Error::success();
+  }
+
+  se::Error unregisterHostMemory(void *) override {
+    return se::Error::success();
+  }
+
+  se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc,
+                               size_t SrcByteOffset, void *HostDst,
+                               size_t DstByteOffset,
+                               size_t ByteCount) override {
+    std::memcpy(static_cast<char *>(HostDst) + DstByteOffset,
+                static_cast<const char *>(DeviceSrc.getHandle()) +
+                    SrcByteOffset,
+                ByteCount);
+    return se::Error::success();
+  }
+
+  se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
+                               se::GlobalDeviceMemoryBase DeviceDst,
+                               size_t DstByteOffset,
+                               size_t ByteCount) override {
+    std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
+                    DstByteOffset,
+                static_cast<const char *>(HostSrc) + SrcByteOffset, ByteCount);
+    return se::Error::success();
+  }
+
+  se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst,
+                               size_t DstByteOffset,
+                               const se::GlobalDeviceMemoryBase &DeviceSrc,
+                               size_t SrcByteOffset,
+                               size_t ByteCount) override {
+    std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
+                    DstByteOffset,
+                static_cast<const char *>(DeviceSrc.getHandle()) +
+                    SrcByteOffset,
+                ByteCount);
+    return se::Error::success();
+  }
+};
+
+/// Test fixture to hold objects used by tests.
+class DeviceTest : public ::testing::Test {
+public:
+  DeviceTest()
+      : HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9},
+        HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23},
+        DeviceA5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA5, 5)),
+        DeviceB5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB5, 5)),
+        DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
+        DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
+        Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
+        Device(&PDevice) {}
+
+  // Device memory is backed by host arrays.
+  int HostA5[5];
+  int HostB5[5];
+  int HostA7[7];
+  int HostB7[7];
+  se::GlobalDeviceMemory<int> DeviceA5;
+  se::GlobalDeviceMemory<int> DeviceB5;
+  se::GlobalDeviceMemory<int> DeviceA7;
+  se::GlobalDeviceMemory<int> DeviceB7;
+
+  // Host memory to be used as actual host memory.
+  int Host5[5];
+  int Host7[7];
+
+  MockPlatformDevice PDevice;
+  se::Device Device;
+};
+
+#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E))
+#define EXPECT_ERROR(E)                                                        \
+  do {                                                                         \
+    se::Error E__ = E;                                                         \
+    EXPECT_TRUE(static_cast<bool>(E__));                                       \
+    consumeError(std::move(E__));                                              \
+  } while (false)
+
+using llvm::ArrayRef;
+using llvm::MutableArrayRef;
+
+TEST_F(DeviceTest, AllocateAndFreeDeviceMemory) {
+  se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
+      Device.allocateDeviceMemory<int>(10);
+  EXPECT_TRUE(static_cast<bool>(MaybeMemory));
+  EXPECT_NO_ERROR(Device.freeDeviceMemory(*MaybeMemory));
+}
+
+TEST_F(DeviceTest, AllocateAndFreeHostMemory) {
+  se::Expected<int *> MaybeMemory = Device.allocateHostMemory<int>(10);
+  EXPECT_TRUE(static_cast<bool>(MaybeMemory));
+  EXPECT_NO_ERROR(Device.freeHostMemory(*MaybeMemory));
+}
+
+TEST_F(DeviceTest, RegisterAndUnregisterHostMemory) {
+  std::vector<int> Data(10);
+  EXPECT_NO_ERROR(Device.registerHostMemory(Data.data(), 10));
+  EXPECT_NO_ERROR(Device.unregisterHostMemory(Data.data()));
+}
+
+// D2H tests
+
+TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRefByCount) {
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2));
+  for (int I = 0; I < 2; ++I) {
+    EXPECT_EQ(HostB5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7));
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7));
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRef) {
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5)));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5)));
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7)));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HToPointer) {
+  EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5, Host5, 5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5, Host7, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRefByCount) {
+  EXPECT_NO_ERROR(Device.synchronousCopyD2H(
+      DeviceA5.asSlice().drop_front(1), MutableArrayRef<int>(Host5 + 1, 4), 4));
+  for (int I = 1; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1),
+                                            MutableArrayRef<int>(Host5), 2));
+  for (int I = 0; I < 2; ++I) {
+    EXPECT_EQ(HostB5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice(),
+                                         MutableArrayRef<int>(Host5), 7));
+
+  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
+                                         MutableArrayRef<int>(Host7), 7));
+
+  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
+                                         MutableArrayRef<int>(Host5), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRef) {
+  EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5),
+                                            MutableArrayRef<int>(Host5)));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA7[I + 1], Host5[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1),
+                                         MutableArrayRef<int>(Host5)));
+
+  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
+                                         MutableArrayRef<int>(Host7)));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HSliceToPointer) {
+  EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1),
+                                            Host5 + 1, 4));
+  for (int I = 1; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7));
+}
+
+// H2D tests
+
+TEST_F(DeviceTest, SyncCopyH2DToArrayRefByCount) {
+  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2));
+  for (int I = 0; I < 2; ++I) {
+    EXPECT_EQ(HostB5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7));
+
+  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7));
+
+  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DToArrayRef) {
+  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7));
+
+  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DToPointer) {
+  EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5, 5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRefByCount) {
+  EXPECT_NO_ERROR(Device.synchronousCopyH2D(
+      ArrayRef<int>(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4));
+  for (int I = 1; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_NO_ERROR(Device.synchronousCopyH2D(
+      ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2));
+  for (int I = 0; I < 2; ++I) {
+    EXPECT_EQ(HostB5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(
+      Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7));
+
+  EXPECT_ERROR(
+      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7));
+
+  EXPECT_ERROR(
+      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRef) {
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice()));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(
+      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice()));
+
+  EXPECT_ERROR(
+      Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice()));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DSliceToPointer) {
+  EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], Host5[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7));
+}
+
+// D2D tests
+
+TEST_F(DeviceTest, SyncCopyD2DByCount) {
+  EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], HostB5[I]);
+  }
+
+  EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB7, 2));
+  for (int I = 0; I < 2; ++I) {
+    EXPECT_EQ(HostA7[I], HostB7[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 7));
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5, 7));
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2D) {
+  EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], HostB5[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5));
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2DByCount) {
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1), DeviceB5, 4));
+  for (int I = 0; I < 4; ++I) {
+    EXPECT_EQ(HostA5[I + 1], HostB5[I]);
+  }
+
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2));
+  for (int I = 0; I < 2; ++I) {
+    EXPECT_EQ(HostA7[I], HostB7[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7));
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7));
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2D) {
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA7[I], HostB5[I]);
+  }
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5));
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2DSliceByCount) {
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_front(2), 5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], HostB7[I + 2]);
+  }
+
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2));
+  for (int I = 0; I < 2; ++I) {
+    EXPECT_EQ(HostA7[I], HostB7[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7));
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7));
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2DSlice) {
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], HostB7[I]);
+  }
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice()));
+
+  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice()));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2DSliceByCount) {
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], HostB5[I]);
+  }
+
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2));
+  for (int I = 0; I < 2; ++I) {
+    EXPECT_EQ(HostA7[I], HostB7[I]);
+  }
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7));
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7));
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2DSlice) {
+  EXPECT_NO_ERROR(
+      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()));
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(HostA5[I], HostB5[I]);
+  }
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice()));
+
+  EXPECT_ERROR(
+      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()));
+}
+
+} // namespace
diff --git a/streamexecutor/lib/unittests/ExecutorTest.cpp b/streamexecutor/lib/unittests/ExecutorTest.cpp
deleted file mode 100644
index b6719d3..0000000
--- a/streamexecutor/lib/unittests/ExecutorTest.cpp
+++ /dev/null
@@ -1,478 +0,0 @@
-//===-- ExecutorTest.cpp - Tests for Executor -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the unit tests for Executor code.
-///
-//===----------------------------------------------------------------------===//
-
-#include <cstdlib>
-#include <cstring>
-
-#include "streamexecutor/Executor.h"
-#include "streamexecutor/PlatformInterfaces.h"
-
-#include "gtest/gtest.h"
-
-namespace {
-
-namespace se = ::streamexecutor;
-
-class MockPlatformExecutor : public se::PlatformExecutor {
-public:
-  ~MockPlatformExecutor() override {}
-
-  std::string getName() const override { return "MockPlatformExecutor"; }
-
-  se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
-  createStream() override {
-    return se::make_error("not implemented");
-  }
-
-  se::Expected<se::GlobalDeviceMemoryBase>
-  allocateDeviceMemory(size_t ByteCount) override {
-    return se::GlobalDeviceMemoryBase(std::malloc(ByteCount));
-  }
-
-  se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override {
-    std::free(const_cast<void *>(Memory.getHandle()));
-    return se::Error::success();
-  }
-
-  se::Expected<void *> allocateHostMemory(size_t ByteCount) override {
-    return std::malloc(ByteCount);
-  }
-
-  se::Error freeHostMemory(void *Memory) override {
-    std::free(Memory);
-    return se::Error::success();
-  }
-
-  se::Error registerHostMemory(void *, size_t) override {
-    return se::Error::success();
-  }
-
-  se::Error unregisterHostMemory(void *) override {
-    return se::Error::success();
-  }
-
-  se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc,
-                               size_t SrcByteOffset, void *HostDst,
-                               size_t DstByteOffset,
-                               size_t ByteCount) override {
-    std::memcpy(static_cast<char *>(HostDst) + DstByteOffset,
-                static_cast<const char *>(DeviceSrc.getHandle()) +
-                    SrcByteOffset,
-                ByteCount);
-    return se::Error::success();
-  }
-
-  se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
-                               se::GlobalDeviceMemoryBase DeviceDst,
-                               size_t DstByteOffset,
-                               size_t ByteCount) override {
-    std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
-                    DstByteOffset,
-                static_cast<const char *>(HostSrc) + SrcByteOffset, ByteCount);
-    return se::Error::success();
-  }
-
-  se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst,
-                               size_t DstByteOffset,
-                               const se::GlobalDeviceMemoryBase &DeviceSrc,
-                               size_t SrcByteOffset,
-                               size_t ByteCount) override {
-    std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
-                    DstByteOffset,
-                static_cast<const char *>(DeviceSrc.getHandle()) +
-                    SrcByteOffset,
-                ByteCount);
-    return se::Error::success();
-  }
-};
-
-/// Test fixture to hold objects used by tests.
-class ExecutorTest : public ::testing::Test {
-public:
-  ExecutorTest()
-      : HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9},
-        HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23},
-        DeviceA5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA5, 5)),
-        DeviceB5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB5, 5)),
-        DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
-        DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
-        Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
-        Executor(&PExecutor) {}
-
-  // Device memory is backed by host arrays.
-  int HostA5[5];
-  int HostB5[5];
-  int HostA7[7];
-  int HostB7[7];
-  se::GlobalDeviceMemory<int> DeviceA5;
-  se::GlobalDeviceMemory<int> DeviceB5;
-  se::GlobalDeviceMemory<int> DeviceA7;
-  se::GlobalDeviceMemory<int> DeviceB7;
-
-  // Host memory to be used as actual host memory.
-  int Host5[5];
-  int Host7[7];
-
-  MockPlatformExecutor PExecutor;
-  se::Executor Executor;
-};
-
-#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E))
-#define EXPECT_ERROR(E)                                                        \
-  do {                                                                         \
-    se::Error E__ = E;                                                         \
-    EXPECT_TRUE(static_cast<bool>(E__));                                       \
-    consumeError(std::move(E__));                                              \
-  } while (false)
-
-using llvm::ArrayRef;
-using llvm::MutableArrayRef;
-
-TEST_F(ExecutorTest, AllocateAndFreeDeviceMemory) {
-  se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
-      Executor.allocateDeviceMemory<int>(10);
-  EXPECT_TRUE(static_cast<bool>(MaybeMemory));
-  EXPECT_NO_ERROR(Executor.freeDeviceMemory(*MaybeMemory));
-}
-
-TEST_F(ExecutorTest, AllocateAndFreeHostMemory) {
-  se::Expected<int *> MaybeMemory = Executor.allocateHostMemory<int>(10);
-  EXPECT_TRUE(static_cast<bool>(MaybeMemory));
-  EXPECT_NO_ERROR(Executor.freeHostMemory(*MaybeMemory));
-}
-
-TEST_F(ExecutorTest, RegisterAndUnregisterHostMemory) {
-  std::vector<int> Data(10);
-  EXPECT_NO_ERROR(Executor.registerHostMemory(Data.data(), 10));
-  EXPECT_NO_ERROR(Executor.unregisterHostMemory(Data.data()));
-}
-
-// D2H tests
-
-TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRefByCount) {
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2));
-  for (int I = 0; I < 2; ++I) {
-    EXPECT_EQ(HostB5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7));
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7));
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRef) {
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5)));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5)));
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7)));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2HToPointer) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host5, 5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host7, 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRefByCount) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2H(
-      DeviceA5.asSlice().drop_front(1), MutableArrayRef<int>(Host5 + 1, 4), 4));
-  for (int I = 1; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1),
-                                              MutableArrayRef<int>(Host5), 2));
-  for (int I = 0; I < 2; ++I) {
-    EXPECT_EQ(HostB5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice(),
-                                           MutableArrayRef<int>(Host5), 7));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
-                                           MutableArrayRef<int>(Host7), 7));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
-                                           MutableArrayRef<int>(Host5), 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRef) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5),
-                                              MutableArrayRef<int>(Host5)));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA7[I + 1], Host5[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1),
-                                           MutableArrayRef<int>(Host5)));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
-                                           MutableArrayRef<int>(Host7)));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2HSliceToPointer) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1),
-                                              Host5 + 1, 4));
-  for (int I = 1; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7));
-}
-
-// H2D tests
-
-TEST_F(ExecutorTest, SyncCopyH2DToArrayRefByCount) {
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2));
-  for (int I = 0; I < 2; ++I) {
-    EXPECT_EQ(HostB5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7));
-
-  EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7));
-
-  EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyH2DToArrayRef) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7));
-
-  EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5));
-}
-
-TEST_F(ExecutorTest, SyncCopyH2DToPointer) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5, 5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5, 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRefByCount) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyH2D(
-      ArrayRef<int>(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4));
-  for (int I = 1; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_NO_ERROR(Executor.synchronousCopyH2D(
-      ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2));
-  for (int I = 0; I < 2; ++I) {
-    EXPECT_EQ(HostB5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7));
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7));
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRef) {
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice()));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice()));
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice()));
-}
-
-TEST_F(ExecutorTest, SyncCopyH2DSliceToPointer) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], Host5[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7));
-}
-
-// D2D tests
-
-TEST_F(ExecutorTest, SyncCopyD2DByCount) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], HostB5[I]);
-  }
-
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB7, 2));
-  for (int I = 0; I < 2; ++I) {
-    EXPECT_EQ(HostA7[I], HostB7[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 7));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5, 7));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7, 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2D) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], HostB5[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7));
-}
-
-TEST_F(ExecutorTest, SyncCopySliceD2DByCount) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1),
-                                              DeviceB5, 4));
-  for (int I = 0; I < 4; ++I) {
-    EXPECT_EQ(HostA5[I + 1], HostB5[I]);
-  }
-
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1),
-                                              DeviceB7, 2));
-  for (int I = 0; I < 2; ++I) {
-    EXPECT_EQ(HostA7[I], HostB7[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7));
-}
-
-TEST_F(ExecutorTest, SyncCopySliceD2D) {
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA7[I], HostB5[I]);
-  }
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5));
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2DSliceByCount) {
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2D(
-      DeviceA5, DeviceB7.asSlice().drop_front(2), 5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], HostB7[I + 2]);
-  }
-
-  EXPECT_NO_ERROR(Executor.synchronousCopyD2D(
-      DeviceA7, DeviceB7.asSlice().drop_back(3), 2));
-  for (int I = 0; I < 2; ++I) {
-    EXPECT_EQ(HostA7[I], HostB7[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2DSlice) {
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], HostB7[I]);
-  }
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice()));
-
-  EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice()));
-}
-
-TEST_F(ExecutorTest, SyncCopySliceD2DSliceByCount) {
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], HostB5[I]);
-  }
-
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2));
-  for (int I = 0; I < 2; ++I) {
-    EXPECT_EQ(HostA7[I], HostB7[I]);
-  }
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7));
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7));
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7));
-}
-
-TEST_F(ExecutorTest, SyncCopySliceD2DSlice) {
-  EXPECT_NO_ERROR(
-      Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()));
-  for (int I = 0; I < 5; ++I) {
-    EXPECT_EQ(HostA5[I], HostB5[I]);
-  }
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice()));
-
-  EXPECT_ERROR(
-      Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()));
-}
-
-} // namespace
diff --git a/streamexecutor/lib/unittests/KernelTest.cpp b/streamexecutor/lib/unittests/KernelTest.cpp
index b5ee8a0..a19ebfb 100644
--- a/streamexecutor/lib/unittests/KernelTest.cpp
+++ b/streamexecutor/lib/unittests/KernelTest.cpp
@@ -14,7 +14,7 @@
 
 #include <cassert>
 
-#include "streamexecutor/Executor.h"
+#include "streamexecutor/Device.h"
 #include "streamexecutor/Kernel.h"
 #include "streamexecutor/KernelSpec.h"
 #include "streamexecutor/PlatformInterfaces.h"
@@ -27,7 +27,7 @@
 
 namespace se = ::streamexecutor;
 
-// An Executor that returns a dummy KernelInterface.
+// A Device that returns a dummy KernelInterface.
 //
 // During construction it creates a unique_ptr to a dummy KernelInterface and it
 // also stores a separate copy of the raw pointer that is stored by that
@@ -39,10 +39,10 @@
 // object. The raw pointer copy can then be used to identify the unique_ptr in
 // its new location (by comparing the raw pointer with unique_ptr::get), to
 // verify that the unique_ptr ended up where it was supposed to be.
-class MockExecutor : public se::Executor {
+class MockDevice : public se::Device {
 public:
-  MockExecutor()
-      : se::Executor(nullptr), Unique(llvm::make_unique<se::KernelInterface>()),
+  MockDevice()
+      : se::Device(nullptr), Unique(llvm::make_unique<se::KernelInterface>()),
         Raw(Unique.get()) {}
 
   // Moves the unique pointer into the returned se::Expected instance.
@@ -51,7 +51,7 @@
   // out.
   se::Expected<std::unique_ptr<se::KernelInterface>>
   getKernelImplementation(const se::MultiKernelLoaderSpec &) override {
-    assert(Unique && "MockExecutor getKernelImplementation should not be "
+    assert(Unique && "MockDevice getKernelImplementation should not be "
                      "called more than once");
     return std::move(Unique);
   }
@@ -79,15 +79,15 @@
 
 // Tests that the kernel create functions properly fetch the implementation
 // pointers for the kernel objects they construct from the passed-in
-// Executor objects.
+// Device objects.
 TYPED_TEST(GetImplementationTest, SetImplementationDuringCreate) {
   se::MultiKernelLoaderSpec Spec;
-  MockExecutor MockExecutor;
+  MockDevice Dev;
 
-  auto MaybeKernel = TypeParam::create(&MockExecutor, Spec);
+  auto MaybeKernel = TypeParam::create(&Dev, Spec);
   EXPECT_TRUE(static_cast<bool>(MaybeKernel));
   se::KernelInterface *Implementation = MaybeKernel->getImplementation();
-  EXPECT_EQ(MockExecutor.getRaw(), Implementation);
+  EXPECT_EQ(Dev.getRaw(), Implementation);
 }
 
 } // namespace
diff --git a/streamexecutor/lib/unittests/StreamTest.cpp b/streamexecutor/lib/unittests/StreamTest.cpp
index 7564670..d05c928 100644
--- a/streamexecutor/lib/unittests/StreamTest.cpp
+++ b/streamexecutor/lib/unittests/StreamTest.cpp
@@ -14,7 +14,7 @@
 
 #include <cstring>
 
-#include "streamexecutor/Executor.h"
+#include "streamexecutor/Device.h"
 #include "streamexecutor/Kernel.h"
 #include "streamexecutor/KernelSpec.h"
 #include "streamexecutor/PlatformInterfaces.h"
@@ -26,14 +26,14 @@
 
 namespace se = ::streamexecutor;
 
-/// Mock PlatformExecutor that performs asynchronous memcpy operations by
+/// Mock PlatformDevice that performs asynchronous memcpy operations by
 /// ignoring the stream argument and calling std::memcpy on device memory
 /// handles.
-class MockPlatformExecutor : public se::PlatformExecutor {
+class MockPlatformDevice : public se::PlatformDevice {
 public:
-  ~MockPlatformExecutor() override {}
+  ~MockPlatformDevice() override {}
 
-  std::string getName() const override { return "MockPlatformExecutor"; }
+  std::string getName() const override { return "MockPlatformDevice"; }
 
   se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
   createStream() override {
@@ -83,7 +83,7 @@
         DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
         DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
         Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
-        Stream(llvm::make_unique<se::PlatformStreamHandle>(&PExecutor)) {}
+        Stream(llvm::make_unique<se::PlatformStreamHandle>(&PDevice)) {}
 
 protected:
   // Device memory is backed by host arrays.
@@ -100,7 +100,7 @@
   int Host5[5];
   int Host7[7];
 
-  MockPlatformExecutor PExecutor;
+  MockPlatformDevice PDevice;
   se::Stream Stream;
 };