blob: ea4224eb30ce3c60ef39504bb3781b4d570fbb43 [file] [log] [blame]
//===-- Executor.h - The Executor class -------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// The Executor class which represents a single device of a specific platform.
///
//===----------------------------------------------------------------------===//
#ifndef STREAMEXECUTOR_EXECUTOR_H
#define STREAMEXECUTOR_EXECUTOR_H
#include "streamexecutor/KernelSpec.h"
#include "streamexecutor/PlatformInterfaces.h"
#include "streamexecutor/Utils/Error.h"
namespace streamexecutor {
class KernelInterface;
class Stream;
class Executor {
public:
explicit Executor(PlatformExecutor *PExecutor);
virtual ~Executor();
/// Gets the kernel implementation for the underlying platform.
virtual Expected<std::unique_ptr<KernelInterface>>
getKernelImplementation(const MultiKernelLoaderSpec &Spec) {
// TODO(jhen): Implement this.
return nullptr;
}
Expected<std::unique_ptr<Stream>> createStream();
/// Allocates an array of ElementCount entries of type T in device memory.
template <typename T>
Expected<GlobalDeviceMemory<T>> allocateDeviceMemory(size_t ElementCount) {
return PExecutor->allocateDeviceMemory(ElementCount * sizeof(T));
}
/// Frees memory previously allocated with allocateDeviceMemory.
template <typename T> Error freeDeviceMemory(GlobalDeviceMemory<T> Memory) {
return PExecutor->freeDeviceMemory(Memory);
}
/// Allocates an array of ElementCount entries of type T in host memory.
///
/// Host memory allocated by this function can be used for asynchronous memory
/// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
template <typename T> Expected<T *> allocateHostMemory(size_t ElementCount) {
return PExecutor->allocateHostMemory(ElementCount * sizeof(T));
}
/// Frees memory previously allocated with allocateHostMemory.
template <typename T> Error freeHostMemory(T *Memory) {
return PExecutor->freeHostMemory(Memory);
}
/// Registers a previously allocated host array of type T for asynchronous
/// memory operations.
///
/// Host memory registered by this function can be used for asynchronous
/// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
template <typename T>
Error registerHostMemory(T *Memory, size_t ElementCount) {
return PExecutor->registerHostMemory(Memory, ElementCount * sizeof(T));
}
/// Unregisters host memory previously registered by registerHostMemory.
template <typename T> Error unregisterHostMemory(T *Memory) {
return PExecutor->unregisterHostMemory(Memory);
}
/// Host-synchronously copies a slice of an array of elements of type T from
/// host to device memory.
///
/// Returns an error if ElementCount is too large for the source slice or the
/// destination.
///
/// The calling host thread is blocked until the copy completes. Can be used
/// with any host memory, the host memory does not have to be allocated with
/// allocateHostMemory or registered with registerHostMemory. Does not block
/// any ongoing device calls.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src,
llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
if (ElementCount > Src.getElementCount())
return make_error("copying too many elements, " +
llvm::Twine(ElementCount) +
", from a device array of element count " +
llvm::Twine(Src.getElementCount()));
if (ElementCount > Dst.size())
return make_error(
"copying too many elements, " + llvm::Twine(ElementCount) +
", to a host array of element count " + llvm::Twine(Dst.size()));
return PExecutor->synchronousCopyD2H(
Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.data(), 0,
ElementCount * sizeof(T));
}
/// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but does not take an element count
/// argument because it copies the entire source array.
///
/// Returns an error if the Src and Dst sizes do not match.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src,
llvm::MutableArrayRef<T> Dst) {
if (Src.getElementCount() != Dst.size())
return make_error(
"array size mismatch for D2H, device source has element count " +
llvm::Twine(Src.getElementCount()) +
" but host destination has element count " + llvm::Twine(Dst.size()));
return synchronousCopyD2H(Src, Dst, Src.getElementCount());
}
/// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but copies to a pointer rather than an
/// llvm::MutableArrayRef.
///
/// Returns an error if ElementCount is too large for the source slice.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src, T *Dst,
size_t ElementCount) {
return synchronousCopyD2H(Src, llvm::MutableArrayRef<T>(Dst, ElementCount),
ElementCount);
}
/// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but the source is a GlobalDeviceMemory
/// rather than a GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemory<T> Src,
llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount);
}
/// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>) but the source is a GlobalDeviceMemory rather
/// than a GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemory<T> Src,
llvm::MutableArrayRef<T> Dst) {
return synchronousCopyD2H(Src.asSlice(), Dst);
}
/// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>, T*, size_t) but
/// the source is a GlobalDeviceMemory rather than a GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemory<T> Src, T *Dst,
size_t ElementCount) {
return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount);
}
/// Host-synchronously copies a slice of an array of elements of type T from
/// device to host memory.
///
/// Returns an error if ElementCount is too large for the source or the
/// destination.
///
/// The calling host thread is blocked until the copy completes. Can be used
/// with any host memory, the host memory does not have to be allocated with
/// allocateHostMemory or registered with registerHostMemory. Does not block
/// any ongoing device calls.
template <typename T>
Error synchronousCopyH2D(llvm::ArrayRef<T> Src,
GlobalDeviceMemorySlice<T> Dst,
size_t ElementCount) {
if (ElementCount > Src.size())
return make_error(
"copying too many elements, " + llvm::Twine(ElementCount) +
", from a host array of element count " + llvm::Twine(Src.size()));
if (ElementCount > Dst.getElementCount())
return make_error("copying too many elements, " +
llvm::Twine(ElementCount) +
", to a device array of element count " +
llvm::Twine(Dst.getElementCount()));
return PExecutor->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(),
Dst.getElementOffset() * sizeof(T),
ElementCount * sizeof(T));
}
/// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but does not take an element count
/// argument because it copies the entire source array.
///
/// Returns an error if the Src and Dst sizes do not match.
template <typename T>
Error synchronousCopyH2D(llvm::ArrayRef<T> Src,
GlobalDeviceMemorySlice<T> Dst) {
if (Src.size() != Dst.getElementCount())
return make_error(
"array size mismatch for H2D, host source has element count " +
llvm::Twine(Src.size()) +
" but device destination has element count " +
llvm::Twine(Dst.getElementCount()));
return synchronousCopyH2D(Src, Dst, Dst.getElementCount());
}
/// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but copies from a pointer rather than
/// an llvm::ArrayRef.
///
/// Returns an error if ElementCount is too large for the destination.
template <typename T>
Error synchronousCopyH2D(T *Src, GlobalDeviceMemorySlice<T> Dst,
size_t ElementCount) {
return synchronousCopyH2D(llvm::ArrayRef<T>(Src, ElementCount), Dst,
ElementCount);
}
/// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the destination is a
/// GlobalDeviceMemory rather than a GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst,
size_t ElementCount) {
return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount);
}
/// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
/// GlobalDeviceMemorySlice<T>) but the destination is a GlobalDeviceMemory
/// rather than a GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst) {
return synchronousCopyH2D(Src, Dst.asSlice());
}
/// Similar to synchronousCopyH2D(T*, GlobalDeviceMemorySlice<T>, size_t) but
/// the destination is a GlobalDeviceMemory rather than a
/// GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyH2D(T *Src, GlobalDeviceMemory<T> Dst,
size_t ElementCount) {
return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount);
}
/// Host-synchronously copies a slice of an array of elements of type T from
/// one location in device memory to another.
///
/// Returns an error if ElementCount is too large for the source slice or the
/// destination.
///
/// The calling host thread is blocked until the copy completes. Can be used
/// with any host memory, the host memory does not have to be allocated with
/// allocateHostMemory or registered with registerHostMemory. Does not block
/// any ongoing device calls.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemorySlice<T> Dst,
size_t ElementCount) {
if (ElementCount > Src.getElementCount())
return make_error("copying too many elements, " +
llvm::Twine(ElementCount) +
", from a device array of element count " +
llvm::Twine(Src.getElementCount()));
if (ElementCount > Dst.getElementCount())
return make_error("copying too many elements, " +
llvm::Twine(ElementCount) +
", to a device array of element count " +
llvm::Twine(Dst.getElementCount()));
return PExecutor->synchronousCopyD2D(
Src.getBaseMemory(), Src.getElementOffset() * sizeof(T),
Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T),
ElementCount * sizeof(T));
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but does not take an element count
/// argument because it copies the entire source array.
///
/// Returns an error if the Src and Dst sizes do not match.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemorySlice<T> Dst) {
if (Src.getElementCount() != Dst.getElementCount())
return make_error(
"array size mismatch for D2D, device source has element count " +
llvm::Twine(Src.getElementCount()) +
" but device destination has element count " +
llvm::Twine(Dst.getElementCount()));
return synchronousCopyD2D(Src, Dst, Src.getElementCount());
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the source is a
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
GlobalDeviceMemorySlice<T> Dst,
size_t ElementCount) {
return synchronousCopyD2D(Src.asSlice(), Dst, ElementCount);
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>) but the source is a GlobalDeviceMemory<T>
/// rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
GlobalDeviceMemorySlice<T> Dst) {
return synchronousCopyD2D(Src.asSlice(), Dst);
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the destination is a
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemory<T> Dst, size_t ElementCount) {
return synchronousCopyD2D(Src, Dst.asSlice(), ElementCount);
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>) but the destination is a GlobalDeviceMemory<T>
/// rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemory<T> Dst) {
return synchronousCopyD2D(Src, Dst.asSlice());
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the source and destination are
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemory<T> Dst,
size_t ElementCount) {
return synchronousCopyD2D(Src.asSlice(), Dst.asSlice(), ElementCount);
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>) but the source and destination are
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
GlobalDeviceMemory<T> Dst) {
return synchronousCopyD2D(Src.asSlice(), Dst.asSlice());
}
private:
PlatformExecutor *PExecutor;
};
} // namespace streamexecutor
#endif // STREAMEXECUTOR_EXECUTOR_H