| //===-- StreamExecutor.h - Main include file for StreamExecutor -*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| /// \mainpage Welcome to StreamExecutor |
| /// |
| /// \section Introduction |
| /// \b StreamExecutor is a wrapper around CUDA and OpenCL host-side programming |
| /// models (runtimes). This abstraction cleanly permits host code to target |
| /// either CUDA or OpenCL devices with identically-functioning data parallel |
| /// kernels. It manages the execution of concurrent work targeting the |
| /// accelerator, similar to a host-side Executor. |
| /// |
| /// This version of StreamExecutor can be built either as a sub-project of the |
| /// LLVM project or as a standalone project depending on LLVM as an external |
| /// package. |
| /// |
| /// \subsection ExampleUsage Example Usage |
| /// Below is an example of the use of the StreamExecutor API: |
| /// |
| /// \snippet examples/CUDASaxpy.cpp Example saxpy host main |
| /// |
| /// In the example, a couple of handler functions, \c getOrDie and \c |
| /// dieIfError, are used to handle error return values in the StreamExecutor |
| /// API. These functions are provided by StreamExecutor for quick-and-dirty |
| /// error handling, but real applications will likely want to define their own |
| /// versions of these handlers so that errors are handled more gracefully than |
| /// just exiting the program. |
| /// |
| /// \subsection CompilerGeneratedCode Compiler-Generated Code |
| /// |
| /// The example also references some symbols from a compiler-generated |
| /// namespace: |
| /// |
| /// \snippet examples/CUDASaxpy.cpp Example saxpy compiler-generated |
| /// |
| /// Instead of depending on the compiler to generate this code, you can |
| /// technically write the code yourself, but this is not recommended because the |
| /// code is very error-prone. For example, the template parameters for the |
| /// Kernel specialization have to match the parameter types for the device |
| /// kernel, and the MultiKernelLoaderSpec has to be initialized with valid |
| /// device code for the kernel. Errors in this code will not show up until |
| /// runtime, and may only show up as garbage output rather than an explicit |
| /// error, which can be very hard to debug, so again, it is strongly advised not |
| /// to write this code yourself. |
| /// |
| /// The example compiler-generated code uses a PTX string in the source code to |
| /// store the device code, but the device code can also be stored in other |
| /// formats such as CUDA cubin and CUDA fatbin. Furthermore, device code can be |
| /// stored for other platforms such as OpenCL, and StreamExecutor will pick the |
| /// right device code at runtime based on the user's platform selection. See |
| /// streamexecutor::MultiKernelLoaderSpec for details of how device code can be |
| /// stored for different platforms, but again, the code to set up the |
| /// MultiKernelLoaderSpec instance should be generated by the compiler if |
| /// possible, not by the user. |
| |
| /// \example examples/CUDASaxpy.cpp |
| /// Running saxpy on a CUDA device. |
| |
| #ifndef STREAMEXECUTOR_STREAMEXECUTOR_H |
| #define STREAMEXECUTOR_STREAMEXECUTOR_H |
| |
| #include "Device.h" |
| #include "Kernel.h" |
| #include "KernelSpec.h" |
| #include "Platform.h" |
| #include "PlatformManager.h" |
| #include "Stream.h" |
| |
| #endif // STREAMEXECUTOR_STREAMEXECUTOR_H |