blob: 2d6a892f337304e462b95c4d52bce6a5e4f02947 [file] [log] [blame]
//===-- StreamExecutor.h - Main include file for StreamExecutor -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \mainpage Getting Started
///
/// \b StreamExecutor is a wrapper around CUDA and OpenCL host-side programming
/// models (runtimes). This abstraction cleanly permits host code to target
/// either CUDA or OpenCL devices with identically-functioning data parallel
/// kernels. It manages the execution of concurrent work targeting the
/// accelerator, similar to a host-side Executor.
///
/// This version of StreamExecutor can be built either as a sub-project of the
/// LLVM project or as a standalone project depending on LLVM as an external
/// package.
///
/// Below is an example of the use of the StreamExecutor API:
///
/// \snippet examples/Example.cpp Example saxpy host main
///
/// In the example, a couple of handler functions are used to handle error
/// return values in the StreamExecutor API:
///
/// \snippet examples/Example.cpp Example saxpy host helper functions
///
/// These are just example handler functions. A real application will likely
/// want to define similar handlers of its own that log errors in an
/// application-specific way, convert errors to the application's own
/// error-handling framework, or try to recover from errors as appropriate.
///
/// The example also references some symbols from a compiler-generated
/// namespace:
///
/// \snippet examples/Example.cpp Example saxpy compiler-generated
///
/// Instead of depending on the compiler to generate this code, you can
/// technically write the code yourself, but this is not recommended because the
/// code is very error-prone. For example, the template parameters for the
/// Kernel specialization have to match the parameter types for the device
/// kernel, and the MultiKernelLoaderSpec has to be initialized with valid
/// device code for the kernel. Errors in this code will not show up until
/// runtime, and may only show up as garbage output rather than an explicit
/// error, which can be very hard to debug, so again, it is strongly advised not
/// to write this code yourself.
///
/// The example compiler-generated code uses a PTX string in the source code to
/// store the device code, but the device code can also be stored in other
/// formats such as CUDA cubin and CUDA fatbin. Furthermore, device code can be
/// stored for other platforms such as OpenCL, and StreamExecutor will pick the
/// right device code at runtime based on the user's platform selection. See
/// streamexecutor::MultiKernelLoaderSpec for details of how device code can be
/// stored for different platforms, but again, the code to set up the
/// MultiKernelLoaderSpec instance should be generated by the compiler if
/// possible, not by the user.
#ifndef STREAMEXECUTOR_STREAMEXECUTOR_H
#define STREAMEXECUTOR_STREAMEXECUTOR_H
#include "Device.h"
#include "Kernel.h"
#include "KernelSpec.h"
#include "Platform.h"
#include "PlatformManager.h"
#include "Stream.h"
#endif // STREAMEXECUTOR_STREAMEXECUTOR_H