streamexecutor/include/streamexecutor/StreamExecutor.h - llvm-project/parallel-libs - Git at Google

 //===-- StreamExecutor.h - Main include file for StreamExecutor -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//

 /// \mainpage Getting Started
 ///
 /// \b StreamExecutor is a wrapper around CUDA and OpenCL host-side programming
 /// models (runtimes). This abstraction cleanly permits host code to target
 /// either CUDA or OpenCL devices with identically-functioning data parallel
 /// kernels. It manages the execution of concurrent work targeting the
 /// accelerator, similar to a host-side Executor.
 ///
 /// This version of StreamExecutor can be built either as a sub-project of the
 /// LLVM project or as a standalone project depending on LLVM as an external
 /// package.
 ///
 /// Below is an example of the use of the StreamExecutor API:
 ///
 /// \snippet examples/Example.cpp Example saxpy host main
 ///
 /// In the example, a couple of handler functions are used to handle error
 /// return values in the StreamExecutor API:
 ///
 /// \snippet examples/Example.cpp Example saxpy host helper functions
 ///
 /// These are just example handler functions. A real application will likely
 /// want to define similar handlers of its own that log errors in an
 /// application-specific way, convert errors to the application's own
 /// error-handling framework, or try to recover from errors as appropriate.
 ///
 /// The example also references some symbols from a compiler-generated
 /// namespace:
 ///
 /// \snippet examples/Example.cpp Example saxpy compiler-generated
 ///
 /// Instead of depending on the compiler to generate this code, you can
 /// technically write the code yourself, but this is not recommended because the
 /// code is very error-prone. For example, the template parameters for the
 /// Kernel specialization have to match the parameter types for the device
 /// kernel, and the MultiKernelLoaderSpec has to be initialized with valid
 /// device code for the kernel. Errors in this code will not show up until
 /// runtime, and may only show up as garbage output rather than an explicit
 /// error, which can be very hard to debug, so again, it is strongly advised not
 /// to write this code yourself.
 ///
 /// The example compiler-generated code uses a PTX string in the source code to
 /// store the device code, but the device code can also be stored in other
 /// formats such as CUDA cubin and CUDA fatbin. Furthermore, device code can be
 /// stored for other platforms such as OpenCL, and StreamExecutor will pick the
 /// right device code at runtime based on the user's platform selection. See
 /// streamexecutor::MultiKernelLoaderSpec for details of how device code can be
 /// stored for different platforms, but again, the code to set up the
 /// MultiKernelLoaderSpec instance should be generated by the compiler if
 /// possible, not by the user.

 #ifndef STREAMEXECUTOR_STREAMEXECUTOR_H
 #define STREAMEXECUTOR_STREAMEXECUTOR_H

 #include "Device.h"
 #include "Kernel.h"
 #include "KernelSpec.h"
 #include "Platform.h"
 #include "PlatformManager.h"
 #include "Stream.h"

 #endif // STREAMEXECUTOR_STREAMEXECUTOR_H
	//===-- StreamExecutor.h - Main include file for StreamExecutor -- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	/// \mainpage Getting Started
	///
	/// \b StreamExecutor is a wrapper around CUDA and OpenCL host-side programming
	/// models (runtimes). This abstraction cleanly permits host code to target
	/// either CUDA or OpenCL devices with identically-functioning data parallel
	/// kernels. It manages the execution of concurrent work targeting the
	/// accelerator, similar to a host-side Executor.
	///
	/// This version of StreamExecutor can be built either as a sub-project of the
	/// LLVM project or as a standalone project depending on LLVM as an external
	/// package.
	///
	/// Below is an example of the use of the StreamExecutor API:
	///
	/// \snippet examples/Example.cpp Example saxpy host main
	///
	/// In the example, a couple of handler functions are used to handle error
	/// return values in the StreamExecutor API:
	///
	/// \snippet examples/Example.cpp Example saxpy host helper functions
	///
	/// These are just example handler functions. A real application will likely
	/// want to define similar handlers of its own that log errors in an
	/// application-specific way, convert errors to the application's own
	/// error-handling framework, or try to recover from errors as appropriate.
	///
	/// The example also references some symbols from a compiler-generated
	/// namespace:
	///
	/// \snippet examples/Example.cpp Example saxpy compiler-generated
	///
	/// Instead of depending on the compiler to generate this code, you can
	/// technically write the code yourself, but this is not recommended because the
	/// code is very error-prone. For example, the template parameters for the
	/// Kernel specialization have to match the parameter types for the device
	/// kernel, and the MultiKernelLoaderSpec has to be initialized with valid
	/// device code for the kernel. Errors in this code will not show up until
	/// runtime, and may only show up as garbage output rather than an explicit
	/// error, which can be very hard to debug, so again, it is strongly advised not
	/// to write this code yourself.
	///
	/// The example compiler-generated code uses a PTX string in the source code to
	/// store the device code, but the device code can also be stored in other
	/// formats such as CUDA cubin and CUDA fatbin. Furthermore, device code can be
	/// stored for other platforms such as OpenCL, and StreamExecutor will pick the
	/// right device code at runtime based on the user's platform selection. See
	/// streamexecutor::MultiKernelLoaderSpec for details of how device code can be
	/// stored for different platforms, but again, the code to set up the
	/// MultiKernelLoaderSpec instance should be generated by the compiler if
	/// possible, not by the user.

	#ifndef STREAMEXECUTOR_STREAMEXECUTOR_H
	#define STREAMEXECUTOR_STREAMEXECUTOR_H

	#include "Device.h"
	#include "Kernel.h"
	#include "KernelSpec.h"
	#include "Platform.h"
	#include "PlatformManager.h"
	#include "Stream.h"

	#endif // STREAMEXECUTOR_STREAMEXECUTOR_H