blob: a6943143086a551a75c2128318b57cfb8b7c6126 [file] [log] [blame]
//===-- KernelSpec.h - Kernel loader spec types -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// KernelLoaderSpec is the base class for types that know where to find the
/// code for a data-parallel kernel in a particular format on a particular
/// platform. So, for example, there will be one subclass that deals with CUDA
/// PTX code, another subclass that deals with CUDA fatbin code, and yet another
/// subclass that deals with OpenCL text code.
///
/// A MultiKernelLoaderSpec is basically a collection of KernelLoaderSpec
/// instances. This is useful when code is available for the same kernel in
/// several different formats or targeted for several different platforms. All
/// the various KernelLoaderSpec instances for this kernel can be combined
/// together in one MultiKernelLoaderSpec and the specific platform consumer can
/// decide which instance of the code it wants to use.
///
/// MultiKernelLoaderSpec provides several helper functions to build and
/// register KernelLoaderSpec instances all in a single operation. For example,
/// MultiKernelLoaderSpec::addCUDAPTXInMemory can be used to construct and
/// register a CUDAPTXInMemorySpec KernelLoaderSpec.
///
/// The loader spec classes declared here are designed primarily to be
/// instantiated by the compiler, but they can also be instantiated directly by
/// the user. A simplified example workflow which a compiler might follow in the
/// case of a CUDA kernel that is compiled to CUDA fatbin code is as follows:
///
/// 1. The user defines a kernel function called UserKernel.
/// 2. The compiler compiles the kernel code into CUDA fatbin data and embeds
/// that data into the host code at address __UserKernelFatbinAddress.
/// 3. The compiler adds code at the beginning of the host code to instantiate a
/// MultiKernelLoaderSpec:
/// \code
/// namespace compiler_cuda_namespace {
/// MultiKernelLoaderSpec UserKernelLoaderSpec;
/// } // namespace compiler_cuda_namespace
/// \endcode
/// 4. The compiler then adds code to the host code to add the fatbin data to
/// the new MultiKernelLoaderSpec, and to associate that data with the kernel
/// name "UserKernel":
/// \code
/// namespace compiler_cuda_namespace {
/// UserKernelLoaderSpec.addCUDAFatbinInMemory(
/// __UserKernelFatbinAddress, "UserKernel");
/// } // namespace compiler_cuda_namespace
/// \encode
/// 5. The host code, having known beforehand that the compiler would initialize
/// a MultiKernelLoaderSpec based on the name of the CUDA kernel, makes use
/// of the symbol cudanamespace::UserKernelLoaderSpec without defining it.
///
/// In the example above, the MultiKernelLoaderSpec instance created by the
/// compiler can be used by the host code to create StreamExecutor kernel
/// objects. In turn, those StreamExecutor kernel objects can be used by the
/// host code to launch the kernel on the device as desired.
///
//===----------------------------------------------------------------------===//
#ifndef STREAMEXECUTOR_KERNELSPEC_H
#define STREAMEXECUTOR_KERNELSPEC_H
#include <cassert>
#include <map>
#include <memory>
#include <string>
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
namespace streamexecutor {
/// An object that knows how to find the code for a device kernel.
///
/// This is the base class for the hierarchy of loader specs. The different
/// subclasses know how to find code in different formats (e.g. CUDA PTX, OpenCL
/// binary).
///
/// This base class has functionality for storing and getting the name of the
/// kernel as a string.
class KernelLoaderSpec {
public:
/// Returns the name of the kernel this spec loads.
const std::string &getKernelName() const { return KernelName; }
protected:
explicit KernelLoaderSpec(llvm::StringRef KernelName);
private:
std::string KernelName;
KernelLoaderSpec(const KernelLoaderSpec &) = delete;
KernelLoaderSpec &operator=(const KernelLoaderSpec &) = delete;
};
/// A KernelLoaderSpec for CUDA PTX code that resides in memory as a
/// null-terminated string.
class CUDAPTXInMemorySpec : public KernelLoaderSpec {
public:
/// First component is major version, second component is minor version.
using ComputeCapability = std::pair<int, int>;
/// PTX code combined with its compute capability.
struct PTXSpec {
ComputeCapability TheComputeCapability;
const char *PTXCode;
};
/// Creates a CUDAPTXInMemorySpec from an array of PTXSpec objects.
///
/// Adds each item in SpecList to this object.
///
/// Does not take ownership of the PTXCode pointers in the SpecList elements.
CUDAPTXInMemorySpec(llvm::StringRef KernelName,
const llvm::ArrayRef<PTXSpec> SpecList);
/// Returns a pointer to the PTX code for the requested compute capability.
///
/// Returns nullptr on failed lookup (if the requested compute capability is
/// not available). Matches exactly the specified compute capability. Doesn't
/// try to do anything smart like finding the next best compute capability if
/// the specified capability cannot be found.
const char *getCode(int ComputeCapabilityMajor,
int ComputeCapabilityMinor) const;
private:
/// PTX code contents in memory.
///
/// The key is a pair (cc_major, cc_minor), i.e., (2, 0), (3, 0), (3, 5).
std::map<ComputeCapability, const char *> PTXByComputeCapability;
CUDAPTXInMemorySpec(const CUDAPTXInMemorySpec &) = delete;
CUDAPTXInMemorySpec &operator=(const CUDAPTXInMemorySpec &) = delete;
};
/// A KernelLoaderSpec for CUDA fatbin code that resides in memory.
class CUDAFatbinInMemorySpec : public KernelLoaderSpec {
public:
/// Creates a CUDAFatbinInMemorySpec with a reference to the given fatbin
/// bytes.
///
/// Does not take ownership of the Bytes pointer.
CUDAFatbinInMemorySpec(llvm::StringRef KernelName, const void *Bytes);
/// Gets the fatbin data bytes.
const void *getBytes() const { return Bytes; }
private:
const void *Bytes;
CUDAFatbinInMemorySpec(const CUDAFatbinInMemorySpec &) = delete;
CUDAFatbinInMemorySpec &operator=(const CUDAFatbinInMemorySpec &) = delete;
};
/// A KernelLoaderSpec for OpenCL text that resides in memory as a
/// null-terminated string.
class OpenCLTextInMemorySpec : public KernelLoaderSpec {
public:
/// Creates a OpenCLTextInMemorySpec with a reference to the given OpenCL text
/// code bytes.
///
/// Does not take ownership of the Text pointer.
OpenCLTextInMemorySpec(llvm::StringRef KernelName, const char *Text);
/// Returns the OpenCL text contents.
const char *getText() const { return Text; }
private:
const char *Text;
OpenCLTextInMemorySpec(const OpenCLTextInMemorySpec &) = delete;
OpenCLTextInMemorySpec &operator=(const OpenCLTextInMemorySpec &) = delete;
};
/// An object to store several different KernelLoaderSpecs for the same kernel.
///
/// This allows code in different formats and for different platforms to be
/// stored all together for a single kernel.
///
/// Various methods are available to add a new KernelLoaderSpec to a
/// MultiKernelLoaderSpec. There are also methods to query which formats and
/// platforms are supported by the currently added KernelLoaderSpec objects, and
/// methods to get the KernelLoaderSpec objects for each format and platform.
///
/// Since all stored KernelLoaderSpecs are supposed to reference the same
/// kernel, they are all assumed to take the same number and type of parameters,
/// but no checking is done to enforce this. In debug mode, all
/// KernelLoaderSpecs are checked to make sure they have the same kernel name,
/// so passing in specs with different kernel names can cause the program to
/// abort.
///
/// This interface is prone to errors, so it is better to leave
/// MultiKernelLoaderSpec creation and initialization to the compiler rather
/// than doing it by hand.
class MultiKernelLoaderSpec {
public:
// Convenience getters for testing whether these platform variants have
// kernel loader specifications available.
bool hasCUDAPTXInMemory() const { return TheCUDAPTXInMemorySpec != nullptr; }
bool hasCUDAFatbinInMemory() const {
return TheCUDAFatbinInMemorySpec != nullptr;
}
bool hasOpenCLTextInMemory() const {
return TheOpenCLTextInMemorySpec != nullptr;
}
// Accessors for platform variant kernel load specifications.
//
// Precondition: corresponding has* method returns true.
const CUDAPTXInMemorySpec &getCUDAPTXInMemory() const {
assert(hasCUDAPTXInMemory() && "getting spec that is not present");
return *TheCUDAPTXInMemorySpec;
}
const CUDAFatbinInMemorySpec &getCUDAFatbinInMemory() const {
assert(hasCUDAFatbinInMemory() && "getting spec that is not present");
return *TheCUDAFatbinInMemorySpec;
}
const OpenCLTextInMemorySpec &getOpenCLTextInMemory() const {
assert(hasOpenCLTextInMemory() && "getting spec that is not present");
return *TheOpenCLTextInMemorySpec;
}
// Builder-pattern-like methods for use in initializing a
// MultiKernelLoaderSpec.
//
// Each of these should be used at most once for a single
// MultiKernelLoaderSpec object. See file comment for example usage.
//
// Note that the KernelName parameter must be consistent with the kernel in
// the PTX or OpenCL being loaded. Also be aware that in CUDA C++ the kernel
// name may be mangled by the compiler if it is not declared extern "C".
/// Does not take ownership of the PTXCode pointers in the SpecList elements.
MultiKernelLoaderSpec &
addCUDAPTXInMemory(llvm::StringRef KernelName,
llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
/// Does not take ownership of the FatbinBytes pointer.
MultiKernelLoaderSpec &addCUDAFatbinInMemory(llvm::StringRef KernelName,
const void *FatbinBytes);
/// Does not take ownership of the OpenCLText pointer.
MultiKernelLoaderSpec &addOpenCLTextInMemory(llvm::StringRef KernelName,
const char *OpenCLText);
private:
void setKernelName(llvm::StringRef KernelName);
std::unique_ptr<std::string> TheKernelName;
std::unique_ptr<CUDAPTXInMemorySpec> TheCUDAPTXInMemorySpec;
std::unique_ptr<CUDAFatbinInMemorySpec> TheCUDAFatbinInMemorySpec;
std::unique_ptr<OpenCLTextInMemorySpec> TheOpenCLTextInMemorySpec;
};
} // namespace streamexecutor
#endif // STREAMEXECUTOR_KERNELSPEC_H