//===-- KernelSpec.h - Kernel loader spec types -----------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// KernelLoaderSpec is the base class for types that know where to find the
/// code for a data-parallel kernel in a particular format on a particular
/// platform. So, for example, there will be one subclass that deals with CUDA
/// PTX code, another subclass that deals with CUDA fatbin code, and yet another
/// subclass that deals with OpenCL text code.
///
/// A MultiKernelLoaderSpec is basically a collection of KernelLoaderSpec
/// instances. This is useful when code is available for the same kernel in
/// several different formats or targeted for several different platforms. All
/// the various KernelLoaderSpec instances for this kernel can be combined
/// together in one MultiKernelLoaderSpec and the specific platform consumer can
/// decide which instance of the code it wants to use.
///
/// MultiKernelLoaderSpec provides several helper functions to build and
/// register KernelLoaderSpec instances all in a single operation. For example,
/// MultiKernelLoaderSpec::addCUDAPTXInMemory can be used to construct and
/// register a CUDAPTXInMemorySpec KernelLoaderSpec.
///
/// The loader spec classes declared here are designed primarily to be
/// instantiated by the compiler, but they can also be instantiated directly by
/// the user. A simplified example workflow which a compiler might follow in the
/// case of a CUDA kernel that is compiled to CUDA fatbin code is as follows:
///
/// 1. The user defines a kernel function called \c UserKernel.
/// 2. The compiler compiles the kernel code into CUDA fatbin data and embeds
///    that data into the host code at address \c __UserKernelFatbinAddress.
/// 3. The compiler adds code at the beginning of the host code to instantiate a
///    MultiKernelLoaderSpec:
///    \code
///    namespace compiler_cuda_namespace {
///      MultiKernelLoaderSpec UserKernelLoaderSpec;
///    } // namespace compiler_cuda_namespace
///    \endcode
/// 4. The compiler then adds code to the host code to add the fatbin data to
///    the new MultiKernelLoaderSpec, and to associate that data with the kernel
///    name \c "UserKernel":
///    \code
///    namespace compiler_cuda_namespace {
///      UserKernelLoaderSpec.addCUDAFatbinInMemory(
///        __UserKernelFatbinAddress, "UserKernel");
///    } // namespace compiler_cuda_namespace
///    \endcode
/// 5. The host code, having known beforehand that the compiler would initialize
///    a MultiKernelLoaderSpec based on the name of the CUDA kernel, makes use
///    of the symbol \c cudanamespace::UserKernelLoaderSpec without defining it.
///
/// In the example above, the MultiKernelLoaderSpec instance created by the
/// compiler can be used by the host code to create StreamExecutor kernel
/// objects. In turn, those StreamExecutor kernel objects can be used by the
/// host code to launch the kernel on the device as desired.
///
//===----------------------------------------------------------------------===//

#ifndef STREAMEXECUTOR_KERNELSPEC_H
#define STREAMEXECUTOR_KERNELSPEC_H

#include <cassert>
#include <functional>
#include <map>
#include <memory>
#include <string>

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"

namespace streamexecutor {

/// An object that knows how to find the code for a device kernel.
///
/// This is the base class for the hierarchy of loader specs. The different
/// subclasses know how to find code in different formats (e.g. CUDA PTX, OpenCL
/// binary).
///
/// This base class has functionality for storing and getting the name of the
/// kernel as a string.
class KernelLoaderSpec {
public:
  /// Returns the name of the kernel this spec loads.
  const std::string &getKernelName() const { return KernelName; }

protected:
  explicit KernelLoaderSpec(llvm::StringRef KernelName);

private:
  std::string KernelName;

  KernelLoaderSpec(const KernelLoaderSpec &) = delete;
  KernelLoaderSpec &operator=(const KernelLoaderSpec &) = delete;
};

/// A KernelLoaderSpec for CUDA PTX code that resides in memory as a
/// null-terminated string.
class CUDAPTXInMemorySpec : public KernelLoaderSpec {
public:
  /// First component is major version, second component is minor version.
  using ComputeCapability = std::pair<int, int>;

  /// PTX code combined with its compute capability.
  struct PTXSpec {
    ComputeCapability TheComputeCapability;
    const char *PTXCode;
  };

  /// Creates a CUDAPTXInMemorySpec from an array of PTXSpec objects.
  ///
  /// Adds each item in SpecList to this object.
  ///
  /// Does not take ownership of the PTXCode pointers in the SpecList elements.
  CUDAPTXInMemorySpec(
      llvm::StringRef KernelName,
      const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);

  /// Returns a pointer to the PTX code for the greatest compute capability not
  /// exceeding the requested compute capability.
  ///
  /// Returns nullptr on failed lookup (if the requested version is not
  /// available and no lower versions are available).
  const char *getCode(int ComputeCapabilityMajor,
                      int ComputeCapabilityMinor) const;

private:
  /// PTX code contents in memory.
  ///
  /// The key is a pair (cc_major, cc_minor), i.e., (2, 0), (3, 0), (3, 5).
  std::map<ComputeCapability, const char *> PTXByComputeCapability;

  CUDAPTXInMemorySpec(const CUDAPTXInMemorySpec &) = delete;
  CUDAPTXInMemorySpec &operator=(const CUDAPTXInMemorySpec &) = delete;
};

/// A KernelLoaderSpec for CUDA fatbin code that resides in memory.
class CUDAFatbinInMemorySpec : public KernelLoaderSpec {
public:
  /// Creates a CUDAFatbinInMemorySpec with a reference to the given fatbin
  /// bytes.
  ///
  /// Does not take ownership of the Bytes pointer.
  CUDAFatbinInMemorySpec(llvm::StringRef KernelName, const void *Bytes);

  /// Gets the fatbin data bytes.
  const void *getBytes() const { return Bytes; }

private:
  const void *Bytes;

  CUDAFatbinInMemorySpec(const CUDAFatbinInMemorySpec &) = delete;
  CUDAFatbinInMemorySpec &operator=(const CUDAFatbinInMemorySpec &) = delete;
};

/// A KernelLoaderSpec for OpenCL text that resides in memory as a
/// null-terminated string.
class OpenCLTextInMemorySpec : public KernelLoaderSpec {
public:
  /// Creates a OpenCLTextInMemorySpec with a reference to the given OpenCL text
  /// code bytes.
  ///
  /// Does not take ownership of the Text pointer.
  OpenCLTextInMemorySpec(llvm::StringRef KernelName, const char *Text);

  /// Returns the OpenCL text contents.
  const char *getText() const { return Text; }

private:
  const char *Text;

  OpenCLTextInMemorySpec(const OpenCLTextInMemorySpec &) = delete;
  OpenCLTextInMemorySpec &operator=(const OpenCLTextInMemorySpec &) = delete;
};

/// An object to store several different KernelLoaderSpecs for the same kernel.
///
/// This allows code in different formats and for different platforms to be
/// stored all together for a single kernel.
///
/// Various methods are available to add a new KernelLoaderSpec to a
/// MultiKernelLoaderSpec. There are also methods to query which formats and
/// platforms are supported by the currently added KernelLoaderSpec objects, and
/// methods to get the KernelLoaderSpec objects for each format and platform.
///
/// Since all stored KernelLoaderSpecs are supposed to reference the same
/// kernel, they are all assumed to take the same number and type of parameters,
/// but no checking is done to enforce this. In debug mode, all
/// KernelLoaderSpecs are checked to make sure they have the same kernel name,
/// so passing in specs with different kernel names can cause the program to
/// abort.
///
/// This interface is prone to errors, so it is better to leave
/// MultiKernelLoaderSpec creation and initialization to the compiler rather
/// than doing it by hand.
class MultiKernelLoaderSpec {
public:
  /// Type of functions used as host platform kernels.
  using HostFunctionTy = std::function<void(const void **)>;

  std::string getKernelName() const {
    if (TheKernelName)
      return *TheKernelName;
    return "";
  }

  // Convenience getters for testing whether these platform variants have
  // kernel loader specifications available.

  bool hasCUDAPTXInMemory() const { return TheCUDAPTXInMemorySpec != nullptr; }
  bool hasCUDAFatbinInMemory() const {
    return TheCUDAFatbinInMemorySpec != nullptr;
  }
  bool hasOpenCLTextInMemory() const {
    return TheOpenCLTextInMemorySpec != nullptr;
  }
  bool hasHostFunction() const { return HostFunction != nullptr; }

  // Accessors for platform variant kernel load specifications.
  //
  // Precondition: corresponding has* method returns true.

  const CUDAPTXInMemorySpec &getCUDAPTXInMemory() const {
    assert(hasCUDAPTXInMemory() && "getting spec that is not present");
    return *TheCUDAPTXInMemorySpec;
  }
  const CUDAFatbinInMemorySpec &getCUDAFatbinInMemory() const {
    assert(hasCUDAFatbinInMemory() && "getting spec that is not present");
    return *TheCUDAFatbinInMemorySpec;
  }
  const OpenCLTextInMemorySpec &getOpenCLTextInMemory() const {
    assert(hasOpenCLTextInMemory() && "getting spec that is not present");
    return *TheOpenCLTextInMemorySpec;
  }

  const HostFunctionTy &getHostFunction() const {
    assert(hasHostFunction() && "getting spec that is not present");
    return *HostFunction;
  }

  // Builder-pattern-like methods for use in initializing a
  // MultiKernelLoaderSpec.
  //
  // Each of these should be used at most once for a single
  // MultiKernelLoaderSpec object. See file comment for example usage.
  //
  // Note that the KernelName parameter must be consistent with the kernel in
  // the PTX or OpenCL being loaded. Also be aware that in CUDA C++ the kernel
  // name may be mangled by the compiler if it is not declared extern "C".

  /// Does not take ownership of the PTXCode pointers in the SpecList elements.
  MultiKernelLoaderSpec &
  addCUDAPTXInMemory(llvm::StringRef KernelName,
                     llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);

  /// Does not take ownership of the FatbinBytes pointer.
  MultiKernelLoaderSpec &addCUDAFatbinInMemory(llvm::StringRef KernelName,
                                               const void *FatbinBytes);

  /// Does not take ownership of the OpenCLText pointer.
  MultiKernelLoaderSpec &addOpenCLTextInMemory(llvm::StringRef KernelName,
                                               const char *OpenCLText);

  MultiKernelLoaderSpec &addHostFunction(llvm::StringRef KernelName,
                                         HostFunctionTy Function) {
    HostFunction = llvm::make_unique<HostFunctionTy>(std::move(Function));
    return *this;
  }

private:
  void setKernelName(llvm::StringRef KernelName);

  std::unique_ptr<std::string> TheKernelName;
  std::unique_ptr<CUDAPTXInMemorySpec> TheCUDAPTXInMemorySpec;
  std::unique_ptr<CUDAFatbinInMemorySpec> TheCUDAFatbinInMemorySpec;
  std::unique_ptr<OpenCLTextInMemorySpec> TheOpenCLTextInMemorySpec;
  std::unique_ptr<HostFunctionTy> HostFunction;
};

} // namespace streamexecutor

#endif // STREAMEXECUTOR_KERNELSPEC_H
