//===- PTXGenerator.h - IR helper to create GPGPU LLVM-IR -------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains functions to create GPGPU parallel loops as LLVM-IR.
//
//===----------------------------------------------------------------------===//
#ifndef POLLY_CODEGEN_PTXGENERATOR_H
#define POLLY_CODEGEN_PTXGENERATOR_H

#include "polly/Config/config.h"

#ifdef GPU_CODEGEN
#include "polly/CodeGen/IRBuilder.h"
#include "llvm/ADT/SetVector.h"

#include <map>

namespace llvm {
class Value;
class Pass;
class BasicBlock;
}

namespace polly {
using namespace llvm;

class PTXGenerator {
public:
  typedef std::map<Value *, Value *> ValueToValueMapTy;

  PTXGenerator(PollyIRBuilder &Builder, Pass *P, const std::string &Triple);

  /// @brief Create a GPGPU parallel loop.
  ///
  /// @param UsedValues   A set of LLVM-IR Values that should be available to
  ///                     the new loop body.
  /// @param OriginalIVS  The new values of the original induction variables.
  /// @param VMap         This map is filled by createParallelLoop(). It
  ///                     maps the values in UsedValues to Values through which
  ///                     their content is available within the loop body.
  /// @param LoopBody     A pointer to an iterator that is set to point to the
  ///                     body of the created loop. It should be used to insert
  ///                     instructions that form the actual loop body.
  void startGeneration(SetVector<Value *> &UsedValues,
                       SetVector<Value *> &OriginalIVS, ValueToValueMapTy &VMap,
                       BasicBlock::iterator *LoopBody);

  /// @brief Execute the post-operations to build a GPGPU parallel loop.
  ///
  void finishGeneration(Function *SubFunction);

  /// @brief Set the parameters for launching PTX kernel.
  ///
  /// @param GridW    A value of the width of a GPU grid.
  /// @param GridH    A value of the height of a GPU grid.
  /// @param BlockW   A value of the width of a GPU block.
  /// @param BlockH   A value of the height of a GPU block.
  void setLaunchingParameters(int GridW, int GridH, int BlockW, int BlockH) {
    GridWidth = GridW;
    GridHeight = GridH;
    BlockWidth = BlockW;
    BlockHeight = BlockH;
  }

  /// @brief Set the size of the output array.
  ///
  /// This size is used to allocate memory on the device and the host.
  ///
  /// @param Bytes        Output array size in bytes.
  void setOutputBytes(unsigned Bytes) { OutputBytes = Bytes; }

private:
  PollyIRBuilder &Builder;
  Pass *P;

  /// @brief The target triple of the device.
  const std::string &GPUTriple;

  ///@brief Parameters used for launching PTX kernel.
  int GridWidth, GridHeight, BlockWidth, BlockHeight;

  /// @brief Size of the output array in bytes.
  unsigned OutputBytes;

  /// @brief Polly's GPU data types.
  StructType *ContextTy, *ModuleTy, *KernelTy, *DeviceTy, *DevDataTy, *EventTy;

  void InitializeGPUDataTypes();
  IntegerType *getInt64Type();           // i64
  PointerType *getI8PtrType();           // char *
  PointerType *getPtrI8PtrType();        // char **
  PointerType *getFloatPtrType();        // float *
  PointerType *getGPUContextPtrType();   // %struct.PollyGPUContextT *
  PointerType *getGPUModulePtrType();    // %struct.PollyGPUModuleT *
  PointerType *getGPUDevicePtrType();    // %struct.PollyGPUDeviceT *
  PointerType *getPtrGPUDevicePtrType(); // %struct.PollyGPUDevicePtrT *
  PointerType *getGPUFunctionPtrType();  // %struct.PollyGPUFunctionT *
  PointerType *getGPUEventPtrType();     // %struct.PollyGPUEventT *

  Module *getModule();

  /// @brief Create the kernel string containing LLVM IR.
  ///
  /// @param SubFunction  A pointer to the device code function.
  /// @return             A global string variable containing the LLVM IR codes
  //                      of the SubFunction.
  Value *createPTXKernelFunction(Function *SubFunction);

  /// @brief Get the entry name of the device kernel function.
  ///
  /// @param SubFunction  A pointer to the device code function.
  /// @return             A global string variable containing the entry name of
  ///                     the SubFunction.
  Value *getPTXKernelEntryName(Function *SubFunction);

  void createCallInitDevice(Value *Context, Value *Device);
  void createCallGetPTXModule(Value *Buffer, Value *Module);
  void createCallGetPTXKernelEntry(Value *Entry, Value *Module, Value *Kernel);
  void createCallAllocateMemoryForHostAndDevice(Value *HostData,
                                                Value *DeviceData, Value *Size);
  void createCallCopyFromHostToDevice(Value *DeviceData, Value *HostData,
                                      Value *Size);
  void createCallCopyFromDeviceToHost(Value *HostData, Value *DeviceData,
                                      Value *Size);
  void createCallSetKernelParameters(Value *Kernel, Value *BlockWidth,
                                     Value *BlockHeight, Value *DeviceData);
  void createCallLaunchKernel(Value *Kernel, Value *GridWidth,
                              Value *GridHeight);
  void createCallStartTimerByCudaEvent(Value *StartEvent, Value *StopEvent);
  void createCallStopTimerByCudaEvent(Value *StartEvent, Value *StopEvent,
                                      Value *Timer);
  void createCallCleanupGPGPUResources(Value *HostData, Value *DeviceData,
                                       Value *Module, Value *Context,
                                       Value *Kernel);

  /// @brief Create the CUDA subfunction.
  ///
  /// @param UsedValues   A set of LLVM-IR Values that should be available to
  ///                     the new loop body.
  /// @param VMap         This map that is filled by createSubfunction(). It
  ///                     maps the values in UsedValues to Values through which
  ///                     their content is available within the loop body.
  /// @param OriginalIVS  The new values of the original induction variables.
  /// @param SubFunction  The newly created SubFunction is returned here.
  void createSubfunction(SetVector<Value *> &UsedValues,
                         SetVector<Value *> &OriginalIVS,
                         ValueToValueMapTy &VMap, Function **SubFunction);

  /// @brief Create the definition of the CUDA subfunction.
  ///
  /// @param NumArgs      The number of parameters of this subfunction. This is
  ///                     usually set to the number of memory accesses which
  ///                     will be copied from host to device.
  Function *createSubfunctionDefinition(int NumArgs);

  /// @brief Get the Value of CUDA block width.
  Value *getCUDABlockWidth();

  /// @brief Get the Value of CUDA block height.
  Value *getCUDABlockHeight();

  /// @brief Get the Value of CUDA Gird width.
  Value *getCUDAGridWidth();

  /// @brief Get the Value of CUDA grid height.
  Value *getCUDAGridHeight();

  /// @brief Get the Value of the bytes of the output array.
  Value *getOutputArraySizeInBytes();

  /// @brief Erase the ptx-related subfunctions and declarations.
  ///
  /// @param SubFunction  A pointer to the device code function.
  void eraseUnusedFunctions(Function *SubFunction);
};
} // end namespace polly
#endif /* GPU_CODEGEN */
#endif /* POLLY_CODEGEN_PTXGENERATOR_H */
