| //===--- cuda/dynamic_cuda/cuda.h --------------------------------- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // The parts of the cuda api that are presently in use by the openmp cuda plugin |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef DYNAMIC_CUDA_CUDA_H_INCLUDED |
| #define DYNAMIC_CUDA_CUDA_H_INCLUDED |
| |
| #include <cstddef> |
| #include <cstdint> |
| |
| typedef int CUdevice; |
| typedef uintptr_t CUdeviceptr; |
| typedef struct CUmod_st *CUmodule; |
| typedef struct CUctx_st *CUcontext; |
| typedef struct CUfunc_st *CUfunction; |
| typedef struct CUstream_st *CUstream; |
| typedef struct CUevent_st *CUevent; |
| |
| typedef enum cudaError_enum { |
| CUDA_SUCCESS = 0, |
| CUDA_ERROR_INVALID_VALUE = 1, |
| CUDA_ERROR_INVALID_HANDLE = 400, |
| } CUresult; |
| |
| typedef enum CUstream_flags_enum { |
| CU_STREAM_DEFAULT = 0x0, |
| CU_STREAM_NON_BLOCKING = 0x1, |
| } CUstream_flags; |
| |
| typedef enum CUlimit_enum { |
| CU_LIMIT_STACK_SIZE = 0x0, |
| CU_LIMIT_PRINTF_FIFO_SIZE = 0x1, |
| CU_LIMIT_MALLOC_HEAP_SIZE = 0x2, |
| CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x3, |
| CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x4, |
| CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x5, |
| CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x6, |
| CU_LIMIT_MAX |
| } CUlimit; |
| |
| typedef enum CUdevice_attribute_enum { |
| CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, |
| CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, |
| CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, |
| CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, |
| CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, |
| CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, |
| CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, |
| CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, |
| CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, |
| CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, |
| CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, |
| CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, |
| CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, |
| CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, |
| CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, |
| CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, |
| CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, |
| CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, |
| CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, |
| CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, |
| CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, |
| CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, |
| CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, |
| CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, |
| CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, |
| CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, |
| CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, |
| CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, |
| CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, |
| CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, |
| CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, |
| CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, |
| CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, |
| CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, |
| CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, |
| CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, |
| CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74, |
| CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, |
| CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, |
| CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78, |
| CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79, |
| CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80, |
| CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, |
| CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82, |
| CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, |
| CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, |
| CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, |
| CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86, |
| CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, |
| CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88, |
| CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89, |
| CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90, |
| CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, |
| CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92, |
| CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93, |
| CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94, |
| CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95, |
| CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96, |
| CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, |
| CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98, |
| CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99, |
| CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, |
| CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, |
| CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102, |
| CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102, |
| CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103, |
| CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104, |
| CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105, |
| CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106, |
| CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107, |
| CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108, |
| CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109, |
| CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110, |
| CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111, |
| CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112, |
| CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113, |
| CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114, |
| CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115, |
| CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116, |
| CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117, |
| CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118, |
| CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119, |
| CU_DEVICE_ATTRIBUTE_MAX, |
| } CUdevice_attribute; |
| |
| typedef enum CUfunction_attribute_enum { |
| CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, |
| } CUfunction_attribute; |
| |
| typedef enum CUctx_flags_enum { |
| CU_CTX_SCHED_BLOCKING_SYNC = 0x04, |
| CU_CTX_SCHED_MASK = 0x07, |
| } CUctx_flags; |
| |
| typedef enum CUmemAttach_flags_enum { |
| CU_MEM_ATTACH_GLOBAL = 0x1, |
| CU_MEM_ATTACH_HOST = 0x2, |
| CU_MEM_ATTACH_SINGLE = 0x4, |
| } CUmemAttach_flags; |
| |
| typedef enum CUcomputeMode_enum { |
| CU_COMPUTEMODE_DEFAULT = 0, |
| CU_COMPUTEMODE_PROHIBITED = 2, |
| CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3, |
| } CUcompute_mode; |
| |
| typedef enum CUevent_flags_enum { |
| CU_EVENT_DEFAULT = 0x0, |
| CU_EVENT_BLOCKING_SYNC = 0x1, |
| CU_EVENT_DISABLE_TIMING = 0x2, |
| CU_EVENT_INTERPROCESS = 0x4 |
| } CUevent_flags; |
| |
| CUresult cuCtxGetDevice(CUdevice *); |
| CUresult cuDeviceGet(CUdevice *, int); |
| CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice); |
| CUresult cuDeviceGetCount(int *); |
| CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction); |
| |
| // Device info |
| CUresult cuDeviceGetName(char *, int, CUdevice); |
| CUresult cuDeviceTotalMem(size_t *, CUdevice); |
| CUresult cuDriverGetVersion(int *); |
| |
| CUresult cuGetErrorString(CUresult, const char **); |
| CUresult cuInit(unsigned); |
| CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned, |
| unsigned, unsigned, unsigned, CUstream, void **, |
| void **); |
| |
| CUresult cuMemAlloc(CUdeviceptr *, size_t); |
| CUresult cuMemAllocHost(void **, size_t); |
| CUresult cuMemAllocManaged(CUdeviceptr *, size_t, unsigned int); |
| |
| CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream); |
| CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t); |
| CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream); |
| CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t); |
| CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream); |
| |
| CUresult cuMemFree(CUdeviceptr); |
| CUresult cuMemFreeHost(void *); |
| |
| CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *); |
| CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *); |
| |
| CUresult cuModuleUnload(CUmodule); |
| CUresult cuStreamCreate(CUstream *, unsigned); |
| CUresult cuStreamDestroy(CUstream); |
| CUresult cuStreamSynchronize(CUstream); |
| CUresult cuCtxSetCurrent(CUcontext); |
| CUresult cuDevicePrimaryCtxRelease(CUdevice); |
| CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *); |
| CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned); |
| CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice); |
| CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *, |
| void **); |
| |
| CUresult cuDeviceCanAccessPeer(int *, CUdevice, CUdevice); |
| CUresult cuCtxEnablePeerAccess(CUcontext, unsigned); |
| CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, |
| size_t, CUstream); |
| |
| CUresult cuCtxGetLimit(size_t *, CUlimit); |
| CUresult cuCtxSetLimit(CUlimit, size_t); |
| |
| CUresult cuEventCreate(CUevent *, unsigned int); |
| CUresult cuEventRecord(CUevent, CUstream); |
| CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int); |
| CUresult cuEventSynchronize(CUevent); |
| CUresult cuEventDestroy(CUevent); |
| |
| #endif |