[flang][cuda] Add entry point to launch cuda fortran kernel (#113490)
diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h index b73bc39..e9f61932 100644 --- a/flang/include/flang/Runtime/CUDA/common.h +++ b/flang/include/flang/Runtime/CUDA/common.h
@@ -30,7 +30,7 @@ const char *name = cudaGetErrorName(err); \ if (!name) \ name = "<unknown>"; \ - Terminator terminator{__FILE__, __LINE__}; \ + Fortran::runtime::Terminator terminator{__FILE__, __LINE__}; \ terminator.Crash("'%s' failed with '%s'", #expr, name); \ }(expr)
diff --git a/flang/include/flang/Runtime/CUDA/kernel.h b/flang/include/flang/Runtime/CUDA/kernel.h new file mode 100644 index 0000000..cf07d87 --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/kernel.h
@@ -0,0 +1,27 @@ +//===-- include/flang/Runtime/CUDA/kernel.h ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_CUDA_KERNEL_H_ +#define FORTRAN_RUNTIME_CUDA_KERNEL_H_ + +#include "flang/Runtime/entry-names.h" +#include <cstddef> +#include <stdint.h> + +extern "C" { + +// This function uses intptr_t instead of CUDA's unsigned int to match +// the type of MLIR's index type. This avoids the need for casts in the +// generated MLIR code. +void RTDEF(CUFLaunchKernel)(const void *kernelName, intptr_t gridX, + intptr_t gridY, intptr_t gridZ, intptr_t blockX, intptr_t blockY, + intptr_t blockZ, int32_t smem, void **params, void **extra); + +} // extern "C" + +#endif // FORTRAN_RUNTIME_CUDA_KERNEL_H_
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt index 86523b4..ce87f3e 100644 --- a/flang/runtime/CUDA/CMakeLists.txt +++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -17,6 +17,7 @@ allocator.cpp allocatable.cpp descriptor.cpp + kernel.cpp memory.cpp registration.cpp )
diff --git a/flang/runtime/CUDA/kernel.cpp b/flang/runtime/CUDA/kernel.cpp new file mode 100644 index 0000000..f81153a --- /dev/null +++ b/flang/runtime/CUDA/kernel.cpp
@@ -0,0 +1,33 @@ +//===-- runtime/CUDA/kernel.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/kernel.h" +#include "../terminator.h" +#include "flang/Runtime/CUDA/common.h" + +#include "cuda_runtime.h" + +extern "C" { + +void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY, + intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ, + int32_t smem, void **params, void **extra) { + dim3 gridDim; + gridDim.x = gridX; + gridDim.y = gridY; + gridDim.z = gridZ; + dim3 blockDim; + blockDim.x = blockX; + blockDim.y = blockY; + blockDim.z = blockZ; + cudaStream_t stream = 0; + CUDA_REPORT_IF_ERROR( + cudaLaunchKernel(kernel, gridDim, blockDim, params, smem, stream)); +} + +} // extern "C"