| //===-- lib/cuda/memory.cpp -------------------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "flang/Runtime/CUDA/memory.h" |
| #include "flang-rt/runtime/assign-impl.h" |
| #include "flang-rt/runtime/descriptor.h" |
| #include "flang-rt/runtime/environment.h" |
| #include "flang-rt/runtime/terminator.h" |
| #include "flang/Runtime/CUDA/common.h" |
| #include "flang/Runtime/CUDA/descriptor.h" |
| #include "flang/Runtime/CUDA/memmove-function.h" |
| #include "flang/Runtime/assign.h" |
| |
| #include "cuda_runtime.h" |
| |
| namespace Fortran::runtime::cuda { |
| |
| extern "C" { |
| |
| void *RTDEF(CUFMemAlloc)( |
| std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) { |
| void *ptr = nullptr; |
| bytes = bytes ? bytes : 1; |
| if (type == kMemTypeDevice) { |
| if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) { |
| CUDA_REPORT_IF_ERROR( |
| cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); |
| } else { |
| CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes)); |
| } |
| } else if (type == kMemTypeManaged || type == kMemTypeUnified) { |
| CUDA_REPORT_IF_ERROR( |
| cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); |
| } else if (type == kMemTypePinned) { |
| CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes)); |
| } else { |
| Terminator terminator{sourceFile, sourceLine}; |
| terminator.Crash("unsupported memory type"); |
| } |
| return ptr; |
| } |
| |
| void RTDEF(CUFMemFree)( |
| void *ptr, unsigned type, const char *sourceFile, int sourceLine) { |
| if (!ptr) |
| return; |
| if (type == kMemTypeDevice || type == kMemTypeManaged || |
| type == kMemTypeUnified) { |
| CUDA_REPORT_IF_ERROR(cudaFree(ptr)); |
| } else if (type == kMemTypePinned) { |
| CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr)); |
| } else { |
| Terminator terminator{sourceFile, sourceLine}; |
| terminator.Crash("unsupported memory type"); |
| } |
| } |
| |
| void RTDEF(CUFMemsetDescriptor)( |
| Descriptor *desc, void *value, const char *sourceFile, int sourceLine) { |
| Terminator terminator{sourceFile, sourceLine}; |
| terminator.Crash("not yet implemented: CUDA data transfer from a scalar " |
| "value to a descriptor"); |
| } |
| |
| void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, |
| unsigned mode, const char *sourceFile, int sourceLine) { |
| cudaMemcpyKind kind; |
| if (mode == kHostToDevice) { |
| kind = cudaMemcpyHostToDevice; |
| } else if (mode == kDeviceToHost) { |
| kind = cudaMemcpyDeviceToHost; |
| } else if (mode == kDeviceToDevice) { |
| kind = cudaMemcpyDeviceToDevice; |
| } else { |
| Terminator terminator{sourceFile, sourceLine}; |
| terminator.Crash("host to host copy not supported"); |
| } |
| // TODO: Use cudaMemcpyAsync when we have support for stream. |
| CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); |
| } |
| |
| void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc, |
| std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { |
| Terminator terminator{sourceFile, sourceLine}; |
| terminator.Crash( |
| "not yet implemented: CUDA data transfer from a descriptor to a pointer"); |
| } |
| |
| void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc, |
| unsigned mode, const char *sourceFile, int sourceLine) { |
| MemmoveFct memmoveFct; |
| Terminator terminator{sourceFile, sourceLine}; |
| if (mode == kHostToDevice) { |
| memmoveFct = &MemmoveHostToDevice; |
| } else if (mode == kDeviceToHost) { |
| memmoveFct = &MemmoveDeviceToHost; |
| } else if (mode == kDeviceToDevice) { |
| memmoveFct = &MemmoveDeviceToDevice; |
| } else { |
| terminator.Crash("host to host copy not supported"); |
| } |
| // Allocate dst descriptor if not allocated. |
| if (!dstDesc->IsAllocated()) { |
| dstDesc->ApplyMold(*srcDesc, dstDesc->rank()); |
| dstDesc->Allocate(/*asyncObject=*/nullptr); |
| } |
| if ((srcDesc->rank() > 0) && (dstDesc->Elements() <= srcDesc->Elements()) && |
| srcDesc->IsContiguous() && dstDesc->IsContiguous()) { |
| // Special case when rhs is bigger than lhs and both are contiguous arrays. |
| // In this case we do a simple ptr to ptr transfer with the size of lhs. |
| // This is be allowed in the reference compiler and it avoids error |
| // triggered in the Assign runtime function used for the main case below. |
| RTNAME(CUFDataTransferPtrPtr)(dstDesc->raw().base_addr, |
| srcDesc->raw().base_addr, dstDesc->Elements() * dstDesc->ElementBytes(), |
| mode, sourceFile, sourceLine); |
| } else { |
| Fortran::runtime::Assign( |
| *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct); |
| } |
| } |
| |
| void RTDECL(CUFDataTransferCstDesc)(Descriptor *dstDesc, Descriptor *srcDesc, |
| unsigned mode, const char *sourceFile, int sourceLine) { |
| MemmoveFct memmoveFct; |
| Terminator terminator{sourceFile, sourceLine}; |
| if (mode == kHostToDevice) { |
| memmoveFct = &MemmoveHostToDevice; |
| } else if (mode == kDeviceToHost) { |
| memmoveFct = &MemmoveDeviceToHost; |
| } else if (mode == kDeviceToDevice) { |
| memmoveFct = &MemmoveDeviceToDevice; |
| } else { |
| terminator.Crash("host to host copy not supported"); |
| } |
| |
| Fortran::runtime::DoFromSourceAssign( |
| *dstDesc, *srcDesc, terminator, memmoveFct); |
| } |
| |
| void RTDECL(CUFDataTransferDescDescNoRealloc)(Descriptor *dstDesc, |
| Descriptor *srcDesc, unsigned mode, const char *sourceFile, |
| int sourceLine) { |
| MemmoveFct memmoveFct; |
| Terminator terminator{sourceFile, sourceLine}; |
| if (mode == kHostToDevice) { |
| memmoveFct = &MemmoveHostToDevice; |
| } else if (mode == kDeviceToHost) { |
| memmoveFct = &MemmoveDeviceToHost; |
| } else if (mode == kDeviceToDevice) { |
| memmoveFct = &MemmoveDeviceToDevice; |
| } else { |
| terminator.Crash("host to host copy not supported"); |
| } |
| Fortran::runtime::Assign( |
| *dstDesc, *srcDesc, terminator, NoAssignFlags, memmoveFct); |
| } |
| |
| void RTDECL(CUFDataTransferGlobalDescDesc)(Descriptor *dstDesc, |
| Descriptor *srcDesc, unsigned mode, const char *sourceFile, |
| int sourceLine) { |
| RTNAME(CUFDataTransferDescDesc) |
| (dstDesc, srcDesc, mode, sourceFile, sourceLine); |
| if ((mode == kHostToDevice) || (mode == kDeviceToDevice)) { |
| void *deviceAddr{ |
| RTNAME(CUFGetDeviceAddress)((void *)dstDesc, sourceFile, sourceLine)}; |
| RTNAME(CUFDescriptorSync) |
| ((Descriptor *)deviceAddr, dstDesc, sourceFile, sourceLine); |
| } |
| } |
| } |
| } // namespace Fortran::runtime::cuda |