| //===-- lib/runtime/copy.cpp ------------------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "copy.h" |
| #include "stack.h" |
| #include "flang-rt/runtime/descriptor.h" |
| #include "flang-rt/runtime/terminator.h" |
| #include "flang-rt/runtime/type-info.h" |
| #include "flang/Runtime/allocatable.h" |
| #include <cstring> |
| |
| namespace Fortran::runtime { |
| namespace { |
| using StaticDescTy = StaticDescriptor<maxRank, true, 0>; |
| |
| // A structure describing the data copy that needs to be done |
| // from one descriptor to another. It is a helper structure |
| // for CopyElement. |
| struct CopyDescriptor { |
| // A constructor specifying all members explicitly. |
| // The toAt and fromAt specify subscript storages that might be |
| // external to CopyElement, and cannot be modified. |
| // The copy descriptor only establishes toAtPtr_ and fromAtPtr_ |
| // pointers to point to these storages. |
| RT_API_ATTRS CopyDescriptor(const Descriptor &to, const SubscriptValue toAt[], |
| const Descriptor &from, const SubscriptValue fromAt[], |
| std::size_t elements, bool usesStaticDescriptors = false) |
| : to_(to), from_(from), elements_(elements), |
| usesStaticDescriptors_(usesStaticDescriptors) { |
| toAtPtr_ = toAt; |
| fromAtPtr_ = fromAt; |
| } |
| // The number of elements to copy is initialized from the to descriptor. |
| // The current element subscripts are initialized from the lower bounds |
| // of the to and from descriptors. |
| RT_API_ATTRS CopyDescriptor(const Descriptor &to, const Descriptor &from, |
| bool usesStaticDescriptors = false) |
| : to_(to), from_(from), elements_(to.Elements()), |
| usesStaticDescriptors_(usesStaticDescriptors) { |
| to.GetLowerBounds(toAt_); |
| from.GetLowerBounds(fromAt_); |
| } |
| |
| // Increment the toAt_ and fromAt_ subscripts to the next |
| // element. |
| RT_API_ATTRS void IncrementSubscripts(Terminator &terminator) { |
| // This method must not be called for copy descriptors |
| // using external non-modifiable subscript storage. |
| RUNTIME_CHECK(terminator, toAt_ == toAtPtr_ && fromAt_ == fromAtPtr_); |
| to_.IncrementSubscripts(toAt_); |
| from_.IncrementSubscripts(fromAt_); |
| } |
| |
| // Descriptor of the destination. |
| const Descriptor &to_; |
| // A subscript specifying the current element position to copy to. |
| SubscriptValue toAt_[maxRank]; |
| // A pointer to the storage of the 'to' subscript. |
| // It may point to toAt_ or to an external non-modifiable |
| // subscript storage. |
| const SubscriptValue *toAtPtr_{toAt_}; |
| // Descriptor of the source. |
| const Descriptor &from_; |
| // A subscript specifying the current element position to copy from. |
| SubscriptValue fromAt_[maxRank]; |
| // A pointer to the storage of the 'from' subscript. |
| // It may point to fromAt_ or to an external non-modifiable |
| // subscript storage. |
| const SubscriptValue *fromAtPtr_{fromAt_}; |
| // Number of elements left to copy. |
| std::size_t elements_; |
| // Must be true, if the to and from descriptors are allocated |
| // by the CopyElement runtime. The allocated memory belongs |
| // to a separate stack that needs to be popped in correspondence |
| // with popping such a CopyDescriptor node. |
| bool usesStaticDescriptors_; |
| }; |
| |
| // A pair of StaticDescTy elements. |
| struct StaticDescriptorsPair { |
| StaticDescTy to; |
| StaticDescTy from; |
| }; |
| } // namespace |
| |
| RT_OFFLOAD_API_GROUP_BEGIN |
| |
| RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[], |
| const Descriptor &from, const SubscriptValue fromAt[], |
| Terminator &terminator) { |
| if (!to.Addendum()) { |
| // Avoid the overhead of creating the work stacks below |
| // for the simple non-derived type cases, because the overhead |
| // might be noticeable over the total amount of work that |
| // needs to be done for the copy. |
| char *toPtr{to.Element<char>(toAt)}; |
| char *fromPtr{from.Element<char>(fromAt)}; |
| RUNTIME_CHECK(terminator, to.ElementBytes() == from.ElementBytes()); |
| std::memcpy(toPtr, fromPtr, to.ElementBytes()); |
| return; |
| } |
| |
| #if !defined(RT_DEVICE_COMPILATION) |
| constexpr unsigned copyStackReserve{16}; |
| constexpr unsigned descriptorStackReserve{6}; |
| #else |
| // Always use dynamic allocation on the device to avoid |
| // big stack sizes. This may be tuned as needed. |
| constexpr unsigned copyStackReserve{0}; |
| constexpr unsigned descriptorStackReserve{0}; |
| #endif |
| // Keep a stack of CopyDescriptor's to avoid recursive calls. |
| Stack<CopyDescriptor, copyStackReserve> copyStack{terminator}; |
| // Keep a separate stack of StaticDescTy pairs. These descriptors |
| // may be used for representing copies of Component::Genre::Data |
| // components (since they do not have their descriptors allocated |
| // in memory). |
| Stack<StaticDescriptorsPair, descriptorStackReserve> descriptorsStack{ |
| terminator}; |
| copyStack.emplace(to, toAt, from, fromAt, /*elements=*/std::size_t{1}); |
| |
| while (!copyStack.empty()) { |
| CopyDescriptor ¤tCopy{copyStack.top()}; |
| std::size_t &elements{currentCopy.elements_}; |
| if (elements == 0) { |
| // This copy has been exhausted. |
| if (currentCopy.usesStaticDescriptors_) { |
| // Pop the static descriptors, if they were used |
| // for the current copy. |
| descriptorsStack.pop(); |
| } |
| copyStack.pop(); |
| continue; |
| } |
| const Descriptor &curTo{currentCopy.to_}; |
| const SubscriptValue *curToAt{currentCopy.toAtPtr_}; |
| const Descriptor &curFrom{currentCopy.from_}; |
| const SubscriptValue *curFromAt{currentCopy.fromAtPtr_}; |
| char *toPtr{curTo.Element<char>(curToAt)}; |
| char *fromPtr{curFrom.Element<char>(curFromAt)}; |
| RUNTIME_CHECK(terminator, curTo.ElementBytes() == curFrom.ElementBytes()); |
| // TODO: the memcpy can be optimized when both to and from are contiguous. |
| // Moreover, if we came here from an Component::Genre::Data component, |
| // all the per-element copies are redundant, because the parent |
| // has already been copied as a whole. |
| std::memcpy(toPtr, fromPtr, curTo.ElementBytes()); |
| --elements; |
| if (elements != 0) { |
| currentCopy.IncrementSubscripts(terminator); |
| } |
| |
| // Deep copy allocatable and automatic components if any. |
| if (const auto *addendum{curTo.Addendum()}) { |
| if (const auto *derived{addendum->derivedType()}; |
| derived && !derived->noDestructionNeeded()) { |
| RUNTIME_CHECK(terminator, |
| curFrom.Addendum() && derived == curFrom.Addendum()->derivedType()); |
| const Descriptor &componentDesc{derived->component()}; |
| const typeInfo::Component *component{ |
| componentDesc.OffsetElement<typeInfo::Component>()}; |
| std::size_t nComponents{componentDesc.Elements()}; |
| for (std::size_t j{0}; j < nComponents; ++j, ++component) { |
| if (component->genre() == typeInfo::Component::Genre::Allocatable || |
| component->genre() == typeInfo::Component::Genre::Automatic) { |
| Descriptor &toDesc{ |
| *reinterpret_cast<Descriptor *>(toPtr + component->offset())}; |
| if (toDesc.raw().base_addr != nullptr) { |
| toDesc.set_base_addr(nullptr); |
| RUNTIME_CHECK( |
| terminator, toDesc.Allocate(/*asyncId=*/-1) == CFI_SUCCESS); |
| const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>( |
| fromPtr + component->offset())}; |
| copyStack.emplace(toDesc, fromDesc); |
| } |
| } else if (component->genre() == typeInfo::Component::Genre::Data && |
| component->derivedType() && |
| !component->derivedType()->noDestructionNeeded()) { |
| SubscriptValue extents[maxRank]; |
| const typeInfo::Value *bounds{component->bounds()}; |
| std::size_t elements{1}; |
| for (int dim{0}; dim < component->rank(); ++dim) { |
| typeInfo::TypeParameterValue lb{ |
| bounds[2 * dim].GetValue(&curTo).value_or(0)}; |
| typeInfo::TypeParameterValue ub{ |
| bounds[2 * dim + 1].GetValue(&curTo).value_or(0)}; |
| extents[dim] = ub >= lb ? ub - lb + 1 : 0; |
| elements *= extents[dim]; |
| } |
| if (elements != 0) { |
| const typeInfo::DerivedType &compType{*component->derivedType()}; |
| // Place a pair of static descriptors onto the descriptors stack. |
| descriptorsStack.emplace(); |
| StaticDescriptorsPair &descs{descriptorsStack.top()}; |
| Descriptor &toCompDesc{descs.to.descriptor()}; |
| toCompDesc.Establish(compType, toPtr + component->offset(), |
| component->rank(), extents); |
| Descriptor &fromCompDesc{descs.from.descriptor()}; |
| fromCompDesc.Establish(compType, fromPtr + component->offset(), |
| component->rank(), extents); |
| copyStack.emplace(toCompDesc, fromCompDesc, |
| /*usesStaticDescriptors=*/true); |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| RT_OFFLOAD_API_GROUP_END |
| } // namespace Fortran::runtime |