blob: b9f5c1658293141bcdad8d879170dae4464a19dd [file] [log] [blame]
//===-- OpenMP/Mapping.h - OpenMP/OpenACC pointer mapping -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Declarations for managing host-to-device pointer mappings.
//
//===----------------------------------------------------------------------===//
#ifndef OMPTARGET_OPENMP_MAPPING_H
#define OMPTARGET_OPENMP_MAPPING_H
#include "ExclusiveAccess.h"
#include "Shared/EnvironmentVar.h"
#include "omptarget.h"
#include <cstdint>
#include <mutex>
#include <string>
#include "llvm/ADT/SmallSet.h"
struct DeviceTy;
class AsyncInfoTy;
using map_var_info_t = void *;
class MappingConfig {
MappingConfig() {
BoolEnvar ForceAtomic = BoolEnvar("LIBOMPTARGET_MAP_FORCE_ATOMIC", true);
UseEventsForAtomicTransfers = ForceAtomic;
}
public:
static const MappingConfig &get() {
static MappingConfig MP;
return MP;
};
/// Flag to indicate if we use events to ensure the atomicity of
/// map clauses or not. Can be modified with an environment variable.
bool UseEventsForAtomicTransfers = true;
};
/// Information about shadow pointers.
struct ShadowPtrInfoTy {
void **HstPtrAddr = nullptr;
void *HstPtrVal = nullptr;
void **TgtPtrAddr = nullptr;
void *TgtPtrVal = nullptr;
bool operator==(const ShadowPtrInfoTy &Other) const {
return HstPtrAddr == Other.HstPtrAddr;
}
};
inline bool operator<(const ShadowPtrInfoTy &lhs, const ShadowPtrInfoTy &rhs) {
return lhs.HstPtrAddr < rhs.HstPtrAddr;
}
/// Map between host data and target data.
struct HostDataToTargetTy {
const uintptr_t HstPtrBase; // host info.
const uintptr_t HstPtrBegin;
const uintptr_t HstPtrEnd; // non-inclusive.
const map_var_info_t HstPtrName; // Optional source name of mapped variable.
const uintptr_t TgtAllocBegin; // allocated target memory
const uintptr_t TgtPtrBegin; // mapped target memory = TgtAllocBegin + padding
private:
static const uint64_t INFRefCount = ~(uint64_t)0;
static std::string refCountToStr(uint64_t RefCount) {
return RefCount == INFRefCount ? "INF" : std::to_string(RefCount);
}
struct StatesTy {
StatesTy(uint64_t DRC, uint64_t HRC)
: DynRefCount(DRC), HoldRefCount(HRC) {}
/// The dynamic reference count is the standard reference count as of OpenMP
/// 4.5. The hold reference count is an OpenMP extension for the sake of
/// OpenACC support.
///
/// The 'ompx_hold' map type modifier is permitted only on "omp target" and
/// "omp target data", and "delete" is permitted only on "omp target exit
/// data" and associated runtime library routines. As a result, we really
/// need to implement "reset" functionality only for the dynamic reference
/// counter. Likewise, only the dynamic reference count can be infinite
/// because, for example, omp_target_associate_ptr and "omp declare target
/// link" operate only on it. Nevertheless, it's actually easier to follow
/// the code (and requires less assertions for special cases) when we just
/// implement these features generally across both reference counters here.
/// Thus, it's the users of this class that impose those restrictions.
///
uint64_t DynRefCount;
uint64_t HoldRefCount;
/// A map of shadow pointers associated with this entry, the keys are host
/// pointer addresses to identify stale entries.
llvm::SmallSet<ShadowPtrInfoTy, 2> ShadowPtrInfos;
/// Pointer to the event corresponding to the data update of this map.
/// Note: At present this event is created when the first data transfer from
/// host to device is issued, and only being used for H2D. It is not used
/// for data transfer in another direction (device to host). It is still
/// unclear whether we need it for D2H. If in the future we need similar
/// mechanism for D2H, and if the event cannot be shared between them, Event
/// should be written as <tt>void *Event[2]</tt>.
void *Event = nullptr;
/// Number of threads currently holding a reference to the entry at a
/// targetDataEnd. This is used to ensure that only the last thread that
/// references this entry will actually delete it.
int32_t DataEndThreadCount = 0;
};
// When HostDataToTargetTy is used by std::set, std::set::iterator is const
// use unique_ptr to make States mutable.
const std::unique_ptr<StatesTy> States;
public:
HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E,
uintptr_t TgtAllocBegin, uintptr_t TgtPtrBegin,
bool UseHoldRefCount, map_var_info_t Name = nullptr,
bool IsINF = false)
: HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name),
TgtAllocBegin(TgtAllocBegin), TgtPtrBegin(TgtPtrBegin),
States(std::make_unique<StatesTy>(UseHoldRefCount ? 0
: IsINF ? INFRefCount
: 1,
!UseHoldRefCount ? 0
: IsINF ? INFRefCount
: 1)) {}
/// Get the total reference count. This is smarter than just getDynRefCount()
/// + getHoldRefCount() because it handles the case where at least one is
/// infinity and the other is non-zero.
uint64_t getTotalRefCount() const {
if (States->DynRefCount == INFRefCount ||
States->HoldRefCount == INFRefCount)
return INFRefCount;
return States->DynRefCount + States->HoldRefCount;
}
/// Get the dynamic reference count.
uint64_t getDynRefCount() const { return States->DynRefCount; }
/// Get the hold reference count.
uint64_t getHoldRefCount() const { return States->HoldRefCount; }
/// Get the event bound to this data map.
void *getEvent() const { return States->Event; }
/// Add a new event, if necessary.
/// Returns OFFLOAD_FAIL if something went wrong, OFFLOAD_SUCCESS otherwise.
int addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const;
/// Functions that manages the number of threads referencing the entry in a
/// targetDataEnd.
void incDataEndThreadCount() { ++States->DataEndThreadCount; }
[[nodiscard]] int32_t decDataEndThreadCount() {
return --States->DataEndThreadCount;
}
[[nodiscard]] int32_t getDataEndThreadCount() const {
return States->DataEndThreadCount;
}
/// Set the event bound to this data map.
void setEvent(void *Event) const { States->Event = Event; }
/// Reset the specified reference count unless it's infinity. Reset to 1
/// (even if currently 0) so it can be followed by a decrement.
void resetRefCount(bool UseHoldRefCount) const {
uint64_t &ThisRefCount =
UseHoldRefCount ? States->HoldRefCount : States->DynRefCount;
if (ThisRefCount != INFRefCount)
ThisRefCount = 1;
}
/// Increment the specified reference count unless it's infinity.
void incRefCount(bool UseHoldRefCount) const {
uint64_t &ThisRefCount =
UseHoldRefCount ? States->HoldRefCount : States->DynRefCount;
if (ThisRefCount != INFRefCount) {
++ThisRefCount;
assert(ThisRefCount < INFRefCount && "refcount overflow");
}
}
/// Decrement the specified reference count unless it's infinity or zero, and
/// return the total reference count.
uint64_t decRefCount(bool UseHoldRefCount) const {
uint64_t &ThisRefCount =
UseHoldRefCount ? States->HoldRefCount : States->DynRefCount;
uint64_t OtherRefCount =
UseHoldRefCount ? States->DynRefCount : States->HoldRefCount;
(void)OtherRefCount;
if (ThisRefCount != INFRefCount) {
if (ThisRefCount > 0)
--ThisRefCount;
else
assert(OtherRefCount >= 0 && "total refcount underflow");
}
return getTotalRefCount();
}
/// Is the dynamic (and thus the total) reference count infinite?
bool isDynRefCountInf() const { return States->DynRefCount == INFRefCount; }
/// Convert the dynamic reference count to a debug string.
std::string dynRefCountToStr() const {
return refCountToStr(States->DynRefCount);
}
/// Convert the hold reference count to a debug string.
std::string holdRefCountToStr() const {
return refCountToStr(States->HoldRefCount);
}
/// Should one decrement of the specified reference count (after resetting it
/// if \c AfterReset) remove this mapping?
bool decShouldRemove(bool UseHoldRefCount, bool AfterReset = false) const {
uint64_t ThisRefCount =
UseHoldRefCount ? States->HoldRefCount : States->DynRefCount;
uint64_t OtherRefCount =
UseHoldRefCount ? States->DynRefCount : States->HoldRefCount;
if (OtherRefCount > 0)
return false;
if (AfterReset)
return ThisRefCount != INFRefCount;
return ThisRefCount == 1;
}
/// Add the shadow pointer info \p ShadowPtrInfo to this entry but only if the
/// the target ptr value was not already present in the existing set of shadow
/// pointers. Return true if something was added.
bool addShadowPointer(const ShadowPtrInfoTy &ShadowPtrInfo) const {
auto Pair = States->ShadowPtrInfos.insert(ShadowPtrInfo);
if (Pair.second)
return true;
// Check for a stale entry, if found, replace the old one.
if ((*Pair.first).TgtPtrVal == ShadowPtrInfo.TgtPtrVal)
return false;
States->ShadowPtrInfos.erase(ShadowPtrInfo);
return addShadowPointer(ShadowPtrInfo);
}
/// Apply \p CB to all shadow pointers of this entry. Returns OFFLOAD_FAIL if
/// \p CB returned OFFLOAD_FAIL for any of them, otherwise this returns
/// OFFLOAD_SUCCESS. The entry is locked for this operation.
template <typename CBTy> int foreachShadowPointerInfo(CBTy CB) const {
for (auto &It : States->ShadowPtrInfos)
if (CB(const_cast<ShadowPtrInfoTy &>(It)) == OFFLOAD_FAIL)
return OFFLOAD_FAIL;
return OFFLOAD_SUCCESS;
}
/// Lock this entry for exclusive access. Ensure to get exclusive access to
/// HDTTMap first!
void lock() const { Mtx.lock(); }
/// Unlock this entry to allow other threads inspecting it.
void unlock() const { Mtx.unlock(); }
private:
// Mutex that needs to be held before the entry is inspected or modified. The
// HDTTMap mutex needs to be held before trying to lock any HDTT Entry.
mutable std::mutex Mtx;
};
/// Wrapper around the HostDataToTargetTy to be used in the HDTT map. In
/// addition to the HDTT pointer we store the key value explicitly. This
/// allows the set to inspect (sort/search/...) this entry without an additional
/// load of HDTT. HDTT is a pointer to allow the modification of the set without
/// invalidating HDTT entries which can now be inspected at the same time.
struct HostDataToTargetMapKeyTy {
uintptr_t KeyValue;
HostDataToTargetMapKeyTy(void *Key) : KeyValue(uintptr_t(Key)) {}
HostDataToTargetMapKeyTy(uintptr_t Key) : KeyValue(Key) {}
HostDataToTargetMapKeyTy(HostDataToTargetTy *HDTT)
: KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {}
HostDataToTargetTy *HDTT;
};
inline bool operator<(const HostDataToTargetMapKeyTy &LHS,
const uintptr_t &RHS) {
return LHS.KeyValue < RHS;
}
inline bool operator<(const uintptr_t &LHS,
const HostDataToTargetMapKeyTy &RHS) {
return LHS < RHS.KeyValue;
}
inline bool operator<(const HostDataToTargetMapKeyTy &LHS,
const HostDataToTargetMapKeyTy &RHS) {
return LHS.KeyValue < RHS.KeyValue;
}
/// This struct will be returned by \p DeviceTy::getTargetPointer which provides
/// more data than just a target pointer. A TargetPointerResultTy that has a non
/// null Entry owns the entry. As long as the TargetPointerResultTy (TPR) exists
/// the entry is locked. To give up ownership without destroying the TPR use the
/// reset() function.
struct TargetPointerResultTy {
struct FlagTy {
/// If the map table entry is just created
unsigned IsNewEntry : 1;
/// If the pointer is actually a host pointer (when unified memory enabled)
unsigned IsHostPointer : 1;
/// If the pointer is present in the mapping table.
unsigned IsPresent : 1;
/// Flag indicating that this was the last user of the entry and the ref
/// count is now 0.
unsigned IsLast : 1;
/// If the pointer is contained.
unsigned IsContained : 1;
} Flags = {0, 0, 0, 0, 0};
TargetPointerResultTy(const TargetPointerResultTy &) = delete;
TargetPointerResultTy &operator=(const TargetPointerResultTy &TPR) = delete;
TargetPointerResultTy() {}
TargetPointerResultTy(FlagTy Flags, HostDataToTargetTy *Entry,
void *TargetPointer)
: Flags(Flags), TargetPointer(TargetPointer), Entry(Entry) {
if (Entry)
Entry->lock();
}
TargetPointerResultTy(TargetPointerResultTy &&TPR)
: Flags(TPR.Flags), TargetPointer(TPR.TargetPointer), Entry(TPR.Entry) {
TPR.Entry = nullptr;
}
TargetPointerResultTy &operator=(TargetPointerResultTy &&TPR) {
if (&TPR != this) {
std::swap(Flags, TPR.Flags);
std::swap(Entry, TPR.Entry);
std::swap(TargetPointer, TPR.TargetPointer);
}
return *this;
}
~TargetPointerResultTy() {
if (Entry)
Entry->unlock();
}
bool isPresent() const { return Flags.IsPresent; }
bool isHostPointer() const { return Flags.IsHostPointer; }
bool isContained() const { return Flags.IsContained; }
/// The corresponding target pointer
void *TargetPointer = nullptr;
HostDataToTargetTy *getEntry() const { return Entry; }
void setEntry(HostDataToTargetTy *HDTTT,
HostDataToTargetTy *OwnedTPR = nullptr) {
if (Entry)
Entry->unlock();
Entry = HDTTT;
if (Entry && Entry != OwnedTPR)
Entry->lock();
}
void reset() { *this = TargetPointerResultTy(); }
private:
/// The corresponding map table entry which is stable.
HostDataToTargetTy *Entry = nullptr;
};
struct LookupResult {
struct {
unsigned IsContained : 1;
unsigned ExtendsBefore : 1;
unsigned ExtendsAfter : 1;
} Flags;
LookupResult() : Flags({0, 0, 0}), TPR() {}
TargetPointerResultTy TPR;
};
// This structure stores information of a mapped memory region.
struct MapComponentInfoTy {
void *Base;
void *Begin;
int64_t Size;
int64_t Type;
void *Name;
MapComponentInfoTy() = default;
MapComponentInfoTy(void *Base, void *Begin, int64_t Size, int64_t Type,
void *Name)
: Base(Base), Begin(Begin), Size(Size), Type(Type), Name(Name) {}
};
// This structure stores all components of a user-defined mapper. The number of
// components are dynamically decided, so we utilize C++ STL vector
// implementation here.
struct MapperComponentsTy {
llvm::SmallVector<MapComponentInfoTy> Components;
int32_t size() { return Components.size(); }
};
// The mapper function pointer type. It follows the signature below:
// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
// void *base, void *begin,
// size_t size, int64_t type,
// void * name);
typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t,
void *);
// Function pointer type for targetData* functions (targetDataBegin,
// targetDataEnd and targetDataUpdate).
typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **,
void **, int64_t *, int64_t *,
map_var_info_t *, void **, AsyncInfoTy &,
bool);
void dumpTargetPointerMappings(const ident_t *Loc, DeviceTy &Device,
bool toStdOut = false);
int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
bool FromMapper = false);
int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
bool FromMapper = false);
int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
bool FromMapper = false);
struct MappingInfoTy {
MappingInfoTy(DeviceTy &Device) : Device(Device) {}
/// Host data to device map type with a wrapper key indirection that allows
/// concurrent modification of the entries without invalidating the underlying
/// entries.
using HostDataToTargetListTy =
std::set<HostDataToTargetMapKeyTy, std::less<>>;
/// The HDTTMap is a protected object that can only be accessed by one thread
/// at a time.
ProtectedObj<HostDataToTargetListTy> HostDataToTargetMap;
/// The type used to access the HDTT map.
using HDTTMapAccessorTy = decltype(HostDataToTargetMap)::AccessorTy;
/// Lookup the mapping of \p HstPtrBegin in \p HDTTMap. The accessor ensures
/// exclusive access to the HDTT map.
LookupResult lookupMapping(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
int64_t Size,
HostDataToTargetTy *OwnedTPR = nullptr);
/// Get the target pointer based on host pointer begin and base. If the
/// mapping already exists, the target pointer will be returned directly. In
/// addition, if required, the memory region pointed by \p HstPtrBegin of size
/// \p Size will also be transferred to the device. If the mapping doesn't
/// exist, and if unified shared memory is not enabled, a new mapping will be
/// created and the data will also be transferred accordingly. nullptr will be
/// returned because of any of following reasons:
/// - Data allocation failed;
/// - The user tried to do an illegal mapping;
/// - Data transfer issue fails.
TargetPointerResultTy getTargetPointer(
HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName,
bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr,
bool ReleaseHDTTMap = true);
/// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor
/// ensures exclusive access to the HDTT map.
void *getTgtPtrBegin(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
int64_t Size);
/// Return the target pointer begin (where the data will be moved).
/// Used by targetDataBegin, targetDataEnd, targetDataUpdate and target.
/// - \p UpdateRefCount and \p UseHoldRefCount controls which and if the entry
/// reference counters will be decremented.
/// - \p MustContain enforces that the query must not extend beyond an already
/// mapped entry to be valid.
/// - \p ForceDelete deletes the entry regardless of its reference counting
/// (unless it is infinite).
/// - \p FromDataEnd tracks the number of threads referencing the entry at
/// targetDataEnd for delayed deletion purpose.
[[nodiscard]] TargetPointerResultTy
getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool UpdateRefCount,
bool UseHoldRefCount, bool MustContain = false,
bool ForceDelete = false, bool FromDataEnd = false);
/// Remove the \p Entry from the data map. Expect the entry's total reference
/// count to be zero and the caller thread to be the last one using it. \p
/// HDTTMap ensure the caller holds exclusive access and can modify the map.
/// Return \c OFFLOAD_SUCCESS if the map entry existed, and return \c
/// OFFLOAD_FAIL if not. It is the caller's responsibility to skip calling
/// this function if the map entry is not expected to exist because \p
/// HstPtrBegin uses shared memory.
[[nodiscard]] int eraseMapEntry(HDTTMapAccessorTy &HDTTMap,
HostDataToTargetTy *Entry, int64_t Size);
/// Deallocate the \p Entry from the device memory and delete it. Return \c
/// OFFLOAD_SUCCESS if the deallocation operations executed successfully, and
/// return \c OFFLOAD_FAIL otherwise.
[[nodiscard]] int deallocTgtPtrAndEntry(HostDataToTargetTy *Entry,
int64_t Size);
int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
int disassociatePtr(void *HstPtrBegin);
/// Print information about the transfer from \p HstPtr to \p TgtPtr (or vice
/// versa if \p H2D is false). If there is an existing mapping, or if \p Entry
/// is set, the associated metadata will be printed as well.
void printCopyInfo(void *TgtPtr, void *HstPtr, int64_t Size, bool H2D,
HostDataToTargetTy *Entry,
MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr);
private:
DeviceTy &Device;
};
#endif // OMPTARGET_OPENMP_MAPPING_H