blob: 1a3490394458ff12512b6c4a241f0e757dbb189b [file] [log] [blame]
//===-------- State.h - OpenMP State & ICV interface ------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
#ifndef OMPTARGET_STATE_H
#define OMPTARGET_STATE_H
#include "Shared/Environment.h"
#include "Debug.h"
#include "Mapping.h"
#include "Types.h"
#include "Utils.h"
// Forward declaration.
struct KernelEnvironmentTy;
#pragma omp begin declare target device_type(nohost)
namespace ompx {
namespace memory {
/// Alloca \p Size bytes in shared memory, if possible, for \p Reason.
///
/// Note: See the restrictions on __kmpc_alloc_shared for proper usage.
void *allocShared(uint64_t Size, const char *Reason);
/// Free \p Ptr, alloated via allocShared, for \p Reason.
///
/// Note: See the restrictions on __kmpc_free_shared for proper usage.
void freeShared(void *Ptr, uint64_t Bytes, const char *Reason);
/// Alloca \p Size bytes in global memory, if possible, for \p Reason.
void *allocGlobal(uint64_t Size, const char *Reason);
/// Return a pointer to the dynamic shared memory buffer.
void *getDynamicBuffer();
/// Free \p Ptr, alloated via allocGlobal, for \p Reason.
void freeGlobal(void *Ptr, const char *Reason);
} // namespace memory
namespace state {
inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE;
struct ICVStateTy {
uint32_t NThreadsVar;
uint32_t LevelVar;
uint32_t ActiveLevelVar;
uint32_t Padding0Val;
uint32_t MaxActiveLevelsVar;
uint32_t RunSchedVar;
uint32_t RunSchedChunkVar;
bool operator==(const ICVStateTy &Other) const;
void assertEqual(const ICVStateTy &Other) const;
};
struct TeamStateTy {
void init(bool IsSPMD);
bool operator==(const TeamStateTy &) const;
void assertEqual(TeamStateTy &Other) const;
/// ICVs
///
/// Preallocated storage for ICV values that are used if the threads have not
/// set a custom default. The latter is supported but unlikely and slow(er).
///
///{
ICVStateTy ICVState;
///}
uint32_t ParallelTeamSize;
uint32_t HasThreadState;
ParallelRegionFnTy ParallelRegionFnVar;
};
extern TeamStateTy TeamState;
#pragma omp allocate(TeamState) allocator(omp_pteam_mem_alloc)
struct ThreadStateTy {
/// ICVs have preallocated storage in the TeamStateTy which is used if a
/// thread has not set a custom value. The latter is supported but unlikely.
/// When it happens we will allocate dynamic memory to hold the values of all
/// ICVs. Thus, the first time an ICV is set by a thread we will allocate an
/// ICV struct to hold them all. This is slower than alternatives but allows
/// users to pay only for what they use.
///
state::ICVStateTy ICVState;
ThreadStateTy *PreviousThreadState;
void init() {
ICVState = TeamState.ICVState;
PreviousThreadState = nullptr;
}
void init(ThreadStateTy *PreviousTS) {
ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState;
PreviousThreadState = PreviousTS;
}
};
extern ThreadStateTy **ThreadStates;
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
/// Initialize the state machinery. Must be called by all threads.
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
/// Return the kernel and kernel launch environment associated with the current
/// kernel. The former is static and contains compile time information that
/// holds for all instances of the kernel. The latter is dynamic and provides
/// per-launch information.
KernelEnvironmentTy &getKernelEnvironment();
KernelLaunchEnvironmentTy &getKernelLaunchEnvironment();
/// TODO
enum ValueKind {
VK_NThreads,
VK_Level,
VK_ActiveLevel,
VK_MaxActiveLevels,
VK_RunSched,
// ---
VK_RunSchedChunk,
VK_ParallelRegionFn,
VK_ParallelTeamSize,
VK_HasThreadState,
};
/// TODO
void enterDataEnvironment(IdentTy *Ident);
/// TODO
void exitDataEnvironment();
/// TODO
struct DateEnvironmentRAII {
DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); }
~DateEnvironmentRAII() { exitDataEnvironment(); }
};
/// TODO
void resetStateForThread(uint32_t TId);
inline uint32_t &lookupForModify32Impl(uint32_t state::ICVStateTy::*Var,
IdentTy *Ident, bool ForceTeamState) {
if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() ||
!TeamState.HasThreadState))
return TeamState.ICVState.*Var;
uint32_t TId = mapping::getThreadIdInBlock();
if (OMP_UNLIKELY(!ThreadStates[TId])) {
ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>(memory::allocGlobal(
sizeof(ThreadStateTy), "ICV modification outside data environment"));
ASSERT(ThreadStates[TId] != nullptr, "Nullptr returned by malloc!");
TeamState.HasThreadState = true;
ThreadStates[TId]->init();
}
return ThreadStates[TId]->ICVState.*Var;
}
inline uint32_t &lookupImpl(uint32_t state::ICVStateTy::*Var,
bool ForceTeamState) {
auto TId = mapping::getThreadIdInBlock();
if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() &&
TeamState.HasThreadState && ThreadStates[TId]))
return ThreadStates[TId]->ICVState.*Var;
return TeamState.ICVState.*Var;
}
[[gnu::always_inline, gnu::flatten]] inline uint32_t &
lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
switch (Kind) {
case state::VK_NThreads:
if (IsReadonly)
return lookupImpl(&ICVStateTy::NThreadsVar, ForceTeamState);
return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident,
ForceTeamState);
case state::VK_Level:
if (IsReadonly)
return lookupImpl(&ICVStateTy::LevelVar, ForceTeamState);
return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident, ForceTeamState);
case state::VK_ActiveLevel:
if (IsReadonly)
return lookupImpl(&ICVStateTy::ActiveLevelVar, ForceTeamState);
return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident,
ForceTeamState);
case state::VK_MaxActiveLevels:
if (IsReadonly)
return lookupImpl(&ICVStateTy::MaxActiveLevelsVar, ForceTeamState);
return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident,
ForceTeamState);
case state::VK_RunSched:
if (IsReadonly)
return lookupImpl(&ICVStateTy::RunSchedVar, ForceTeamState);
return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident,
ForceTeamState);
case state::VK_RunSchedChunk:
if (IsReadonly)
return lookupImpl(&ICVStateTy::RunSchedChunkVar, ForceTeamState);
return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident,
ForceTeamState);
case state::VK_ParallelTeamSize:
return TeamState.ParallelTeamSize;
case state::VK_HasThreadState:
return TeamState.HasThreadState;
default:
break;
}
__builtin_unreachable();
}
[[gnu::always_inline, gnu::flatten]] inline void *&
lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
switch (Kind) {
case state::VK_ParallelRegionFn:
return TeamState.ParallelRegionFnVar;
default:
break;
}
__builtin_unreachable();
}
/// A class without actual state used to provide a nice interface to lookup and
/// update ICV values we can declare in global scope.
template <typename Ty, ValueKind Kind> struct Value {
[[gnu::flatten, gnu::always_inline]] operator Ty() {
return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr,
/*ForceTeamState=*/false);
}
[[gnu::flatten, gnu::always_inline]] Value &operator=(const Ty &Other) {
set(Other, /*IdentTy=*/nullptr);
return *this;
}
[[gnu::flatten, gnu::always_inline]] Value &operator++() {
inc(1, /*IdentTy=*/nullptr);
return *this;
}
[[gnu::flatten, gnu::always_inline]] Value &operator--() {
inc(-1, /*IdentTy=*/nullptr);
return *this;
}
[[gnu::flatten, gnu::always_inline]] void
assert_eq(const Ty &V, IdentTy *Ident = nullptr,
bool ForceTeamState = false) {
ASSERT(lookup(/*IsReadonly=*/true, Ident, ForceTeamState) == V, nullptr);
}
private:
[[gnu::flatten, gnu::always_inline]] Ty &
lookup(bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
Ty &t = lookup32(Kind, IsReadonly, Ident, ForceTeamState);
return t;
}
[[gnu::flatten, gnu::always_inline]] Ty &inc(int UpdateVal, IdentTy *Ident) {
return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) +=
UpdateVal);
}
[[gnu::flatten, gnu::always_inline]] Ty &set(Ty UpdateVal, IdentTy *Ident) {
return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) =
UpdateVal);
}
template <typename VTy, typename Ty2> friend struct ValueRAII;
};
/// A mookup class without actual state used to provide
/// a nice interface to lookup and update ICV values
/// we can declare in global scope.
template <typename Ty, ValueKind Kind> struct PtrValue {
[[gnu::flatten, gnu::always_inline]] operator Ty() {
return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr,
/*ForceTeamState=*/false);
}
[[gnu::flatten, gnu::always_inline]] PtrValue &operator=(const Ty Other) {
set(Other);
return *this;
}
private:
Ty &lookup(bool IsReadonly, IdentTy *, bool ForceTeamState) {
return lookupPtr(Kind, IsReadonly, ForceTeamState);
}
Ty &set(Ty UpdateVal) {
return (lookup(/*IsReadonly=*/false, /*IdentTy=*/nullptr,
/*ForceTeamState=*/false) = UpdateVal);
}
template <typename VTy, typename Ty2> friend struct ValueRAII;
};
template <typename VTy, typename Ty> struct ValueRAII {
ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident,
bool ForceTeamState = false)
: Ptr(Active ? &V.lookup(/*IsReadonly=*/false, Ident, ForceTeamState)
: (Ty *)utils::UndefPtr),
Val(OldValue), Active(Active) {
if (!Active)
return;
ASSERT(*Ptr == OldValue, "ValueRAII initialization with wrong old value!");
*Ptr = NewValue;
}
~ValueRAII() {
if (Active)
*Ptr = Val;
}
private:
Ty *Ptr;
Ty Val;
bool Active;
};
/// TODO
inline state::Value<uint32_t, state::VK_RunSchedChunk> RunSchedChunk;
/// TODO
inline state::Value<uint32_t, state::VK_ParallelTeamSize> ParallelTeamSize;
/// TODO
inline state::Value<uint32_t, state::VK_HasThreadState> HasThreadState;
/// TODO
inline state::PtrValue<ParallelRegionFnTy, state::VK_ParallelRegionFn>
ParallelRegionFn;
void runAndCheckState(void(Func(void)));
void assumeInitialState(bool IsSPMD);
/// Return the value of the ParallelTeamSize ICV.
int getEffectivePTeamSize();
} // namespace state
namespace icv {
/// TODO
inline state::Value<uint32_t, state::VK_NThreads> NThreads;
/// TODO
inline state::Value<uint32_t, state::VK_Level> Level;
/// The `active-level` describes which of the parallel level counted with the
/// `level-var` is active. There can only be one.
///
/// active-level-var is 1, if ActiveLevelVar is not 0, otherweise it is 0.
inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel;
/// TODO
inline state::Value<uint32_t, state::VK_MaxActiveLevels> MaxActiveLevels;
/// TODO
inline state::Value<uint32_t, state::VK_RunSched> RunSched;
} // namespace icv
} // namespace ompx
#pragma omp end declare target
#endif