blob: 1272d9181a57a867e6adef2055a35dc40757c3e1 [file] [log] [blame] [edit]
//===--- Level Zero Target RTL Implementation -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Level Zero RTL Options support.
//
//===----------------------------------------------------------------------===//
#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0OPTIONS_H
#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0OPTIONS_H
#include <level_zero/ze_api.h>
#include "Shared/EnvironmentVar.h"
#include "L0Defs.h"
namespace llvm::omp::target::plugin {
/// Command submission mode.
enum class CommandModeTy { Sync = 0, Async, AsyncOrdered };
/// Specialization constants used for a module compilation.
class SpecConstantsTy {
std::vector<uint32_t> ConstantIds;
std::vector<const void *> ConstantValues;
BumpPtrAllocator &Allocator;
public:
SpecConstantsTy(BumpPtrAllocator &Allocator) : Allocator(Allocator) {}
SpecConstantsTy(const SpecConstantsTy &) = delete;
SpecConstantsTy(SpecConstantsTy &&) = delete;
SpecConstantsTy &operator=(const SpecConstantsTy &) = delete;
SpecConstantsTy &operator=(const SpecConstantsTy &&) = delete;
SpecConstantsTy(const SpecConstantsTy &&Other)
: ConstantIds(std::move(Other.ConstantIds)),
ConstantValues(std::move(Other.ConstantValues)),
Allocator(Other.Allocator) {}
~SpecConstantsTy() = default;
template <typename T> void addConstant(uint32_t Id, T Val) {
T *ValuePtr =
reinterpret_cast<T *>(Allocator.Allocate(sizeof(T), alignof(T)));
*ValuePtr = Val;
ConstantIds.push_back(Id);
ConstantValues.push_back(reinterpret_cast<void *>(ValuePtr));
}
ze_module_constants_t getModuleConstants() const {
ze_module_constants_t Tmp{static_cast<uint32_t>(ConstantValues.size()),
ConstantIds.data(),
// Unfortunately we have to const_cast it.
// L0 data type should probably be fixed.
const_cast<const void **>(ConstantValues.data())};
return Tmp;
}
};
/// L0 Plugin flags.
struct L0OptionFlagsTy {
uint64_t UseMemoryPool : 1;
uint64_t Reserved : 63;
L0OptionFlagsTy() : UseMemoryPool(1), Reserved(0) {}
};
struct L0OptionsTy {
/// Binary flags.
L0OptionFlagsTy Flags;
/// Staging buffer size.
size_t StagingBufferSize = L0StagingBufferSize;
/// Staging buffer count.
size_t StagingBufferCount = L0StagingBufferCount;
struct MemPoolConfigTy {
bool Use;
int32_t AllocMax;
int32_t Capacity;
int32_t PoolSize;
};
/// Memory pool default parameters for each allocation kind:
/// {UseByDefault, AllocMax(MB), Capacity, PoolSize(MB)}
std::array<MemPoolConfigTy, 3> MemPoolConfig{
MemPoolConfigTy{true, 1, 4, 256}, // TARGET_ALLOC_DEVICE
MemPoolConfigTy{true, 1, 4, 256}, // TARGET_ALLOC_HOST
MemPoolConfigTy{true, 8, 4, 256}}; // TARGET_ALLOC_SHARED
/// Parameters for memory pools dedicated to reduction scratch space.
std::array<int32_t, 3> ReductionPoolInfo{256, 8, 8192};
/// Oversubscription rate for normal kernels.
uint32_t SubscriptionRate = 4;
/// Loop kernels with known ND-range may be known to have
/// few iterations and they may not exploit the offload device
/// to the fullest extent.
/// Let's assume a device has N total HW threads available,
/// and the kernel requires M hardware threads with LWS set to L.
/// If (M < N * ThinThreadsThreshold), then we will try
/// to iteratively divide L by 2 to increase the number of HW
/// threads used for executing the kernel. Effectively, we will
/// end up with L less than the kernel's SIMD width, so the HW
/// threads will not use all their SIMD lanes. This (presumably) should
/// allow more parallelism, because the stalls in the SIMD lanes
/// will be distributed across more HW threads, and the probability
/// of having a stall (or a sequence of stalls) on a critical path
/// in the kernel should decrease.
/// Anyway, this is just a heuristics that seems to work well for some
/// kernels (which poorly expose parallelism in the first place).
double ThinThreadsThreshold = 0.1;
// Compilation options for IGC.
// OpenCL 2.0 builtins (like atomic_load_explicit and etc.) are used by
// runtime, so we have to explicitly specify the "-cl-std=CL2.0" compilation
// option. With it, the SPIR-V will be converted to LLVM IR with OpenCL 2.0
// builtins. Otherwise, SPIR-V will be converted to LLVM IR with OpenCL 1.2
// builtins.
static constexpr std::string_view CompilationOptions = "-cl-std=CL2.0 ";
static constexpr std::string_view InternalCompilationOptions =
"-cl-take-global-address";
std::string UserCompilationOptions;
/// Spec constants used for all modules.
SpecConstantsTy CommonSpecConstants;
/// Command execution mode.
/// Whether the runtime uses asynchronous mode or not depends on the type of
/// devices and whether immediate command list is fully enabled.
CommandModeTy CommandMode = CommandModeTy::Async;
/// Controls if we need to reduce available HW threads. We need this
/// adjustment on XeHPG when Level Zero debug is enabled
/// (ZET_ENABLE_PROGRAM_DEBUGGING=1).
bool ZeDebugEnabled = false;
bool Init = false; // Have the options already been processed.
// Allocator for long-lived allocations (e.g. spec constants).
BumpPtrAllocator Allocator;
L0OptionsTy() : CommonSpecConstants(Allocator) {}
/// Read environment variables.
void processEnvironmentVars();
void init() {
if (!Init) {
processEnvironmentVars();
Init = true;
}
}
bool match(const StringEnvar &Var, const llvm::StringRef Matched) {
return Matched.equals_insensitive(Var.get());
}
}; // L0OptionsTy
} // namespace llvm::omp::target::plugin
#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0OPTIONS_H