| //===--- Level Zero Target RTL Implementation -----------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Memory related support for SPIR-V/Xe machine. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0MEMORY_H |
| #define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0MEMORY_H |
| |
| #include <cassert> |
| #include <level_zero/ze_api.h> |
| #include <list> |
| #include <map> |
| #include <memory> |
| #include <mutex> |
| |
| #include "L0Defs.h" |
| #include "L0Trace.h" |
| |
| namespace llvm::omp::target::plugin { |
| |
| class L0DeviceTy; |
| |
| // Forward declarations. |
| struct L0OptionsTy; |
| class L0DeviceTy; |
| class L0ContextTy; |
| |
| constexpr static int32_t MaxMemKind = TARGET_ALLOC_LAST + 1; |
| |
| struct DynamicMemHeapTy { |
| /// Base address memory is allocated from. |
| uintptr_t AllocBase = 0; |
| /// Minimal size served by the current heap. |
| size_t BlockSize = 0; |
| /// Max size served by the current heap. |
| size_t MaxSize = 0; |
| /// Available memory blocks. |
| uint32_t NumBlocks = 0; |
| /// Number of block descriptors. |
| uint32_t NumBlockDesc = 0; |
| /// Number of block counters. |
| uint32_t NumBlockCounter = 0; |
| /// List of memory block descriptors. |
| uint64_t *BlockDesc = nullptr; |
| /// List of memory block counters. |
| uint32_t *BlockCounter = nullptr; |
| }; |
| |
| struct DynamicMemPoolTy { |
| /// Location of device memory blocks. |
| void *PoolBase = nullptr; |
| /// Heap size common to all heaps. |
| size_t HeapSize = 0; |
| /// Number of heaps available. |
| uint32_t NumHeaps = 0; |
| /// Heap descriptors (using fixed-size array to simplify memory allocation). |
| DynamicMemHeapTy HeapDesc[8]; |
| }; |
| |
| /// Memory allocation information used in memory allocation/deallocation. |
| struct MemAllocInfoTy { |
| /// Base address allocated from compute runtime. |
| void *Base = nullptr; |
| /// Allocation size known to users/libomptarget. |
| size_t ReqSize = 0; |
| /// Allocation size known to the plugin (can be larger than ReqSize). |
| size_t AllocSize = 0; |
| /// TARGET_ALLOC kind. |
| int32_t Kind = TARGET_ALLOC_DEFAULT; |
| /// Is the allocation from a pool? |
| bool InPool = false; |
| /// Is an implicit argument? |
| bool ImplicitArg = false; |
| |
| MemAllocInfoTy() = default; |
| |
| MemAllocInfoTy(void *Base, size_t ReqSize, size_t AllocSize, int32_t Kind, |
| bool InPool, bool ImplicitArg) |
| : Base(Base), ReqSize(ReqSize), AllocSize(AllocSize), Kind(Kind), |
| InPool(InPool), ImplicitArg(ImplicitArg) {} |
| }; |
| |
| /// Responsible for all activities involving memory allocation/deallocation. |
| /// It contains memory pool management, memory allocation bookkeeping. |
| class MemAllocatorTy { |
| |
| /// Simple memory allocation statistics. Maintains numbers for pool allocation |
| /// and GPU RT allocation. |
| struct MemStatTy { |
| size_t Requested[2] = {0, 0}; // Requested bytes. |
| size_t Allocated[2] = {0, 0}; // Allocated bytes. |
| size_t Freed[2] = {0, 0}; // Freed bytes. |
| size_t InUse[2] = {0, 0}; // Current memory in use. |
| size_t PeakUse[2] = {0, 0}; // Peak bytes used. |
| size_t NumAllocs[2] = {0, 0}; // Number of allocations. |
| }; |
| |
| /// Memory pool which enables reuse of already allocated blocks: |
| /// -- Pool maintains a list of buckets each of which can allocate fixed-size |
| /// memory. |
| /// -- Each bucket maintains a list of memory blocks allocated by GPU RT. |
| /// -- Each memory block can allocate multiple fixed-size memory requested by |
| /// offload RT or user. |
| /// -- Memory allocation falls back to GPU RT allocation when the pool size |
| /// (total memory used by pool) reaches a threshold. |
| class MemPoolTy { |
| |
| /// Memory block maintained in each bucket. |
| struct BlockTy { |
| /// Base address of this block. |
| uintptr_t Base = 0; |
| /// Size of the block. |
| size_t Size = 0; |
| /// Supported allocation size by this block. |
| size_t ChunkSize = 0; |
| /// Total number of slots. |
| uint32_t NumSlots = 0; |
| /// Maximum slot value. |
| static constexpr uint32_t MaxSlots = |
| std::numeric_limits<decltype(NumSlots)>::max(); |
| /// Number of slots in use. |
| uint32_t NumUsedSlots = 0; |
| /// Cached available slot returned by the last dealloc() call. |
| uint32_t FreeSlot = MaxSlots; |
| /// Marker for the currently used slots. |
| std::vector<bool> UsedSlots; |
| |
| BlockTy(void *_Base, size_t _Size, size_t _ChunkSize) { |
| Base = reinterpret_cast<uintptr_t>(_Base); |
| Size = _Size; |
| ChunkSize = _ChunkSize; |
| NumSlots = Size / ChunkSize; |
| NumUsedSlots = 0; |
| UsedSlots.resize(NumSlots, /*InitValue=*/false); |
| } |
| |
| /// Check if the current block is fully used. |
| bool isFull() const { return NumUsedSlots == NumSlots; } |
| |
| /// Check if the given address belongs to the current block. |
| bool contains(void *Mem) const { |
| auto M = reinterpret_cast<uintptr_t>(Mem); |
| return M >= Base && M < Base + Size; |
| } |
| |
| /// Allocate a single chunk from the block. |
| void *alloc(); |
| |
| /// Deallocate the given memory. |
| void dealloc(void *Mem); |
| }; // BlockTy |
| |
| /// Allocation kind for the current pool. |
| int32_t AllocKind = TARGET_ALLOC_DEFAULT; |
| /// Access to the allocator. |
| MemAllocatorTy *Allocator = nullptr; |
| /// Minimum supported memory allocation size from pool. |
| size_t AllocMin = 1 << 6; // 64B |
| /// Maximum supported memory allocation size from pool. |
| size_t AllocMax = 0; |
| /// Allocation size when the pool needs to allocate a block. |
| size_t AllocUnit = 1 << 16; // 64KB |
| /// Capacity of each block in the buckets which decides number of |
| /// allocatable chunks from the block. Each block in the bucket can serve |
| /// at least BlockCapacity chunks. |
| /// If ChunkSize * BlockCapacity <= AllocUnit |
| /// BlockSize = AllocUnit |
| /// Otherwise, |
| /// BlockSize = ChunkSize * BlockCapacity |
| /// This simply means how much memory is over-allocated. |
| uint32_t BlockCapacity = 0; |
| /// Total memory allocated from GPU RT for this pool. |
| size_t PoolSize = 0; |
| /// Maximum allowed pool size. Allocation falls back to GPU RT allocation if |
| /// when PoolSize reaches PoolSizeMax. |
| size_t PoolSizeMax = 0; |
| /// Small allocation size allowed in the pool even if pool size is over the |
| /// pool size limit. |
| size_t SmallAllocMax = 1024; |
| /// Small allocation pool size. |
| size_t SmallPoolSize = 0; |
| /// Small allocation pool size max (4MB). |
| size_t SmallPoolSizeMax = (4 << 20); |
| /// List of buckets. |
| std::vector<std::vector<BlockTy *>> Buckets; |
| /// List of bucket parameters. |
| std::vector<std::pair<size_t, size_t>> BucketParams; |
| /// Map from allocated pointer to corresponding block. |
| llvm::DenseMap<void *, BlockTy *> PtrToBlock; |
| /// Simple stats counting miss/hit in each bucket. |
| std::vector<std::pair<uint64_t, uint64_t>> BucketStats; |
| /// Need to zero-initialize after L0 allocation. |
| bool ZeroInit = false; |
| |
| /// Get bucket ID from the specified allocation size. |
| uint32_t getBucketId(size_t Size) { |
| uint32_t Count = 0; |
| for (size_t SZ = AllocMin; SZ < Size; Count++) |
| SZ <<= 1; |
| return Count; |
| } |
| |
| public: |
| MemPoolTy() = default; |
| MemPoolTy(const MemPoolTy &) = delete; |
| MemPoolTy(MemPoolTy &&) = delete; |
| MemPoolTy &operator=(const MemPoolTy &) = delete; |
| MemPoolTy &operator=(const MemPoolTy &&) = delete; |
| ~MemPoolTy() = default; |
| |
| void printUsage(); |
| |
| /// Initialize pool with allocation kind, allocator, and user options. |
| Error init(int32_t Kind, MemAllocatorTy *Allocator, |
| const L0OptionsTy &Option); |
| // Initialize pool used for reduction pool. |
| Error init(MemAllocatorTy *Allocator, const L0OptionsTy &Option); |
| // Initialize pool used for small memory pool with fixed parameters. |
| Error init(MemAllocatorTy *Allocator); |
| |
| /// Release resources used in the pool. |
| Error deinit(); |
| |
| /// Allocate the requested size of memory from this pool. |
| /// AllocSize is the chunk size internally used for the returned memory. |
| Expected<void *> alloc(size_t Size, size_t &AllocSize); |
| /// Deallocate the specified memory and returns block size deallocated. |
| size_t dealloc(void *Ptr); |
| }; // MemPoolTy |
| |
| /// Allocation information maintained in the plugin. |
| class MemAllocInfoMapTy { |
| /// Map from allocated pointer to allocation information. |
| std::map<void *, MemAllocInfoTy> Map; |
| /// Map from target alloc kind to number of implicit arguments. |
| std::array<uint32_t, MaxMemKind> NumImplicitArgs; |
| |
| public: |
| /// Add allocation information to the map. |
| void add(void *Ptr, void *Base, size_t ReqSize, size_t AllocSize, |
| int32_t Kind, bool InPool = false, bool ImplicitArg = false); |
| |
| /// Remove allocation information for the given memory location. |
| bool remove(void *Ptr, MemAllocInfoTy *Removed = nullptr); |
| |
| /// Finds allocation information for the given memory location. |
| const MemAllocInfoTy *find(void *Ptr) const { |
| auto AllocInfo = Map.find(Ptr); |
| if (AllocInfo == Map.end()) |
| return nullptr; |
| else |
| return &AllocInfo->second; |
| } |
| |
| /// Check if the map contains the given pointer and offset. |
| bool contains(const void *Ptr, size_t Size) const { |
| if (Map.size() == 0) |
| return false; |
| auto I = Map.upper_bound(const_cast<void *>(Ptr)); |
| if (I == Map.begin()) |
| return false; |
| --I; |
| |
| uintptr_t PtrAsInt = reinterpret_cast<uintptr_t>(Ptr); |
| uintptr_t MapBase = reinterpret_cast<uintptr_t>(I->first); |
| uintptr_t MapSize = static_cast<uintptr_t>(I->second.ReqSize); |
| |
| bool Ret = MapBase <= PtrAsInt && PtrAsInt + Size <= MapBase + MapSize; |
| return Ret; |
| } |
| |
| /// Returns the number of implicit arguments for the specified allocation |
| /// kind. |
| size_t getNumImplicitArgs(int32_t Kind) { |
| assert(Kind >= 0 && Kind < MaxMemKind && |
| "Invalid target allocation kind"); |
| return NumImplicitArgs[Kind]; |
| } |
| }; // MemAllocInfoMapTy |
| |
| /// L0 context to use. |
| const L0ContextTy *L0Context = nullptr; |
| /// L0 device to use. |
| L0DeviceTy *Device = nullptr; |
| /// Whether the device supports large memory allocation. |
| bool SupportsLargeMem = false; |
| /// Cached max alloc size supported by device. |
| uint64_t MaxAllocSize; |
| /// Map from allocation kind to memory statistics. |
| std::array<MemStatTy, MaxMemKind> Stats; |
| /// Map from allocation kind to memory pool. |
| std::array<std::unique_ptr<MemPoolTy>, MaxMemKind> Pools; |
| |
| /// Memory pool dedicated to reduction scratch space. |
| std::unique_ptr<MemPoolTy> ReductionPool; |
| /// Memory pool dedicated to reduction counters. |
| std::unique_ptr<MemPoolTy> CounterPool; |
| /// Allocation information map. |
| MemAllocInfoMapTy AllocInfo; |
| /// RTL-owned memory that needs to be freed automatically. |
| std::vector<void *> MemOwned; |
| /// Lock protection. |
| std::mutex Mtx; |
| /// Allocator only supports host memory. |
| bool IsHostMem = false; |
| // Internal deallocation function to be called when already |
| // hondling the Mtx lock. |
| Error deallocLocked(void *Ptr); |
| |
| /// Allocate memory from L0 GPU RT. |
| Expected<void *> allocFromL0(size_t Size, size_t Align, int32_t Kind); |
| /// Deallocate memory from L0 GPU RT. |
| Error deallocFromL0(void *Ptr); |
| |
| /// We use over-allocation workaround to support target pointer with |
| /// offset, and positive "ActiveSize" is specified in such cases to |
| /// correct debug logging. |
| Expected<void *> allocFromL0AndLog(size_t Size, size_t Align, int32_t Kind, |
| size_t ActiveSize = 0) { |
| auto MemOrErr = allocFromL0(Size, Align, Kind); |
| if (!MemOrErr) |
| return MemOrErr; |
| size_t LoggedSize = ActiveSize ? ActiveSize : Size; |
| log(LoggedSize, Size, Kind); |
| return MemOrErr; |
| } |
| |
| /// Log memory allocation/deallocation. |
| void log(size_t ReqSize, size_t Size, int32_t Kind, bool Pool = false) { |
| if (Kind < 0 || Kind >= MaxMemKind) |
| return; // Stat is disabled. |
| |
| auto &ST = Stats[Kind]; |
| int32_t I = Pool ? 1 : 0; |
| if (ReqSize > 0) { |
| ST.Requested[I] += ReqSize; |
| ST.Allocated[I] += Size; |
| ST.InUse[I] += Size; |
| ST.NumAllocs[I]++; |
| } else { |
| ST.Freed[I] += Size; |
| ST.InUse[I] -= Size; |
| } |
| ST.PeakUse[I] = (std::max)(ST.PeakUse[I], ST.InUse[I]); |
| } |
| |
| /// Perform copy operation. |
| Error enqueueMemCopy(void *Dst, const void *Src, size_t Size); |
| /// Perform memory fill operation. |
| Error enqueueMemSet(void *Dst, int8_t Value, size_t Size); |
| |
| /// Allocate memory with the specified information from a memory pool. |
| Expected<void *> allocFromPool(size_t Size, size_t Align, int32_t Kind, |
| intptr_t Offset, bool UserAlloc, |
| bool DevMalloc, uint32_t MemAdvice, |
| AllocOptionTy AllocOpt); |
| /// Deallocate memory from memory pool. |
| Error deallocFromPool(void *Ptr) { |
| std::lock_guard<std::mutex> Lock(Mtx); |
| return deallocLocked(Ptr); |
| } |
| |
| public: |
| MemAllocatorTy() |
| : MaxAllocSize(std::numeric_limits<decltype(MaxAllocSize)>::max()) {} |
| |
| MemAllocatorTy(const MemAllocatorTy &) = delete; |
| MemAllocatorTy(MemAllocatorTy &&) = delete; |
| MemAllocatorTy &operator=(const MemAllocatorTy &) = delete; |
| MemAllocatorTy &operator=(const MemAllocatorTy &&) = delete; |
| ~MemAllocatorTy() = default; |
| |
| Error initDevicePools(L0DeviceTy &L0Device, const L0OptionsTy &Option); |
| Error initHostPool(L0ContextTy &Driver, const L0OptionsTy &Option); |
| void updateMaxAllocSize(L0DeviceTy &L0Device); |
| |
| /// Release resources and report statistics if requested. |
| Error deinit(); |
| |
| /// Allocate memory with the specified information from a memory pool. |
| Expected<void *> alloc(size_t Size, size_t Align, int32_t Kind, |
| intptr_t Offset, bool UserAlloc, bool DevMalloc, |
| uint32_t MemAdvice, AllocOptionTy AllocOpt) { |
| return allocFromPool(Size, Align, Kind, Offset, UserAlloc, DevMalloc, |
| MemAdvice, AllocOpt); |
| } |
| |
| /// Deallocate memory. |
| Error dealloc(void *Ptr) { return deallocFromPool(Ptr); } |
| |
| /// Check if the given memory location and offset belongs to any allocated |
| /// memory. |
| bool contains(const void *Ptr, size_t Size) { |
| std::lock_guard<std::mutex> Lock(Mtx); |
| return AllocInfo.contains(Ptr, Size); |
| } |
| |
| /// Get allocation information for the specified memory location. |
| const MemAllocInfoTy *getAllocInfo(void *Ptr) { |
| std::lock_guard<std::mutex> Lock(Mtx); |
| return AllocInfo.find(Ptr); |
| } |
| |
| /// Get kernel indirect access flags using implicit argument info. |
| ze_kernel_indirect_access_flags_t getIndirectFlags() { |
| std::lock_guard<std::mutex> Lock(Mtx); |
| ze_kernel_indirect_access_flags_t Ret = 0; |
| if (AllocInfo.getNumImplicitArgs(TARGET_ALLOC_DEVICE) > 0) |
| Ret |= ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE; |
| if (AllocInfo.getNumImplicitArgs(TARGET_ALLOC_HOST) > 0) |
| Ret |= ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST; |
| if (AllocInfo.getNumImplicitArgs(TARGET_ALLOC_SHARED) > 0) |
| Ret |= ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; |
| return Ret; |
| } |
| }; /// MemAllocatorTy |
| |
| // Simple generic wrapper to reuse objects |
| // objects must have zero argument accessible constructor. |
| template <class ObjTy> class ObjPool { |
| // Protection. |
| std::unique_ptr<std::mutex> Mtx; |
| // List of Objects. |
| std::list<ObjTy *> Objects; |
| |
| public: |
| ObjPool() { Mtx.reset(new std::mutex); } |
| |
| ObjPool(const ObjPool &) = delete; |
| ObjPool(ObjPool &) = delete; |
| ObjPool &operator=(const ObjPool &) = delete; |
| ObjPool &operator=(const ObjPool &&) = delete; |
| |
| ObjTy *get() { |
| if (!Objects.empty()) { |
| std::lock_guard<std::mutex> Lock(*Mtx); |
| if (!Objects.empty()) { |
| const auto Ret = Objects.back(); |
| Objects.pop_back(); |
| return Ret; |
| } |
| } |
| return new ObjTy(); |
| } |
| |
| void release(ObjTy *obj) { |
| std::lock_guard<std::mutex> Lock(*Mtx); |
| Objects.push_back(obj); |
| } |
| |
| ~ObjPool() { |
| for (auto Object : Objects) |
| delete Object; |
| } |
| }; |
| |
| /// Common event pool used in the plugin. This event pool assumes all events |
| /// from the pool are host-visible and use the same event pool flag. |
| class EventPoolTy { |
| /// Size of L0 event pool created on demand. |
| size_t PoolSize = 64; |
| |
| /// Context of the events. |
| ze_context_handle_t Context = nullptr; |
| |
| /// Additional event pool flags common to this pull. |
| uint32_t Flags = 0; |
| |
| /// Protection. |
| std::unique_ptr<std::mutex> Mtx; |
| |
| /// List of created L0 event pools. |
| std::list<ze_event_pool_handle_t> Pools; |
| |
| /// List of free L0 events. |
| std::list<ze_event_handle_t> Events; |
| |
| #ifdef OMPT_SUPPORT |
| /// Event to OMPT record map. The timestamp information is recorded to the |
| /// OMPT record before the event is recycled. |
| std::unordered_map<ze_event_handle_t, ompt_record_ompt_t *> EventToRecord; |
| #endif // OMPT_SUPPORT |
| |
| public: |
| /// Initialize context, flags, and mutex. |
| Error init(ze_context_handle_t ContextIn, uint32_t FlagsIn) { |
| Context = ContextIn; |
| Flags = FlagsIn; |
| Mtx.reset(new std::mutex); |
| return Plugin::success(); |
| } |
| |
| /// Destroys L0 resources. |
| Error deinit() { |
| for (auto E : Events) |
| CALL_ZE_RET_ERROR(zeEventDestroy, E); |
| for (auto P : Pools) |
| CALL_ZE_RET_ERROR(zeEventPoolDestroy, P); |
| return Plugin::success(); |
| } |
| |
| /// Get a free event from the pool. |
| Expected<ze_event_handle_t> getEvent(); |
| |
| /// Return an event to the pool. |
| Error releaseEvent(ze_event_handle_t Event, L0DeviceTy &Device); |
| }; |
| |
| /// Staging buffer. |
| /// A single staging buffer is not enough when batching is enabled since there |
| /// can be multiple pending copy operations. |
| class StagingBufferTy { |
| /// Context for L0 calls. |
| ze_context_handle_t Context = nullptr; |
| /// Max allowed size for staging buffer. |
| size_t Size = L0StagingBufferSize; |
| /// Number of buffers allocated together. |
| size_t Count = L0StagingBufferCount; |
| /// Buffers increasing by Count if a new buffer is required. |
| llvm::SmallVector<void *> Buffers; |
| /// Next buffer location in the buffers. |
| size_t Offset = 0; |
| |
| Expected<void *> addBuffers() { |
| ze_host_mem_alloc_desc_t AllocDesc{ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, |
| nullptr, 0}; |
| void *Ret = nullptr; |
| size_t AllocSize = Size * Count; |
| CALL_ZE_RET_ERROR(zeMemAllocHost, Context, &AllocDesc, AllocSize, |
| L0DefaultAlignment, &Ret); |
| Buffers.push_back(Ret); |
| return Ret; |
| } |
| |
| public: |
| StagingBufferTy() = default; |
| StagingBufferTy(const StagingBufferTy &) = delete; |
| StagingBufferTy(StagingBufferTy &&) = delete; |
| StagingBufferTy &operator=(const StagingBufferTy &) = delete; |
| StagingBufferTy &operator=(const StagingBufferTy &&) = delete; |
| ~StagingBufferTy() = default; |
| |
| Error clear() { |
| for (auto Ptr : Buffers) |
| CALL_ZE_RET_ERROR(zeMemFree, Context, Ptr); |
| Context = nullptr; |
| return Plugin::success(); |
| } |
| |
| bool initialized() const { return Context != nullptr; } |
| |
| void init(ze_context_handle_t ContextIn, size_t SizeIn, size_t CountIn) { |
| Context = ContextIn; |
| Size = SizeIn; |
| Count = CountIn; |
| } |
| |
| void reset() { Offset = 0; } |
| |
| /// Always return the first buffer. |
| Expected<void *> get() { |
| if (Size == 0 || Count == 0) |
| return nullptr; |
| return Buffers.empty() ? addBuffers() : Buffers.front(); |
| } |
| |
| /// Return the next available buffer. |
| Expected<void *> getNext() { |
| void *Ret = nullptr; |
| if (Size == 0 || Count == 0) |
| return Ret; |
| |
| size_t AllocSize = Size * Count; |
| bool NeedToGrow = Buffers.empty() || Offset >= Buffers.size() * AllocSize; |
| if (NeedToGrow) { |
| auto PtrOrErr = addBuffers(); |
| if (!PtrOrErr) |
| return PtrOrErr.takeError(); |
| Ret = *PtrOrErr; |
| } else |
| Ret = reinterpret_cast<void *>( |
| reinterpret_cast<uintptr_t>(Buffers.back()) + (Offset % AllocSize)); |
| |
| if (!Ret) |
| return nullptr; |
| |
| Offset += Size; |
| return Ret; |
| } |
| |
| /// Return either a fixed buffer or next buffer. |
| Expected<void *> get(bool Next) { return Next ? getNext() : get(); } |
| }; |
| |
| } // namespace llvm::omp::target::plugin |
| |
| #endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0MEMORY_H |