src/__support/RPC/rpc_util.h - llvm-project/libc - Git at Google

 //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIBC_SRC_SUPPORT_RPC_RPC_UTILS_H
 #define LLVM_LIBC_SRC_SUPPORT_RPC_RPC_UTILS_H

 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/GPU/utils.h"
 #include "src/__support/macros/attributes.h"
 #include "src/__support/macros/properties/architectures.h"

 namespace __llvm_libc {
 namespace rpc {

 /// Maximum amount of data a single lane can use.
 constexpr uint64_t MAX_LANE_SIZE = 64;

 /// Suspend the thread briefly to assist the thread scheduler during busy loops.
 LIBC_INLINE void sleep_briefly() {
 #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
   asm("nanosleep.u32 64;" ::: "memory");
 #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
   __builtin_amdgcn_s_sleep(2);
 #else
   // Simply do nothing if sleeping isn't supported on this platform.
 #endif
 }

 /// Get the first active thread inside the lane.
 LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) {
   return __builtin_ffsl(lane_mask) - 1;
 }

 /// Conditional that is only true for a single thread in a lane.
 LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
   return gpu::get_lane_id() == get_first_lane_id(lane_mask);
 }

 /// Conditional to indicate if this process is running on the GPU.
 LIBC_INLINE constexpr bool is_process_gpu() {
 #if defined(LIBC_TARGET_ARCH_IS_GPU)
   return true;
 #else
   return false;
 #endif
 }

 /// Return \p val aligned "upwards" according to \p align.
 template <typename V, typename A> LIBC_INLINE V align_up(V val, A align) {
   return ((val + V(align) - 1) / V(align)) * V(align);
 }

 /// Utility to provide a unified interface between the CPU and GPU's memory
 /// model. On the GPU stack variables are always private to a lane so we can
 /// simply use the variable passed in. On the CPU we need to allocate enough
 /// space for the whole lane and index into it.
 template <typename V> LIBC_INLINE V &lane_value(V *val, uint32_t id) {
   if constexpr (is_process_gpu())
     return *val;
   return val[id];
 }

 /// Helper to get the maximum value.
 template <typename T> LIBC_INLINE const T &max(const T &x, const T &y) {
   return x < y ? y : x;
 }

 /// Advance the \p p by \p bytes.
 template <typename T, typename U> LIBC_INLINE T *advance(T *ptr, U bytes) {
   if constexpr (cpp::is_const_v<T>)
     return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) +
                                  bytes);
   else
     return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes);
 }

 } // namespace rpc
 } // namespace __llvm_libc

 #endif
	//===-- Shared memory RPC client / server utilities -------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIBC_SRC_SUPPORT_RPC_RPC_UTILS_H
	#define LLVM_LIBC_SRC_SUPPORT_RPC_RPC_UTILS_H

	#include "src/__support/CPP/type_traits.h"
	#include "src/__support/GPU/utils.h"
	#include "src/__support/macros/attributes.h"
	#include "src/__support/macros/properties/architectures.h"

	namespace __llvm_libc {
	namespace rpc {

	/// Maximum amount of data a single lane can use.
	constexpr uint64_t MAX_LANE_SIZE = 64;

	/// Suspend the thread briefly to assist the thread scheduler during busy loops.
	LIBC_INLINE void sleep_briefly() {
	#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
	asm("nanosleep.u32 64;" ::: "memory");
	#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
	__builtin_amdgcn_s_sleep(2);
	#else
	// Simply do nothing if sleeping isn't supported on this platform.
	#endif
	}

	/// Get the first active thread inside the lane.
	LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) {
	return __builtin_ffsl(lane_mask) - 1;
	}

	/// Conditional that is only true for a single thread in a lane.
	LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
	return gpu::get_lane_id() == get_first_lane_id(lane_mask);
	}

	/// Conditional to indicate if this process is running on the GPU.
	LIBC_INLINE constexpr bool is_process_gpu() {
	#if defined(LIBC_TARGET_ARCH_IS_GPU)
	return true;
	#else
	return false;
	#endif
	}

	/// Return \p val aligned "upwards" according to \p align.
	template <typename V, typename A> LIBC_INLINE V align_up(V val, A align) {
	return ((val + V(align) - 1) / V(align)) * V(align);
	}

	/// Utility to provide a unified interface between the CPU and GPU's memory
	/// model. On the GPU stack variables are always private to a lane so we can
	/// simply use the variable passed in. On the CPU we need to allocate enough
	/// space for the whole lane and index into it.
	template <typename V> LIBC_INLINE V &lane_value(V *val, uint32_t id) {
	if constexpr (is_process_gpu())
	return *val;
	return val[id];
	}

	/// Helper to get the maximum value.
	template <typename T> LIBC_INLINE const T &max(const T &x, const T &y) {
	return x < y ? y : x;
	}

	/// Advance the \p p by \p bytes.
	template <typename T, typename U> LIBC_INLINE T advance(T ptr, U bytes) {
	if constexpr (cpp::is_const_v<T>)
	return reinterpret_cast<T >(reinterpret_cast<const uint8_t >(ptr) +
	bytes);
	else
	return reinterpret_cast<T >(reinterpret_cast<uint8_t >(ptr) + bytes);
	}

	} // namespace rpc
	} // namespace __llvm_libc

	#endif