src/string/memory_utils/op_generic.h - llvm-project/libc - Git at Google

 //===-- Generic implementation of memory function building blocks ---------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file provides generic C++ building blocks.
 // Depending on the requested size, the block operation uses unsigned integral
 // types, vector types or an array of the type with the maximum size.
 //
 // The maximum size is passed as a template argument. For instance, on x86
 // platforms that only supports integral types the maximum size would be 8
 // (corresponding to uint64_t). On this platform if we request the size 32, this
 // would be treated as a cpp::array<uint64_t, 4>.
 //
 // On the other hand, if the platform is x86 with support for AVX the maximum
 // size is 32 and the operation can be handled with a single native operation.
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H

 #include "src/__support/CPP/array.h"
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/common.h"
 #include "src/__support/endian.h"
 #include "src/__support/macros/optimization.h"
 #include "src/string/memory_utils/op_builtin.h"
 #include "src/string/memory_utils/utils.h"

 #include <stdint.h>

 namespace __llvm_libc {
 // Compiler types using the vector attributes.
 using uint8x1_t = uint8_t __attribute__((__vector_size__(1)));
 using uint8x2_t = uint8_t __attribute__((__vector_size__(2)));
 using uint8x4_t = uint8_t __attribute__((__vector_size__(4)));
 using uint8x8_t = uint8_t __attribute__((__vector_size__(8)));
 using uint8x16_t = uint8_t __attribute__((__vector_size__(16)));
 using uint8x32_t = uint8_t __attribute__((__vector_size__(32)));
 using uint8x64_t = uint8_t __attribute__((__vector_size__(64)));
 } // namespace __llvm_libc

 namespace __llvm_libc::generic {
 // We accept three types of values as elements for generic operations:
 // - scalar : unsigned integral types
 // - vector : compiler types using the vector attributes
 // - array  : a cpp::array<T, N> where T is itself either a scalar or a vector.
 // The following traits help discriminate between these cases.
 template <typename T>
 constexpr bool is_scalar_v = cpp::is_integral_v<T> && cpp::is_unsigned_v<T>;

 template <typename T>
 constexpr bool is_vector_v =
     cpp::details::is_unqualified_any_of<T, uint8x1_t, uint8x2_t, uint8x4_t,
                                         uint8x8_t, uint8x16_t, uint8x32_t,
                                         uint8x64_t>();

 template <class T> struct is_array : cpp::false_type {};
 template <class T, size_t N> struct is_array<cpp::array<T, N>> {
   static constexpr bool value = is_scalar_v<T> || is_vector_v<T>;
 };
 template <typename T> constexpr bool is_array_v = is_array<T>::value;

 template <typename T>
 constexpr bool is_element_type_v =
     is_scalar_v<T> || is_vector_v<T> || is_array_v<T>;

 //
 template <class T> struct array_size {};
 template <class T, size_t N>
 struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {};
 template <typename T> constexpr size_t array_size_v = array_size<T>::value;

 // Generic operations for the above type categories.

 template <typename T> T load(CPtr src) {
   static_assert(is_element_type_v<T>);
   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
     return ::__llvm_libc::load<T>(src);
   } else if constexpr (is_array_v<T>) {
     using value_type = typename T::value_type;
     T Value;
     for (size_t I = 0; I < array_size_v<T>; ++I)
       Value[I] = load<value_type>(src + (I * sizeof(value_type)));
     return Value;
   }
 }

 template <typename T> void store(Ptr dst, T value) {
   static_assert(is_element_type_v<T>);
   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
     ::__llvm_libc::store<T>(dst, value);
   } else if constexpr (is_array_v<T>) {
     using value_type = typename T::value_type;
     for (size_t I = 0; I < array_size_v<T>; ++I)
       store<value_type>(dst + (I * sizeof(value_type)), value[I]);
   }
 }

 template <typename T> T splat(uint8_t value) {
   static_assert(is_scalar_v<T> || is_vector_v<T>);
   if constexpr (is_scalar_v<T>)
     return T(~0) / T(0xFF) * T(value);
   else if constexpr (is_vector_v<T>) {
     T Out;
     // This for loop is optimized out for vector types.
     for (size_t i = 0; i < sizeof(T); ++i)
       Out[i] = value;
     return Out;
   }
 }

 static_assert((UINTPTR_MAX == 4294967295U) ||
                   (UINTPTR_MAX == 18446744073709551615UL),
               "We currently only support 32- or 64-bit platforms");

 #if defined(LIBC_TARGET_ARCH_IS_X86_64) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
 #define LLVM_LIBC_HAS_UINT64
 #endif

 namespace details {
 // Checks that each type is sorted in strictly decreasing order of size.
 // i.e. sizeof(First) > sizeof(Second) > ... > sizeof(Last)
 template <typename First> constexpr bool is_decreasing_size() {
   return sizeof(First) == 1;
 }
 template <typename First, typename Second, typename... Next>
 constexpr bool is_decreasing_size() {
   if constexpr (sizeof...(Next) > 0)
     return sizeof(First) > sizeof(Second) && is_decreasing_size<Next...>();
   else
     return sizeof(First) > sizeof(Second) && is_decreasing_size<Second>();
 }

 template <size_t Size, typename... Ts> struct Largest;
 template <size_t Size> struct Largest<Size> : cpp::type_identity<uint8_t> {};
 template <size_t Size, typename T, typename... Ts>
 struct Largest<Size, T, Ts...> {
   using next = Largest<Size, Ts...>;
   using type = cpp::conditional_t<(Size >= sizeof(T)), T, typename next::type>;
 };

 } // namespace details

 // 'SupportedTypes' holds a list of natively supported types.
 // The types are instanciations of ScalarType or VectorType.
 // They should be ordered in strictly decreasing order.
 // The 'TypeFor<Size>' type retrieves is the largest supported type that can
 // handle 'Size' bytes. e.g.
 //
 // using ST = SupportedTypes<ScalarType<uint16_t>, ScalarType<uint8_t>>;
 // using Type = ST::TypeFor<10>;
 // static_assert(cpp:is_same_v<Type, ScalarType<uint16_t>>);

 template <typename First, typename... Ts> struct SupportedTypes {
   static_assert(details::is_decreasing_size<First, Ts...>());

   using MaxType = First;

   template <size_t Size>
   using TypeFor = typename details::Largest<Size, First, Ts...>::type;
 };

 // Map from sizes to structures offering static load, store and splat methods.
 // Note: On platforms lacking vector support, we use the ArrayType below and
 // decompose the operation in smaller pieces.

 // Lists a generic native types to use for Memset and Memmove operations.
 // TODO: Inject the native types within Memset and Memmove depending on the
 // target architectures and derive MaxSize from it.
 using NativeTypeMap = SupportedTypes<uint8x64_t, //
                                      uint8x32_t, //
                                      uint8x16_t,
 #if defined(LLVM_LIBC_HAS_UINT64)
                                      uint64_t, // Not available on 32bit
 #endif
                                      uint32_t, //
                                      uint16_t, //
                                      uint8_t>;

 namespace details {

 // Helper to test if a type is void.
 template <typename T> inline constexpr bool is_void_v = cpp::is_same_v<T, void>;

 // In case the 'Size' is not supported we can fall back to a sequence of smaller
 // operations using the largest natively supported type.
 template <size_t Size, size_t MaxSize> static constexpr bool useArrayType() {
   return (Size > MaxSize) && ((Size % MaxSize) == 0) &&
          !details::is_void_v<NativeTypeMap::TypeFor<MaxSize>>;
 }

 // Compute the type to handle an operation of 'Size' bytes knowing that the
 // underlying platform only support native types up to MaxSize bytes.
 template <size_t Size, size_t MaxSize>
 using getTypeFor = cpp::conditional_t<
     useArrayType<Size, MaxSize>(),
     cpp::array<NativeTypeMap::TypeFor<MaxSize>, Size / MaxSize>,
     NativeTypeMap::TypeFor<Size>>;

 } // namespace details

 ///////////////////////////////////////////////////////////////////////////////
 // Memset
 ///////////////////////////////////////////////////////////////////////////////

 template <typename T> struct Memset {
   static constexpr size_t SIZE = sizeof(T);

   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
     static_assert(is_element_type_v<T>);
     if constexpr (is_scalar_v<T> || is_vector_v<T>) {
       store<T>(dst, splat<T>(value));
     } else if constexpr (is_array_v<T>) {
       using value_type = typename T::value_type;
       const auto Splat = splat<value_type>(value);
       for (size_t I = 0; I < array_size_v<T>; ++I)
         store<value_type>(dst + (I * sizeof(value_type)), Splat);
     }
   }

   LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) {
     block(dst + count - SIZE, value);
   }

   LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) {
     block(dst, value);
     tail(dst, value, count);
   }

   LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
     size_t offset = 0;
     do {
       block(dst + offset, value);
       offset += SIZE;
     } while (offset < count - SIZE);
     tail(dst, value, count);
   }
 };

 template <typename T, typename... TS> struct MemsetSequence {
   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
     Memset<T>::block(dst, value);
     if constexpr (sizeof...(TS) > 0) {
       return MemsetSequence<TS...>::block(dst + sizeof(T), value);
     }
   }
 };

 ///////////////////////////////////////////////////////////////////////////////
 // Memmove
 ///////////////////////////////////////////////////////////////////////////////

 template <typename T> struct Memmove {
   static constexpr size_t SIZE = sizeof(T);

   LIBC_INLINE static void block(Ptr dst, CPtr src) {
     store<T>(dst, load<T>(src));
   }

   LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
     const size_t offset = count - SIZE;
     // The load and store operations can be performed in any order as long as
     // they are not interleaved. More investigations are needed to determine
     // the best order.
     const auto head = load<T>(src);
     const auto tail = load<T>(src + offset);
     store<T>(dst, head);
     store<T>(dst + offset, tail);
   }

   // Align forward suitable when dst < src. The alignment is performed with
   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
   //
   // e.g. Moving two bytes forward, we make sure src is aligned.
   // [  |       |       |       |      ]
   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
   // [____LLLLLLLL_____________________]
   // [___________LLLLLLLA______________]
   // [_SSSSSSSS________________________]
   // [________SSSSSSSS_________________]
   //
   // e.g. Moving two bytes forward, we make sure dst is aligned.
   // [  |       |       |       |      ]
   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
   // [____LLLLLLLL_____________________]
   // [______LLLLLLLL___________________]
   // [_SSSSSSSS________________________]
   // [___SSSSSSSA______________________]
   template <Arg AlignOn>
   LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) {
     Ptr prev_dst = dst;
     CPtr prev_src = src;
     size_t prev_count = count;
     align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
     adjust(SIZE, dst, src, count);
     head_tail(prev_dst, prev_src, prev_count - count);
   }

   // Align backward suitable when dst > src. The alignment is performed with
   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
   //
   // e.g. Moving two bytes backward, we make sure src is aligned.
   // [  |       |       |       |      ]
   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
   // [ _________________ALLLLLLL_______]
   // [ ___________________LLLLLLLL_____]
   // [____________________SSSSSSSS_____]
   // [______________________SSSSSSSS___]
   //
   // e.g. Moving two bytes backward, we make sure dst is aligned.
   // [  |       |       |       |      ]
   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
   // [ _______________LLLLLLLL_________]
   // [ ___________________LLLLLLLL_____]
   // [__________________ASSSSSSS_______]
   // [______________________SSSSSSSS___]
   template <Arg AlignOn>
   LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) {
     Ptr headtail_dst = dst + count;
     CPtr headtail_src = src + count;
     size_t headtail_size = 0;
     align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
                                           headtail_size);
     adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
     head_tail(headtail_dst, headtail_src, headtail_size);
     count -= headtail_size;
   }

   // Move forward suitable when dst < src. We load the tail bytes before
   // handling the loop.
   //
   // e.g. Moving two bytes
   // [   |       |       |       |       |]
   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
   // [_________________________LLLLLLLL___]
   // [___LLLLLLLL_________________________]
   // [_SSSSSSSS___________________________]
   // [___________LLLLLLLL_________________]
   // [_________SSSSSSSS___________________]
   // [___________________LLLLLLLL_________]
   // [_________________SSSSSSSS___________]
   // [_______________________SSSSSSSS_____]
   LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
                                                 size_t count) {
     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
     const size_t tail_offset = count - SIZE;
     const auto tail_value = load<T>(src + tail_offset);
     size_t offset = 0;
     LIBC_LOOP_NOUNROLL
     do {
       block(dst + offset, src + offset);
       offset += SIZE;
     } while (offset < count - SIZE);
     store<T>(dst + tail_offset, tail_value);
   }

   // Move backward suitable when dst > src. We load the head bytes before
   // handling the loop.
   //
   // e.g. Moving two bytes
   // [   |       |       |       |       |]
   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
   // [___LLLLLLLL_________________________]
   // [_________________________LLLLLLLL___]
   // [___________________________SSSSSSSS_]
   // [_________________LLLLLLLL___________]
   // [___________________SSSSSSSS_________]
   // [_________LLLLLLLL___________________]
   // [___________SSSSSSSS_________________]
   // [_____SSSSSSSS_______________________]
   LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
                                                  size_t count) {
     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
     const auto head_value = load<T>(src);
     ptrdiff_t offset = count - SIZE;
     LIBC_LOOP_NOUNROLL
     do {
       block(dst + offset, src + offset);
       offset -= SIZE;
     } while (offset >= 0);
     store<T>(dst, head_value);
   }
 };

 ///////////////////////////////////////////////////////////////////////////////
 // Bcmp
 ///////////////////////////////////////////////////////////////////////////////
 template <size_t Size> struct Bcmp {
   static constexpr size_t SIZE = Size;
   static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64)
                                         ? sizeof(uint64_t)
                                         : sizeof(uint32_t);

   template <typename T> LIBC_INLINE static uint32_t load_xor(CPtr p1, CPtr p2) {
     static_assert(sizeof(T) <= sizeof(uint32_t));
     return load<T>(p1) ^ load<T>(p2);
   }

   template <typename T>
   LIBC_INLINE static uint32_t load_not_equal(CPtr p1, CPtr p2) {
     return load<T>(p1) != load<T>(p2);
   }

   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
     if constexpr (Size == 1) {
       return load_xor<uint8_t>(p1, p2);
     } else if constexpr (Size == 2) {
       return load_xor<uint16_t>(p1, p2);
     } else if constexpr (Size == 4) {
       return load_xor<uint32_t>(p1, p2);
     } else if constexpr (Size == 8) {
       return load_not_equal<uint64_t>(p1, p2);
     } else if constexpr (details::useArrayType<Size, MaxSize>()) {
       for (size_t offset = 0; offset < Size; offset += MaxSize)
         if (auto value = Bcmp<MaxSize>::block(p1 + offset, p2 + offset))
           return value;
     } else {
       deferred_static_assert("Unimplemented Size");
     }
     return BcmpReturnType::ZERO();
   }

   LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
     return block(p1 + count - SIZE, p2 + count - SIZE);
   }

   LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
     return block(p1, p2) | tail(p1, p2, count);
   }

   LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
                                                   size_t count) {
     static_assert(Size > 1, "a loop of size 1 does not need tail");
     size_t offset = 0;
     do {
       if (auto value = block(p1 + offset, p2 + offset))
         return value;
       offset += SIZE;
     } while (offset < count - SIZE);
     return tail(p1, p2, count);
   }
 };

 ///////////////////////////////////////////////////////////////////////////////
 // Memcmp
 ///////////////////////////////////////////////////////////////////////////////
 template <size_t Size> struct Memcmp {
   static constexpr size_t SIZE = Size;
   static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64)
                                         ? sizeof(uint64_t)
                                         : sizeof(uint32_t);

   template <typename T> LIBC_INLINE static T load_be(CPtr ptr) {
     return Endian::to_big_endian(load<T>(ptr));
   }

   template <typename T>
   LIBC_INLINE static MemcmpReturnType load_be_diff(CPtr p1, CPtr p2) {
     return load_be<T>(p1) - load_be<T>(p2);
   }

   template <typename T>
   LIBC_INLINE static MemcmpReturnType load_be_cmp(CPtr p1, CPtr p2) {
     const auto la = load_be<T>(p1);
     const auto lb = load_be<T>(p2);
     return la > lb ? 1 : la < lb ? -1 : 0;
   }

   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
     if constexpr (Size == 1) {
       return load_be_diff<uint8_t>(p1, p2);
     } else if constexpr (Size == 2) {
       return load_be_diff<uint16_t>(p1, p2);
     } else if constexpr (Size == 4) {
       return load_be_cmp<uint32_t>(p1, p2);
     } else if constexpr (Size == 8) {
       return load_be_cmp<uint64_t>(p1, p2);
     } else if constexpr (details::useArrayType<Size, MaxSize>()) {
       for (size_t offset = 0; offset < Size; offset += MaxSize)
         if (Bcmp<MaxSize>::block(p1 + offset, p2 + offset))
           return Memcmp<MaxSize>::block(p1 + offset, p2 + offset);
       return MemcmpReturnType::ZERO();
     } else if constexpr (Size == 3) {
       if (auto value = Memcmp<2>::block(p1, p2))
         return value;
       return Memcmp<1>::block(p1 + 2, p2 + 2);
     } else {
       deferred_static_assert("Unimplemented Size");
     }
   }

   LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
     return block(p1 + count - SIZE, p2 + count - SIZE);
   }

   LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2,
                                                 size_t count) {
     if (auto value = block(p1, p2))
       return value;
     return tail(p1, p2, count);
   }

   LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
                                                     size_t count) {
     static_assert(Size > 1, "a loop of size 1 does not need tail");
     size_t offset = 0;
     do {
       if (auto value = block(p1 + offset, p2 + offset))
         return value;
       offset += SIZE;
     } while (offset < count - SIZE);
     return tail(p1, p2, count);
   }
 };

 } // namespace __llvm_libc::generic

 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
	//===-- Generic implementation of memory function building blocks ---------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file provides generic C++ building blocks.
	// Depending on the requested size, the block operation uses unsigned integral
	// types, vector types or an array of the type with the maximum size.
	//
	// The maximum size is passed as a template argument. For instance, on x86
	// platforms that only supports integral types the maximum size would be 8
	// (corresponding to uint64_t). On this platform if we request the size 32, this
	// would be treated as a cpp::array<uint64_t, 4>.
	//
	// On the other hand, if the platform is x86 with support for AVX the maximum
	// size is 32 and the operation can be handled with a single native operation.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
	#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H

	#include "src/__support/CPP/array.h"
	#include "src/__support/CPP/type_traits.h"
	#include "src/__support/common.h"
	#include "src/__support/endian.h"
	#include "src/__support/macros/optimization.h"
	#include "src/string/memory_utils/op_builtin.h"
	#include "src/string/memory_utils/utils.h"

	#include <stdint.h>

	namespace __llvm_libc {
	// Compiler types using the vector attributes.
	using uint8x1_t = uint8_t __attribute__((__vector_size__(1)));
	using uint8x2_t = uint8_t __attribute__((__vector_size__(2)));
	using uint8x4_t = uint8_t __attribute__((__vector_size__(4)));
	using uint8x8_t = uint8_t __attribute__((__vector_size__(8)));
	using uint8x16_t = uint8_t __attribute__((__vector_size__(16)));
	using uint8x32_t = uint8_t __attribute__((__vector_size__(32)));
	using uint8x64_t = uint8_t __attribute__((__vector_size__(64)));
	} // namespace __llvm_libc

	namespace __llvm_libc::generic {
	// We accept three types of values as elements for generic operations:
	// - scalar : unsigned integral types
	// - vector : compiler types using the vector attributes
	// - array : a cpp::array<T, N> where T is itself either a scalar or a vector.
	// The following traits help discriminate between these cases.
	template <typename T>
	constexpr bool is_scalar_v = cpp::is_integral_v<T> && cpp::is_unsigned_v<T>;

	template <typename T>
	constexpr bool is_vector_v =
	cpp::details::is_unqualified_any_of<T, uint8x1_t, uint8x2_t, uint8x4_t,
	uint8x8_t, uint8x16_t, uint8x32_t,
	uint8x64_t>();

	template <class T> struct is_array : cpp::false_type {};
	template <class T, size_t N> struct is_array<cpp::array<T, N>> {
	static constexpr bool value = is_scalar_v<T> \|\| is_vector_v<T>;
	};
	template <typename T> constexpr bool is_array_v = is_array<T>::value;

	template <typename T>
	constexpr bool is_element_type_v =
	is_scalar_v<T> \|\| is_vector_v<T> \|\| is_array_v<T>;

	//
	template <class T> struct array_size {};
	template <class T, size_t N>
	struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {};
	template <typename T> constexpr size_t array_size_v = array_size<T>::value;

	// Generic operations for the above type categories.

	template <typename T> T load(CPtr src) {
	static_assert(is_element_type_v<T>);
	if constexpr (is_scalar_v<T> \|\| is_vector_v<T>) {
	return ::__llvm_libc::load<T>(src);
	} else if constexpr (is_array_v<T>) {
	using value_type = typename T::value_type;
	T Value;
	for (size_t I = 0; I < array_size_v<T>; ++I)
	Value[I] = load<value_type>(src + (I * sizeof(value_type)));
	return Value;
	}
	}

	template <typename T> void store(Ptr dst, T value) {
	static_assert(is_element_type_v<T>);
	if constexpr (is_scalar_v<T> \|\| is_vector_v<T>) {
	::__llvm_libc::store<T>(dst, value);
	} else if constexpr (is_array_v<T>) {
	using value_type = typename T::value_type;
	for (size_t I = 0; I < array_size_v<T>; ++I)
	store<value_type>(dst + (I * sizeof(value_type)), value[I]);
	}
	}

	template <typename T> T splat(uint8_t value) {
	static_assert(is_scalar_v<T> \|\| is_vector_v<T>);
	if constexpr (is_scalar_v<T>)
	return T(~0) / T(0xFF) * T(value);
	else if constexpr (is_vector_v<T>) {
	T Out;
	// This for loop is optimized out for vector types.
	for (size_t i = 0; i < sizeof(T); ++i)
	Out[i] = value;
	return Out;
	}
	}

	static_assert((UINTPTR_MAX == 4294967295U) \|\|
	(UINTPTR_MAX == 18446744073709551615UL),
	"We currently only support 32- or 64-bit platforms");

	#if defined(LIBC_TARGET_ARCH_IS_X86_64) \|\| defined(LIBC_TARGET_ARCH_IS_AARCH64)
	#define LLVM_LIBC_HAS_UINT64
	#endif

	namespace details {
	// Checks that each type is sorted in strictly decreasing order of size.
	// i.e. sizeof(First) > sizeof(Second) > ... > sizeof(Last)
	template <typename First> constexpr bool is_decreasing_size() {
	return sizeof(First) == 1;
	}
	template <typename First, typename Second, typename... Next>
	constexpr bool is_decreasing_size() {
	if constexpr (sizeof...(Next) > 0)
	return sizeof(First) > sizeof(Second) && is_decreasing_size<Next...>();
	else
	return sizeof(First) > sizeof(Second) && is_decreasing_size<Second>();
	}

	template <size_t Size, typename... Ts> struct Largest;
	template <size_t Size> struct Largest<Size> : cpp::type_identity<uint8_t> {};
	template <size_t Size, typename T, typename... Ts>
	struct Largest<Size, T, Ts...> {
	using next = Largest<Size, Ts...>;
	using type = cpp::conditional_t<(Size >= sizeof(T)), T, typename next::type>;
	};

	} // namespace details

	// 'SupportedTypes' holds a list of natively supported types.
	// The types are instanciations of ScalarType or VectorType.
	// They should be ordered in strictly decreasing order.
	// The 'TypeFor<Size>' type retrieves is the largest supported type that can
	// handle 'Size' bytes. e.g.
	//
	// using ST = SupportedTypes<ScalarType<uint16_t>, ScalarType<uint8_t>>;
	// using Type = ST::TypeFor<10>;
	// static_assert(cpp:is_same_v<Type, ScalarType<uint16_t>>);

	template <typename First, typename... Ts> struct SupportedTypes {
	static_assert(details::is_decreasing_size<First, Ts...>());

	using MaxType = First;

	template <size_t Size>
	using TypeFor = typename details::Largest<Size, First, Ts...>::type;
	};

	// Map from sizes to structures offering static load, store and splat methods.
	// Note: On platforms lacking vector support, we use the ArrayType below and
	// decompose the operation in smaller pieces.

	// Lists a generic native types to use for Memset and Memmove operations.
	// TODO: Inject the native types within Memset and Memmove depending on the
	// target architectures and derive MaxSize from it.
	using NativeTypeMap = SupportedTypes<uint8x64_t, //
	uint8x32_t, //
	uint8x16_t,
	#if defined(LLVM_LIBC_HAS_UINT64)
	uint64_t, // Not available on 32bit
	#endif
	uint32_t, //
	uint16_t, //
	uint8_t>;

	namespace details {

	// Helper to test if a type is void.
	template <typename T> inline constexpr bool is_void_v = cpp::is_same_v<T, void>;

	// In case the 'Size' is not supported we can fall back to a sequence of smaller
	// operations using the largest natively supported type.
	template <size_t Size, size_t MaxSize> static constexpr bool useArrayType() {
	return (Size > MaxSize) && ((Size % MaxSize) == 0) &&
	!details::is_void_v<NativeTypeMap::TypeFor<MaxSize>>;
	}

	// Compute the type to handle an operation of 'Size' bytes knowing that the
	// underlying platform only support native types up to MaxSize bytes.
	template <size_t Size, size_t MaxSize>
	using getTypeFor = cpp::conditional_t<
	useArrayType<Size, MaxSize>(),
	cpp::array<NativeTypeMap::TypeFor<MaxSize>, Size / MaxSize>,
	NativeTypeMap::TypeFor<Size>>;

	} // namespace details

	///////////////////////////////////////////////////////////////////////////////
	// Memset
	///////////////////////////////////////////////////////////////////////////////

	template <typename T> struct Memset {
	static constexpr size_t SIZE = sizeof(T);

	LIBC_INLINE static void block(Ptr dst, uint8_t value) {
	static_assert(is_element_type_v<T>);
	if constexpr (is_scalar_v<T> \|\| is_vector_v<T>) {
	store<T>(dst, splat<T>(value));
	} else if constexpr (is_array_v<T>) {
	using value_type = typename T::value_type;
	const auto Splat = splat<value_type>(value);
	for (size_t I = 0; I < array_size_v<T>; ++I)
	store<value_type>(dst + (I * sizeof(value_type)), Splat);
	}
	}

	LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) {
	block(dst + count - SIZE, value);
	}

	LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) {
	block(dst, value);
	tail(dst, value, count);
	}

	LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
	static_assert(SIZE > 1, "a loop of size 1 does not need tail");
	size_t offset = 0;
	do {
	block(dst + offset, value);
	offset += SIZE;
	} while (offset < count - SIZE);
	tail(dst, value, count);
	}
	};

	template <typename T, typename... TS> struct MemsetSequence {
	static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
	LIBC_INLINE static void block(Ptr dst, uint8_t value) {
	Memset<T>::block(dst, value);
	if constexpr (sizeof...(TS) > 0) {
	return MemsetSequence<TS...>::block(dst + sizeof(T), value);
	}
	}
	};

	///////////////////////////////////////////////////////////////////////////////
	// Memmove
	///////////////////////////////////////////////////////////////////////////////

	template <typename T> struct Memmove {
	static constexpr size_t SIZE = sizeof(T);

	LIBC_INLINE static void block(Ptr dst, CPtr src) {
	store<T>(dst, load<T>(src));
	}

	LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
	const size_t offset = count - SIZE;
	// The load and store operations can be performed in any order as long as
	// they are not interleaved. More investigations are needed to determine
	// the best order.
	const auto head = load<T>(src);
	const auto tail = load<T>(src + offset);
	store<T>(dst, head);
	store<T>(dst + offset, tail);
	}

	// Align forward suitable when dst < src. The alignment is performed with
	// an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
	//
	// e.g. Moving two bytes forward, we make sure src is aligned.
	// [ \| \| \| \| ]
	// [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
	// [____LLLLLLLL_____________________]
	// [___________LLLLLLLA______________]
	// [_SSSSSSSS________________________]
	// [________SSSSSSSS_________________]
	//
	// e.g. Moving two bytes forward, we make sure dst is aligned.
	// [ \| \| \| \| ]
	// [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
	// [____LLLLLLLL_____________________]
	// [______LLLLLLLL___________________]
	// [_SSSSSSSS________________________]
	// [___SSSSSSSA______________________]
	template <Arg AlignOn>
	LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) {
	Ptr prev_dst = dst;
	CPtr prev_src = src;
	size_t prev_count = count;
	align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
	adjust(SIZE, dst, src, count);
	head_tail(prev_dst, prev_src, prev_count - count);
	}

	// Align backward suitable when dst > src. The alignment is performed with
	// an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
	//
	// e.g. Moving two bytes backward, we make sure src is aligned.
	// [ \| \| \| \| ]
	// [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
	// [ _________________ALLLLLLL_______]
	// [ ___________________LLLLLLLL_____]
	// [____________________SSSSSSSS_____]
	// [______________________SSSSSSSS___]
	//
	// e.g. Moving two bytes backward, we make sure dst is aligned.
	// [ \| \| \| \| ]
	// [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
	// [ _______________LLLLLLLL_________]
	// [ ___________________LLLLLLLL_____]
	// [__________________ASSSSSSS_______]
	// [______________________SSSSSSSS___]
	template <Arg AlignOn>
	LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) {
	Ptr headtail_dst = dst + count;
	CPtr headtail_src = src + count;
	size_t headtail_size = 0;
	align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
	headtail_size);
	adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
	head_tail(headtail_dst, headtail_src, headtail_size);
	count -= headtail_size;
	}

	// Move forward suitable when dst < src. We load the tail bytes before
	// handling the loop.
	//
	// e.g. Moving two bytes
	// [ \| \| \| \| \|]
	// [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
	// [_________________________LLLLLLLL___]
	// [___LLLLLLLL_________________________]
	// [_SSSSSSSS___________________________]
	// [___________LLLLLLLL_________________]
	// [_________SSSSSSSS___________________]
	// [___________________LLLLLLLL_________]
	// [_________________SSSSSSSS___________]
	// [_______________________SSSSSSSS_____]
	LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
	size_t count) {
	static_assert(SIZE > 1, "a loop of size 1 does not need tail");
	const size_t tail_offset = count - SIZE;
	const auto tail_value = load<T>(src + tail_offset);
	size_t offset = 0;
	LIBC_LOOP_NOUNROLL
	do {
	block(dst + offset, src + offset);
	offset += SIZE;
	} while (offset < count - SIZE);
	store<T>(dst + tail_offset, tail_value);
	}

	// Move backward suitable when dst > src. We load the head bytes before
	// handling the loop.
	//
	// e.g. Moving two bytes
	// [ \| \| \| \| \|]
	// [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
	// [___LLLLLLLL_________________________]
	// [_________________________LLLLLLLL___]
	// [___________________________SSSSSSSS_]
	// [_________________LLLLLLLL___________]
	// [___________________SSSSSSSS_________]
	// [_________LLLLLLLL___________________]
	// [___________SSSSSSSS_________________]
	// [_____SSSSSSSS_______________________]
	LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
	size_t count) {
	static_assert(SIZE > 1, "a loop of size 1 does not need tail");
	const auto head_value = load<T>(src);
	ptrdiff_t offset = count - SIZE;
	LIBC_LOOP_NOUNROLL
	do {
	block(dst + offset, src + offset);
	offset -= SIZE;
	} while (offset >= 0);
	store<T>(dst, head_value);
	}
	};

	///////////////////////////////////////////////////////////////////////////////
	// Bcmp
	///////////////////////////////////////////////////////////////////////////////
	template <size_t Size> struct Bcmp {
	static constexpr size_t SIZE = Size;
	static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64)
	? sizeof(uint64_t)
	: sizeof(uint32_t);

	template <typename T> LIBC_INLINE static uint32_t load_xor(CPtr p1, CPtr p2) {
	static_assert(sizeof(T) <= sizeof(uint32_t));
	return load<T>(p1) ^ load<T>(p2);
	}

	template <typename T>
	LIBC_INLINE static uint32_t load_not_equal(CPtr p1, CPtr p2) {
	return load<T>(p1) != load<T>(p2);
	}

	LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
	if constexpr (Size == 1) {
	return load_xor<uint8_t>(p1, p2);
	} else if constexpr (Size == 2) {
	return load_xor<uint16_t>(p1, p2);
	} else if constexpr (Size == 4) {
	return load_xor<uint32_t>(p1, p2);
	} else if constexpr (Size == 8) {
	return load_not_equal<uint64_t>(p1, p2);
	} else if constexpr (details::useArrayType<Size, MaxSize>()) {
	for (size_t offset = 0; offset < Size; offset += MaxSize)
	if (auto value = Bcmp<MaxSize>::block(p1 + offset, p2 + offset))
	return value;
	} else {
	deferred_static_assert("Unimplemented Size");
	}
	return BcmpReturnType::ZERO();
	}

	LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
	return block(p1 + count - SIZE, p2 + count - SIZE);
	}

	LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
	return block(p1, p2) \| tail(p1, p2, count);
	}

	LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
	size_t count) {
	static_assert(Size > 1, "a loop of size 1 does not need tail");
	size_t offset = 0;
	do {
	if (auto value = block(p1 + offset, p2 + offset))
	return value;
	offset += SIZE;
	} while (offset < count - SIZE);
	return tail(p1, p2, count);
	}
	};

	///////////////////////////////////////////////////////////////////////////////
	// Memcmp
	///////////////////////////////////////////////////////////////////////////////
	template <size_t Size> struct Memcmp {
	static constexpr size_t SIZE = Size;
	static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64)
	? sizeof(uint64_t)
	: sizeof(uint32_t);

	template <typename T> LIBC_INLINE static T load_be(CPtr ptr) {
	return Endian::to_big_endian(load<T>(ptr));
	}

	template <typename T>
	LIBC_INLINE static MemcmpReturnType load_be_diff(CPtr p1, CPtr p2) {
	return load_be<T>(p1) - load_be<T>(p2);
	}

	template <typename T>
	LIBC_INLINE static MemcmpReturnType load_be_cmp(CPtr p1, CPtr p2) {
	const auto la = load_be<T>(p1);
	const auto lb = load_be<T>(p2);
	return la > lb ? 1 : la < lb ? -1 : 0;
	}

	LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
	if constexpr (Size == 1) {
	return load_be_diff<uint8_t>(p1, p2);
	} else if constexpr (Size == 2) {
	return load_be_diff<uint16_t>(p1, p2);
	} else if constexpr (Size == 4) {
	return load_be_cmp<uint32_t>(p1, p2);
	} else if constexpr (Size == 8) {
	return load_be_cmp<uint64_t>(p1, p2);
	} else if constexpr (details::useArrayType<Size, MaxSize>()) {
	for (size_t offset = 0; offset < Size; offset += MaxSize)
	if (Bcmp<MaxSize>::block(p1 + offset, p2 + offset))
	return Memcmp<MaxSize>::block(p1 + offset, p2 + offset);
	return MemcmpReturnType::ZERO();
	} else if constexpr (Size == 3) {
	if (auto value = Memcmp<2>::block(p1, p2))
	return value;
	return Memcmp<1>::block(p1 + 2, p2 + 2);
	} else {
	deferred_static_assert("Unimplemented Size");
	}
	}

	LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
	return block(p1 + count - SIZE, p2 + count - SIZE);
	}

	LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2,
	size_t count) {
	if (auto value = block(p1, p2))
	return value;
	return tail(p1, p2, count);
	}

	LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
	size_t count) {
	static_assert(Size > 1, "a loop of size 1 does not need tail");
	size_t offset = 0;
	do {
	if (auto value = block(p1 + offset, p2 + offset))
	return value;
	offset += SIZE;
	} while (offset < count - SIZE);
	return tail(p1, p2, count);
	}
	};

	} // namespace __llvm_libc::generic

	#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H