| //===-- Portable SIMD library similar to stdx::simd -------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file provides a generic interface into fixed-size SIMD instructions |
| // using the clang vector type. The API shares some similarities with the |
| // stdx::simd proposal, but instead chooses to use vectors as primitive types |
| // with several extra helper functions. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "hdr/stdint_proxy.h" |
| #include "src/__support/CPP/algorithm.h" |
| #include "src/__support/CPP/limits.h" |
| #include "src/__support/CPP/tuple.h" |
| #include "src/__support/CPP/type_traits.h" |
| #include "src/__support/CPP/utility/integer_sequence.h" |
| #include "src/__support/macros/attributes.h" |
| #include "src/__support/macros/config.h" |
| |
| #include <stddef.h> |
| |
| #ifndef LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H |
| #define LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H |
| |
| #if LIBC_HAS_VECTOR_TYPE |
| |
| namespace LIBC_NAMESPACE_DECL { |
| namespace cpp { |
| |
| namespace internal { |
| |
| #if defined(LIBC_TARGET_CPU_HAS_AVX512F) |
| template <typename T> |
| LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof(T); |
| #elif defined(LIBC_TARGET_CPU_HAS_AVX2) |
| template <typename T> |
| LIBC_INLINE_VAR constexpr size_t native_vector_size = 32 / sizeof(T); |
| #elif defined(LIBC_TARGET_CPU_HAS_SSE2) || defined(LIBC_TARGET_CPU_HAS_ARM_NEON) |
| template <typename T> |
| LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T); |
| #else |
| template <typename T> LIBC_INLINE constexpr size_t native_vector_size = 1; |
| #endif |
| |
| } // namespace internal |
| |
| // Type aliases. |
| template <typename T, size_t N> |
| using fixed_size_simd = T [[clang::ext_vector_type(N)]]; |
| template <typename T, size_t N = internal::native_vector_size<T>> |
| using simd = T [[clang::ext_vector_type(N)]]; |
| template <typename T> |
| using simd_mask = simd<bool, internal::native_vector_size<T>>; |
| |
| namespace internal { |
| |
| template <typename T> |
| using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT); |
| |
| template <typename T> LIBC_INLINE constexpr T poison() { |
| return __builtin_nondeterministic_value(T()); |
| } |
| |
| template <typename T, size_t N, size_t OriginalSize, size_t... Indices> |
| LIBC_INLINE constexpr static cpp::simd<T, sizeof...(Indices)> |
| extend(cpp::simd<T, N> x, cpp::index_sequence<Indices...>) { |
| return __builtin_shufflevector( |
| x, x, (Indices < OriginalSize ? static_cast<int>(Indices) : -1)...); |
| } |
| |
| template <typename T, size_t N, size_t TargetSize, size_t OriginalSize> |
| LIBC_INLINE constexpr static auto extend(cpp::simd<T, N> x) { |
| // Recursively resize an input vector to the target size, increasing its size |
| // by at most double the input size each step due to shufflevector limitation. |
| if constexpr (N == TargetSize) |
| return x; |
| else if constexpr (TargetSize <= 2 * N) |
| return extend<T, N, TargetSize>(x, cpp::make_index_sequence<TargetSize>{}); |
| else |
| return extend<T, 2 * N, TargetSize, OriginalSize>( |
| extend<T, N, 2 * N>(x, cpp::make_index_sequence<2 * N>{})); |
| } |
| |
| template <typename T, size_t N, size_t M, size_t... Indices> |
| LIBC_INLINE constexpr static cpp::simd<T, N + M> |
| concat(cpp::simd<T, N> x, cpp::simd<T, M> y, cpp::index_sequence<Indices...>) { |
| constexpr size_t Size = cpp::max(N, M); |
| auto remap = [](size_t idx) -> int { |
| if (idx < N) |
| return static_cast<int>(idx); |
| if (idx < N + M) |
| return static_cast<int>((idx - N) + Size); |
| return -1; |
| }; |
| |
| // Extend the input vectors until they are the same size, then use the indices |
| // to shuffle in only the indices that correspond to the original values. |
| auto x_ext = extend<T, N, Size, N>(x); |
| auto y_ext = extend<T, M, Size, M>(y); |
| return __builtin_shufflevector(x_ext, y_ext, remap(Indices)...); |
| } |
| |
| template <typename T, size_t N, size_t Count, size_t Offset, size_t... Indices> |
| LIBC_INLINE constexpr static cpp::simd<T, Count> |
| slice(cpp::simd<T, N> x, cpp::index_sequence<Indices...>) { |
| return __builtin_shufflevector(x, x, (Offset + Indices)...); |
| } |
| |
| template <typename T, size_t N, size_t Offset, size_t Head, size_t... Tail> |
| LIBC_INLINE constexpr static auto split(cpp::simd<T, N> x) { |
| // Recursively splits the input vector by walking the variadic template list, |
| // increasing our current head each call. |
| auto result = cpp::make_tuple( |
| slice<T, N, Head, Offset>(x, cpp::make_index_sequence<Head>{})); |
| if constexpr (sizeof...(Tail) > 0) |
| return cpp::tuple_cat(result, split<T, N, Offset + Head, Tail...>(x)); |
| else |
| return result; |
| } |
| |
| } // namespace internal |
| |
| // Type trait helpers. |
| template <typename T> |
| struct simd_size : cpp::integral_constant<size_t, __builtin_vectorelements(T)> { |
| }; |
| template <class T> constexpr size_t simd_size_v = simd_size<T>::value; |
| |
| template <typename T> struct is_simd : cpp::integral_constant<bool, false> {}; |
| template <typename T, unsigned N> |
| struct is_simd<simd<T, N>> : cpp::integral_constant<bool, true> {}; |
| template <class T> constexpr bool is_simd_v = is_simd<T>::value; |
| |
| template <typename T> |
| struct is_simd_mask : cpp::integral_constant<bool, false> {}; |
| template <unsigned N> |
| struct is_simd_mask<simd<bool, N>> : cpp::integral_constant<bool, true> {}; |
| template <class T> constexpr bool is_simd_mask_v = is_simd_mask<T>::value; |
| |
| template <typename T> struct simd_element_type; |
| template <typename T, size_t N> struct simd_element_type<simd<T, N>> { |
| using type = T; |
| }; |
| template <typename T> |
| using simd_element_type_t = typename simd_element_type<T>::type; |
| |
| template <typename T> |
| using enable_if_simd_t = cpp::enable_if_t<is_simd_v<T>, T>; |
| |
| // Casting. |
| template <typename To, typename From, size_t N> |
| LIBC_INLINE constexpr static simd<To, N> simd_cast(simd<From, N> v) { |
| return __builtin_convertvector(v, simd<To, N>); |
| } |
| |
| // SIMD mask operations. |
| template <size_t N> LIBC_INLINE constexpr static bool all_of(simd<bool, N> m) { |
| return __builtin_reduce_and(m); |
| } |
| template <size_t N> LIBC_INLINE constexpr static bool any_of(simd<bool, N> m) { |
| return __builtin_reduce_or(m); |
| } |
| template <size_t N> LIBC_INLINE constexpr static bool none_of(simd<bool, N> m) { |
| return !any_of(m); |
| } |
| template <size_t N> LIBC_INLINE constexpr static bool some_of(simd<bool, N> m) { |
| return any_of(m) && !all_of(m); |
| } |
| template <size_t N> LIBC_INLINE constexpr static int popcount(simd<bool, N> m) { |
| return __builtin_popcountg(m); |
| } |
| template <size_t N> |
| LIBC_INLINE constexpr static int find_first_set(simd<bool, N> m) { |
| return __builtin_ctzg(m); |
| } |
| template <size_t N> |
| LIBC_INLINE constexpr static int find_last_set(simd<bool, N> m) { |
| constexpr size_t size = simd_size_v<simd<bool, N>>; |
| return size - 1 - __builtin_clzg(m); |
| } |
| |
| // Elementwise operations. |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> min(simd<T, N> x, simd<T, N> y) { |
| return __builtin_elementwise_min(x, y); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> max(simd<T, N> x, simd<T, N> y) { |
| return __builtin_elementwise_max(x, y); |
| } |
| |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> abs(simd<T, N> x) { |
| return __builtin_elementwise_abs(x); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> fma(simd<T, N> x, simd<T, N> y, |
| simd<T, N> z) { |
| return __builtin_elementwise_fma(x, y, z); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> ceil(simd<T, N> x) { |
| return __builtin_elementwise_ceil(x); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> floor(simd<T, N> x) { |
| return __builtin_elementwise_floor(x); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> roundeven(simd<T, N> x) { |
| return __builtin_elementwise_roundeven(x); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> round(simd<T, N> x) { |
| return __builtin_elementwise_round(x); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> trunc(simd<T, N> x) { |
| return __builtin_elementwise_trunc(x); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> nearbyint(simd<T, N> x) { |
| return __builtin_elementwise_nearbyint(x); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> rint(simd<T, N> x) { |
| return __builtin_elementwise_rint(x); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> canonicalize(simd<T, N> x) { |
| return __builtin_elementwise_canonicalize(x); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> copysign(simd<T, N> x, simd<T, N> y) { |
| return __builtin_elementwise_copysign(x, y); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> fmod(simd<T, N> x, simd<T, N> y) { |
| return __builtin_elementwise_fmod(x, y); |
| } |
| |
| // Reduction operations. |
| template <typename T, size_t N, typename Op = cpp::plus<>> |
| LIBC_INLINE constexpr static T reduce(simd<T, N> v, Op op = {}) { |
| return reduce(v, op); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::plus<>) { |
| return __builtin_reduce_add(v); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::multiplies<>) { |
| return __builtin_reduce_mul(v); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::bit_and<>) { |
| return __builtin_reduce_and(v); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::bit_or<>) { |
| return __builtin_reduce_or(v); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::bit_xor<>) { |
| return __builtin_reduce_xor(v); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static T hmin(simd<T, N> v) { |
| return __builtin_reduce_min(v); |
| } |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static T hmax(simd<T, N> v) { |
| return __builtin_reduce_max(v); |
| } |
| |
| // Accessor helpers. |
| template <typename T> |
| LIBC_INLINE enable_if_simd_t<T> load_unaligned(const void *ptr) { |
| T tmp; |
| __builtin_memcpy(&tmp, ptr, sizeof(T)); |
| return tmp; |
| } |
| template <typename T> |
| LIBC_INLINE enable_if_simd_t<T> load_aligned(const void *ptr) { |
| return load_unaligned<T>(__builtin_assume_aligned(ptr, alignof(T))); |
| } |
| template <typename T> |
| LIBC_INLINE enable_if_simd_t<T> store_unaligned(T v, void *ptr) { |
| __builtin_memcpy(ptr, &v, sizeof(T)); |
| } |
| template <typename T> |
| LIBC_INLINE enable_if_simd_t<T> store_aligned(T v, void *ptr) { |
| store_unaligned<T>(v, __builtin_assume_aligned(ptr, alignof(T))); |
| } |
| template <typename T> |
| LIBC_INLINE enable_if_simd_t<T> |
| masked_load(simd<bool, simd_size_v<T>> m, void *ptr, |
| T passthru = internal::poison<simd_element_type<T>>()) { |
| return __builtin_masked_load(m, ptr, passthru); |
| } |
| template <typename T> |
| LIBC_INLINE enable_if_simd_t<T> masked_store(simd<bool, simd_size_v<T>> m, T v, |
| void *ptr) { |
| __builtin_masked_store( |
| m, v, static_cast<T *>(__builtin_assume_aligned(ptr, alignof(T)))); |
| } |
| |
| // Construction helpers. |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> splat(T v) { |
| return simd<T, N>(v); |
| } |
| template <typename T> LIBC_INLINE constexpr static simd<T> splat(T v) { |
| return splat<T, simd_size_v<simd<T>>>(v); |
| } |
| template <typename T, unsigned N> |
| LIBC_INLINE constexpr static simd<T, N> iota(T base = T(0), T step = T(1)) { |
| simd<T, N> v{}; |
| for (unsigned i = 0; i < N; ++i) |
| v[i] = base + T(i) * step; |
| return v; |
| } |
| template <typename T> |
| LIBC_INLINE constexpr static simd<T> iota(T base = T(0), T step = T(1)) { |
| return iota<T, simd_size_v<simd<T>>>(base, step); |
| } |
| |
| // Conditional helpers. |
| template <typename T, size_t N> |
| LIBC_INLINE constexpr static simd<T, N> select(simd<bool, N> m, simd<T, N> x, |
| simd<T, N> y) { |
| return m ? x : y; |
| } |
| |
| // Shuffling helpers. |
| template <typename T, size_t N, size_t M> |
| LIBC_INLINE constexpr static auto concat(cpp::simd<T, N> x, cpp::simd<T, M> y) { |
| return internal::concat(x, y, make_index_sequence<N + M>{}); |
| } |
| template <typename T, size_t N, size_t M, typename... Rest> |
| LIBC_INLINE constexpr static auto concat(cpp::simd<T, N> x, cpp::simd<T, M> y, |
| Rest... rest) { |
| auto xy = concat(x, y); |
| if constexpr (sizeof...(Rest)) |
| return concat(xy, rest...); |
| else |
| return xy; |
| } |
| template <size_t... Sizes, typename T, size_t N> auto split(cpp::simd<T, N> x) { |
| static_assert((... + Sizes) == N, "split sizes must sum to vector size"); |
| return internal::split<T, N, 0, Sizes...>(x); |
| } |
| |
| // TODO: where expressions, scalar overloads, ABI types. |
| |
| } // namespace cpp |
| } // namespace LIBC_NAMESPACE_DECL |
| |
| #endif // LIBC_HAS_VECTOR_TYPE |
| #endif |