src/__support/mathvec/expf.h - llvm-project/libc - Git at Google

 //===-- Implementation header for SIMD expf ---------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H
 #define LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H

 #include "expf_utils.h"
 #include "src/__support/CPP/simd.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/common.h"

 namespace LIBC_NAMESPACE_DECL {

 namespace mathvec {

 template <size_t N>
 LIBC_INLINE static cpp::simd<double, N> inline_exp(cpp::simd<double, N> x) {
   constexpr cpp::simd<double, N> shift = 0x1.800000000ffc0p+46;

   // inv_ln2 = round(1/log(2), D, RN);
   constexpr cpp::simd<double, N> inv_ln2 = 0x1.71547652b82fep+0;
   cpp::simd<double, N> z = shift + x * inv_ln2;
   cpp::simd<double, N> n = z - shift;

   // ln2_hi = round(log(2), D, RN);
   // ln2_lo = round(log(2) - ln2_hi, D, RN);
   constexpr cpp::simd<double, N> ln2_hi = 0x1.62e42fefa39efp-1;
   constexpr cpp::simd<double, N> ln2_lo = 0x1.abc9e3b39803fp-56;

   cpp::simd<double, N> r = x;
   r = r - n * ln2_hi;
   r = r - n * ln2_lo;

   // Coefficients of exp approximation, generated by Sollya with:
   // poly = 1 + x;
   // for i from 2 to 5 do {
   //   r = remez(exp(x)-poly(x), 5-i, [-log(2)/128;log(2)/128], x^i, 1e-10);
   //   c = coeff(roundcoefficients(r, [|D ...|]), 0);
   //   poly = poly + x^i*c;
   //   c;
   // };
   constexpr cpp::simd<double, N> c0 = 0x1.fffffffffdbcep-2;
   constexpr cpp::simd<double, N> c1 = 0x1.55555555543c2p-3;
   constexpr cpp::simd<double, N> c2 = 0x1.555573c64f2e3p-5;
   constexpr cpp::simd<double, N> c3 = 0x1.111126b4eff73p-7;

   /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5.  */
   cpp::simd<double, N> r2 = r * r;
   cpp::simd<double, N> p01 = c0 + r * c1;
   cpp::simd<double, N> p23 = c2 + r * c3;
   cpp::simd<double, N> p04 = p01 + r2 * p23;
   cpp::simd<double, N> y = r + p04 * r2;

   cpp::simd<uint64_t, N> u = cpp::bit_cast<cpp::simd<uint64_t, N>>(z);
   cpp::simd<double, N> s = exp_lookup(u);
   return s + s * y;
 }

 template <size_t N>
 LIBC_INLINE cpp::simd<float, N> expf(cpp::simd<float, N> x) {
   using FPBits = typename fputil::FPBits<float>;

   cpp::simd<bool, N> is_inf = x >= 0x1.62e38p+9;
   cpp::simd<bool, N> is_zero = x <= -0x1.628c2ap+9;
   cpp::simd<bool, N> is_special = is_inf | is_zero;

   cpp::simd<float, N> special_res = is_inf ? FPBits::inf().get_val() : 0.0f;

   cpp::simd<double, N> x_d = cpp::simd_cast<double, float, N>(x);
   cpp::simd<double, N> y = inline_exp(x_d);
   cpp::simd<float, N> ret = cpp::simd_cast<float, double, N>(y);
   return is_special ? special_res : ret;
 }

 } // namespace mathvec

 } // namespace LIBC_NAMESPACE_DECL

 #endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H
	//===-- Implementation header for SIMD expf ---------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H
	#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H

	#include "expf_utils.h"
	#include "src/__support/CPP/simd.h"
	#include "src/__support/FPUtil/FPBits.h"
	#include "src/__support/common.h"

	namespace LIBC_NAMESPACE_DECL {

	namespace mathvec {

	template <size_t N>
	LIBC_INLINE static cpp::simd<double, N> inline_exp(cpp::simd<double, N> x) {
	constexpr cpp::simd<double, N> shift = 0x1.800000000ffc0p+46;

	// inv_ln2 = round(1/log(2), D, RN);
	constexpr cpp::simd<double, N> inv_ln2 = 0x1.71547652b82fep+0;
	cpp::simd<double, N> z = shift + x * inv_ln2;
	cpp::simd<double, N> n = z - shift;

	// ln2_hi = round(log(2), D, RN);
	// ln2_lo = round(log(2) - ln2_hi, D, RN);
	constexpr cpp::simd<double, N> ln2_hi = 0x1.62e42fefa39efp-1;
	constexpr cpp::simd<double, N> ln2_lo = 0x1.abc9e3b39803fp-56;

	cpp::simd<double, N> r = x;
	r = r - n * ln2_hi;
	r = r - n * ln2_lo;

	// Coefficients of exp approximation, generated by Sollya with:
	// poly = 1 + x;
	// for i from 2 to 5 do {
	// r = remez(exp(x)-poly(x), 5-i, [-log(2)/128;log(2)/128], x^i, 1e-10);
	// c = coeff(roundcoefficients(r, [\|D ...\|]), 0);
	// poly = poly + x^i*c;
	// c;
	// };
	constexpr cpp::simd<double, N> c0 = 0x1.fffffffffdbcep-2;
	constexpr cpp::simd<double, N> c1 = 0x1.55555555543c2p-3;
	constexpr cpp::simd<double, N> c2 = 0x1.555573c64f2e3p-5;
	constexpr cpp::simd<double, N> c3 = 0x1.111126b4eff73p-7;

	/* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5. */
	cpp::simd<double, N> r2 = r * r;
	cpp::simd<double, N> p01 = c0 + r * c1;
	cpp::simd<double, N> p23 = c2 + r * c3;
	cpp::simd<double, N> p04 = p01 + r2 * p23;
	cpp::simd<double, N> y = r + p04 * r2;

	cpp::simd<uint64_t, N> u = cpp::bit_cast<cpp::simd<uint64_t, N>>(z);
	cpp::simd<double, N> s = exp_lookup(u);
	return s + s * y;
	}

	template <size_t N>
	LIBC_INLINE cpp::simd<float, N> expf(cpp::simd<float, N> x) {
	using FPBits = typename fputil::FPBits<float>;

	cpp::simd<bool, N> is_inf = x >= 0x1.62e38p+9;
	cpp::simd<bool, N> is_zero = x <= -0x1.628c2ap+9;
	cpp::simd<bool, N> is_special = is_inf \| is_zero;

	cpp::simd<float, N> special_res = is_inf ? FPBits::inf().get_val() : 0.0f;

	cpp::simd<double, N> x_d = cpp::simd_cast<double, float, N>(x);
	cpp::simd<double, N> y = inline_exp(x_d);
	cpp::simd<float, N> ret = cpp::simd_cast<float, double, N>(y);
	return is_special ? special_res : ret;
	}

	} // namespace mathvec

	} // namespace LIBC_NAMESPACE_DECL

	#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H