libc/src/math/generic/explogxf.h - llvm-project - Git at Google

 //===-- Single-precision general exp/log functions ------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
 #define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H

 #include "common_constants.h"

 #include "src/__support/common.h"
 #include "src/__support/macros/properties/cpu_features.h"
 #include "src/__support/math/acoshf_utils.h"
 #include "src/__support/math/exp10f_utils.h"
 #include "src/__support/math/exp_utils.h"

 namespace LIBC_NAMESPACE_DECL {

 constexpr int LOG_P1_BITS = 6;
 constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS;

 // The function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
 // and exp(-x) simultaneously.
 // To compute e^x, we perform the following range
 // reduction: find hi, mid, lo such that:
 //   x = (hi + mid) * log(2) + lo, in which
 //     hi is an integer,
 //     0 <= mid * 2^5 < 32 is an integer
 //     -2^(-6) <= lo * log2(e) <= 2^-6.
 // In particular,
 //   hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
 // Then,
 //   e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
 // 2^mid is stored in the lookup table of 32 elements.
 // e^lo is computed using a degree-5 minimax polynomial
 // generated by Sollya:
 //   e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
 //        = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
 //        = P_even + lo * P_odd
 // We perform 2^hi * 2^mid by simply add hi to the exponent field
 // of 2^mid.
 // To compute e^(-x), notice that:
 //   e^(-x) = 2^(-(hi + mid)) * e^(-lo)
 //          ~ 2^(-(hi + mid)) * P(-lo)
 //          = 2^(-(hi + mid)) * (P_even - lo * P_odd)
 // So:
 //   sinh(x) = (e^x - e^(-x)) / 2
 //           ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
 //                    2^(-(hi + mid)) * (P_even - lo * P_odd))
 //           = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
 //                    lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
 // And similarly:
 //   cosh(x) = (e^x + e^(-x)) / 2
 //           ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
 //                    lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
 // The main point of these formulas is that the expensive part of calculating
 // the polynomials approximating lower parts of e^(x) and e^(-x) are shared
 // and only done once.
 template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
   double xd = static_cast<double>(x);

   // kd = round(x * log2(e) * 2^5)
   // k_p = round(x * log2(e) * 2^5)
   // k_m = round(-x * log2(e) * 2^5)
   double kd;
   int k_p, k_m;

 #ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
   kd = fputil::nearest_integer(ExpBase::LOG2_B * xd);
   k_p = static_cast<int>(kd);
   k_m = -k_p;
 #else
   constexpr double HALF_WAY[2] = {0.5, -0.5};

   k_p = static_cast<int>(
       fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f]));
   k_m = -k_p;
   kd = static_cast<double>(k_p);
 #endif // LIBC_TARGET_CPU_HAS_NEAREST_INT

   // hi = floor(kf * 2^(-5))
   // exp_hi = shift hi to the exponent field of double precision.
   int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS))
                      << fputil::FPBits<double>::FRACTION_LEN;
   int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS))
                      << fputil::FPBits<double>::FRACTION_LEN;
   // mh_p = 2^(hi + mid)
   // mh_m = 2^(-(hi + mid))
   // mh_bits_* = bit field of mh_*
   int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p;
   int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m;
   double mh_p = fputil::FPBits<double>(uint64_t(mh_bits_p)).get_val();
   double mh_m = fputil::FPBits<double>(uint64_t(mh_bits_m)).get_val();
   // mh_sum = 2^(hi + mid) + 2^(-(hi + mid))
   double mh_sum = mh_p + mh_m;
   // mh_diff = 2^(hi + mid) - 2^(-(hi + mid))
   double mh_diff = mh_p - mh_m;

   // dx = lo = x - (hi + mid) * log(2)
   double dx =
       fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO,
                            fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd));
   double dx2 = dx * dx;

   // c0 = 1 + COEFFS[0] * lo^2
   // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2
   double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5,
                                    ExpBase::COEFFS[2] * 0.5);
   // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2
   double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5,
                                   ExpBase::COEFFS[3] * 0.5);

   double r;
   if constexpr (is_sinh)
     r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff);
   else
     r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum);
   return r;
 }

 // x should be positive, normal finite value
 // TODO: Simplify range reduction and polynomial degree for float16.
 //       See issue #137190.
 LIBC_INLINE static float log_eval_f(float x) {
   // For x = 2^ex * (1 + mx), logf(x) = ex * logf(2) + logf(1 + mx).
   using FPBits = fputil::FPBits<float>;
   FPBits xbits(x);

   float ex = static_cast<float>(xbits.get_exponent());
   // p1 is the leading 7 bits of mx, i.e.
   // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7).
   int p1 = static_cast<int>(xbits.get_mantissa() >> (FPBits::FRACTION_LEN - 7));

   // Set bits to (1 + (mx - p1*2^(-7)))
   xbits.set_uintval(xbits.uintval() & (FPBits::FRACTION_MASK >> 7));
   xbits.set_biased_exponent(FPBits::EXP_BIAS);
   // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)).
   float dx = (xbits.get_val() - 1.0f) * ONE_OVER_F_FLOAT[p1];

   // Minimax polynomial for log(1 + dx), generated using Sollya:
   //   > P = fpminimax(log(1 + x)/x, 6, [|SG...|], [0, 2^-7]);
   //   > Q = (P - 1) / x;
   //   > for i from 0 to degree(Q) do print(coeff(Q, i));
   constexpr float COEFFS[6] = {-0x1p-1f,       0x1.555556p-2f,  -0x1.00022ep-2f,
                                0x1.9ea056p-3f, -0x1.e50324p-2f, 0x1.c018fp3f};

   float dx2 = dx * dx;

   float c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
   float c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
   float c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);

   float p = fputil::polyeval(dx2, dx, c1, c2, c3);

   // Generated by Sollya with the following commands:
   //   > display = hexadecimal;
   //   > round(log(2), SG, RN);
   constexpr float LOGF_2 = 0x1.62e43p-1f;

   float result = fputil::multiply_add(ex, LOGF_2, LOG_F_FLOAT[p1] + p);
   return result;
 }

 } // namespace LIBC_NAMESPACE_DECL

 #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
	//===-- Single-precision general exp/log functions ------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
	#define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H

	#include "common_constants.h"

	#include "src/__support/common.h"
	#include "src/__support/macros/properties/cpu_features.h"
	#include "src/__support/math/acoshf_utils.h"
	#include "src/__support/math/exp10f_utils.h"
	#include "src/__support/math/exp_utils.h"

	namespace LIBC_NAMESPACE_DECL {

	constexpr int LOG_P1_BITS = 6;
	constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS;

	// The function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
	// and exp(-x) simultaneously.
	// To compute e^x, we perform the following range
	// reduction: find hi, mid, lo such that:
	// x = (hi + mid) * log(2) + lo, in which
	// hi is an integer,
	// 0 <= mid * 2^5 < 32 is an integer
	// -2^(-6) <= lo * log2(e) <= 2^-6.
	// In particular,
	// hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
	// Then,
	// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
	// 2^mid is stored in the lookup table of 32 elements.
	// e^lo is computed using a degree-5 minimax polynomial
	// generated by Sollya:
	// e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
	// = (1 + c2lo^2 + c4lo^4) + lo * (1 + c3lo^2 + c5lo^4)
	// = P_even + lo * P_odd
	// We perform 2^hi * 2^mid by simply add hi to the exponent field
	// of 2^mid.
	// To compute e^(-x), notice that:
	// e^(-x) = 2^(-(hi + mid)) * e^(-lo)
	// ~ 2^(-(hi + mid)) * P(-lo)
	// = 2^(-(hi + mid)) * (P_even - lo * P_odd)
	// So:
	// sinh(x) = (e^x - e^(-x)) / 2
	// ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
	// 2^(-(hi + mid)) * (P_even - lo * P_odd))
	// = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
	// lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
	// And similarly:
	// cosh(x) = (e^x + e^(-x)) / 2
	// ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
	// lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
	// The main point of these formulas is that the expensive part of calculating
	// the polynomials approximating lower parts of e^(x) and e^(-x) are shared
	// and only done once.
	template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
	double xd = static_cast<double>(x);

	// kd = round(x * log2(e) * 2^5)
	// k_p = round(x * log2(e) * 2^5)
	// k_m = round(-x * log2(e) * 2^5)
	double kd;
	int k_p, k_m;

	#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
	kd = fputil::nearest_integer(ExpBase::LOG2_B * xd);
	k_p = static_cast<int>(kd);
	k_m = -k_p;
	#else
	constexpr double HALF_WAY[2] = {0.5, -0.5};

	k_p = static_cast<int>(
	fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f]));
	k_m = -k_p;
	kd = static_cast<double>(k_p);
	#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT

	// hi = floor(kf * 2^(-5))
	// exp_hi = shift hi to the exponent field of double precision.
	int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS))
	<< fputil::FPBits<double>::FRACTION_LEN;
	int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS))
	<< fputil::FPBits<double>::FRACTION_LEN;
	// mh_p = 2^(hi + mid)
	// mh_m = 2^(-(hi + mid))
	// mh_bits_* = bit field of mh_*
	int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p;
	int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m;
	double mh_p = fputil::FPBits<double>(uint64_t(mh_bits_p)).get_val();
	double mh_m = fputil::FPBits<double>(uint64_t(mh_bits_m)).get_val();
	// mh_sum = 2^(hi + mid) + 2^(-(hi + mid))
	double mh_sum = mh_p + mh_m;
	// mh_diff = 2^(hi + mid) - 2^(-(hi + mid))
	double mh_diff = mh_p - mh_m;

	// dx = lo = x - (hi + mid) * log(2)
	double dx =
	fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO,
	fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd));
	double dx2 = dx * dx;

	// c0 = 1 + COEFFS[0] * lo^2
	// P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2
	double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5,
	ExpBase::COEFFS[2] * 0.5);
	// P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2
	double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5,
	ExpBase::COEFFS[3] * 0.5);

	double r;
	if constexpr (is_sinh)
	r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff);
	else
	r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum);
	return r;
	}

	// x should be positive, normal finite value
	// TODO: Simplify range reduction and polynomial degree for float16.
	// See issue #137190.
	LIBC_INLINE static float log_eval_f(float x) {
	// For x = 2^ex * (1 + mx), logf(x) = ex * logf(2) + logf(1 + mx).
	using FPBits = fputil::FPBits<float>;
	FPBits xbits(x);

	float ex = static_cast<float>(xbits.get_exponent());
	// p1 is the leading 7 bits of mx, i.e.
	// p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7).
	int p1 = static_cast<int>(xbits.get_mantissa() >> (FPBits::FRACTION_LEN - 7));

	// Set bits to (1 + (mx - p1*2^(-7)))
	xbits.set_uintval(xbits.uintval() & (FPBits::FRACTION_MASK >> 7));
	xbits.set_biased_exponent(FPBits::EXP_BIAS);
	// dx = (mx - p12^(-7)) / (1 + p12^(-7)).
	float dx = (xbits.get_val() - 1.0f) * ONE_OVER_F_FLOAT[p1];

	// Minimax polynomial for log(1 + dx), generated using Sollya:
	// > P = fpminimax(log(1 + x)/x, 6, [\|SG...\|], [0, 2^-7]);
	// > Q = (P - 1) / x;
	// > for i from 0 to degree(Q) do print(coeff(Q, i));
	constexpr float COEFFS[6] = {-0x1p-1f, 0x1.555556p-2f, -0x1.00022ep-2f,
	0x1.9ea056p-3f, -0x1.e50324p-2f, 0x1.c018fp3f};

	float dx2 = dx * dx;

	float c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
	float c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
	float c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);

	float p = fputil::polyeval(dx2, dx, c1, c2, c3);

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > round(log(2), SG, RN);
	constexpr float LOGF_2 = 0x1.62e43p-1f;

	float result = fputil::multiply_add(ex, LOGF_2, LOG_F_FLOAT[p1] + p);
	return result;
	}

	} // namespace LIBC_NAMESPACE_DECL

	#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H