libc/src/math/generic/explogxf.h - llvm-project - Git at Google

 //===-- Single-precision general exp/log functions ------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
 #define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H

 #include "common_constants.h"
 #include "math_utils.h"
 #include "src/__support/CPP/bit.h"
 #include "src/__support/CPP/optional.h"
 #include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/FPUtil/nearest_integer.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/properties/cpu_features.h"

 #include <errno.h>

 namespace LIBC_NAMESPACE {

 struct ExpBase {
   // Base = e
   static constexpr int MID_BITS = 5;
   static constexpr int MID_MASK = (1 << MID_BITS) - 1;
   // log2(e) * 2^5
   static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS);
   // High and low parts of -log(2) * 2^(-5)
   static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS);
   static constexpr double M_LOGB_2_LO =
       -0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS);
   // Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya
   // with:
   // > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN));
   static constexpr int64_t EXP_2_MID[1 << MID_BITS] = {
       0x3ff0000000000000, 0x3ff059b0d3158574, 0x3ff0b5586cf9890f,
       0x3ff11301d0125b51, 0x3ff172b83c7d517b, 0x3ff1d4873168b9aa,
       0x3ff2387a6e756238, 0x3ff29e9df51fdee1, 0x3ff306fe0a31b715,
       0x3ff371a7373aa9cb, 0x3ff3dea64c123422, 0x3ff44e086061892d,
       0x3ff4bfdad5362a27, 0x3ff5342b569d4f82, 0x3ff5ab07dd485429,
       0x3ff6247eb03a5585, 0x3ff6a09e667f3bcd, 0x3ff71f75e8ec5f74,
       0x3ff7a11473eb0187, 0x3ff82589994cce13, 0x3ff8ace5422aa0db,
       0x3ff93737b0cdc5e5, 0x3ff9c49182a3f090, 0x3ffa5503b23e255d,
       0x3ffae89f995ad3ad, 0x3ffb7f76f2fb5e47, 0x3ffc199bdd85529c,
       0x3ffcb720dcef9069, 0x3ffd5818dcfba487, 0x3ffdfc97337b9b5f,
       0x3ffea4afa2a490da, 0x3fff50765b6e4540,
   };

   // Approximating e^dx with degree-5 minimax polynomial generated by Sollya:
   // > Q = fpminimax(expm1(x)/x, 4, [|1, D...|], [-log(2)/64, log(2)/64]);
   // Then:
   //   e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[3] * dx^5.
   static constexpr double COEFFS[4] = {
       0x1.ffffffffe5bc8p-2, 0x1.555555555cd67p-3, 0x1.5555c2a9b48b4p-5,
       0x1.11112a0e34bdbp-7};

   LIBC_INLINE static double powb_lo(double dx) {
     using fputil::multiply_add;
     double dx2 = dx * dx;
     double c0 = 1.0 + dx;
     // c1 = COEFFS[0] + COEFFS[1] * dx
     double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]);
     // c2 = COEFFS[2] + COEFFS[3] * dx
     double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]);
     // r = c4 + c5 * dx^4
     //   = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7
     return fputil::polyeval(dx2, c0, c1, c2);
   }
 };

 struct Exp10Base : public ExpBase {
   // log2(10) * 2^5
   static constexpr double LOG2_B = 0x1.a934f0979a371p1 * (1 << MID_BITS);
   // High and low parts of -log10(2) * 2^(-5).
   // Notice that since |x * log2(10)| < 150:
   //   |k| = |round(x * log2(10) * 2^5)| < 2^8 * 2^5 = 2^13
   // So when the FMA instructions are not available, in order for the product
   //   k * M_LOGB_2_HI
   // to be exact, we only store the high part of log10(2) up to 38 bits
   // (= 53 - 15) of precision.
   // It is generated by Sollya with:
   // > round(log10(2), 44, RN);
   static constexpr double M_LOGB_2_HI = -0x1.34413509f8p-2 / (1 << MID_BITS);
   // > round(log10(2) - 0x1.34413509f8p-2, D, RN);
   static constexpr double M_LOGB_2_LO = 0x1.80433b83b532ap-44 / (1 << MID_BITS);

   // Approximating 10^dx with degree-5 minimax polynomial generated by Sollya:
   // > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]);
   // Then:
   //   10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
   static constexpr double COEFFS[5] = {0x1.26bb1bbb55515p1, 0x1.53524c73bd3eap1,
                                        0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0,
                                        0x1.1429e74a98f43p-1};

   static double powb_lo(double dx) {
     using fputil::multiply_add;
     double dx2 = dx * dx;
     // c0 = 1 + COEFFS[0] * dx
     double c0 = multiply_add(dx, Exp10Base::COEFFS[0], 1.0);
     // c1 = COEFFS[1] + COEFFS[2] * dx
     double c1 = multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]);
     // c2 = COEFFS[3] + COEFFS[4] * dx
     double c2 = multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]);
     // r = c0 + dx^2 * (c1 + c2 * dx^2)
     //   = c0 + c1 * dx^2 + c2 * dx^4
     //   = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
     return fputil::polyeval(dx2, c0, c1, c2);
   }
 };

 constexpr int LOG_P1_BITS = 6;
 constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS;

 // N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40]
 extern const double LOG_P1_LOG2[LOG_P1_SIZE];

 // N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40]
 extern const double LOG_P1_1_OVER[LOG_P1_SIZE];

 // Taylor series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers
 // K_LOG2_ODD starts from x^3
 extern const double K_LOG2_ODD[4];
 extern const double K_LOG2_EVEN[4];

 // Output of range reduction for exp_b: (2^(mid + hi), lo)
 // where:
 //   b^x = 2^(mid + hi) * b^lo
 struct exp_b_reduc_t {
   double mh; // 2^(mid + hi)
   double lo;
 };

 // The function correctly calculates b^x value with at least float precision
 // in a limited range.
 // Range reduction:
 //   b^x = 2^(hi + mid) * b^lo
 // where:
 //   x = (hi + mid) * log_b(2) + lo
 //   hi is an integer,
 //   0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer
 //   -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1)
 // Base class needs to provide the following constants:
 //   - MID_BITS    : number of bits after decimal points used for mid
 //   - MID_MASK    : 2^MID_BITS - 1, mask to extract mid bits
 //   - LOG2_B      : log2(b) * 2^MID_BITS for scaling
 //   - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS)
 //   - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS)
 //   - EXP_2_MID   : look up table for bit fields of 2^mid
 // Return:
 //   { 2^(hi + mid), lo }
 template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) {
   double xd = static_cast<double>(x);
   // kd = round((hi + mid) * log2(b) * 2^MID_BITS)
   double kd = fputil::nearest_integer(Base::LOG2_B * xd);
   // k = round((hi + mid) * log2(b) * 2^MID_BITS)
   int k = static_cast<int>(kd);
   // hi = floor(kd * 2^(-MID_BITS))
   // exp_hi = shift hi to the exponent field of double precision.
   int64_t exp_hi = static_cast<int64_t>((k >> Base::MID_BITS))
                    << fputil::FPBits<double>::FRACTION_LEN;
   // mh = 2^hi * 2^mid
   // mh_bits = bit field of mh
   int64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi;
   double mh = fputil::FPBits<double>(uint64_t(mh_bits)).get_val();
   // dx = lo = x - (hi + mid) * log(2)
   double dx = fputil::multiply_add(
       kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd));
   return {mh, dx};
 }

 // The function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
 // and exp(-x) simultaneously.
 // To compute e^x, we perform the following range
 // reduction: find hi, mid, lo such that:
 //   x = (hi + mid) * log(2) + lo, in which
 //     hi is an integer,
 //     0 <= mid * 2^5 < 32 is an integer
 //     -2^(-6) <= lo * log2(e) <= 2^-6.
 // In particular,
 //   hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
 // Then,
 //   e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
 // 2^mid is stored in the lookup table of 32 elements.
 // e^lo is computed using a degree-5 minimax polynomial
 // generated by Sollya:
 //   e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
 //        = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
 //        = P_even + lo * P_odd
 // We perform 2^hi * 2^mid by simply add hi to the exponent field
 // of 2^mid.
 // To compute e^(-x), notice that:
 //   e^(-x) = 2^(-(hi + mid)) * e^(-lo)
 //          ~ 2^(-(hi + mid)) * P(-lo)
 //          = 2^(-(hi + mid)) * (P_even - lo * P_odd)
 // So:
 //   sinh(x) = (e^x - e^(-x)) / 2
 //           ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
 //                    2^(-(hi + mid)) * (P_even - lo * P_odd))
 //           = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
 //                    lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
 // And similarly:
 //   cosh(x) = (e^x + e^(-x)) / 2
 //           ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
 //                    lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
 // The main point of these formulas is that the expensive part of calculating
 // the polynomials approximating lower parts of e^(x) and e^(-x) are shared
 // and only done once.
 template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
   double xd = static_cast<double>(x);

   // kd = round(x * log2(e) * 2^5)
   // k_p = round(x * log2(e) * 2^5)
   // k_m = round(-x * log2(e) * 2^5)
   double kd;
   int k_p, k_m;

 #ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
   kd = fputil::nearest_integer(ExpBase::LOG2_B * xd);
   k_p = static_cast<int>(kd);
   k_m = -k_p;
 #else
   constexpr double HALF_WAY[2] = {0.5, -0.5};

   k_p = static_cast<int>(
       fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f]));
   k_m = -k_p;
   kd = static_cast<double>(k_p);
 #endif // LIBC_TARGET_CPU_HAS_NEAREST_INT

   // hi = floor(kf * 2^(-5))
   // exp_hi = shift hi to the exponent field of double precision.
   int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS))
                      << fputil::FPBits<double>::FRACTION_LEN;
   int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS))
                      << fputil::FPBits<double>::FRACTION_LEN;
   // mh_p = 2^(hi + mid)
   // mh_m = 2^(-(hi + mid))
   // mh_bits_* = bit field of mh_*
   int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p;
   int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m;
   double mh_p = fputil::FPBits<double>(uint64_t(mh_bits_p)).get_val();
   double mh_m = fputil::FPBits<double>(uint64_t(mh_bits_m)).get_val();
   // mh_sum = 2^(hi + mid) + 2^(-(hi + mid))
   double mh_sum = mh_p + mh_m;
   // mh_diff = 2^(hi + mid) - 2^(-(hi + mid))
   double mh_diff = mh_p - mh_m;

   // dx = lo = x - (hi + mid) * log(2)
   double dx =
       fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO,
                            fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd));
   double dx2 = dx * dx;

   // c0 = 1 + COEFFS[0] * lo^2
   // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2
   double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5,
                                    ExpBase::COEFFS[2] * 0.5);
   // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2
   double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5,
                                   ExpBase::COEFFS[3] * 0.5);

   double r;
   if constexpr (is_sinh)
     r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff);
   else
     r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum);
   return r;
 }

 // x should be positive, normal finite value
 LIBC_INLINE static double log2_eval(double x) {
   using FPB = fputil::FPBits<double>;
   FPB bs(x);

   double result = 0;
   result += bs.get_exponent();

   int p1 = (bs.get_mantissa() >> (FPB::FRACTION_LEN - LOG_P1_BITS)) &
            (LOG_P1_SIZE - 1);

   bs.bits &= FPB::FRACTION_MASK >> LOG_P1_BITS;
   bs.set_biased_exponent(FPB::EXP_BIAS);
   double dx = (bs.get_val() - 1.0) * LOG_P1_1_OVER[p1];

   // Taylor series for log(2,1+x)
   double c1 = fputil::multiply_add(dx, K_LOG2_ODD[0], K_LOG2_EVEN[0]);
   double c2 = fputil::multiply_add(dx, K_LOG2_ODD[1], K_LOG2_EVEN[1]);
   double c3 = fputil::multiply_add(dx, K_LOG2_ODD[2], K_LOG2_EVEN[2]);
   double c4 = fputil::multiply_add(dx, K_LOG2_ODD[3], K_LOG2_EVEN[3]);

   // c0 = dx * (1.0 / ln(2)) + LOG_P1_LOG2[p1]
   double c0 = fputil::multiply_add(dx, 0x1.71547652b82fep+0, LOG_P1_LOG2[p1]);
   result += LIBC_NAMESPACE::fputil::polyeval(dx * dx, c0, c1, c2, c3, c4);
   return result;
 }

 // x should be positive, normal finite value
 LIBC_INLINE static double log_eval(double x) {
   // For x = 2^ex * (1 + mx)
   //   log(x) = ex * log(2) + log(1 + mx)
   using FPB = fputil::FPBits<double>;
   FPB bs(x);

   double ex = static_cast<double>(bs.get_exponent());

   // p1 is the leading 7 bits of mx, i.e.
   // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7).
   int p1 = static_cast<int>(bs.get_mantissa() >> (FPB::FRACTION_LEN - 7));

   // Set bs to (1 + (mx - p1*2^(-7))
   bs.bits &= FPB::FRACTION_MASK >> 7;
   bs.set_biased_exponent(FPB::EXP_BIAS);
   // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)).
   double dx = (bs.get_val() - 1.0) * ONE_OVER_F[p1];

   // Minimax polynomial of log(1 + dx) generated by Sollya with:
   // > P = fpminimax(log(1 + x)/x, 6, [|D...|], [0, 2^-7]);
   const double COEFFS[6] = {-0x1.ffffffffffffcp-2, 0x1.5555555552ddep-2,
                             -0x1.ffffffefe562dp-3, 0x1.9999817d3a50fp-3,
                             -0x1.554317b3f67a5p-3, 0x1.1dc5c45e09c18p-3};
   double dx2 = dx * dx;
   double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
   double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
   double c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);

   double p = fputil::polyeval(dx2, dx, c1, c2, c3);
   double result =
       fputil::multiply_add(ex, /*log(2)*/ 0x1.62e42fefa39efp-1, LOG_F[p1] + p);
   return result;
 }

 // Rounding tests for 2^hi * (mid + lo) when the output might be denormal. We
 // assume further that 1 <= mid < 2, mid + lo < 2, and |lo| << mid.
 // Notice that, if 0 < x < 2^-1022,
 //   double(2^-1022 + x) - 2^-1022 = double(x).
 // So if we scale x up by 2^1022, we can use
 //   double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range.
 LIBC_INLINE cpp::optional<double> ziv_test_denorm(int hi, double mid, double lo,
                                                   double err) {
   using FPBits = typename fputil::FPBits<double>;

   // Scaling factor = 1/(min normal number) = 2^1022
   int64_t exp_hi = static_cast<int64_t>(hi + 1022) << FPBits::FRACTION_LEN;
   double mid_hi = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(mid));
   double lo_scaled =
       (lo != 0.0) ? cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(lo))
                   : 0.0;

   double extra_factor = 0.0;
   uint64_t scale_down = 0x3FE0'0000'0000'0000; // 1022 in the exponent field.

   // Result is denormal if (mid_hi + lo_scale < 1.0).
   if ((1.0 - mid_hi) > lo_scaled) {
     // Extra rounding step is needed, which adds more rounding errors.
     err += 0x1.0p-52;
     extra_factor = 1.0;
     scale_down = 0x3FF0'0000'0000'0000; // 1023 in the exponent field.
   }

   double err_scaled =
       cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(err));

   double lo_u = lo_scaled + err_scaled;
   double lo_l = lo_scaled - err_scaled;

   // By adding 1.0, the results will have similar rounding points as denormal
   // outputs.
   double upper = extra_factor + (mid_hi + lo_u);
   double lower = extra_factor + (mid_hi + lo_l);

   if (LIBC_LIKELY(upper == lower)) {
     return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(upper) - scale_down);
   }

   return cpp::nullopt;
 }

 } // namespace LIBC_NAMESPACE

 #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
	//===-- Single-precision general exp/log functions ------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
	#define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H

	#include "common_constants.h"
	#include "math_utils.h"
	#include "src/__support/CPP/bit.h"
	#include "src/__support/CPP/optional.h"
	#include "src/__support/FPUtil/FEnvImpl.h"
	#include "src/__support/FPUtil/FPBits.h"
	#include "src/__support/FPUtil/PolyEval.h"
	#include "src/__support/FPUtil/nearest_integer.h"
	#include "src/__support/common.h"
	#include "src/__support/macros/properties/cpu_features.h"

	#include <errno.h>

	namespace LIBC_NAMESPACE {

	struct ExpBase {
	// Base = e
	static constexpr int MID_BITS = 5;
	static constexpr int MID_MASK = (1 << MID_BITS) - 1;
	// log2(e) * 2^5
	static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS);
	// High and low parts of -log(2) * 2^(-5)
	static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS);
	static constexpr double M_LOGB_2_LO =
	-0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS);
	// Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya
	// with:
	// > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN));
	static constexpr int64_t EXP_2_MID[1 << MID_BITS] = {
	0x3ff0000000000000, 0x3ff059b0d3158574, 0x3ff0b5586cf9890f,
	0x3ff11301d0125b51, 0x3ff172b83c7d517b, 0x3ff1d4873168b9aa,
	0x3ff2387a6e756238, 0x3ff29e9df51fdee1, 0x3ff306fe0a31b715,
	0x3ff371a7373aa9cb, 0x3ff3dea64c123422, 0x3ff44e086061892d,
	0x3ff4bfdad5362a27, 0x3ff5342b569d4f82, 0x3ff5ab07dd485429,
	0x3ff6247eb03a5585, 0x3ff6a09e667f3bcd, 0x3ff71f75e8ec5f74,
	0x3ff7a11473eb0187, 0x3ff82589994cce13, 0x3ff8ace5422aa0db,
	0x3ff93737b0cdc5e5, 0x3ff9c49182a3f090, 0x3ffa5503b23e255d,
	0x3ffae89f995ad3ad, 0x3ffb7f76f2fb5e47, 0x3ffc199bdd85529c,
	0x3ffcb720dcef9069, 0x3ffd5818dcfba487, 0x3ffdfc97337b9b5f,
	0x3ffea4afa2a490da, 0x3fff50765b6e4540,
	};

	// Approximating e^dx with degree-5 minimax polynomial generated by Sollya:
	// > Q = fpminimax(expm1(x)/x, 4, [\|1, D...\|], [-log(2)/64, log(2)/64]);
	// Then:
	// e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[3] * dx^5.
	static constexpr double COEFFS[4] = {
	0x1.ffffffffe5bc8p-2, 0x1.555555555cd67p-3, 0x1.5555c2a9b48b4p-5,
	0x1.11112a0e34bdbp-7};

	LIBC_INLINE static double powb_lo(double dx) {
	using fputil::multiply_add;
	double dx2 = dx * dx;
	double c0 = 1.0 + dx;
	// c1 = COEFFS[0] + COEFFS[1] * dx
	double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]);
	// c2 = COEFFS[2] + COEFFS[3] * dx
	double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]);
	// r = c4 + c5 * dx^4
	// = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7
	return fputil::polyeval(dx2, c0, c1, c2);
	}
	};

	struct Exp10Base : public ExpBase {
	// log2(10) * 2^5
	static constexpr double LOG2_B = 0x1.a934f0979a371p1 * (1 << MID_BITS);
	// High and low parts of -log10(2) * 2^(-5).
	// Notice that since \|x * log2(10)\| < 150:
	// \|k\| = \|round(x * log2(10) * 2^5)\| < 2^8 * 2^5 = 2^13
	// So when the FMA instructions are not available, in order for the product
	// k * M_LOGB_2_HI
	// to be exact, we only store the high part of log10(2) up to 38 bits
	// (= 53 - 15) of precision.
	// It is generated by Sollya with:
	// > round(log10(2), 44, RN);
	static constexpr double M_LOGB_2_HI = -0x1.34413509f8p-2 / (1 << MID_BITS);
	// > round(log10(2) - 0x1.34413509f8p-2, D, RN);
	static constexpr double M_LOGB_2_LO = 0x1.80433b83b532ap-44 / (1 << MID_BITS);

	// Approximating 10^dx with degree-5 minimax polynomial generated by Sollya:
	// > Q = fpminimax((10^x - 1)/x, 4, [\|D...\|], [-log10(2)/2^6, log10(2)/2^6]);
	// Then:
	// 10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
	static constexpr double COEFFS[5] = {0x1.26bb1bbb55515p1, 0x1.53524c73bd3eap1,
	0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0,
	0x1.1429e74a98f43p-1};

	static double powb_lo(double dx) {
	using fputil::multiply_add;
	double dx2 = dx * dx;
	// c0 = 1 + COEFFS[0] * dx
	double c0 = multiply_add(dx, Exp10Base::COEFFS[0], 1.0);
	// c1 = COEFFS[1] + COEFFS[2] * dx
	double c1 = multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]);
	// c2 = COEFFS[3] + COEFFS[4] * dx
	double c2 = multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]);
	// r = c0 + dx^2 * (c1 + c2 * dx^2)
	// = c0 + c1 * dx^2 + c2 * dx^4
	// = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
	return fputil::polyeval(dx2, c0, c1, c2);
	}
	};

	constexpr int LOG_P1_BITS = 6;
	constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS;

	// N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40]
	extern const double LOG_P1_LOG2[LOG_P1_SIZE];

	// N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40]
	extern const double LOG_P1_1_OVER[LOG_P1_SIZE];

	// Taylor series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers
	// K_LOG2_ODD starts from x^3
	extern const double K_LOG2_ODD[4];
	extern const double K_LOG2_EVEN[4];

	// Output of range reduction for exp_b: (2^(mid + hi), lo)
	// where:
	// b^x = 2^(mid + hi) * b^lo
	struct exp_b_reduc_t {
	double mh; // 2^(mid + hi)
	double lo;
	};

	// The function correctly calculates b^x value with at least float precision
	// in a limited range.
	// Range reduction:
	// b^x = 2^(hi + mid) * b^lo
	// where:
	// x = (hi + mid) * log_b(2) + lo
	// hi is an integer,
	// 0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer
	// -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1)
	// Base class needs to provide the following constants:
	// - MID_BITS : number of bits after decimal points used for mid
	// - MID_MASK : 2^MID_BITS - 1, mask to extract mid bits
	// - LOG2_B : log2(b) * 2^MID_BITS for scaling
	// - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS)
	// - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS)
	// - EXP_2_MID : look up table for bit fields of 2^mid
	// Return:
	// { 2^(hi + mid), lo }
	template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) {
	double xd = static_cast<double>(x);
	// kd = round((hi + mid) * log2(b) * 2^MID_BITS)
	double kd = fputil::nearest_integer(Base::LOG2_B * xd);
	// k = round((hi + mid) * log2(b) * 2^MID_BITS)
	int k = static_cast<int>(kd);
	// hi = floor(kd * 2^(-MID_BITS))
	// exp_hi = shift hi to the exponent field of double precision.
	int64_t exp_hi = static_cast<int64_t>((k >> Base::MID_BITS))
	<< fputil::FPBits<double>::FRACTION_LEN;
	// mh = 2^hi * 2^mid
	// mh_bits = bit field of mh
	int64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi;
	double mh = fputil::FPBits<double>(uint64_t(mh_bits)).get_val();
	// dx = lo = x - (hi + mid) * log(2)
	double dx = fputil::multiply_add(
	kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd));
	return {mh, dx};
	}

	// The function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
	// and exp(-x) simultaneously.
	// To compute e^x, we perform the following range
	// reduction: find hi, mid, lo such that:
	// x = (hi + mid) * log(2) + lo, in which
	// hi is an integer,
	// 0 <= mid * 2^5 < 32 is an integer
	// -2^(-6) <= lo * log2(e) <= 2^-6.
	// In particular,
	// hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
	// Then,
	// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
	// 2^mid is stored in the lookup table of 32 elements.
	// e^lo is computed using a degree-5 minimax polynomial
	// generated by Sollya:
	// e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
	// = (1 + c2lo^2 + c4lo^4) + lo * (1 + c3lo^2 + c5lo^4)
	// = P_even + lo * P_odd
	// We perform 2^hi * 2^mid by simply add hi to the exponent field
	// of 2^mid.
	// To compute e^(-x), notice that:
	// e^(-x) = 2^(-(hi + mid)) * e^(-lo)
	// ~ 2^(-(hi + mid)) * P(-lo)
	// = 2^(-(hi + mid)) * (P_even - lo * P_odd)
	// So:
	// sinh(x) = (e^x - e^(-x)) / 2
	// ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
	// 2^(-(hi + mid)) * (P_even - lo * P_odd))
	// = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
	// lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
	// And similarly:
	// cosh(x) = (e^x + e^(-x)) / 2
	// ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
	// lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
	// The main point of these formulas is that the expensive part of calculating
	// the polynomials approximating lower parts of e^(x) and e^(-x) are shared
	// and only done once.
	template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
	double xd = static_cast<double>(x);

	// kd = round(x * log2(e) * 2^5)
	// k_p = round(x * log2(e) * 2^5)
	// k_m = round(-x * log2(e) * 2^5)
	double kd;
	int k_p, k_m;

	#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
	kd = fputil::nearest_integer(ExpBase::LOG2_B * xd);
	k_p = static_cast<int>(kd);
	k_m = -k_p;
	#else
	constexpr double HALF_WAY[2] = {0.5, -0.5};

	k_p = static_cast<int>(
	fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f]));
	k_m = -k_p;
	kd = static_cast<double>(k_p);
	#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT

	// hi = floor(kf * 2^(-5))
	// exp_hi = shift hi to the exponent field of double precision.
	int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS))
	<< fputil::FPBits<double>::FRACTION_LEN;
	int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS))
	<< fputil::FPBits<double>::FRACTION_LEN;
	// mh_p = 2^(hi + mid)
	// mh_m = 2^(-(hi + mid))
	// mh_bits_* = bit field of mh_*
	int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p;
	int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m;
	double mh_p = fputil::FPBits<double>(uint64_t(mh_bits_p)).get_val();
	double mh_m = fputil::FPBits<double>(uint64_t(mh_bits_m)).get_val();
	// mh_sum = 2^(hi + mid) + 2^(-(hi + mid))
	double mh_sum = mh_p + mh_m;
	// mh_diff = 2^(hi + mid) - 2^(-(hi + mid))
	double mh_diff = mh_p - mh_m;

	// dx = lo = x - (hi + mid) * log(2)
	double dx =
	fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO,
	fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd));
	double dx2 = dx * dx;

	// c0 = 1 + COEFFS[0] * lo^2
	// P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2
	double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5,
	ExpBase::COEFFS[2] * 0.5);
	// P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2
	double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5,
	ExpBase::COEFFS[3] * 0.5);

	double r;
	if constexpr (is_sinh)
	r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff);
	else
	r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum);
	return r;
	}

	// x should be positive, normal finite value
	LIBC_INLINE static double log2_eval(double x) {
	using FPB = fputil::FPBits<double>;
	FPB bs(x);

	double result = 0;
	result += bs.get_exponent();

	int p1 = (bs.get_mantissa() >> (FPB::FRACTION_LEN - LOG_P1_BITS)) &
	(LOG_P1_SIZE - 1);

	bs.bits &= FPB::FRACTION_MASK >> LOG_P1_BITS;
	bs.set_biased_exponent(FPB::EXP_BIAS);
	double dx = (bs.get_val() - 1.0) * LOG_P1_1_OVER[p1];

	// Taylor series for log(2,1+x)
	double c1 = fputil::multiply_add(dx, K_LOG2_ODD[0], K_LOG2_EVEN[0]);
	double c2 = fputil::multiply_add(dx, K_LOG2_ODD[1], K_LOG2_EVEN[1]);
	double c3 = fputil::multiply_add(dx, K_LOG2_ODD[2], K_LOG2_EVEN[2]);
	double c4 = fputil::multiply_add(dx, K_LOG2_ODD[3], K_LOG2_EVEN[3]);

	// c0 = dx * (1.0 / ln(2)) + LOG_P1_LOG2[p1]
	double c0 = fputil::multiply_add(dx, 0x1.71547652b82fep+0, LOG_P1_LOG2[p1]);
	result += LIBC_NAMESPACE::fputil::polyeval(dx * dx, c0, c1, c2, c3, c4);
	return result;
	}

	// x should be positive, normal finite value
	LIBC_INLINE static double log_eval(double x) {
	// For x = 2^ex * (1 + mx)
	// log(x) = ex * log(2) + log(1 + mx)
	using FPB = fputil::FPBits<double>;
	FPB bs(x);

	double ex = static_cast<double>(bs.get_exponent());

	// p1 is the leading 7 bits of mx, i.e.
	// p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7).
	int p1 = static_cast<int>(bs.get_mantissa() >> (FPB::FRACTION_LEN - 7));

	// Set bs to (1 + (mx - p1*2^(-7))
	bs.bits &= FPB::FRACTION_MASK >> 7;
	bs.set_biased_exponent(FPB::EXP_BIAS);
	// dx = (mx - p12^(-7)) / (1 + p12^(-7)).
	double dx = (bs.get_val() - 1.0) * ONE_OVER_F[p1];

	// Minimax polynomial of log(1 + dx) generated by Sollya with:
	// > P = fpminimax(log(1 + x)/x, 6, [\|D...\|], [0, 2^-7]);
	const double COEFFS[6] = {-0x1.ffffffffffffcp-2, 0x1.5555555552ddep-2,
	-0x1.ffffffefe562dp-3, 0x1.9999817d3a50fp-3,
	-0x1.554317b3f67a5p-3, 0x1.1dc5c45e09c18p-3};
	double dx2 = dx * dx;
	double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
	double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
	double c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);

	double p = fputil::polyeval(dx2, dx, c1, c2, c3);
	double result =
	fputil::multiply_add(ex, /log(2)/ 0x1.62e42fefa39efp-1, LOG_F[p1] + p);
	return result;
	}

	// Rounding tests for 2^hi * (mid + lo) when the output might be denormal. We
	// assume further that 1 <= mid < 2, mid + lo < 2, and \|lo\| << mid.
	// Notice that, if 0 < x < 2^-1022,
	// double(2^-1022 + x) - 2^-1022 = double(x).
	// So if we scale x up by 2^1022, we can use
	// double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range.
	LIBC_INLINE cpp::optional<double> ziv_test_denorm(int hi, double mid, double lo,
	double err) {
	using FPBits = typename fputil::FPBits<double>;

	// Scaling factor = 1/(min normal number) = 2^1022
	int64_t exp_hi = static_cast<int64_t>(hi + 1022) << FPBits::FRACTION_LEN;
	double mid_hi = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(mid));
	double lo_scaled =
	(lo != 0.0) ? cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(lo))
	: 0.0;

	double extra_factor = 0.0;
	uint64_t scale_down = 0x3FE0'0000'0000'0000; // 1022 in the exponent field.

	// Result is denormal if (mid_hi + lo_scale < 1.0).
	if ((1.0 - mid_hi) > lo_scaled) {
	// Extra rounding step is needed, which adds more rounding errors.
	err += 0x1.0p-52;
	extra_factor = 1.0;
	scale_down = 0x3FF0'0000'0000'0000; // 1023 in the exponent field.
	}

	double err_scaled =
	cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(err));

	double lo_u = lo_scaled + err_scaled;
	double lo_l = lo_scaled - err_scaled;

	// By adding 1.0, the results will have similar rounding points as denormal
	// outputs.
	double upper = extra_factor + (mid_hi + lo_u);
	double lower = extra_factor + (mid_hi + lo_l);

	if (LIBC_LIKELY(upper == lower)) {
	return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(upper) - scale_down);
	}

	return cpp::nullopt;
	}

	} // namespace LIBC_NAMESPACE

	#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H