| //===-- Single-precision general exp/log functions ------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H |
| #define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H |
| |
| #include "common_constants.h" |
| |
| #include "src/__support/common.h" |
| #include "src/__support/macros/properties/cpu_features.h" |
| #include "src/__support/math/acoshf_utils.h" |
| #include "src/__support/math/exp10f_utils.h" |
| #include "src/__support/math/exp_utils.h" |
| |
| namespace LIBC_NAMESPACE_DECL { |
| |
| constexpr int LOG_P1_BITS = 6; |
| constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS; |
| |
| // The function correctly calculates sinh(x) and cosh(x) by calculating exp(x) |
| // and exp(-x) simultaneously. |
| // To compute e^x, we perform the following range |
| // reduction: find hi, mid, lo such that: |
| // x = (hi + mid) * log(2) + lo, in which |
| // hi is an integer, |
| // 0 <= mid * 2^5 < 32 is an integer |
| // -2^(-6) <= lo * log2(e) <= 2^-6. |
| // In particular, |
| // hi + mid = round(x * log2(e) * 2^5) * 2^(-5). |
| // Then, |
| // e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo. |
| // 2^mid is stored in the lookup table of 32 elements. |
| // e^lo is computed using a degree-5 minimax polynomial |
| // generated by Sollya: |
| // e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5 |
| // = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4) |
| // = P_even + lo * P_odd |
| // We perform 2^hi * 2^mid by simply add hi to the exponent field |
| // of 2^mid. |
| // To compute e^(-x), notice that: |
| // e^(-x) = 2^(-(hi + mid)) * e^(-lo) |
| // ~ 2^(-(hi + mid)) * P(-lo) |
| // = 2^(-(hi + mid)) * (P_even - lo * P_odd) |
| // So: |
| // sinh(x) = (e^x - e^(-x)) / 2 |
| // ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) - |
| // 2^(-(hi + mid)) * (P_even - lo * P_odd)) |
| // = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) + |
| // lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) |
| // And similarly: |
| // cosh(x) = (e^x + e^(-x)) / 2 |
| // ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) + |
| // lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) |
| // The main point of these formulas is that the expensive part of calculating |
| // the polynomials approximating lower parts of e^(x) and e^(-x) are shared |
| // and only done once. |
| template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) { |
| double xd = static_cast<double>(x); |
| |
| // kd = round(x * log2(e) * 2^5) |
| // k_p = round(x * log2(e) * 2^5) |
| // k_m = round(-x * log2(e) * 2^5) |
| double kd; |
| int k_p, k_m; |
| |
| #ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT |
| kd = fputil::nearest_integer(ExpBase::LOG2_B * xd); |
| k_p = static_cast<int>(kd); |
| k_m = -k_p; |
| #else |
| constexpr double HALF_WAY[2] = {0.5, -0.5}; |
| |
| k_p = static_cast<int>( |
| fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f])); |
| k_m = -k_p; |
| kd = static_cast<double>(k_p); |
| #endif // LIBC_TARGET_CPU_HAS_NEAREST_INT |
| |
| // hi = floor(kf * 2^(-5)) |
| // exp_hi = shift hi to the exponent field of double precision. |
| int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS)) |
| << fputil::FPBits<double>::FRACTION_LEN; |
| int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS)) |
| << fputil::FPBits<double>::FRACTION_LEN; |
| // mh_p = 2^(hi + mid) |
| // mh_m = 2^(-(hi + mid)) |
| // mh_bits_* = bit field of mh_* |
| int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p; |
| int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m; |
| double mh_p = fputil::FPBits<double>(uint64_t(mh_bits_p)).get_val(); |
| double mh_m = fputil::FPBits<double>(uint64_t(mh_bits_m)).get_val(); |
| // mh_sum = 2^(hi + mid) + 2^(-(hi + mid)) |
| double mh_sum = mh_p + mh_m; |
| // mh_diff = 2^(hi + mid) - 2^(-(hi + mid)) |
| double mh_diff = mh_p - mh_m; |
| |
| // dx = lo = x - (hi + mid) * log(2) |
| double dx = |
| fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO, |
| fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd)); |
| double dx2 = dx * dx; |
| |
| // c0 = 1 + COEFFS[0] * lo^2 |
| // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2 |
| double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5, |
| ExpBase::COEFFS[2] * 0.5); |
| // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2 |
| double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5, |
| ExpBase::COEFFS[3] * 0.5); |
| |
| double r; |
| if constexpr (is_sinh) |
| r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff); |
| else |
| r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum); |
| return r; |
| } |
| |
| // x should be positive, normal finite value |
| // TODO: Simplify range reduction and polynomial degree for float16. |
| // See issue #137190. |
| LIBC_INLINE static float log_eval_f(float x) { |
| // For x = 2^ex * (1 + mx), logf(x) = ex * logf(2) + logf(1 + mx). |
| using FPBits = fputil::FPBits<float>; |
| FPBits xbits(x); |
| |
| float ex = static_cast<float>(xbits.get_exponent()); |
| // p1 is the leading 7 bits of mx, i.e. |
| // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7). |
| int p1 = static_cast<int>(xbits.get_mantissa() >> (FPBits::FRACTION_LEN - 7)); |
| |
| // Set bits to (1 + (mx - p1*2^(-7))) |
| xbits.set_uintval(xbits.uintval() & (FPBits::FRACTION_MASK >> 7)); |
| xbits.set_biased_exponent(FPBits::EXP_BIAS); |
| // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)). |
| float dx = (xbits.get_val() - 1.0f) * ONE_OVER_F_FLOAT[p1]; |
| |
| // Minimax polynomial for log(1 + dx), generated using Sollya: |
| // > P = fpminimax(log(1 + x)/x, 6, [|SG...|], [0, 2^-7]); |
| // > Q = (P - 1) / x; |
| // > for i from 0 to degree(Q) do print(coeff(Q, i)); |
| constexpr float COEFFS[6] = {-0x1p-1f, 0x1.555556p-2f, -0x1.00022ep-2f, |
| 0x1.9ea056p-3f, -0x1.e50324p-2f, 0x1.c018fp3f}; |
| |
| float dx2 = dx * dx; |
| |
| float c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); |
| float c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); |
| float c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); |
| |
| float p = fputil::polyeval(dx2, dx, c1, c2, c3); |
| |
| // Generated by Sollya with the following commands: |
| // > display = hexadecimal; |
| // > round(log(2), SG, RN); |
| constexpr float LOGF_2 = 0x1.62e43p-1f; |
| |
| float result = fputil::multiply_add(ex, LOGF_2, LOG_F_FLOAT[p1] + p); |
| return result; |
| } |
| |
| } // namespace LIBC_NAMESPACE_DECL |
| |
| #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H |