| //===-- Collection of utils for acoshf --------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_UTILS_H |
| #define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_UTILS_H |
| |
| #include "acosh_float_constants.h" |
| #include "src/__support/FPUtil/FPBits.h" |
| #include "src/__support/FPUtil/PolyEval.h" |
| #include "src/__support/FPUtil/multiply_add.h" |
| #include "src/__support/macros/attributes.h" |
| #include "src/__support/macros/optimization.h" |
| |
| namespace LIBC_NAMESPACE_DECL { |
| |
| namespace acoshf_internal { |
| |
| // Compute log(|x|), use for float functions, so the error requirements are not |
| // as strict as for double precision, and x is assumed to be normal. |
| |
| #if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \ |
| defined(LIBC_MATH_HAS_SMALL_TABLES) |
| |
| LIBC_INLINE LIBC_CONSTEXPR double log_eval(double x) { |
| using FPBits = fputil::FPBits<double>; |
| FPBits x_bits(x); |
| uint64_t x_u = x_bits.uintval(); |
| // Extract exponent and remove sign bit. |
| double ex = static_cast<double>( |
| (static_cast<int>(x_u >> FPBits::FRACTION_LEN) & 0x7ff) - |
| FPBits::EXP_BIAS); |
| // Reduce to 1.m |
| double x_r = |
| FPBits((x_u & FPBits::FRACTION_MASK) | FPBits::one().uintval()).get_val(); |
| // x_r = 1 + dx |
| double dx = x_r - 1.0; |
| // Minimax polynomial of log(1 + x) generated by Sollya with: |
| // > P = fpminimax(log(1 + x)/x, 8, [|1, D...|], [0, 1]); |
| // > dirtyinfnorm((log(1 + x) - x*P)/log(1 + x), [0, 1]); |
| // 0x1.2d5f0a5f66124e3afefa7a66251d1530e07301ed8p-25 |
| constexpr double COEFFS[8] = { |
| -0x1.ffff09a15a555p-2, 0x1.55350257eb492p-2, -0x1.fd0649c8116b3p-3, |
| 0x1.8814186a6a587p-3, -0x1.194a9c269ae3p-3, 0x1.4389c5fa07e93p-4, |
| -0x1.ea7bb4f18dbccp-6, 0x1.5d864e41667eep-8, |
| }; |
| constexpr double LOG_2 = 0x1.62e42fefa39efp-1; |
| |
| double dx2 = dx * dx; |
| double c0 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); |
| double c1 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); |
| double c2 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); |
| double c3 = fputil::multiply_add(dx, COEFFS[7], COEFFS[6]); |
| double dx4 = dx2 * dx2; |
| double d0 = fputil::multiply_add(dx2, c1, c0); |
| double d1 = fputil::multiply_add(dx2, c3, c2); |
| double p = fputil::multiply_add(dx4, d1, d0); |
| |
| double r_hi = fputil::multiply_add(ex, LOG_2, dx); |
| double r = fputil::multiply_add(dx2, p, r_hi); |
| return r; |
| } |
| |
| #else // Accurate evaluation. |
| |
| LIBC_INLINE LIBC_CONSTEXPR double log_eval(double x) { |
| constexpr double LOG_2 = 0x1.62e42fefa39efp-1; |
| |
| // For x = 2^ex * (1 + mx) |
| // log(x) = ex * log(2) + log(1 + mx) |
| using FPBits = fputil::FPBits<double>; |
| FPBits x_bits(x); |
| uint64_t x_u = x_bits.uintval(); |
| |
| // log(x) = log(2^x_e * x_m) |
| // = x_e * log(2) + log(x_m) |
| |
| // Range reduction for log(x_m): |
| // For each x_m, we would like to find r such that: |
| // -2^-7 <= r * x_m - 1 < 2^-6 |
| int shifted = static_cast<int>(x_u >> (FPBits::FRACTION_LEN - 6)); |
| int index = shifted & 0x3F; |
| double r = R_LOG[index]; |
| |
| // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are |
| // all 1's. |
| int x_e = static_cast<int>((x_u + (1ULL << (FPBits::FRACTION_LEN - 6))) >> |
| FPBits::FRACTION_LEN) - |
| FPBits::EXP_BIAS; |
| double e_x = static_cast<double>(x_e); |
| |
| double r_hi = fputil::multiply_add(e_x, LOG_2, LOG_R[index]); |
| |
| // Set m = 1.mantissa. |
| uint64_t x_m = (x_u & FPBits::FRACTION_MASK) | FPBits::one().uintval(); |
| double m = FPBits(x_m).get_val(); |
| |
| double dx = 0.0; |
| |
| // Perform exact range reduction |
| #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
| dx = fputil::multiply_add(r, m, -1.0); // exact |
| #else |
| uint64_t c_m = x_m & 0x3FFF'C000'0000'0000ULL; |
| double c = FPBits(c_m).get_val(); |
| dx = fputil::multiply_add(r, m - c, C_LOG[index]); // exact |
| #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
| |
| // Minimax polynomial of log(1 + dx) generated by Sollya with: |
| // > P = fpminimax(log(1 + x)/x, 6, [|1, D...|], [-2^-7, 2^-6]); |
| // > dirtyinfnorm((log(1 + x) - x*P)/log(1 + x), [-2^-7, 2^-6]); |
| // 0x1.6e853e8864f29a6f10d50698ee6ef972a79d3a487p-54 |
| constexpr double COEFFS[6] = {-0x1.ffffffffffe03p-2, 0x1.55555555395f9p-2, |
| -0x1.0000001be5329p-2, 0x1.9999c1bf8c3afp-3, |
| -0x1.554f0ba9cee4bp-3, 0x1.1d94cd56b72d7p-3}; |
| |
| double dx2 = dx * dx; |
| double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); |
| double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); |
| double c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); |
| double dx4 = dx2 * dx2; |
| double d1 = fputil::multiply_add(dx2, c1, r_hi + dx); |
| double d2 = fputil::multiply_add(dx2, c3, c2); |
| double result = fputil::multiply_add(dx4, d2, d1); |
| return result; |
| } |
| |
| #endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS && LIBC_MATH_HAS_SMALL_TABLES |
| |
| } // namespace acoshf_internal |
| |
| } // namespace LIBC_NAMESPACE_DECL |
| |
| #endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_UTILS_H |