blob: 3d760f983524909244cc971060c2eb348e080e3a [file]
//===-- Collection of utils for acoshf --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_UTILS_H
#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_UTILS_H
#include "acosh_float_constants.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/optimization.h"
namespace LIBC_NAMESPACE_DECL {
namespace acoshf_internal {
// Compute log(|x|), use for float functions, so the error requirements are not
// as strict as for double precision, and x is assumed to be normal.
#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
defined(LIBC_MATH_HAS_SMALL_TABLES)
LIBC_INLINE LIBC_CONSTEXPR double log_eval(double x) {
using FPBits = fputil::FPBits<double>;
FPBits x_bits(x);
uint64_t x_u = x_bits.uintval();
// Extract exponent and remove sign bit.
double ex = static_cast<double>(
(static_cast<int>(x_u >> FPBits::FRACTION_LEN) & 0x7ff) -
FPBits::EXP_BIAS);
// Reduce to 1.m
double x_r =
FPBits((x_u & FPBits::FRACTION_MASK) | FPBits::one().uintval()).get_val();
// x_r = 1 + dx
double dx = x_r - 1.0;
// Minimax polynomial of log(1 + x) generated by Sollya with:
// > P = fpminimax(log(1 + x)/x, 8, [|1, D...|], [0, 1]);
// > dirtyinfnorm((log(1 + x) - x*P)/log(1 + x), [0, 1]);
// 0x1.2d5f0a5f66124e3afefa7a66251d1530e07301ed8p-25
constexpr double COEFFS[8] = {
-0x1.ffff09a15a555p-2, 0x1.55350257eb492p-2, -0x1.fd0649c8116b3p-3,
0x1.8814186a6a587p-3, -0x1.194a9c269ae3p-3, 0x1.4389c5fa07e93p-4,
-0x1.ea7bb4f18dbccp-6, 0x1.5d864e41667eep-8,
};
constexpr double LOG_2 = 0x1.62e42fefa39efp-1;
double dx2 = dx * dx;
double c0 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
double c1 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
double c2 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);
double c3 = fputil::multiply_add(dx, COEFFS[7], COEFFS[6]);
double dx4 = dx2 * dx2;
double d0 = fputil::multiply_add(dx2, c1, c0);
double d1 = fputil::multiply_add(dx2, c3, c2);
double p = fputil::multiply_add(dx4, d1, d0);
double r_hi = fputil::multiply_add(ex, LOG_2, dx);
double r = fputil::multiply_add(dx2, p, r_hi);
return r;
}
#else // Accurate evaluation.
LIBC_INLINE LIBC_CONSTEXPR double log_eval(double x) {
constexpr double LOG_2 = 0x1.62e42fefa39efp-1;
// For x = 2^ex * (1 + mx)
// log(x) = ex * log(2) + log(1 + mx)
using FPBits = fputil::FPBits<double>;
FPBits x_bits(x);
uint64_t x_u = x_bits.uintval();
// log(x) = log(2^x_e * x_m)
// = x_e * log(2) + log(x_m)
// Range reduction for log(x_m):
// For each x_m, we would like to find r such that:
// -2^-7 <= r * x_m - 1 < 2^-6
int shifted = static_cast<int>(x_u >> (FPBits::FRACTION_LEN - 6));
int index = shifted & 0x3F;
double r = R_LOG[index];
// Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are
// all 1's.
int x_e = static_cast<int>((x_u + (1ULL << (FPBits::FRACTION_LEN - 6))) >>
FPBits::FRACTION_LEN) -
FPBits::EXP_BIAS;
double e_x = static_cast<double>(x_e);
double r_hi = fputil::multiply_add(e_x, LOG_2, LOG_R[index]);
// Set m = 1.mantissa.
uint64_t x_m = (x_u & FPBits::FRACTION_MASK) | FPBits::one().uintval();
double m = FPBits(x_m).get_val();
double dx = 0.0;
// Perform exact range reduction
#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
dx = fputil::multiply_add(r, m, -1.0); // exact
#else
uint64_t c_m = x_m & 0x3FFF'C000'0000'0000ULL;
double c = FPBits(c_m).get_val();
dx = fputil::multiply_add(r, m - c, C_LOG[index]); // exact
#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
// Minimax polynomial of log(1 + dx) generated by Sollya with:
// > P = fpminimax(log(1 + x)/x, 6, [|1, D...|], [-2^-7, 2^-6]);
// > dirtyinfnorm((log(1 + x) - x*P)/log(1 + x), [-2^-7, 2^-6]);
// 0x1.6e853e8864f29a6f10d50698ee6ef972a79d3a487p-54
constexpr double COEFFS[6] = {-0x1.ffffffffffe03p-2, 0x1.55555555395f9p-2,
-0x1.0000001be5329p-2, 0x1.9999c1bf8c3afp-3,
-0x1.554f0ba9cee4bp-3, 0x1.1d94cd56b72d7p-3};
double dx2 = dx * dx;
double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
double c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);
double dx4 = dx2 * dx2;
double d1 = fputil::multiply_add(dx2, c1, r_hi + dx);
double d2 = fputil::multiply_add(dx2, c3, c2);
double result = fputil::multiply_add(dx4, d2, d1);
return result;
}
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS && LIBC_MATH_HAS_SMALL_TABLES
} // namespace acoshf_internal
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_UTILS_H