| //===-- Single-precision atanf float function -----------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H |
| #define LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H |
| |
| #include "src/__support/FPUtil/FEnvImpl.h" |
| #include "src/__support/FPUtil/FPBits.h" |
| #include "src/__support/FPUtil/double_double.h" |
| #include "src/__support/FPUtil/multiply_add.h" |
| #include "src/__support/FPUtil/nearest_integer.h" |
| #include "src/__support/macros/config.h" |
| #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY |
| |
| namespace LIBC_NAMESPACE_DECL { |
| |
| namespace math { |
| |
| namespace atanf_internal { |
| |
| using fputil::FloatFloat; |
| // atan(i/64) with i = 0..16, generated by Sollya with: |
| // > for i from 0 to 16 do { |
| // a = round(atan(i/16), SG, RN); |
| // b = round(atan(i/16) - a, SG, RN); |
| // print("{", b, ",", a, "},"); |
| // }; |
| static constexpr FloatFloat ATAN_I[17] = { |
| {0.0f, 0.0f}, |
| {-0x1.1a6042p-30f, 0x1.ff55bcp-5f}, |
| {-0x1.54f424p-30f, 0x1.fd5baap-4f}, |
| {0x1.79cb6p-28f, 0x1.7b97b4p-3f}, |
| {-0x1.b4dfc8p-29f, 0x1.f5b76p-3f}, |
| {-0x1.1f0286p-27f, 0x1.362774p-2f}, |
| {0x1.e4defp-30f, 0x1.6f6194p-2f}, |
| {0x1.e611fep-29f, 0x1.a64eecp-2f}, |
| {0x1.586ed4p-28f, 0x1.dac67p-2f}, |
| {-0x1.6499e6p-26f, 0x1.0657eap-1f}, |
| {0x1.7bdfd6p-26f, 0x1.1e00bap-1f}, |
| {-0x1.98e422p-28f, 0x1.345f02p-1f}, |
| {0x1.934f7p-28f, 0x1.4978fap-1f}, |
| {0x1.c5a6c6p-27f, 0x1.5d5898p-1f}, |
| {0x1.5e118cp-27f, 0x1.700a7cp-1f}, |
| {-0x1.1d4eb6p-26f, 0x1.819d0cp-1f}, |
| {-0x1.777a5cp-26f, 0x1.921fb6p-1f}, |
| }; |
| |
| // 1 / (1 + (i/16)^2) with i = 0..16, generated by Sollya with: |
| // > for i from 0 to 16 do { |
| // a = round(1 / (1 + (i/16)^2), SG, RN); |
| // print(a, ","); |
| // }; |
| static constexpr float ATANF_REDUCED_ARG[17] = { |
| 0x1.0p0f, 0x1.fe01fep-1f, 0x1.f81f82p-1f, 0x1.ee9c8p-1f, |
| 0x1.e1e1e2p-1f, 0x1.d272cap-1f, 0x1.c0e07p-1f, 0x1.adbe88p-1f, |
| 0x1.99999ap-1f, 0x1.84f00cp-1f, 0x1.702e06p-1f, 0x1.5babccp-1f, |
| 0x1.47ae14p-1f, 0x1.34679ap-1f, 0x1.21fb78p-1f, 0x1.107fbcp-1f, |
| 0x1p-1f, |
| }; |
| |
| // Approximating atan( u / (1 + u * k/16) ) |
| // atan( u / (1 + u * k/16) ) / u ~ 1 - k/16 * u + (k^2/256 - 1/3) * u^2 + |
| // + (k/16 - (k/16)^3) * u^3 + O(u^4) |
| LIBC_INLINE static float atanf_eval(float u, float k_over_16) { |
| // (k/16)^2 |
| float c2 = k_over_16 * k_over_16; |
| // -(k/16)^3 |
| float c3 = fputil::multiply_add(-k_over_16, c2, k_over_16); |
| float u2 = u * u; |
| // (k^2/256 - 1/3) + u * (k/16 - (k/16)^3) |
| float a0 = fputil::multiply_add(c3, u, c2 - 0x1.555556p-2f); |
| // -k/16 + u*(k^2/256 - 1/3) + u^2 * (k/16 - (k/16)^3) |
| float a1 = fputil::multiply_add(u, a0, -k_over_16); |
| // u - u^2 * k/16 + u^3 * ((k^2/256 - 1/3) + u^4 * (k/16 - (k/16)^3)) |
| return fputil::multiply_add(u2, a1, u); |
| } |
| |
| } // namespace atanf_internal |
| |
| // There are several range reduction steps we can take for atan2(y, x) as |
| // follow: |
| |
| LIBC_INLINE static float atanf(float x) { |
| using namespace atanf_internal; |
| using FPBits = typename fputil::FPBits<float>; |
| using FPBits = typename fputil::FPBits<float>; |
| |
| constexpr float SIGN[2] = {1.0f, -1.0f}; |
| constexpr FloatFloat PI_OVER_2 = {-0x1.777a5cp-25f, 0x1.921fb6p0f}; |
| |
| FPBits x_bits(x); |
| Sign s = x_bits.sign(); |
| float sign = SIGN[s.is_neg()]; |
| uint32_t x_abs = x_bits.uintval() & 0x7fff'ffffU; |
| |
| // x is inf or nan, |x| <= 2^-11 or |x|= > 2^11. |
| if (LIBC_UNLIKELY(x_abs <= 0x3a00'0000U || x_abs >= 0x4500'0000U)) { |
| if (LIBC_UNLIKELY(x_bits.is_inf())) |
| return sign * PI_OVER_2.hi; |
| // atan(NaN) = NaN |
| if (LIBC_UNLIKELY(x_bits.is_nan())) { |
| if (x_bits.is_signaling_nan()) { |
| fputil::raise_except_if_required(FE_INVALID); |
| return FPBits::quiet_nan().get_val(); |
| } |
| |
| return x; |
| } |
| // |x| >= 2^11: |
| // atan(x) = sign(x) * pi/2 - atan(1/x) |
| // ~ sign(x) * pi/2 - 1/x |
| if (LIBC_UNLIKELY(x_abs >= 0x4200'0000)) |
| return fputil::multiply_add(sign, PI_OVER_2.hi, -1.0f / x); |
| // x <= 2^-11: |
| // atan(x) ~ x |
| return x; |
| } |
| |
| // Range reduction steps: |
| // 1) atan(x) = sign(x) * atan(|x|) |
| // 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|) |
| // 3) For 1/16 < |x| < 1 + 1/32, we find k such that: | |x| - k/16 | <= 1/32. |
| // Let y = |x| - k/16, then using the angle summation formula, we have: |
| // atan(|x|) = atan(k/16) + atan( (|x| - k/16) / (1 + |x| * k/16) ) |
| // = atan(k/16) + atan( y / (1 + (y + k/16) * k/16 ) |
| // = atan(k/16) + atan( y / ((1 + k^2/256) + y * k/16) ) |
| // 4) Let u = y / (1 + k^2/256), then we can rewritten the above as: |
| // atan(|x|) = atan(k/16) + atan( u / (1 + u * k/16) ) |
| // ~ atan(k/16) + (u - k/16 * u^2 + (k^2/256 - 1/3) * u^3 + |
| // + (k/16 - (k/16)^3) * u^4) + O(u^5) |
| float x_a = cpp::bit_cast<float>(x_abs); |
| // |x| > 1 + 1/32, we need to invert x, so we will perform the division in |
| // float-float. |
| if (x_abs > 0x3f84'0000U) |
| x_a = 1.0f / x_a; |
| // Perform range reduction. |
| // k = nearestint(x * 16) |
| float k_f = fputil::nearest_integer(x_a * 0x1.0p4f); |
| unsigned idx = static_cast<unsigned>(k_f); |
| float k_over_16 = k_f * 0x1.0p-4f; |
| float y = x_a - k_over_16; |
| // u = (x - k/16) / (1 + (k/16)^2) |
| float u = y * ATANF_REDUCED_ARG[idx]; |
| |
| // atan(x) = sign(x) * atan(|x|) |
| // = sign(x) * (atan(k/16) + atan(|)) |
| // p ~ atan(u) |
| float p = atanf_eval(u, k_over_16); |
| // |x| > 1 + 1/32: q ~ (pi/2 - atan(1/|x|)) |
| // |x| <= 1 + 1/32: q ~ atan(|x|) |
| float q = (p + ATAN_I[idx].lo) + ATAN_I[idx].hi; |
| if (x_abs > 0x3f84'0000U) |
| q = PI_OVER_2.hi + (PI_OVER_2.lo - q); |
| // |x| > 1 + 1/32: sign(x) * (pi/2 - atan(1/|x|)) |
| // |x| <= 1 + 1/32: sign(x) * atan(|x|) |
| return sign * q; |
| } |
| |
| } // namespace math |
| |
| } // namespace LIBC_NAMESPACE_DECL |
| |
| #endif // LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H |