blob: 11dbf7df1475836ed4490ecd573cb6905efdbdd9 [file] [log] [blame]
//===-- Utility class to manipulate wide floats. ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "FPBits.h"
#include "NormalFloat.h"
#include "UInt.h"
#include <stdint.h>
namespace __llvm_libc {
namespace fputil {
// Store and manipulate positive double precision numbers at |Precision| bits.
template <size_t Precision> class XFloat {
static constexpr uint64_t OneMask = (uint64_t(1) << 63);
UInt<Precision> man;
static constexpr uint64_t WordCount = Precision / 64;
int exp;
size_t bit_width(uint64_t x) {
if (x == 0)
return 0;
size_t shift = 0;
while ((OneMask & x) == 0) {
++shift;
x <<= 1;
}
return 64 - shift;
}
public:
XFloat() : exp(0) {
for (int i = 0; i < WordCount; ++i)
man[i] = 0;
}
XFloat(const XFloat &other) : exp(other.exp) {
for (int i = 0; i < WordCount; ++i)
man[i] = other.man[i];
}
explicit XFloat(double x) {
auto xn = NormalFloat<double>(x);
exp = xn.exponent;
man[WordCount - 1] = xn.mantissa << 11;
for (int i = 0; i < WordCount - 1; ++i)
man[i] = 0;
}
XFloat(int e, const UInt<Precision> &bits) : exp(e) {
for (size_t i = 0; i < WordCount; ++i)
man[i] = bits[i];
}
// Multiply this number with x and store the result in this number.
void mul(double x) {
auto xn = NormalFloat<double>(x);
exp += xn.exponent;
uint64_t carry = man.mul(xn.mantissa << 11);
size_t carry_width = bit_width(carry);
carry_width = (carry_width == 64 ? 64 : 63);
man.shift_right(carry_width);
man[WordCount - 1] = man[WordCount - 1] + (carry << (64 - carry_width));
exp += carry_width == 64 ? 1 : 0;
normalize();
}
void drop_int() {
if (exp < 0)
return;
if (exp > int(Precision - 1)) {
for (size_t i = 0; i < WordCount; ++i)
man[i] = 0;
return;
}
man.shift_left(exp + 1);
man.shift_right(exp + 1);
normalize();
}
double mul(const XFloat<Precision> &other) {
constexpr size_t row_words = 2 * WordCount + 1;
constexpr size_t row_precision = row_words * 64;
constexpr size_t result_bits = 2 * Precision;
UInt<row_precision> rows[WordCount];
for (size_t r = 0; r < WordCount; ++r) {
for (size_t i = 0; i < row_words; ++i) {
if (i < WordCount)
rows[r][i] = man[i];
else
rows[r][i] = 0;
}
rows[r].mul(other.man[r]);
rows[r].shift_left(r * 64);
}
for (size_t r = 1; r < WordCount; ++r) {
rows[0].add(rows[r]);
}
int result_exp = exp + other.exp;
uint64_t carry = rows[0][row_words - 1];
if (carry) {
size_t carry_width = bit_width(carry);
rows[0].shift_right(carry_width);
rows[0][row_words - 2] =
rows[0][row_words - 2] + (carry << (64 - carry_width));
result_exp += carry_width;
}
if (rows[0][row_words - 2] & OneMask) {
++result_exp;
} else {
rows[0].shift_left(1);
}
UInt<result_bits> result_man;
for (size_t i = 0; i < result_bits / 64; ++i)
result_man[i] = rows[0][i];
XFloat<result_bits> result(result_exp, result_man);
result.normalize();
return double(result);
}
explicit operator double() {
normalize();
constexpr uint64_t one = uint64_t(1) << 10;
constexpr uint64_t excess_mask = (one << 1) - 1;
uint64_t excess = man[WordCount - 1] & excess_mask;
uint64_t new_man = man[WordCount - 1] >> 11;
if (excess > one) {
// We have to round up.
++new_man;
} else if (excess == one) {
bool greater_than_one = false;
for (size_t i = 0; i < WordCount - 1; ++i) {
greater_than_one = (man[i] != 0);
if (greater_than_one)
break;
}
if (greater_than_one || (new_man & 1) != 0) {
++new_man;
}
}
if (new_man == (uint64_t(1) << 53))
++exp;
// We use NormalFloat as it can produce subnormal numbers or underflow to 0
// if necessary.
NormalFloat<double> d(exp, new_man, 0);
return double(d);
}
// Normalizes this number.
void normalize() {
uint64_t man_bits = 0;
for (size_t i = 0; i < WordCount; ++i)
man_bits |= man[i];
if (man_bits == 0)
return;
while ((man[WordCount - 1] & OneMask) == 0) {
man.shift_left(1);
--exp;
}
}
};
} // namespace fputil
} // namespace __llvm_libc