libc/src/__support/FPUtil/XFloat.h - llvm-project - Git at Google

 //===-- Utility class to manipulate wide floats. ----------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "FPBits.h"
 #include "NormalFloat.h"
 #include "UInt.h"

 #include <stdint.h>

 namespace __llvm_libc {
 namespace fputil {

 // Store and manipulate positive double precision numbers at |Precision| bits.
 template <size_t Precision> class XFloat {
   static constexpr uint64_t OneMask = (uint64_t(1) << 63);
   UInt<Precision> man;
   static constexpr uint64_t WordCount = Precision / 64;
   int exp;

   size_t bit_width(uint64_t x) {
     if (x == 0)
       return 0;
     size_t shift = 0;
     while ((OneMask & x) == 0) {
       ++shift;
       x <<= 1;
     }
     return 64 - shift;
   }

 public:
   XFloat() : exp(0) {
     for (int i = 0; i < WordCount; ++i)
       man[i] = 0;
   }

   XFloat(const XFloat &other) : exp(other.exp) {
     for (int i = 0; i < WordCount; ++i)
       man[i] = other.man[i];
   }

   explicit XFloat(double x) {
     auto xn = NormalFloat<double>(x);
     exp = xn.exponent;
     man[WordCount - 1] = xn.mantissa << 11;
     for (int i = 0; i < WordCount - 1; ++i)
       man[i] = 0;
   }

   XFloat(int e, const UInt<Precision> &bits) : exp(e) {
     for (size_t i = 0; i < WordCount; ++i)
       man[i] = bits[i];
   }

   // Multiply this number with x and store the result in this number.
   void mul(double x) {
     auto xn = NormalFloat<double>(x);
     exp += xn.exponent;
     uint64_t carry = man.mul(xn.mantissa << 11);
     size_t carry_width = bit_width(carry);
     carry_width = (carry_width == 64 ? 64 : 63);
     man.shift_right(carry_width);
     man[WordCount - 1] = man[WordCount - 1] + (carry << (64 - carry_width));
     exp += carry_width == 64 ? 1 : 0;
     normalize();
   }

   void drop_int() {
     if (exp < 0)
       return;
     if (exp > int(Precision - 1)) {
       for (size_t i = 0; i < WordCount; ++i)
         man[i] = 0;
       return;
     }

     man.shift_left(exp + 1);
     man.shift_right(exp + 1);

     normalize();
   }

   double mul(const XFloat<Precision> &other) {
     constexpr size_t row_words = 2 * WordCount + 1;
     constexpr size_t row_precision = row_words * 64;
     constexpr size_t result_bits = 2 * Precision;
     UInt<row_precision> rows[WordCount];

     for (size_t r = 0; r < WordCount; ++r) {
       for (size_t i = 0; i < row_words; ++i) {
         if (i < WordCount)
           rows[r][i] = man[i];
         else
           rows[r][i] = 0;
       }
       rows[r].mul(other.man[r]);
       rows[r].shift_left(r * 64);
     }

     for (size_t r = 1; r < WordCount; ++r) {
       rows[0].add(rows[r]);
     }
     int result_exp = exp + other.exp;
     uint64_t carry = rows[0][row_words - 1];
     if (carry) {
       size_t carry_width = bit_width(carry);
       rows[0].shift_right(carry_width);
       rows[0][row_words - 2] =
           rows[0][row_words - 2] + (carry << (64 - carry_width));
       result_exp += carry_width;
     }

     if (rows[0][row_words - 2] & OneMask) {
       ++result_exp;
     } else {
       rows[0].shift_left(1);
     }

     UInt<result_bits> result_man;
     for (size_t i = 0; i < result_bits / 64; ++i)
       result_man[i] = rows[0][i];
     XFloat<result_bits> result(result_exp, result_man);
     result.normalize();
     return double(result);
   }

   explicit operator double() {
     normalize();

     constexpr uint64_t one = uint64_t(1) << 10;
     constexpr uint64_t excess_mask = (one << 1) - 1;
     uint64_t excess = man[WordCount - 1] & excess_mask;
     uint64_t new_man = man[WordCount - 1] >> 11;
     if (excess > one) {
       // We have to round up.
       ++new_man;
     } else if (excess == one) {
       bool greater_than_one = false;
       for (size_t i = 0; i < WordCount - 1; ++i) {
         greater_than_one = (man[i] != 0);
         if (greater_than_one)
           break;
       }
       if (greater_than_one || (new_man & 1) != 0) {
         ++new_man;
       }
     }

     if (new_man == (uint64_t(1) << 53))
       ++exp;

     // We use NormalFloat as it can produce subnormal numbers or underflow to 0
     // if necessary.
     NormalFloat<double> d(exp, new_man, 0);
     return double(d);
   }

   // Normalizes this number.
   void normalize() {
     uint64_t man_bits = 0;
     for (size_t i = 0; i < WordCount; ++i)
       man_bits |= man[i];

     if (man_bits == 0)
       return;

     while ((man[WordCount - 1] & OneMask) == 0) {
       man.shift_left(1);
       --exp;
     }
   }
 };

 } // namespace fputil
 } // namespace __llvm_libc
	//===-- Utility class to manipulate wide floats. ----------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "FPBits.h"
	#include "NormalFloat.h"
	#include "UInt.h"

	#include <stdint.h>

	namespace __llvm_libc {
	namespace fputil {

	// Store and manipulate positive double precision numbers at \|Precision\| bits.
	template <size_t Precision> class XFloat {
	static constexpr uint64_t OneMask = (uint64_t(1) << 63);
	UInt<Precision> man;
	static constexpr uint64_t WordCount = Precision / 64;
	int exp;

	size_t bit_width(uint64_t x) {
	if (x == 0)
	return 0;
	size_t shift = 0;
	while ((OneMask & x) == 0) {
	++shift;
	x <<= 1;
	}
	return 64 - shift;
	}

	public:
	XFloat() : exp(0) {
	for (int i = 0; i < WordCount; ++i)
	man[i] = 0;
	}

	XFloat(const XFloat &other) : exp(other.exp) {
	for (int i = 0; i < WordCount; ++i)
	man[i] = other.man[i];
	}

	explicit XFloat(double x) {
	auto xn = NormalFloat<double>(x);
	exp = xn.exponent;
	man[WordCount - 1] = xn.mantissa << 11;
	for (int i = 0; i < WordCount - 1; ++i)
	man[i] = 0;
	}

	XFloat(int e, const UInt<Precision> &bits) : exp(e) {
	for (size_t i = 0; i < WordCount; ++i)
	man[i] = bits[i];
	}

	// Multiply this number with x and store the result in this number.
	void mul(double x) {
	auto xn = NormalFloat<double>(x);
	exp += xn.exponent;
	uint64_t carry = man.mul(xn.mantissa << 11);
	size_t carry_width = bit_width(carry);
	carry_width = (carry_width == 64 ? 64 : 63);
	man.shift_right(carry_width);
	man[WordCount - 1] = man[WordCount - 1] + (carry << (64 - carry_width));
	exp += carry_width == 64 ? 1 : 0;
	normalize();
	}

	void drop_int() {
	if (exp < 0)
	return;
	if (exp > int(Precision - 1)) {
	for (size_t i = 0; i < WordCount; ++i)
	man[i] = 0;
	return;
	}

	man.shift_left(exp + 1);
	man.shift_right(exp + 1);

	normalize();
	}

	double mul(const XFloat<Precision> &other) {
	constexpr size_t row_words = 2 * WordCount + 1;
	constexpr size_t row_precision = row_words * 64;
	constexpr size_t result_bits = 2 * Precision;
	UInt<row_precision> rows[WordCount];

	for (size_t r = 0; r < WordCount; ++r) {
	for (size_t i = 0; i < row_words; ++i) {
	if (i < WordCount)
	rows[r][i] = man[i];
	else
	rows[r][i] = 0;
	}
	rows[r].mul(other.man[r]);
	rows[r].shift_left(r * 64);
	}

	for (size_t r = 1; r < WordCount; ++r) {
	rows[0].add(rows[r]);
	}
	int result_exp = exp + other.exp;
	uint64_t carry = rows[0][row_words - 1];
	if (carry) {
	size_t carry_width = bit_width(carry);
	rows[0].shift_right(carry_width);
	rows[0][row_words - 2] =
	rows[0][row_words - 2] + (carry << (64 - carry_width));
	result_exp += carry_width;
	}

	if (rows[0][row_words - 2] & OneMask) {
	++result_exp;
	} else {
	rows[0].shift_left(1);
	}

	UInt<result_bits> result_man;
	for (size_t i = 0; i < result_bits / 64; ++i)
	result_man[i] = rows[0][i];
	XFloat<result_bits> result(result_exp, result_man);
	result.normalize();
	return double(result);
	}

	explicit operator double() {
	normalize();

	constexpr uint64_t one = uint64_t(1) << 10;
	constexpr uint64_t excess_mask = (one << 1) - 1;
	uint64_t excess = man[WordCount - 1] & excess_mask;
	uint64_t new_man = man[WordCount - 1] >> 11;
	if (excess > one) {
	// We have to round up.
	++new_man;
	} else if (excess == one) {
	bool greater_than_one = false;
	for (size_t i = 0; i < WordCount - 1; ++i) {
	greater_than_one = (man[i] != 0);
	if (greater_than_one)
	break;
	}
	if (greater_than_one \|\| (new_man & 1) != 0) {
	++new_man;
	}
	}

	if (new_man == (uint64_t(1) << 53))
	++exp;

	// We use NormalFloat as it can produce subnormal numbers or underflow to 0
	// if necessary.
	NormalFloat<double> d(exp, new_man, 0);
	return double(d);
	}

	// Normalizes this number.
	void normalize() {
	uint64_t man_bits = 0;
	for (size_t i = 0; i < WordCount; ++i)
	man_bits \|= man[i];

	if (man_bits == 0)
	return;

	while ((man[WordCount - 1] & OneMask) == 0) {
	man.shift_left(1);
	--exp;
	}
	}
	};

	} // namespace fputil
	} // namespace __llvm_libc