blob: ddeb7c0f97ed2ba8082a9fea7eb120dda2ea8d5f [file] [log] [blame]
//===-- Abstract class for bit manipulation of float numbers. ---*- C++ -*-===//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "PlatformDefs.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/string.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/builtin_wrappers.h"
#include "src/__support/common.h"
#include "src/__support/integer_to_string.h"
#include "FloatProperties.h"
#include <stdint.h>
namespace __llvm_libc {
namespace fputil {
template <typename T> struct MantissaWidth {
static constexpr unsigned VALUE = FloatProperties<T>::MANTISSA_WIDTH;
template <typename T> struct ExponentWidth {
static constexpr unsigned VALUE = FloatProperties<T>::EXPONENT_WIDTH;
// A generic class to represent single precision, double precision, and quad
// precision IEEE 754 floating point formats.
// On most platforms, the 'float' type corresponds to single precision floating
// point numbers, the 'double' type corresponds to double precision floating
// point numers, and the 'long double' type corresponds to the quad precision
// floating numbers. On x86 platforms however, the 'long double' type maps to
// an x87 floating point format. This format is an IEEE 754 extension format.
// It is handled as an explicit specialization of this class.
template <typename T> struct FPBits {
"FPBits instantiated with invalid type.");
// Reinterpreting bits as an integer value and interpreting the bits of an
// integer value as a floating point value is used in tests. So, a convenient
// type is provided for such reinterpretations.
using FloatProp = FloatProperties<T>;
// TODO: Change UintType name to BitsType for consistency.
using UIntType = typename FloatProp::BitsType;
UIntType bits;
LIBC_INLINE void set_mantissa(UIntType mantVal) {
mantVal &= (FloatProp::MANTISSA_MASK);
bits &= ~(FloatProp::MANTISSA_MASK);
bits |= mantVal;
LIBC_INLINE UIntType get_mantissa() const {
return bits & FloatProp::MANTISSA_MASK;
LIBC_INLINE void set_unbiased_exponent(UIntType expVal) {
expVal = (expVal << (FloatProp::MANTISSA_WIDTH)) & FloatProp::EXPONENT_MASK;
bits &= ~(FloatProp::EXPONENT_MASK);
bits |= expVal;
LIBC_INLINE uint16_t get_unbiased_exponent() const {
return uint16_t((bits & FloatProp::EXPONENT_MASK) >>
// The function return mantissa with the implicit bit set iff the current
// value is a valid normal number.
LIBC_INLINE constexpr UIntType get_explicit_mantissa() {
return ((get_unbiased_exponent() > 0 && !is_inf_or_nan())
? (FloatProp::MANTISSA_MASK + 1)
: 0) |
(FloatProp::MANTISSA_MASK & bits);
LIBC_INLINE void set_sign(bool signVal) {
bits |= FloatProp::SIGN_MASK;
if (!signVal)
bits -= FloatProp::SIGN_MASK;
LIBC_INLINE bool get_sign() const {
return (bits & FloatProp::SIGN_MASK) != 0;
static_assert(sizeof(T) == sizeof(UIntType),
"Data type and integral representation have different sizes.");
static constexpr int EXPONENT_BIAS = (1 << (ExponentWidth<T>::VALUE - 1)) - 1;
static constexpr int MAX_EXPONENT = (1 << ExponentWidth<T>::VALUE) - 1;
static constexpr UIntType MIN_SUBNORMAL = UIntType(1);
static constexpr UIntType MAX_SUBNORMAL =
(UIntType(1) << MantissaWidth<T>::VALUE) - 1;
static constexpr UIntType MIN_NORMAL =
(UIntType(1) << MantissaWidth<T>::VALUE);
static constexpr UIntType MAX_NORMAL =
((UIntType(MAX_EXPONENT) - 1) << MantissaWidth<T>::VALUE) | MAX_SUBNORMAL;
// We don't want accidental type promotions/conversions, so we require exact
// type match.
template <typename XType, cpp::enable_if_t<cpp::is_same_v<T, XType>, int> = 0>
constexpr explicit FPBits(XType x) : bits(cpp::bit_cast<UIntType>(x)) {}
template <typename XType,
cpp::enable_if_t<cpp::is_same_v<XType, UIntType>, int> = 0>
constexpr explicit FPBits(XType x) : bits(x) {}
FPBits() : bits(0) {}
LIBC_INLINE T get_val() const { return cpp::bit_cast<T>(bits); }
LIBC_INLINE void set_val(T value) { bits = cpp::bit_cast<UIntType>(value); }
LIBC_INLINE explicit operator T() const { return get_val(); }
LIBC_INLINE UIntType uintval() const { return bits; }
LIBC_INLINE int get_exponent() const {
return int(get_unbiased_exponent()) - EXPONENT_BIAS;
LIBC_INLINE bool is_zero() const {
// Remove sign bit by shift
return (bits << 1) == 0;
LIBC_INLINE bool is_inf() const {
return (bits & FloatProp::EXP_MANT_MASK) == FloatProp::EXPONENT_MASK;
LIBC_INLINE bool is_nan() const {
return (bits & FloatProp::EXP_MANT_MASK) > FloatProp::EXPONENT_MASK;
LIBC_INLINE bool is_quiet_nan() const {
return (bits & FloatProp::EXP_MANT_MASK) ==
LIBC_INLINE bool is_inf_or_nan() const {
return (bits & FloatProp::EXPONENT_MASK) == FloatProp::EXPONENT_MASK;
LIBC_INLINE static constexpr FPBits<T> zero(bool sign = false) {
return FPBits(sign ? FloatProp::SIGN_MASK : UIntType(0));
LIBC_INLINE static constexpr FPBits<T> neg_zero() { return zero(true); }
LIBC_INLINE static constexpr FPBits<T> inf(bool sign = false) {
FPBits<T> bits(sign ? FloatProp::SIGN_MASK : UIntType(0));
return bits;
LIBC_INLINE static constexpr FPBits<T> neg_inf() {
FPBits<T> bits = inf();
return bits;
LIBC_INLINE static constexpr T build_nan(UIntType v) {
FPBits<T> bits = inf();
return T(bits);
LIBC_INLINE static constexpr T build_quiet_nan(UIntType v) {
return build_nan(FloatProp::QUIET_NAN_MASK | v);
// The function convert integer number and unbiased exponent to proper float
// T type:
// Result = number * 2^(ep+1 - exponent_bias)
// Be careful!
// 1) "ep" is raw exponent value.
// 2) The function add to +1 to ep for seamless normalized to denormalized
// transition.
// 3) The function did not check exponent high limit.
// 4) "number" zero value is not processed correctly.
// 5) Number is unsigned, so the result can be only positive.
LIBC_INLINE static constexpr FPBits<T> make_value(UIntType number, int ep) {
FPBits<T> result;
// offset: +1 for sign, but -1 for implicit first bit
int lz = unsafe_clz(number) - FloatProp::EXPONENT_WIDTH;
number <<= lz;
ep -= lz;
if (LIBC_LIKELY(ep >= 0)) {
// Implicit number bit will be removed by mask
result.set_unbiased_exponent(ep + 1);
} else {
result.set_mantissa(number >> -ep);
return result;
LIBC_INLINE static FPBits<T> create_value(bool sign, UIntType unbiased_exp,
UIntType mantissa) {
FPBits<T> result;
return result;
// Converts the bits to a string in the following format:
// "0x<NNN...N> = S: N, E: 0xNNNN, M:0xNNN...N"
// 1. N is a hexadecimal digit.
// 2. The hexadecimal number on the LHS is the raw numerical representation
// of the bits.
// 3. The exponent is always 16 bits wide irrespective of the type of the
// floating encoding.
LIBC_INLINE cpp::string str() const {
if (is_nan())
return "(NaN)";
if (is_inf())
return get_sign() ? "(-Infinity)" : "(+Infinity)";
auto zerofill = [](char *arr, size_t n) {
for (size_t i = 0; i < n; ++i)
arr[i] = '0';
cpp::string s("0x");
char bitsbuf[IntegerToString::hex_bufsize<UIntType>()];
zerofill(bitsbuf, sizeof(bitsbuf));
IntegerToString::hex(bits, bitsbuf, false);
s += cpp::string(bitsbuf, sizeof(bitsbuf));
s += " = (";
s += cpp::string("S: ") + (get_sign() ? "1" : "0");
char expbuf[IntegerToString::hex_bufsize<uint16_t>()];
zerofill(expbuf, sizeof(expbuf));
IntegerToString::hex(get_unbiased_exponent(), expbuf, false);
s += cpp::string(", E: 0x") + cpp::string(expbuf, sizeof(expbuf));
char mantbuf[IntegerToString::hex_bufsize<UIntType>()] = {'0'};
zerofill(mantbuf, sizeof(mantbuf));
IntegerToString::hex(get_mantissa(), mantbuf, false);
s += cpp::string(", M: 0x") + cpp::string(mantbuf, sizeof(mantbuf));
s += ")";
return s;
} // namespace fputil
} // namespace __llvm_libc
#include "x86_64/LongDoubleBits.h"