| //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "big-radix-floating-point.h" |
| #include "flang/Common/bit-population-count.h" |
| #include "flang/Common/leading-zero-bit-count.h" |
| #include "flang/Decimal/binary-floating-point.h" |
| #include "flang/Decimal/decimal.h" |
| #include "flang/Runtime/freestanding-tools.h" |
| #include <cinttypes> |
| #include <cstring> |
| #include <utility> |
| |
| // Some environments, viz. glibc 2.17 and *BSD, allow the macro HUGE |
| // to leak out of <math.h>. |
| #undef HUGE |
| |
| namespace Fortran::decimal { |
| |
| template <int PREC, int LOG10RADIX> |
| bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber( |
| const char *&p, bool &inexact, const char *end) { |
| SetToZero(); |
| if (end && p >= end) { |
| return false; |
| } |
| // Skip leading spaces |
| for (; p != end && *p == ' '; ++p) { |
| } |
| if (p == end) { |
| return false; |
| } |
| const char *q{p}; |
| isNegative_ = *q == '-'; |
| if (*q == '-' || *q == '+') { |
| ++q; |
| } |
| const char *start{q}; |
| for (; q != end && *q == '0'; ++q) { |
| } |
| const char *firstDigit{q}; |
| for (; q != end && *q >= '0' && *q <= '9'; ++q) { |
| } |
| const char *point{nullptr}; |
| if (q != end && *q == '.') { |
| point = q; |
| for (++q; q != end && *q >= '0' && *q <= '9'; ++q) { |
| } |
| } |
| if (q == start || (q == start + 1 && start == point)) { |
| return false; // require at least one digit |
| } |
| // There's a valid number here; set the reference argument to point to |
| // the first character afterward, which might be an exponent part. |
| p = q; |
| // Strip off trailing zeroes |
| if (point) { |
| while (q[-1] == '0') { |
| --q; |
| } |
| if (q[-1] == '.') { |
| point = nullptr; |
| --q; |
| } |
| } |
| if (!point) { |
| while (q > firstDigit && q[-1] == '0') { |
| --q; |
| ++exponent_; |
| } |
| } |
| // Trim any excess digits |
| const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)}; |
| if (q > limit) { |
| inexact = true; |
| if (point >= limit) { |
| q = point; |
| point = nullptr; |
| } |
| if (!point) { |
| exponent_ += q - limit; |
| } |
| q = limit; |
| } |
| if (point) { |
| exponent_ -= static_cast<int>(q - point - 1); |
| } |
| if (q == firstDigit) { |
| exponent_ = 0; // all zeros |
| } |
| // Rack the decimal digits up into big Digits. |
| for (auto times{radix}; q-- > firstDigit;) { |
| if (*q != '.') { |
| if (times == radix) { |
| digit_[digits_++] = *q - '0'; |
| times = 10; |
| } else { |
| digit_[digits_ - 1] += times * (*q - '0'); |
| times *= 10; |
| } |
| } |
| } |
| // Look for an optional exponent field. |
| if (p == end) { |
| return true; |
| } |
| q = p; |
| switch (*q) { |
| case 'e': |
| case 'E': |
| case 'd': |
| case 'D': |
| case 'q': |
| case 'Q': { |
| if (++q == end) { |
| break; |
| } |
| bool negExpo{*q == '-'}; |
| if (*q == '-' || *q == '+') { |
| ++q; |
| } |
| if (q != end && *q >= '0' && *q <= '9') { |
| int expo{0}; |
| for (; q != end && *q == '0'; ++q) { |
| } |
| const char *expDig{q}; |
| for (; q != end && *q >= '0' && *q <= '9'; ++q) { |
| expo = 10 * expo + *q - '0'; |
| } |
| if (q >= expDig + 8) { |
| // There's a ridiculous number of nonzero exponent digits. |
| // The decimal->binary conversion routine will cope with |
| // returning 0 or Inf, but we must ensure that "expo" didn't |
| // overflow back around to something legal. |
| expo = 10 * Real::decimalRange; |
| exponent_ = 0; |
| } |
| p = q; // exponent is valid; advance the termination pointer |
| if (negExpo) { |
| exponent_ -= expo; |
| } else { |
| exponent_ += expo; |
| } |
| } |
| } break; |
| default: |
| break; |
| } |
| return true; |
| } |
| |
| template <int PREC, int LOG10RADIX> |
| void BigRadixFloatingPointNumber<PREC, |
| LOG10RADIX>::LoseLeastSignificantDigit() { |
| Digit LSD{digit_[0]}; |
| for (int j{0}; j < digits_ - 1; ++j) { |
| digit_[j] = digit_[j + 1]; |
| } |
| digit_[digits_ - 1] = 0; |
| bool incr{false}; |
| switch (rounding_) { |
| case RoundNearest: |
| incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); |
| break; |
| case RoundUp: |
| incr = LSD > 0 && !isNegative_; |
| break; |
| case RoundDown: |
| incr = LSD > 0 && isNegative_; |
| break; |
| case RoundToZero: |
| break; |
| case RoundCompatible: |
| incr = LSD >= radix / 2; |
| break; |
| } |
| for (int j{0}; (digit_[j] += incr) == radix; ++j) { |
| digit_[j] = 0; |
| } |
| } |
| |
| // This local utility class represents an unrounded nonnegative |
| // binary floating-point value with an unbiased (i.e., signed) |
| // binary exponent, an integer value (not a fraction) with an implied |
| // binary point to its *right*, and some guard bits for rounding. |
| template <int PREC> class IntermediateFloat { |
| public: |
| static constexpr int precision{PREC}; |
| using IntType = common::HostUnsignedIntType<precision>; |
| static constexpr IntType topBit{IntType{1} << (precision - 1)}; |
| static constexpr IntType mask{topBit + (topBit - 1)}; |
| |
| RT_API_ATTRS IntermediateFloat() {} |
| IntermediateFloat(const IntermediateFloat &) = default; |
| |
| // Assumes that exponent_ is valid on entry, and may increment it. |
| // Returns the number of guard_ bits that have been determined. |
| template <typename UINT> RT_API_ATTRS bool SetTo(UINT n) { |
| static constexpr int nBits{CHAR_BIT * sizeof n}; |
| if constexpr (precision >= nBits) { |
| value_ = n; |
| guard_ = 0; |
| return 0; |
| } else { |
| int shift{common::BitsNeededFor(n) - precision}; |
| if (shift <= 0) { |
| value_ = n; |
| guard_ = 0; |
| return 0; |
| } else { |
| value_ = n >> shift; |
| exponent_ += shift; |
| n <<= nBits - shift; |
| guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0); |
| return shift; |
| } |
| } |
| } |
| |
| RT_API_ATTRS void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } |
| RT_API_ATTRS bool IsFull() const { return value_ >= topBit; } |
| RT_API_ATTRS void AdjustExponent(int by) { exponent_ += by; } |
| RT_API_ATTRS void SetGuard(int g) { |
| guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1); |
| } |
| |
| RT_API_ATTRS ConversionToBinaryResult<PREC> ToBinary( |
| bool isNegative, FortranRounding) const; |
| |
| private: |
| static constexpr int guardBits{3}; // guard, round, sticky |
| using GuardType = int; |
| static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)}; |
| |
| IntType value_{0}; |
| GuardType guard_{0}; |
| int exponent_{0}; |
| }; |
| |
| // The standard says that these overflow cases round to "representable" |
| // numbers, and some popular compilers interpret that to mean +/-HUGE() |
| // rather than +/-Inf. |
| static inline RT_API_ATTRS constexpr bool RoundOverflowToHuge( |
| enum FortranRounding rounding, bool isNegative) { |
| return rounding == RoundToZero || (!isNegative && rounding == RoundDown) || |
| (isNegative && rounding == RoundUp); |
| } |
| |
| template <int PREC> |
| ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary( |
| bool isNegative, FortranRounding rounding) const { |
| using Binary = BinaryFloatingPointNumber<PREC>; |
| // Create a fraction with a binary point to the left of the integer |
| // value_, and bias the exponent. |
| IntType fraction{value_}; |
| GuardType guard{guard_}; |
| int expo{exponent_ + Binary::exponentBias + (precision - 1)}; |
| while (expo < 1 && (fraction > 0 || guard > oneHalf)) { |
| guard = (guard & 1) | (guard >> 1) | |
| ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1)); |
| fraction >>= 1; |
| ++expo; |
| } |
| int flags{Exact}; |
| if (guard != 0) { |
| flags |= Inexact; |
| } |
| if (fraction == 0) { |
| if (guard <= oneHalf) { |
| if ((!isNegative && rounding == RoundUp) || |
| (isNegative && rounding == RoundDown)) { |
| // round to least nonzero value |
| expo = 0; |
| } else { // round to zero |
| if (guard != 0) { |
| flags |= Underflow; |
| } |
| Binary zero; |
| if (isNegative) { |
| zero.Negate(); |
| } |
| return { |
| std::move(zero), static_cast<enum ConversionResultFlags>(flags)}; |
| } |
| } |
| } else { |
| // The value is nonzero; normalize it. |
| while (fraction < topBit && expo > 1) { |
| --expo; |
| fraction = fraction * 2 + (guard >> (guardBits - 2)); |
| guard = |
| (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); |
| } |
| } |
| // Apply rounding |
| bool incr{false}; |
| switch (rounding) { |
| case RoundNearest: |
| incr = guard > oneHalf || (guard == oneHalf && (fraction & 1)); |
| break; |
| case RoundUp: |
| incr = guard != 0 && !isNegative; |
| break; |
| case RoundDown: |
| incr = guard != 0 && isNegative; |
| break; |
| case RoundToZero: |
| break; |
| case RoundCompatible: |
| incr = guard >= oneHalf; |
| break; |
| } |
| if (incr) { |
| if (fraction == mask) { |
| // rounding causes a carry |
| ++expo; |
| fraction = topBit; |
| } else { |
| ++fraction; |
| } |
| } |
| if (expo == 1 && fraction < topBit) { |
| expo = 0; // subnormal |
| flags |= Underflow; |
| } else if (expo == 0) { |
| flags |= Underflow; |
| } else if (expo >= Binary::maxExponent) { |
| if (RoundOverflowToHuge(rounding, isNegative)) { |
| expo = Binary::maxExponent - 1; |
| fraction = mask; |
| } else { // Inf |
| expo = Binary::maxExponent; |
| flags |= Overflow; |
| if constexpr (Binary::bits == 80) { // x87 |
| fraction = IntType{1} << 63; |
| } else { |
| fraction = 0; |
| } |
| } |
| } |
| using Raw = typename Binary::RawType; |
| Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1); |
| raw |= static_cast<Raw>(expo) << Binary::significandBits; |
| if constexpr (Binary::isImplicitMSB) { |
| fraction &= ~topBit; |
| } |
| raw |= fraction; |
| return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)}; |
| } |
| |
| template <int PREC, int LOG10RADIX> |
| ConversionToBinaryResult<PREC> |
| BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() { |
| // On entry, *this holds a multi-precision integer value in a radix of a |
| // large power of ten. Its radix point is defined to be to the right of its |
| // digits, and "exponent_" is the power of ten by which it is to be scaled. |
| Normalize(); |
| if (digits_ == 0) { // zero value |
| return {Real{SignBit()}}; |
| } |
| // The value is not zero: x = D. * 10.**E |
| // Shift our perspective on the radix (& decimal) point so that |
| // it sits to the *left* of the digits: i.e., x = .D * 10.**E |
| exponent_ += digits_ * log10Radix; |
| // Sanity checks for ridiculous exponents |
| static constexpr int crazy{2 * Real::decimalRange + log10Radix}; |
| if (exponent_ < -crazy) { |
| enum ConversionResultFlags flags { |
| static_cast<enum ConversionResultFlags>(Inexact | Underflow) |
| }; |
| if ((!isNegative_ && rounding_ == RoundUp) || |
| (isNegative_ && rounding_ == RoundDown)) { |
| // return least nonzero value |
| return {Real{Raw{1} | SignBit()}, flags}; |
| } else { // underflow to +/-0. |
| return {Real{SignBit()}, flags}; |
| } |
| } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf |
| if (RoundOverflowToHuge(rounding_, isNegative_)) { |
| return {Real{HUGE()}}; |
| } else { |
| return {Real{Infinity()}, Overflow}; |
| } |
| } |
| // Apply any negative decimal exponent by multiplication |
| // by a power of two, adjusting the binary exponent to compensate. |
| IntermediateFloat<PREC> f; |
| while (exponent_ < log10Radix) { |
| // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9) |
| f.AdjustExponent(-9); |
| digitLimit_ = digits_; |
| if (int carry{MultiplyWithoutNormalization<512>()}) { |
| // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) |
| PushCarry(carry); |
| exponent_ += log10Radix; |
| } |
| } |
| // Apply any positive decimal exponent greater than |
| // is needed to treat the topmost digit as an integer |
| // part by multiplying by 10 or 10000 repeatedly. |
| while (exponent_ > log10Radix) { |
| digitLimit_ = digits_; |
| int carry; |
| if (exponent_ >= log10Radix + 4) { |
| // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4) |
| exponent_ -= 4; |
| carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>(); |
| f.AdjustExponent(4); |
| } else { |
| // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1) |
| --exponent_; |
| carry = MultiplyWithoutNormalization<5>(); |
| f.AdjustExponent(1); |
| } |
| if (carry != 0) { |
| // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) |
| PushCarry(carry); |
| exponent_ += log10Radix; |
| } |
| } |
| // So exponent_ is now log10Radix, meaning that the |
| // MSD can be taken as an integer part and transferred |
| // to the binary result. |
| // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex) |
| int guardShift{f.SetTo(digit_[--digits_])}; |
| // Transfer additional bits until the result is normal. |
| digitLimit_ = digits_; |
| while (!f.IsFull()) { |
| // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1) |
| f.AdjustExponent(-1); |
| std::uint32_t carry = MultiplyWithoutNormalization<2>(); |
| f.ShiftIn(carry); |
| } |
| // Get the next few bits for rounding. Allow for some guard bits |
| // that may have already been set in f.SetTo() above. |
| int guard{0}; |
| if (guardShift == 0) { |
| guard = MultiplyWithoutNormalization<4>(); |
| } else if (guardShift == 1) { |
| guard = MultiplyWithoutNormalization<2>(); |
| } |
| guard = guard + guard + !IsZero(); |
| f.SetGuard(guard); |
| return f.ToBinary(isNegative_, rounding_); |
| } |
| |
| template <int PREC, int LOG10RADIX> |
| ConversionToBinaryResult<PREC> |
| BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary( |
| const char *&p, const char *limit) { |
| bool inexact{false}; |
| if (ParseNumber(p, inexact, limit)) { |
| auto result{ConvertToBinary()}; |
| if (inexact) { |
| result.flags = |
| static_cast<enum ConversionResultFlags>(result.flags | Inexact); |
| } |
| return result; |
| } else { |
| // Could not parse a decimal floating-point number. p has been |
| // advanced over any leading spaces. Most Fortran compilers set |
| // the sign bit for -NaN. |
| const char *q{p}; |
| if (!limit || q < limit) { |
| isNegative_ = *q == '-'; |
| if (isNegative_ || *q == '+') { |
| ++q; |
| } |
| } |
| if ((!limit || limit >= q + 3) && runtime::toupper(q[0]) == 'N' && |
| runtime::toupper(q[1]) == 'A' && runtime::toupper(q[2]) == 'N') { |
| // NaN |
| p = q + 3; |
| bool isQuiet{true}; |
| if ((!limit || p < limit) && *p == '(') { |
| int depth{1}; |
| do { |
| ++p; |
| if (limit && p >= limit) { |
| // Invalid input |
| return {Real{NaN(false)}, Invalid}; |
| } else if (*p == '(') { |
| ++depth; |
| } else if (*p == ')') { |
| --depth; |
| } else if (*p != ' ') { |
| // Implementation dependent, but other compilers |
| // all return quiet NaNs. |
| } |
| } while (depth > 0); |
| ++p; |
| } |
| return {Real{NaN(isQuiet)}}; |
| } else { // Inf? |
| if ((!limit || limit >= q + 3) && runtime::toupper(q[0]) == 'I' && |
| runtime::toupper(q[1]) == 'N' && runtime::toupper(q[2]) == 'F') { |
| if ((!limit || limit >= q + 8) && runtime::toupper(q[3]) == 'I' && |
| runtime::toupper(q[4]) == 'N' && runtime::toupper(q[5]) == 'I' && |
| runtime::toupper(q[6]) == 'T' && runtime::toupper(q[7]) == 'Y') { |
| p = q + 8; |
| } else { |
| p = q + 3; |
| } |
| return {Real{Infinity()}}; |
| } else { |
| // Invalid input |
| return {Real{NaN()}, Invalid}; |
| } |
| } |
| } |
| } |
| |
| template <int PREC> |
| ConversionToBinaryResult<PREC> ConvertToBinary( |
| const char *&p, enum FortranRounding rounding, const char *end) { |
| return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end); |
| } |
| |
| template ConversionToBinaryResult<8> ConvertToBinary<8>( |
| const char *&, enum FortranRounding, const char *end); |
| template ConversionToBinaryResult<11> ConvertToBinary<11>( |
| const char *&, enum FortranRounding, const char *end); |
| template ConversionToBinaryResult<24> ConvertToBinary<24>( |
| const char *&, enum FortranRounding, const char *end); |
| template ConversionToBinaryResult<53> ConvertToBinary<53>( |
| const char *&, enum FortranRounding, const char *end); |
| template ConversionToBinaryResult<64> ConvertToBinary<64>( |
| const char *&, enum FortranRounding, const char *end); |
| template ConversionToBinaryResult<113> ConvertToBinary<113>( |
| const char *&, enum FortranRounding, const char *end); |
| |
| extern "C" { |
| RT_EXT_API_GROUP_BEGIN |
| |
| enum ConversionResultFlags ConvertDecimalToFloat( |
| const char **p, float *f, enum FortranRounding rounding) { |
| auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)}; |
| std::memcpy(reinterpret_cast<void *>(f), |
| reinterpret_cast<const void *>(&result.binary), sizeof *f); |
| return result.flags; |
| } |
| enum ConversionResultFlags ConvertDecimalToDouble( |
| const char **p, double *d, enum FortranRounding rounding) { |
| auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)}; |
| std::memcpy(reinterpret_cast<void *>(d), |
| reinterpret_cast<const void *>(&result.binary), sizeof *d); |
| return result.flags; |
| } |
| enum ConversionResultFlags ConvertDecimalToLongDouble( |
| const char **p, long double *ld, enum FortranRounding rounding) { |
| auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)}; |
| std::memcpy(reinterpret_cast<void *>(ld), |
| reinterpret_cast<const void *>(&result.binary), sizeof *ld); |
| return result.flags; |
| } |
| |
| RT_EXT_API_GROUP_END |
| } // extern "C" |
| } // namespace Fortran::decimal |