| //===-- APFloat.cpp - Implement APFloat class -----------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a class to represent arbitrary precision floating |
| // point values and provide a variety of arithmetic operations on them. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APSInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/FoldingSet.h" |
| #include "llvm/ADT/Hashing.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Config/llvm-config.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <cstring> |
| #include <limits.h> |
| |
| #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ |
| do { \ |
| if (usesLayout<IEEEFloat>(getSemantics())) \ |
| return U.IEEE.METHOD_CALL; \ |
| if (usesLayout<DoubleAPFloat>(getSemantics())) \ |
| return U.Double.METHOD_CALL; \ |
| llvm_unreachable("Unexpected semantics"); \ |
| } while (false) |
| |
| using namespace llvm; |
| |
| /// A macro used to combine two fcCategory enums into one key which can be used |
| /// in a switch statement to classify how the interaction of two APFloat's |
| /// categories affects an operation. |
| /// |
| /// TODO: If clang source code is ever allowed to use constexpr in its own |
| /// codebase, change this into a static inline function. |
| #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) |
| |
| /* Assumed in hexadecimal significand parsing, and conversion to |
| hexadecimal strings. */ |
| static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); |
| |
| namespace llvm { |
| /* Represents floating point arithmetic semantics. */ |
| struct fltSemantics { |
| /* The largest E such that 2^E is representable; this matches the |
| definition of IEEE 754. */ |
| APFloatBase::ExponentType maxExponent; |
| |
| /* The smallest E such that 2^E is a normalized number; this |
| matches the definition of IEEE 754. */ |
| APFloatBase::ExponentType minExponent; |
| |
| /* Number of bits in the significand. This includes the integer |
| bit. */ |
| unsigned int precision; |
| |
| /* Number of bits actually used in the semantics. */ |
| unsigned int sizeInBits; |
| |
| // Returns true if any number described by this semantics can be precisely |
| // represented by the specified semantics. |
| bool isRepresentableBy(const fltSemantics &S) const { |
| return maxExponent <= S.maxExponent && minExponent >= S.minExponent && |
| precision <= S.precision; |
| } |
| }; |
| |
| static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; |
| static const fltSemantics semBFloat = {127, -126, 8, 16}; |
| static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; |
| static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; |
| static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; |
| static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; |
| static const fltSemantics semBogus = {0, 0, 0, 0}; |
| |
| /* The IBM double-double semantics. Such a number consists of a pair of IEEE |
| 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, |
| (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. |
| Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent |
| to each other, and two 11-bit exponents. |
| |
| Note: we need to make the value different from semBogus as otherwise |
| an unsafe optimization may collapse both values to a single address, |
| and we heavily rely on them having distinct addresses. */ |
| static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; |
| |
| /* These are legacy semantics for the fallback, inaccrurate implementation of |
| IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the |
| operation. It's equivalent to having an IEEE number with consecutive 106 |
| bits of mantissa and 11 bits of exponent. |
| |
| It's not equivalent to IBM double-double. For example, a legit IBM |
| double-double, 1 + epsilon: |
| |
| 1 + epsilon = 1 + (1 >> 1076) |
| |
| is not representable by a consecutive 106 bits of mantissa. |
| |
| Currently, these semantics are used in the following way: |
| |
| semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> |
| (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> |
| semPPCDoubleDoubleLegacy -> IEEE operations |
| |
| We use bitcastToAPInt() to get the bit representation (in APInt) of the |
| underlying IEEEdouble, then use the APInt constructor to construct the |
| legacy IEEE float. |
| |
| TODO: Implement all operations in semPPCDoubleDouble, and delete these |
| semantics. */ |
| static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, |
| 53 + 53, 128}; |
| |
| const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { |
| switch (S) { |
| case S_IEEEhalf: |
| return IEEEhalf(); |
| case S_BFloat: |
| return BFloat(); |
| case S_IEEEsingle: |
| return IEEEsingle(); |
| case S_IEEEdouble: |
| return IEEEdouble(); |
| case S_x87DoubleExtended: |
| return x87DoubleExtended(); |
| case S_IEEEquad: |
| return IEEEquad(); |
| case S_PPCDoubleDouble: |
| return PPCDoubleDouble(); |
| } |
| llvm_unreachable("Unrecognised floating semantics"); |
| } |
| |
| APFloatBase::Semantics |
| APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { |
| if (&Sem == &llvm::APFloat::IEEEhalf()) |
| return S_IEEEhalf; |
| else if (&Sem == &llvm::APFloat::BFloat()) |
| return S_BFloat; |
| else if (&Sem == &llvm::APFloat::IEEEsingle()) |
| return S_IEEEsingle; |
| else if (&Sem == &llvm::APFloat::IEEEdouble()) |
| return S_IEEEdouble; |
| else if (&Sem == &llvm::APFloat::x87DoubleExtended()) |
| return S_x87DoubleExtended; |
| else if (&Sem == &llvm::APFloat::IEEEquad()) |
| return S_IEEEquad; |
| else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) |
| return S_PPCDoubleDouble; |
| else |
| llvm_unreachable("Unknown floating semantics"); |
| } |
| |
| const fltSemantics &APFloatBase::IEEEhalf() { |
| return semIEEEhalf; |
| } |
| const fltSemantics &APFloatBase::BFloat() { |
| return semBFloat; |
| } |
| const fltSemantics &APFloatBase::IEEEsingle() { |
| return semIEEEsingle; |
| } |
| const fltSemantics &APFloatBase::IEEEdouble() { |
| return semIEEEdouble; |
| } |
| const fltSemantics &APFloatBase::IEEEquad() { |
| return semIEEEquad; |
| } |
| const fltSemantics &APFloatBase::x87DoubleExtended() { |
| return semX87DoubleExtended; |
| } |
| const fltSemantics &APFloatBase::Bogus() { |
| return semBogus; |
| } |
| const fltSemantics &APFloatBase::PPCDoubleDouble() { |
| return semPPCDoubleDouble; |
| } |
| |
| constexpr RoundingMode APFloatBase::rmNearestTiesToEven; |
| constexpr RoundingMode APFloatBase::rmTowardPositive; |
| constexpr RoundingMode APFloatBase::rmTowardNegative; |
| constexpr RoundingMode APFloatBase::rmTowardZero; |
| constexpr RoundingMode APFloatBase::rmNearestTiesToAway; |
| |
| /* A tight upper bound on number of parts required to hold the value |
| pow(5, power) is |
| |
| power * 815 / (351 * integerPartWidth) + 1 |
| |
| However, whilst the result may require only this many parts, |
| because we are multiplying two values to get it, the |
| multiplication may require an extra part with the excess part |
| being zero (consider the trivial case of 1 * 1, tcFullMultiply |
| requires two parts to hold the single-part result). So we add an |
| extra one to guarantee enough space whilst multiplying. */ |
| const unsigned int maxExponent = 16383; |
| const unsigned int maxPrecision = 113; |
| const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; |
| const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); |
| |
| unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { |
| return semantics.precision; |
| } |
| APFloatBase::ExponentType |
| APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { |
| return semantics.maxExponent; |
| } |
| APFloatBase::ExponentType |
| APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { |
| return semantics.minExponent; |
| } |
| unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { |
| return semantics.sizeInBits; |
| } |
| |
| unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { |
| return Sem.sizeInBits; |
| } |
| |
| /* A bunch of private, handy routines. */ |
| |
| static inline Error createError(const Twine &Err) { |
| return make_error<StringError>(Err, inconvertibleErrorCode()); |
| } |
| |
| static inline unsigned int |
| partCountForBits(unsigned int bits) |
| { |
| return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; |
| } |
| |
| /* Returns 0U-9U. Return values >= 10U are not digits. */ |
| static inline unsigned int |
| decDigitValue(unsigned int c) |
| { |
| return c - '0'; |
| } |
| |
| /* Return the value of a decimal exponent of the form |
| [+-]ddddddd. |
| |
| If the exponent overflows, returns a large exponent with the |
| appropriate sign. */ |
| static Expected<int> readExponent(StringRef::iterator begin, |
| StringRef::iterator end) { |
| bool isNegative; |
| unsigned int absExponent; |
| const unsigned int overlargeExponent = 24000; /* FIXME. */ |
| StringRef::iterator p = begin; |
| |
| // Treat no exponent as 0 to match binutils |
| if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { |
| return 0; |
| } |
| |
| isNegative = (*p == '-'); |
| if (*p == '-' || *p == '+') { |
| p++; |
| if (p == end) |
| return createError("Exponent has no digits"); |
| } |
| |
| absExponent = decDigitValue(*p++); |
| if (absExponent >= 10U) |
| return createError("Invalid character in exponent"); |
| |
| for (; p != end; ++p) { |
| unsigned int value; |
| |
| value = decDigitValue(*p); |
| if (value >= 10U) |
| return createError("Invalid character in exponent"); |
| |
| absExponent = absExponent * 10U + value; |
| if (absExponent >= overlargeExponent) { |
| absExponent = overlargeExponent; |
| break; |
| } |
| } |
| |
| if (isNegative) |
| return -(int) absExponent; |
| else |
| return (int) absExponent; |
| } |
| |
| /* This is ugly and needs cleaning up, but I don't immediately see |
| how whilst remaining safe. */ |
| static Expected<int> totalExponent(StringRef::iterator p, |
| StringRef::iterator end, |
| int exponentAdjustment) { |
| int unsignedExponent; |
| bool negative, overflow; |
| int exponent = 0; |
| |
| if (p == end) |
| return createError("Exponent has no digits"); |
| |
| negative = *p == '-'; |
| if (*p == '-' || *p == '+') { |
| p++; |
| if (p == end) |
| return createError("Exponent has no digits"); |
| } |
| |
| unsignedExponent = 0; |
| overflow = false; |
| for (; p != end; ++p) { |
| unsigned int value; |
| |
| value = decDigitValue(*p); |
| if (value >= 10U) |
| return createError("Invalid character in exponent"); |
| |
| unsignedExponent = unsignedExponent * 10 + value; |
| if (unsignedExponent > 32767) { |
| overflow = true; |
| break; |
| } |
| } |
| |
| if (exponentAdjustment > 32767 || exponentAdjustment < -32768) |
| overflow = true; |
| |
| if (!overflow) { |
| exponent = unsignedExponent; |
| if (negative) |
| exponent = -exponent; |
| exponent += exponentAdjustment; |
| if (exponent > 32767 || exponent < -32768) |
| overflow = true; |
| } |
| |
| if (overflow) |
| exponent = negative ? -32768: 32767; |
| |
| return exponent; |
| } |
| |
| static Expected<StringRef::iterator> |
| skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, |
| StringRef::iterator *dot) { |
| StringRef::iterator p = begin; |
| *dot = end; |
| while (p != end && *p == '0') |
| p++; |
| |
| if (p != end && *p == '.') { |
| *dot = p++; |
| |
| if (end - begin == 1) |
| return createError("Significand has no digits"); |
| |
| while (p != end && *p == '0') |
| p++; |
| } |
| |
| return p; |
| } |
| |
| /* Given a normal decimal floating point number of the form |
| |
| dddd.dddd[eE][+-]ddd |
| |
| where the decimal point and exponent are optional, fill out the |
| structure D. Exponent is appropriate if the significand is |
| treated as an integer, and normalizedExponent if the significand |
| is taken to have the decimal point after a single leading |
| non-zero digit. |
| |
| If the value is zero, V->firstSigDigit points to a non-digit, and |
| the return exponent is zero. |
| */ |
| struct decimalInfo { |
| const char *firstSigDigit; |
| const char *lastSigDigit; |
| int exponent; |
| int normalizedExponent; |
| }; |
| |
| static Error interpretDecimal(StringRef::iterator begin, |
| StringRef::iterator end, decimalInfo *D) { |
| StringRef::iterator dot = end; |
| |
| auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); |
| if (!PtrOrErr) |
| return PtrOrErr.takeError(); |
| StringRef::iterator p = *PtrOrErr; |
| |
| D->firstSigDigit = p; |
| D->exponent = 0; |
| D->normalizedExponent = 0; |
| |
| for (; p != end; ++p) { |
| if (*p == '.') { |
| if (dot != end) |
| return createError("String contains multiple dots"); |
| dot = p++; |
| if (p == end) |
| break; |
| } |
| if (decDigitValue(*p) >= 10U) |
| break; |
| } |
| |
| if (p != end) { |
| if (*p != 'e' && *p != 'E') |
| return createError("Invalid character in significand"); |
| if (p == begin) |
| return createError("Significand has no digits"); |
| if (dot != end && p - begin == 1) |
| return createError("Significand has no digits"); |
| |
| /* p points to the first non-digit in the string */ |
| auto ExpOrErr = readExponent(p + 1, end); |
| if (!ExpOrErr) |
| return ExpOrErr.takeError(); |
| D->exponent = *ExpOrErr; |
| |
| /* Implied decimal point? */ |
| if (dot == end) |
| dot = p; |
| } |
| |
| /* If number is all zeroes accept any exponent. */ |
| if (p != D->firstSigDigit) { |
| /* Drop insignificant trailing zeroes. */ |
| if (p != begin) { |
| do |
| do |
| p--; |
| while (p != begin && *p == '0'); |
| while (p != begin && *p == '.'); |
| } |
| |
| /* Adjust the exponents for any decimal point. */ |
| D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); |
| D->normalizedExponent = (D->exponent + |
| static_cast<APFloat::ExponentType>((p - D->firstSigDigit) |
| - (dot > D->firstSigDigit && dot < p))); |
| } |
| |
| D->lastSigDigit = p; |
| return Error::success(); |
| } |
| |
| /* Return the trailing fraction of a hexadecimal number. |
| DIGITVALUE is the first hex digit of the fraction, P points to |
| the next digit. */ |
| static Expected<lostFraction> |
| trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, |
| unsigned int digitValue) { |
| unsigned int hexDigit; |
| |
| /* If the first trailing digit isn't 0 or 8 we can work out the |
| fraction immediately. */ |
| if (digitValue > 8) |
| return lfMoreThanHalf; |
| else if (digitValue < 8 && digitValue > 0) |
| return lfLessThanHalf; |
| |
| // Otherwise we need to find the first non-zero digit. |
| while (p != end && (*p == '0' || *p == '.')) |
| p++; |
| |
| if (p == end) |
| return createError("Invalid trailing hexadecimal fraction!"); |
| |
| hexDigit = hexDigitValue(*p); |
| |
| /* If we ran off the end it is exactly zero or one-half, otherwise |
| a little more. */ |
| if (hexDigit == -1U) |
| return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; |
| else |
| return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; |
| } |
| |
| /* Return the fraction lost were a bignum truncated losing the least |
| significant BITS bits. */ |
| static lostFraction |
| lostFractionThroughTruncation(const APFloatBase::integerPart *parts, |
| unsigned int partCount, |
| unsigned int bits) |
| { |
| unsigned int lsb; |
| |
| lsb = APInt::tcLSB(parts, partCount); |
| |
| /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ |
| if (bits <= lsb) |
| return lfExactlyZero; |
| if (bits == lsb + 1) |
| return lfExactlyHalf; |
| if (bits <= partCount * APFloatBase::integerPartWidth && |
| APInt::tcExtractBit(parts, bits - 1)) |
| return lfMoreThanHalf; |
| |
| return lfLessThanHalf; |
| } |
| |
| /* Shift DST right BITS bits noting lost fraction. */ |
| static lostFraction |
| shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) |
| { |
| lostFraction lost_fraction; |
| |
| lost_fraction = lostFractionThroughTruncation(dst, parts, bits); |
| |
| APInt::tcShiftRight(dst, parts, bits); |
| |
| return lost_fraction; |
| } |
| |
| /* Combine the effect of two lost fractions. */ |
| static lostFraction |
| combineLostFractions(lostFraction moreSignificant, |
| lostFraction lessSignificant) |
| { |
| if (lessSignificant != lfExactlyZero) { |
| if (moreSignificant == lfExactlyZero) |
| moreSignificant = lfLessThanHalf; |
| else if (moreSignificant == lfExactlyHalf) |
| moreSignificant = lfMoreThanHalf; |
| } |
| |
| return moreSignificant; |
| } |
| |
| /* The error from the true value, in half-ulps, on multiplying two |
| floating point numbers, which differ from the value they |
| approximate by at most HUE1 and HUE2 half-ulps, is strictly less |
| than the returned value. |
| |
| See "How to Read Floating Point Numbers Accurately" by William D |
| Clinger. */ |
| static unsigned int |
| HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) |
| { |
| assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); |
| |
| if (HUerr1 + HUerr2 == 0) |
| return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ |
| else |
| return inexactMultiply + 2 * (HUerr1 + HUerr2); |
| } |
| |
| /* The number of ulps from the boundary (zero, or half if ISNEAREST) |
| when the least significant BITS are truncated. BITS cannot be |
| zero. */ |
| static APFloatBase::integerPart |
| ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, |
| bool isNearest) { |
| unsigned int count, partBits; |
| APFloatBase::integerPart part, boundary; |
| |
| assert(bits != 0); |
| |
| bits--; |
| count = bits / APFloatBase::integerPartWidth; |
| partBits = bits % APFloatBase::integerPartWidth + 1; |
| |
| part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); |
| |
| if (isNearest) |
| boundary = (APFloatBase::integerPart) 1 << (partBits - 1); |
| else |
| boundary = 0; |
| |
| if (count == 0) { |
| if (part - boundary <= boundary - part) |
| return part - boundary; |
| else |
| return boundary - part; |
| } |
| |
| if (part == boundary) { |
| while (--count) |
| if (parts[count]) |
| return ~(APFloatBase::integerPart) 0; /* A lot. */ |
| |
| return parts[0]; |
| } else if (part == boundary - 1) { |
| while (--count) |
| if (~parts[count]) |
| return ~(APFloatBase::integerPart) 0; /* A lot. */ |
| |
| return -parts[0]; |
| } |
| |
| return ~(APFloatBase::integerPart) 0; /* A lot. */ |
| } |
| |
| /* Place pow(5, power) in DST, and return the number of parts used. |
| DST must be at least one part larger than size of the answer. */ |
| static unsigned int |
| powerOf5(APFloatBase::integerPart *dst, unsigned int power) { |
| static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; |
| APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; |
| pow5s[0] = 78125 * 5; |
| |
| unsigned int partsCount[16] = { 1 }; |
| APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; |
| unsigned int result; |
| assert(power <= maxExponent); |
| |
| p1 = dst; |
| p2 = scratch; |
| |
| *p1 = firstEightPowers[power & 7]; |
| power >>= 3; |
| |
| result = 1; |
| pow5 = pow5s; |
| |
| for (unsigned int n = 0; power; power >>= 1, n++) { |
| unsigned int pc; |
| |
| pc = partsCount[n]; |
| |
| /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ |
| if (pc == 0) { |
| pc = partsCount[n - 1]; |
| APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); |
| pc *= 2; |
| if (pow5[pc - 1] == 0) |
| pc--; |
| partsCount[n] = pc; |
| } |
| |
| if (power & 1) { |
| APFloatBase::integerPart *tmp; |
| |
| APInt::tcFullMultiply(p2, p1, pow5, result, pc); |
| result += pc; |
| if (p2[result - 1] == 0) |
| result--; |
| |
| /* Now result is in p1 with partsCount parts and p2 is scratch |
| space. */ |
| tmp = p1; |
| p1 = p2; |
| p2 = tmp; |
| } |
| |
| pow5 += pc; |
| } |
| |
| if (p1 != dst) |
| APInt::tcAssign(dst, p1, result); |
| |
| return result; |
| } |
| |
| /* Zero at the end to avoid modular arithmetic when adding one; used |
| when rounding up during hexadecimal output. */ |
| static const char hexDigitsLower[] = "0123456789abcdef0"; |
| static const char hexDigitsUpper[] = "0123456789ABCDEF0"; |
| static const char infinityL[] = "infinity"; |
| static const char infinityU[] = "INFINITY"; |
| static const char NaNL[] = "nan"; |
| static const char NaNU[] = "NAN"; |
| |
| /* Write out an integerPart in hexadecimal, starting with the most |
| significant nibble. Write out exactly COUNT hexdigits, return |
| COUNT. */ |
| static unsigned int |
| partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, |
| const char *hexDigitChars) |
| { |
| unsigned int result = count; |
| |
| assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); |
| |
| part >>= (APFloatBase::integerPartWidth - 4 * count); |
| while (count--) { |
| dst[count] = hexDigitChars[part & 0xf]; |
| part >>= 4; |
| } |
| |
| return result; |
| } |
| |
| /* Write out an unsigned decimal integer. */ |
| static char * |
| writeUnsignedDecimal (char *dst, unsigned int n) |
| { |
| char buff[40], *p; |
| |
| p = buff; |
| do |
| *p++ = '0' + n % 10; |
| while (n /= 10); |
| |
| do |
| *dst++ = *--p; |
| while (p != buff); |
| |
| return dst; |
| } |
| |
| /* Write out a signed decimal integer. */ |
| static char * |
| writeSignedDecimal (char *dst, int value) |
| { |
| if (value < 0) { |
| *dst++ = '-'; |
| dst = writeUnsignedDecimal(dst, -(unsigned) value); |
| } else |
| dst = writeUnsignedDecimal(dst, value); |
| |
| return dst; |
| } |
| |
| namespace detail { |
| /* Constructors. */ |
| void IEEEFloat::initialize(const fltSemantics *ourSemantics) { |
| unsigned int count; |
| |
| semantics = ourSemantics; |
| count = partCount(); |
| if (count > 1) |
| significand.parts = new integerPart[count]; |
| } |
| |
| void IEEEFloat::freeSignificand() { |
| if (needsCleanup()) |
| delete [] significand.parts; |
| } |
| |
| void IEEEFloat::assign(const IEEEFloat &rhs) { |
| assert(semantics == rhs.semantics); |
| |
| sign = rhs.sign; |
| category = rhs.category; |
| exponent = rhs.exponent; |
| if (isFiniteNonZero() || category == fcNaN) |
| copySignificand(rhs); |
| } |
| |
| void IEEEFloat::copySignificand(const IEEEFloat &rhs) { |
| assert(isFiniteNonZero() || category == fcNaN); |
| assert(rhs.partCount() >= partCount()); |
| |
| APInt::tcAssign(significandParts(), rhs.significandParts(), |
| partCount()); |
| } |
| |
| /* Make this number a NaN, with an arbitrary but deterministic value |
| for the significand. If double or longer, this is a signalling NaN, |
| which may not be ideal. If float, this is QNaN(0). */ |
| void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { |
| category = fcNaN; |
| sign = Negative; |
| exponent = exponentNaN(); |
| |
| integerPart *significand = significandParts(); |
| unsigned numParts = partCount(); |
| |
| // Set the significand bits to the fill. |
| if (!fill || fill->getNumWords() < numParts) |
| APInt::tcSet(significand, 0, numParts); |
| if (fill) { |
| APInt::tcAssign(significand, fill->getRawData(), |
| std::min(fill->getNumWords(), numParts)); |
| |
| // Zero out the excess bits of the significand. |
| unsigned bitsToPreserve = semantics->precision - 1; |
| unsigned part = bitsToPreserve / 64; |
| bitsToPreserve %= 64; |
| significand[part] &= ((1ULL << bitsToPreserve) - 1); |
| for (part++; part != numParts; ++part) |
| significand[part] = 0; |
| } |
| |
| unsigned QNaNBit = semantics->precision - 2; |
| |
| if (SNaN) { |
| // We always have to clear the QNaN bit to make it an SNaN. |
| APInt::tcClearBit(significand, QNaNBit); |
| |
| // If there are no bits set in the payload, we have to set |
| // *something* to make it a NaN instead of an infinity; |
| // conventionally, this is the next bit down from the QNaN bit. |
| if (APInt::tcIsZero(significand, numParts)) |
| APInt::tcSetBit(significand, QNaNBit - 1); |
| } else { |
| // We always have to set the QNaN bit to make it a QNaN. |
| APInt::tcSetBit(significand, QNaNBit); |
| } |
| |
| // For x87 extended precision, we want to make a NaN, not a |
| // pseudo-NaN. Maybe we should expose the ability to make |
| // pseudo-NaNs? |
| if (semantics == &semX87DoubleExtended) |
| APInt::tcSetBit(significand, QNaNBit + 1); |
| } |
| |
| IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { |
| if (this != &rhs) { |
| if (semantics != rhs.semantics) { |
| freeSignificand(); |
| initialize(rhs.semantics); |
| } |
| assign(rhs); |
| } |
| |
| return *this; |
| } |
| |
| IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { |
| freeSignificand(); |
| |
| semantics = rhs.semantics; |
| significand = rhs.significand; |
| exponent = rhs.exponent; |
| category = rhs.category; |
| sign = rhs.sign; |
| |
| rhs.semantics = &semBogus; |
| return *this; |
| } |
| |
| bool IEEEFloat::isDenormal() const { |
| return isFiniteNonZero() && (exponent == semantics->minExponent) && |
| (APInt::tcExtractBit(significandParts(), |
| semantics->precision - 1) == 0); |
| } |
| |
| bool IEEEFloat::isSmallest() const { |
| // The smallest number by magnitude in our format will be the smallest |
| // denormal, i.e. the floating point number with exponent being minimum |
| // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). |
| return isFiniteNonZero() && exponent == semantics->minExponent && |
| significandMSB() == 0; |
| } |
| |
| bool IEEEFloat::isSignificandAllOnes() const { |
| // Test if the significand excluding the integral bit is all ones. This allows |
| // us to test for binade boundaries. |
| const integerPart *Parts = significandParts(); |
| const unsigned PartCount = partCountForBits(semantics->precision); |
| for (unsigned i = 0; i < PartCount - 1; i++) |
| if (~Parts[i]) |
| return false; |
| |
| // Set the unused high bits to all ones when we compare. |
| const unsigned NumHighBits = |
| PartCount*integerPartWidth - semantics->precision + 1; |
| assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && |
| "Can not have more high bits to fill than integerPartWidth"); |
| const integerPart HighBitFill = |
| ~integerPart(0) << (integerPartWidth - NumHighBits); |
| if (~(Parts[PartCount - 1] | HighBitFill)) |
| return false; |
| |
| return true; |
| } |
| |
| bool IEEEFloat::isSignificandAllZeros() const { |
| // Test if the significand excluding the integral bit is all zeros. This |
| // allows us to test for binade boundaries. |
| const integerPart *Parts = significandParts(); |
| const unsigned PartCount = partCountForBits(semantics->precision); |
| |
| for (unsigned i = 0; i < PartCount - 1; i++) |
| if (Parts[i]) |
| return false; |
| |
| // Compute how many bits are used in the final word. |
| const unsigned NumHighBits = |
| PartCount*integerPartWidth - semantics->precision + 1; |
| assert(NumHighBits < integerPartWidth && "Can not have more high bits to " |
| "clear than integerPartWidth"); |
| const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; |
| |
| if (Parts[PartCount - 1] & HighBitMask) |
| return false; |
| |
| return true; |
| } |
| |
| bool IEEEFloat::isLargest() const { |
| // The largest number by magnitude in our format will be the floating point |
| // number with maximum exponent and with significand that is all ones. |
| return isFiniteNonZero() && exponent == semantics->maxExponent |
| && isSignificandAllOnes(); |
| } |
| |
| bool IEEEFloat::isInteger() const { |
| // This could be made more efficient; I'm going for obviously correct. |
| if (!isFinite()) return false; |
| IEEEFloat truncated = *this; |
| truncated.roundToIntegral(rmTowardZero); |
| return compare(truncated) == cmpEqual; |
| } |
| |
| bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { |
| if (this == &rhs) |
| return true; |
| if (semantics != rhs.semantics || |
| category != rhs.category || |
| sign != rhs.sign) |
| return false; |
| if (category==fcZero || category==fcInfinity) |
| return true; |
| |
| if (isFiniteNonZero() && exponent != rhs.exponent) |
| return false; |
| |
| return std::equal(significandParts(), significandParts() + partCount(), |
| rhs.significandParts()); |
| } |
| |
| IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { |
| initialize(&ourSemantics); |
| sign = 0; |
| category = fcNormal; |
| zeroSignificand(); |
| exponent = ourSemantics.precision - 1; |
| significandParts()[0] = value; |
| normalize(rmNearestTiesToEven, lfExactlyZero); |
| } |
| |
| IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { |
| initialize(&ourSemantics); |
| makeZero(false); |
| } |
| |
| // Delegate to the previous constructor, because later copy constructor may |
| // actually inspects category, which can't be garbage. |
| IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) |
| : IEEEFloat(ourSemantics) {} |
| |
| IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { |
| initialize(rhs.semantics); |
| assign(rhs); |
| } |
| |
| IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { |
| *this = std::move(rhs); |
| } |
| |
| IEEEFloat::~IEEEFloat() { freeSignificand(); } |
| |
| unsigned int IEEEFloat::partCount() const { |
| return partCountForBits(semantics->precision + 1); |
| } |
| |
| const IEEEFloat::integerPart *IEEEFloat::significandParts() const { |
| return const_cast<IEEEFloat *>(this)->significandParts(); |
| } |
| |
| IEEEFloat::integerPart *IEEEFloat::significandParts() { |
| if (partCount() > 1) |
| return significand.parts; |
| else |
| return &significand.part; |
| } |
| |
| void IEEEFloat::zeroSignificand() { |
| APInt::tcSet(significandParts(), 0, partCount()); |
| } |
| |
| /* Increment an fcNormal floating point number's significand. */ |
| void IEEEFloat::incrementSignificand() { |
| integerPart carry; |
| |
| carry = APInt::tcIncrement(significandParts(), partCount()); |
| |
| /* Our callers should never cause us to overflow. */ |
| assert(carry == 0); |
| (void)carry; |
| } |
| |
| /* Add the significand of the RHS. Returns the carry flag. */ |
| IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { |
| integerPart *parts; |
| |
| parts = significandParts(); |
| |
| assert(semantics == rhs.semantics); |
| assert(exponent == rhs.exponent); |
| |
| return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); |
| } |
| |
| /* Subtract the significand of the RHS with a borrow flag. Returns |
| the borrow flag. */ |
| IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, |
| integerPart borrow) { |
| integerPart *parts; |
| |
| parts = significandParts(); |
| |
| assert(semantics == rhs.semantics); |
| assert(exponent == rhs.exponent); |
| |
| return APInt::tcSubtract(parts, rhs.significandParts(), borrow, |
| partCount()); |
| } |
| |
| /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it |
| on to the full-precision result of the multiplication. Returns the |
| lost fraction. */ |
| lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, |
| IEEEFloat addend) { |
| unsigned int omsb; // One, not zero, based MSB. |
| unsigned int partsCount, newPartsCount, precision; |
| integerPart *lhsSignificand; |
| integerPart scratch[4]; |
| integerPart *fullSignificand; |
| lostFraction lost_fraction; |
| bool ignored; |
| |
| assert(semantics == rhs.semantics); |
| |
| precision = semantics->precision; |
| |
| // Allocate space for twice as many bits as the original significand, plus one |
| // extra bit for the addition to overflow into. |
| newPartsCount = partCountForBits(precision * 2 + 1); |
| |
| if (newPartsCount > 4) |
| fullSignificand = new integerPart[newPartsCount]; |
| else |
| fullSignificand = scratch; |
| |
| lhsSignificand = significandParts(); |
| partsCount = partCount(); |
| |
| APInt::tcFullMultiply(fullSignificand, lhsSignificand, |
| rhs.significandParts(), partsCount, partsCount); |
| |
| lost_fraction = lfExactlyZero; |
| omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; |
| exponent += rhs.exponent; |
| |
| // Assume the operands involved in the multiplication are single-precision |
| // FP, and the two multiplicants are: |
| // *this = a23 . a22 ... a0 * 2^e1 |
| // rhs = b23 . b22 ... b0 * 2^e2 |
| // the result of multiplication is: |
| // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) |
| // Note that there are three significant bits at the left-hand side of the |
| // radix point: two for the multiplication, and an overflow bit for the |
| // addition (that will always be zero at this point). Move the radix point |
| // toward left by two bits, and adjust exponent accordingly. |
| exponent += 2; |
| |
| if (addend.isNonZero()) { |
| // The intermediate result of the multiplication has "2 * precision" |
| // signicant bit; adjust the addend to be consistent with mul result. |
| // |
| Significand savedSignificand = significand; |
| const fltSemantics *savedSemantics = semantics; |
| fltSemantics extendedSemantics; |
| opStatus status; |
| unsigned int extendedPrecision; |
| |
| // Normalize our MSB to one below the top bit to allow for overflow. |
| extendedPrecision = 2 * precision + 1; |
| if (omsb != extendedPrecision - 1) { |
| assert(extendedPrecision > omsb); |
| APInt::tcShiftLeft(fullSignificand, newPartsCount, |
| (extendedPrecision - 1) - omsb); |
| exponent -= (extendedPrecision - 1) - omsb; |
| } |
| |
| /* Create new semantics. */ |
| extendedSemantics = *semantics; |
| extendedSemantics.precision = extendedPrecision; |
| |
| if (newPartsCount == 1) |
| significand.part = fullSignificand[0]; |
| else |
| significand.parts = fullSignificand; |
| semantics = &extendedSemantics; |
| |
| // Make a copy so we can convert it to the extended semantics. |
| // Note that we cannot convert the addend directly, as the extendedSemantics |
| // is a local variable (which we take a reference to). |
| IEEEFloat extendedAddend(addend); |
| status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); |
| assert(status == opOK); |
| (void)status; |
| |
| // Shift the significand of the addend right by one bit. This guarantees |
| // that the high bit of the significand is zero (same as fullSignificand), |
| // so the addition will overflow (if it does overflow at all) into the top bit. |
| lost_fraction = extendedAddend.shiftSignificandRight(1); |
| assert(lost_fraction == lfExactlyZero && |
| "Lost precision while shifting addend for fused-multiply-add."); |
| |
| lost_fraction = addOrSubtractSignificand(extendedAddend, false); |
| |
| /* Restore our state. */ |
| if (newPartsCount == 1) |
| fullSignificand[0] = significand.part; |
| significand = savedSignificand; |
| semantics = savedSemantics; |
| |
| omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; |
| } |
| |
| // Convert the result having "2 * precision" significant-bits back to the one |
| // having "precision" significant-bits. First, move the radix point from |
| // poision "2*precision - 1" to "precision - 1". The exponent need to be |
| // adjusted by "2*precision - 1" - "precision - 1" = "precision". |
| exponent -= precision + 1; |
| |
| // In case MSB resides at the left-hand side of radix point, shift the |
| // mantissa right by some amount to make sure the MSB reside right before |
| // the radix point (i.e. "MSB . rest-significant-bits"). |
| // |
| // Note that the result is not normalized when "omsb < precision". So, the |
| // caller needs to call IEEEFloat::normalize() if normalized value is |
| // expected. |
| if (omsb > precision) { |
| unsigned int bits, significantParts; |
| lostFraction lf; |
| |
| bits = omsb - precision; |
| significantParts = partCountForBits(omsb); |
| lf = shiftRight(fullSignificand, significantParts, bits); |
| lost_fraction = combineLostFractions(lf, lost_fraction); |
| exponent += bits; |
| } |
| |
| APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); |
| |
| if (newPartsCount > 4) |
| delete [] fullSignificand; |
| |
| return lost_fraction; |
| } |
| |
| lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { |
| return multiplySignificand(rhs, IEEEFloat(*semantics)); |
| } |
| |
| /* Multiply the significands of LHS and RHS to DST. */ |
| lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { |
| unsigned int bit, i, partsCount; |
| const integerPart *rhsSignificand; |
| integerPart *lhsSignificand, *dividend, *divisor; |
| integerPart scratch[4]; |
| lostFraction lost_fraction; |
| |
| assert(semantics == rhs.semantics); |
| |
| lhsSignificand = significandParts(); |
| rhsSignificand = rhs.significandParts(); |
| partsCount = partCount(); |
| |
| if (partsCount > 2) |
| dividend = new integerPart[partsCount * 2]; |
| else |
| dividend = scratch; |
| |
| divisor = dividend + partsCount; |
| |
| /* Copy the dividend and divisor as they will be modified in-place. */ |
| for (i = 0; i < partsCount; i++) { |
| dividend[i] = lhsSignificand[i]; |
| divisor[i] = rhsSignificand[i]; |
| lhsSignificand[i] = 0; |
| } |
| |
| exponent -= rhs.exponent; |
| |
| unsigned int precision = semantics->precision; |
| |
| /* Normalize the divisor. */ |
| bit = precision - APInt::tcMSB(divisor, partsCount) - 1; |
| if (bit) { |
| exponent += bit; |
| APInt::tcShiftLeft(divisor, partsCount, bit); |
| } |
| |
| /* Normalize the dividend. */ |
| bit = precision - APInt::tcMSB(dividend, partsCount) - 1; |
| if (bit) { |
| exponent -= bit; |
| APInt::tcShiftLeft(dividend, partsCount, bit); |
| } |
| |
| /* Ensure the dividend >= divisor initially for the loop below. |
| Incidentally, this means that the division loop below is |
| guaranteed to set the integer bit to one. */ |
| if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { |
| exponent--; |
| APInt::tcShiftLeft(dividend, partsCount, 1); |
| assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); |
| } |
| |
| /* Long division. */ |
| for (bit = precision; bit; bit -= 1) { |
| if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { |
| APInt::tcSubtract(dividend, divisor, 0, partsCount); |
| APInt::tcSetBit(lhsSignificand, bit - 1); |
| } |
| |
| APInt::tcShiftLeft(dividend, partsCount, 1); |
| } |
| |
| /* Figure out the lost fraction. */ |
| int cmp = APInt::tcCompare(dividend, divisor, partsCount); |
| |
| if (cmp > 0) |
| lost_fraction = lfMoreThanHalf; |
| else if (cmp == 0) |
| lost_fraction = lfExactlyHalf; |
| else if (APInt::tcIsZero(dividend, partsCount)) |
| lost_fraction = lfExactlyZero; |
| else |
| lost_fraction = lfLessThanHalf; |
| |
| if (partsCount > 2) |
| delete [] dividend; |
| |
| return lost_fraction; |
| } |
| |
| unsigned int IEEEFloat::significandMSB() const { |
| return APInt::tcMSB(significandParts(), partCount()); |
| } |
| |
| unsigned int IEEEFloat::significandLSB() const { |
| return APInt::tcLSB(significandParts(), partCount()); |
| } |
| |
| /* Note that a zero result is NOT normalized to fcZero. */ |
| lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { |
| /* Our exponent should not overflow. */ |
| assert((ExponentType) (exponent + bits) >= exponent); |
| |
| exponent += bits; |
| |
| return shiftRight(significandParts(), partCount(), bits); |
| } |
| |
| /* Shift the significand left BITS bits, subtract BITS from its exponent. */ |
| void IEEEFloat::shiftSignificandLeft(unsigned int bits) { |
| assert(bits < semantics->precision); |
| |
| if (bits) { |
| unsigned int partsCount = partCount(); |
| |
| APInt::tcShiftLeft(significandParts(), partsCount, bits); |
| exponent -= bits; |
| |
| assert(!APInt::tcIsZero(significandParts(), partsCount)); |
| } |
| } |
| |
| IEEEFloat::cmpResult |
| IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { |
| int compare; |
| |
| assert(semantics == rhs.semantics); |
| assert(isFiniteNonZero()); |
| assert(rhs.isFiniteNonZero()); |
| |
| compare = exponent - rhs.exponent; |
| |
| /* If exponents are equal, do an unsigned bignum comparison of the |
| significands. */ |
| if (compare == 0) |
| compare = APInt::tcCompare(significandParts(), rhs.significandParts(), |
| partCount()); |
| |
| if (compare > 0) |
| return cmpGreaterThan; |
| else if (compare < 0) |
| return cmpLessThan; |
| else |
| return cmpEqual; |
| } |
| |
| /* Set the least significant BITS bits of a bignum, clear the |
| rest. */ |
| static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, |
| unsigned bits) { |
| unsigned i = 0; |
| while (bits > APInt::APINT_BITS_PER_WORD) { |
| dst[i++] = ~(APInt::WordType)0; |
| bits -= APInt::APINT_BITS_PER_WORD; |
| } |
| |
| if (bits) |
| dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); |
| |
| while (i < parts) |
| dst[i++] = 0; |
| } |
| |
| /* Handle overflow. Sign is preserved. We either become infinity or |
| the largest finite number. */ |
| IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { |
| /* Infinity? */ |
| if (rounding_mode == rmNearestTiesToEven || |
| rounding_mode == rmNearestTiesToAway || |
| (rounding_mode == rmTowardPositive && !sign) || |
| (rounding_mode == rmTowardNegative && sign)) { |
| category = fcInfinity; |
| return (opStatus) (opOverflow | opInexact); |
| } |
| |
| /* Otherwise we become the largest finite number. */ |
| category = fcNormal; |
| exponent = semantics->maxExponent; |
| tcSetLeastSignificantBits(significandParts(), partCount(), |
| semantics->precision); |
| |
| return opInexact; |
| } |
| |
| /* Returns TRUE if, when truncating the current number, with BIT the |
| new LSB, with the given lost fraction and rounding mode, the result |
| would need to be rounded away from zero (i.e., by increasing the |
| signficand). This routine must work for fcZero of both signs, and |
| fcNormal numbers. */ |
| bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, |
| lostFraction lost_fraction, |
| unsigned int bit) const { |
| /* NaNs and infinities should not have lost fractions. */ |
| assert(isFiniteNonZero() || category == fcZero); |
| |
| /* Current callers never pass this so we don't handle it. */ |
| assert(lost_fraction != lfExactlyZero); |
| |
| switch (rounding_mode) { |
| case rmNearestTiesToAway: |
| return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; |
| |
| case rmNearestTiesToEven: |
| if (lost_fraction == lfMoreThanHalf) |
| return true; |
| |
| /* Our zeroes don't have a significand to test. */ |
| if (lost_fraction == lfExactlyHalf && category != fcZero) |
| return APInt::tcExtractBit(significandParts(), bit); |
| |
| return false; |
| |
| case rmTowardZero: |
| return false; |
| |
| case rmTowardPositive: |
| return !sign; |
| |
| case rmTowardNegative: |
| return sign; |
| |
| default: |
| break; |
| } |
| llvm_unreachable("Invalid rounding mode found"); |
| } |
| |
| IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, |
| lostFraction lost_fraction) { |
| unsigned int omsb; /* One, not zero, based MSB. */ |
| int exponentChange; |
| |
| if (!isFiniteNonZero()) |
| return opOK; |
| |
| /* Before rounding normalize the exponent of fcNormal numbers. */ |
| omsb = significandMSB() + 1; |
| |
| if (omsb) { |
| /* OMSB is numbered from 1. We want to place it in the integer |
| bit numbered PRECISION if possible, with a compensating change in |
| the exponent. */ |
| exponentChange = omsb - semantics->precision; |
| |
| /* If the resulting exponent is too high, overflow according to |
| the rounding mode. */ |
| if (exponent + exponentChange > semantics->maxExponent) |
| return handleOverflow(rounding_mode); |
| |
| /* Subnormal numbers have exponent minExponent, and their MSB |
| is forced based on that. */ |
| if (exponent + exponentChange < semantics->minExponent) |
| exponentChange = semantics->minExponent - exponent; |
| |
| /* Shifting left is easy as we don't lose precision. */ |
| if (exponentChange < 0) { |
| assert(lost_fraction == lfExactlyZero); |
| |
| shiftSignificandLeft(-exponentChange); |
| |
| return opOK; |
| } |
| |
| if (exponentChange > 0) { |
| lostFraction lf; |
| |
| /* Shift right and capture any new lost fraction. */ |
| lf = shiftSignificandRight(exponentChange); |
| |
| lost_fraction = combineLostFractions(lf, lost_fraction); |
| |
| /* Keep OMSB up-to-date. */ |
| if (omsb > (unsigned) exponentChange) |
| omsb -= exponentChange; |
| else |
| omsb = 0; |
| } |
| } |
| |
| /* Now round the number according to rounding_mode given the lost |
| fraction. */ |
| |
| /* As specified in IEEE 754, since we do not trap we do not report |
| underflow for exact results. */ |
| if (lost_fraction == lfExactlyZero) { |
| /* Canonicalize zeroes. */ |
| if (omsb == 0) |
| category = fcZero; |
| |
| return opOK; |
| } |
| |
| /* Increment the significand if we're rounding away from zero. */ |
| if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { |
| if (omsb == 0) |
| exponent = semantics->minExponent; |
| |
| incrementSignificand(); |
| omsb = significandMSB() + 1; |
| |
| /* Did the significand increment overflow? */ |
| if (omsb == (unsigned) semantics->precision + 1) { |
| /* Renormalize by incrementing the exponent and shifting our |
| significand right one. However if we already have the |
| maximum exponent we overflow to infinity. */ |
| if (exponent == semantics->maxExponent) { |
| category = fcInfinity; |
| |
| return (opStatus) (opOverflow | opInexact); |
| } |
| |
| shiftSignificandRight(1); |
| |
| return opInexact; |
| } |
| } |
| |
| /* The normal case - we were and are not denormal, and any |
| significand increment above didn't overflow. */ |
| if (omsb == semantics->precision) |
| return opInexact; |
| |
| /* We have a non-zero denormal. */ |
| assert(omsb < semantics->precision); |
| |
| /* Canonicalize zeroes. */ |
| if (omsb == 0) |
| category = fcZero; |
| |
| /* The fcZero case is a denormal that underflowed to zero. */ |
| return (opStatus) (opUnderflow | opInexact); |
| } |
| |
| IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, |
| bool subtract) { |
| switch (PackCategoriesIntoKey(category, rhs.category)) { |
| default: |
| llvm_unreachable(nullptr); |
| |
| case PackCategoriesIntoKey(fcZero, fcNaN): |
| case PackCategoriesIntoKey(fcNormal, fcNaN): |
| case PackCategoriesIntoKey(fcInfinity, fcNaN): |
| assign(rhs); |
| LLVM_FALLTHROUGH; |
| case PackCategoriesIntoKey(fcNaN, fcZero): |
| case PackCategoriesIntoKey(fcNaN, fcNormal): |
| case PackCategoriesIntoKey(fcNaN, fcInfinity): |
| case PackCategoriesIntoKey(fcNaN, fcNaN): |
| if (isSignaling()) { |
| makeQuiet(); |
| return opInvalidOp; |
| } |
| return rhs.isSignaling() ? opInvalidOp : opOK; |
| |
| case PackCategoriesIntoKey(fcNormal, fcZero): |
| case PackCategoriesIntoKey(fcInfinity, fcNormal): |
| case PackCategoriesIntoKey(fcInfinity, fcZero): |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcNormal, fcInfinity): |
| case PackCategoriesIntoKey(fcZero, fcInfinity): |
| category = fcInfinity; |
| sign = rhs.sign ^ subtract; |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcZero, fcNormal): |
| assign(rhs); |
| sign = rhs.sign ^ subtract; |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcZero, fcZero): |
| /* Sign depends on rounding mode; handled by caller. */ |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcInfinity, fcInfinity): |
| /* Differently signed infinities can only be validly |
| subtracted. */ |
| if (((sign ^ rhs.sign)!=0) != subtract) { |
| makeNaN(); |
| return opInvalidOp; |
| } |
| |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcNormal, fcNormal): |
| return opDivByZero; |
| } |
| } |
| |
| /* Add or subtract two normal numbers. */ |
| lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, |
| bool subtract) { |
| integerPart carry; |
| lostFraction lost_fraction; |
| int bits; |
| |
| /* Determine if the operation on the absolute values is effectively |
| an addition or subtraction. */ |
| subtract ^= static_cast<bool>(sign ^ rhs.sign); |
| |
| /* Are we bigger exponent-wise than the RHS? */ |
| bits = exponent - rhs.exponent; |
| |
| /* Subtraction is more subtle than one might naively expect. */ |
| if (subtract) { |
| IEEEFloat temp_rhs(rhs); |
| |
| if (bits == 0) |
| lost_fraction = lfExactlyZero; |
| else if (bits > 0) { |
| lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); |
| shiftSignificandLeft(1); |
| } else { |
| lost_fraction = shiftSignificandRight(-bits - 1); |
| temp_rhs.shiftSignificandLeft(1); |
| } |
| |
| // Should we reverse the subtraction. |
| if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { |
| carry = temp_rhs.subtractSignificand |
| (*this, lost_fraction != lfExactlyZero); |
| copySignificand(temp_rhs); |
| sign = !sign; |
| } else { |
| carry = subtractSignificand |
| (temp_rhs, lost_fraction != lfExactlyZero); |
| } |
| |
| /* Invert the lost fraction - it was on the RHS and |
| subtracted. */ |
| if (lost_fraction == lfLessThanHalf) |
| lost_fraction = lfMoreThanHalf; |
| else if (lost_fraction == lfMoreThanHalf) |
| lost_fraction = lfLessThanHalf; |
| |
| /* The code above is intended to ensure that no borrow is |
| necessary. */ |
| assert(!carry); |
| (void)carry; |
| } else { |
| if (bits > 0) { |
| IEEEFloat temp_rhs(rhs); |
| |
| lost_fraction = temp_rhs.shiftSignificandRight(bits); |
| carry = addSignificand(temp_rhs); |
| } else { |
| lost_fraction = shiftSignificandRight(-bits); |
| carry = addSignificand(rhs); |
| } |
| |
| /* We have a guard bit; generating a carry cannot happen. */ |
| assert(!carry); |
| (void)carry; |
| } |
| |
| return lost_fraction; |
| } |
| |
| IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { |
| switch (PackCategoriesIntoKey(category, rhs.category)) { |
| default: |
| llvm_unreachable(nullptr); |
| |
| case PackCategoriesIntoKey(fcZero, fcNaN): |
| case PackCategoriesIntoKey(fcNormal, fcNaN): |
| case PackCategoriesIntoKey(fcInfinity, fcNaN): |
| assign(rhs); |
| sign = false; |
| LLVM_FALLTHROUGH; |
| case PackCategoriesIntoKey(fcNaN, fcZero): |
| case PackCategoriesIntoKey(fcNaN, fcNormal): |
| case PackCategoriesIntoKey(fcNaN, fcInfinity): |
| case PackCategoriesIntoKey(fcNaN, fcNaN): |
| sign ^= rhs.sign; // restore the original sign |
| if (isSignaling()) { |
| makeQuiet(); |
| return opInvalidOp; |
| } |
| return rhs.isSignaling() ? opInvalidOp : opOK; |
| |
| case PackCategoriesIntoKey(fcNormal, fcInfinity): |
| case PackCategoriesIntoKey(fcInfinity, fcNormal): |
| case PackCategoriesIntoKey(fcInfinity, fcInfinity): |
| category = fcInfinity; |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcZero, fcNormal): |
| case PackCategoriesIntoKey(fcNormal, fcZero): |
| case PackCategoriesIntoKey(fcZero, fcZero): |
| category = fcZero; |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcZero, fcInfinity): |
| case PackCategoriesIntoKey(fcInfinity, fcZero): |
| makeNaN(); |
| return opInvalidOp; |
| |
| case PackCategoriesIntoKey(fcNormal, fcNormal): |
| return opOK; |
| } |
| } |
| |
| IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { |
| switch (PackCategoriesIntoKey(category, rhs.category)) { |
| default: |
| llvm_unreachable(nullptr); |
| |
| case PackCategoriesIntoKey(fcZero, fcNaN): |
| case PackCategoriesIntoKey(fcNormal, fcNaN): |
| case PackCategoriesIntoKey(fcInfinity, fcNaN): |
| assign(rhs); |
| sign = false; |
| LLVM_FALLTHROUGH; |
| case PackCategoriesIntoKey(fcNaN, fcZero): |
| case PackCategoriesIntoKey(fcNaN, fcNormal): |
| case PackCategoriesIntoKey(fcNaN, fcInfinity): |
| case PackCategoriesIntoKey(fcNaN, fcNaN): |
| sign ^= rhs.sign; // restore the original sign |
| if (isSignaling()) { |
| makeQuiet(); |
| return opInvalidOp; |
| } |
| return rhs.isSignaling() ? opInvalidOp : opOK; |
| |
| case PackCategoriesIntoKey(fcInfinity, fcZero): |
| case PackCategoriesIntoKey(fcInfinity, fcNormal): |
| case PackCategoriesIntoKey(fcZero, fcInfinity): |
| case PackCategoriesIntoKey(fcZero, fcNormal): |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcNormal, fcInfinity): |
| category = fcZero; |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcNormal, fcZero): |
| category = fcInfinity; |
| return opDivByZero; |
| |
| case PackCategoriesIntoKey(fcInfinity, fcInfinity): |
| case PackCategoriesIntoKey(fcZero, fcZero): |
| makeNaN(); |
| return opInvalidOp; |
| |
| case PackCategoriesIntoKey(fcNormal, fcNormal): |
| return opOK; |
| } |
| } |
| |
| IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { |
| switch (PackCategoriesIntoKey(category, rhs.category)) { |
| default: |
| llvm_unreachable(nullptr); |
| |
| case PackCategoriesIntoKey(fcZero, fcNaN): |
| case PackCategoriesIntoKey(fcNormal, fcNaN): |
| case PackCategoriesIntoKey(fcInfinity, fcNaN): |
| assign(rhs); |
| LLVM_FALLTHROUGH; |
| case PackCategoriesIntoKey(fcNaN, fcZero): |
| case PackCategoriesIntoKey(fcNaN, fcNormal): |
| case PackCategoriesIntoKey(fcNaN, fcInfinity): |
| case PackCategoriesIntoKey(fcNaN, fcNaN): |
| if (isSignaling()) { |
| makeQuiet(); |
| return opInvalidOp; |
| } |
| return rhs.isSignaling() ? opInvalidOp : opOK; |
| |
| case PackCategoriesIntoKey(fcZero, fcInfinity): |
| case PackCategoriesIntoKey(fcZero, fcNormal): |
| case PackCategoriesIntoKey(fcNormal, fcInfinity): |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcNormal, fcZero): |
| case PackCategoriesIntoKey(fcInfinity, fcZero): |
| case PackCategoriesIntoKey(fcInfinity, fcNormal): |
| case PackCategoriesIntoKey(fcInfinity, fcInfinity): |
| case PackCategoriesIntoKey(fcZero, fcZero): |
| makeNaN(); |
| return opInvalidOp; |
| |
| case PackCategoriesIntoKey(fcNormal, fcNormal): |
| return opOK; |
| } |
| } |
| |
| IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { |
| switch (PackCategoriesIntoKey(category, rhs.category)) { |
| default: |
| llvm_unreachable(nullptr); |
| |
| case PackCategoriesIntoKey(fcZero, fcNaN): |
| case PackCategoriesIntoKey(fcNormal, fcNaN): |
| case PackCategoriesIntoKey(fcInfinity, fcNaN): |
| assign(rhs); |
| LLVM_FALLTHROUGH; |
| case PackCategoriesIntoKey(fcNaN, fcZero): |
| case PackCategoriesIntoKey(fcNaN, fcNormal): |
| case PackCategoriesIntoKey(fcNaN, fcInfinity): |
| case PackCategoriesIntoKey(fcNaN, fcNaN): |
| if (isSignaling()) { |
| makeQuiet(); |
| return opInvalidOp; |
| } |
| return rhs.isSignaling() ? opInvalidOp : opOK; |
| |
| case PackCategoriesIntoKey(fcZero, fcInfinity): |
| case PackCategoriesIntoKey(fcZero, fcNormal): |
| case PackCategoriesIntoKey(fcNormal, fcInfinity): |
| return opOK; |
| |
| case PackCategoriesIntoKey(fcNormal, fcZero): |
| case PackCategoriesIntoKey(fcInfinity, fcZero): |
| case PackCategoriesIntoKey(fcInfinity, fcNormal): |
| case PackCategoriesIntoKey(fcInfinity, fcInfinity): |
| case PackCategoriesIntoKey(fcZero, fcZero): |
| makeNaN(); |
| return opInvalidOp; |
| |
| case PackCategoriesIntoKey(fcNormal, fcNormal): |
| return opDivByZero; // fake status, indicating this is not a special case |
| } |
| } |
| |
| /* Change sign. */ |
| void IEEEFloat::changeSign() { |
| /* Look mummy, this one's easy. */ |
| sign = !sign; |
| } |
| |
| /* Normalized addition or subtraction. */ |
| IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, |
| roundingMode rounding_mode, |
| bool subtract) { |
| opStatus fs; |
| |
| fs = addOrSubtractSpecials(rhs, subtract); |
| |
| /* This return code means it was not a simple case. */ |
| if (fs == opDivByZero) { |
| lostFraction lost_fraction; |
| |
| lost_fraction = addOrSubtractSignificand(rhs, subtract); |
| fs = normalize(rounding_mode, lost_fraction); |
| |
| /* Can only be zero if we lost no fraction. */ |
| assert(category != fcZero || lost_fraction == lfExactlyZero); |
| } |
| |
| /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a |
| positive zero unless rounding to minus infinity, except that |
| adding two like-signed zeroes gives that zero. */ |
| if (category == fcZero) { |
| if (rhs.category != fcZero || (sign == rhs.sign) == subtract) |
| sign = (rounding_mode == rmTowardNegative); |
| } |
| |
| return fs; |
| } |
| |
| /* Normalized addition. */ |
| IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, |
| roundingMode rounding_mode) { |
| return addOrSubtract(rhs, rounding_mode, false); |
| } |
| |
| /* Normalized subtraction. */ |
| IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, |
| roundingMode rounding_mode) { |
| return addOrSubtract(rhs, rounding_mode, true); |
| } |
| |
| /* Normalized multiply. */ |
| IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, |
| roundingMode rounding_mode) { |
| opStatus fs; |
| |
| sign ^= rhs.sign; |
| fs = multiplySpecials(rhs); |
| |
| if (isFiniteNonZero()) { |
| lostFraction lost_fraction = multiplySignificand(rhs); |
| fs = normalize(rounding_mode, lost_fraction); |
| if (lost_fraction != lfExactlyZero) |
| fs = (opStatus) (fs | opInexact); |
| } |
| |
| return fs; |
| } |
| |
| /* Normalized divide. */ |
| IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, |
| roundingMode rounding_mode) { |
| opStatus fs; |
| |
| sign ^= rhs.sign; |
| fs = divideSpecials(rhs); |
| |
| if (isFiniteNonZero()) { |
| lostFraction lost_fraction = divideSignificand(rhs); |
| fs = normalize(rounding_mode, lost_fraction); |
| if (lost_fraction != lfExactlyZero) |
| fs = (opStatus) (fs | opInexact); |
| } |
| |
| return fs; |
| } |
| |
| /* Normalized remainder. */ |
| IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { |
| opStatus fs; |
| unsigned int origSign = sign; |
| |
| // First handle the special cases. |
| fs = remainderSpecials(rhs); |
| if (fs != opDivByZero) |
| return fs; |
| |
| fs = opOK; |
| |
| // Make sure the current value is less than twice the denom. If the addition |
| // did not succeed (an overflow has happened), which means that the finite |
| // value we currently posses must be less than twice the denom (as we are |
| // using the same semantics). |
| IEEEFloat P2 = rhs; |
| if (P2.add(rhs, rmNearestTiesToEven) == opOK) { |
| fs = mod(P2); |
| assert(fs == opOK); |
| } |
| |
| // Lets work with absolute numbers. |
| IEEEFloat P = rhs; |
| P.sign = false; |
| sign = false; |
| |
| // |
| // To calculate the remainder we use the following scheme. |
| // |
| // The remainder is defained as follows: |
| // |
| // remainder = numer - rquot * denom = x - r * p |
| // |
| // Where r is the result of: x/p, rounded toward the nearest integral value |
| // (with halfway cases rounded toward the even number). |
| // |
| // Currently, (after x mod 2p): |
| // r is the number of 2p's present inside x, which is inherently, an even |
| // number of p's. |
| // |
| // We may split the remaining calculation into 4 options: |
| // - if x < 0.5p then we round to the nearest number with is 0, and are done. |
| // - if x == 0.5p then we round to the nearest even number which is 0, and we |
| // are done as well. |
| // - if 0.5p < x < p then we round to nearest number which is 1, and we have |
| // to subtract 1p at least once. |
| // - if x >= p then we must subtract p at least once, as x must be a |
| // remainder. |
| // |
| // By now, we were done, or we added 1 to r, which in turn, now an odd number. |
| // |
| // We can now split the remaining calculation to the following 3 options: |
| // - if x < 0.5p then we round to the nearest number with is 0, and are done. |
| // - if x == 0.5p then we round to the nearest even number. As r is odd, we |
| // must round up to the next even number. so we must subtract p once more. |
| // - if x > 0.5p (and inherently x < p) then we must round r up to the next |
| // integral, and subtract p once more. |
| // |
| |
| // Extend the semantics to prevent an overflow/underflow or inexact result. |
| bool losesInfo; |
| fltSemantics extendedSemantics = *semantics; |
| extendedSemantics.maxExponent++; |
| extendedSemantics.minExponent--; |
| extendedSemantics.precision += 2; |
| |
| IEEEFloat VEx = *this; |
| fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); |
| assert(fs == opOK && !losesInfo); |
| IEEEFloat PEx = P; |
| fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); |
| assert(fs == opOK && !losesInfo); |
| |
| // It is simpler to work with 2x instead of 0.5p, and we do not need to lose |
| // any fraction. |
| fs = VEx.add(VEx, rmNearestTiesToEven); |
| assert(fs == opOK); |
| |
| if (VEx.compare(PEx) == cmpGreaterThan) { |
| fs = subtract(P, rmNearestTiesToEven); |
| assert(fs == opOK); |
| |
| // Make VEx = this.add(this), but because we have different semantics, we do |
| // not want to `convert` again, so we just subtract PEx twice (which equals |
| // to the desired value). |
| fs = VEx.subtract(PEx, rmNearestTiesToEven); |
| assert(fs == opOK); |
| fs = VEx.subtract(PEx, rmNearestTiesToEven); |
| assert(fs == opOK); |
| |
| cmpResult result = VEx.compare(PEx); |
| if (result == cmpGreaterThan || result == cmpEqual) { |
| fs = subtract(P, rmNearestTiesToEven); |
| assert(fs == opOK); |
| } |
| } |
| |
| if (isZero()) |
| sign = origSign; // IEEE754 requires this |
| else |
| sign ^= origSign; |
| return fs; |
| } |
| |
| /* Normalized llvm frem (C fmod). */ |
| IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { |
| opStatus fs; |
| fs = modSpecials(rhs); |
| unsigned int origSign = sign; |
| |
| while (isFiniteNonZero() && rhs.isFiniteNonZero() && |
| compareAbsoluteValue(rhs) != cmpLessThan) { |
| IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven); |
| if (compareAbsoluteValue(V) == cmpLessThan) |
| V = scalbn(V, -1, rmNearestTiesToEven); |
| V.sign = sign; |
| |
| fs = subtract(V, rmNearestTiesToEven); |
| assert(fs==opOK); |
| } |
| if (isZero()) |
| sign = origSign; // fmod requires this |
| return fs; |
| } |
| |
| /* Normalized fused-multiply-add. */ |
| IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, |
| const IEEEFloat &addend, |
| roundingMode rounding_mode) { |
| opStatus fs; |
| |
| /* Post-multiplication sign, before addition. */ |
| sign ^= multiplicand.sign; |
| |
| /* If and only if all arguments are normal do we need to do an |
| extended-precision calculation. */ |
| if (isFiniteNonZero() && |
| multiplicand.isFiniteNonZero() && |
| addend.isFinite()) { |
| lostFraction lost_fraction; |
| |
| lost_fraction = multiplySignificand(multiplicand, addend); |
| fs = normalize(rounding_mode, lost_fraction); |
| if (lost_fraction != lfExactlyZero) |
| fs = (opStatus) (fs | opInexact); |
| |
| /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a |
| positive zero unless rounding to minus infinity, except that |
| adding two like-signed zeroes gives that zero. */ |
| if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) |
| sign = (rounding_mode == rmTowardNegative); |
| } else { |
| fs = multiplySpecials(multiplicand); |
| |
| /* FS can only be opOK or opInvalidOp. There is no more work |
| to do in the latter case. The IEEE-754R standard says it is |
| implementation-defined in this case whether, if ADDEND is a |
| quiet NaN, we raise invalid op; this implementation does so. |
| |
| If we need to do the addition we can do so with normal |
| precision. */ |
| if (fs == opOK) |
| fs = addOrSubtract(addend, rounding_mode, false); |
| } |
| |
| return fs; |
| } |
| |
| /* Rounding-mode correct round to integral value. */ |
| IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { |
| opStatus fs; |
| |
| if (isInfinity()) |
| // [IEEE Std 754-2008 6.1]: |
| // The behavior of infinity in floating-point arithmetic is derived from the |
| // limiting cases of real arithmetic with operands of arbitrarily |
| // large magnitude, when such a limit exists. |
| // ... |
| // Operations on infinite operands are usually exact and therefore signal no |
| // exceptions ... |
| return opOK; |
| |
| if (isNaN()) { |
| if (isSignaling()) { |
| // [IEEE Std 754-2008 6.2]: |
| // Under default exception handling, any operation signaling an invalid |
| // operation exception and for which a floating-point result is to be |
| // delivered shall deliver a quiet NaN. |
| makeQuiet(); |
| // [IEEE Std 754-2008 6.2]: |
| // Signaling NaNs shall be reserved operands that, under default exception |
| // handling, signal the invalid operation exception(see 7.2) for every |
| // general-computational and signaling-computational operation except for |
| // the conversions described in 5.12. |
| return opInvalidOp; |
| } else { |
| // [IEEE Std 754-2008 6.2]: |
| // For an operation with quiet NaN inputs, other than maximum and minimum |
| // operations, if a floating-point result is to be delivered the result |
| // shall be a quiet NaN which should be one of the input NaNs. |
| // ... |
| // Every general-computational and quiet-computational operation involving |
| // one or more input NaNs, none of them signaling, shall signal no |
| // exception, except fusedMultiplyAdd might signal the invalid operation |
| // exception(see 7.2). |
| return opOK; |
| } |
| } |
| |
| if (isZero()) { |
| // [IEEE Std 754-2008 6.3]: |
| // ... the sign of the result of conversions, the quantize operation, the |
| // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is |
| // the sign of the first or only operand. |
| return opOK; |
| } |
| |
| // If the exponent is large enough, we know that this value is already |
| // integral, and the arithmetic below would potentially cause it to saturate |
| // to +/-Inf. Bail out early instead. |
| if (exponent+1 >= (int)semanticsPrecision(*semantics)) |
| return opOK; |
| |
| // The algorithm here is quite simple: we add 2^(p-1), where p is the |
| // precision of our format, and then subtract it back off again. The choice |
| // of rounding modes for the addition/subtraction determines the rounding mode |
| // for our integral rounding as well. |
| // NOTE: When the input value is negative, we do subtraction followed by |
| // addition instead. |
| APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); |
| IntegerConstant <<= semanticsPrecision(*semantics)-1; |
| IEEEFloat MagicConstant(*semantics); |
| fs = MagicConstant.convertFromAPInt(IntegerConstant, false, |
| rmNearestTiesToEven); |
| assert(fs == opOK); |
| MagicConstant.sign = sign; |
| |
| // Preserve the input sign so that we can handle the case of zero result |
| // correctly. |
| bool inputSign = isNegative(); |
| |
| fs = add(MagicConstant, rounding_mode); |
| |
| // Current value and 'MagicConstant' are both integers, so the result of the |
| // subtraction is always exact according to Sterbenz' lemma. |
| subtract(MagicConstant, rounding_mode); |
| |
| // Restore the input sign. |
| if (inputSign != isNegative()) |
| changeSign(); |
| |
| return fs; |
| } |
| |
| |
| /* Comparison requires normalized numbers. */ |
| IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { |
| cmpResult result; |
| |
| assert(semantics == rhs.semantics); |
| |
| switch (PackCategoriesIntoKey(category, rhs.category)) { |
| default: |
| llvm_unreachable(nullptr); |
| |
| case PackCategoriesIntoKey(fcNaN, fcZero): |
| case PackCategoriesIntoKey(fcNaN, fcNormal): |
| case PackCategoriesIntoKey(fcNaN, fcInfinity): |
| case PackCategoriesIntoKey(fcNaN, fcNaN): |
| case PackCategoriesIntoKey(fcZero, fcNaN): |
| case PackCategoriesIntoKey(fcNormal, fcNaN): |
| case PackCategoriesIntoKey(fcInfinity, fcNaN): |
| return cmpUnordered; |
| |
| case PackCategoriesIntoKey(fcInfinity, fcNormal): |
| case PackCategoriesIntoKey(fcInfinity, fcZero): |
| case PackCategoriesIntoKey(fcNormal, fcZero): |
| if (sign) |
| return cmpLessThan; |
| else |
| return cmpGreaterThan; |
| |
| case PackCategoriesIntoKey(fcNormal, fcInfinity): |
| case PackCategoriesIntoKey(fcZero, fcInfinity): |
| case PackCategoriesIntoKey(fcZero, fcNormal): |
| if (rhs.sign) |
| return cmpGreaterThan; |
| else |
| return cmpLessThan; |
| |
| case PackCategoriesIntoKey(fcInfinity, fcInfinity): |
| if (sign == rhs.sign) |
| return cmpEqual; |
| else if (sign) |
| return cmpLessThan; |
| else |
| return cmpGreaterThan; |
| |
| case PackCategoriesIntoKey(fcZero, fcZero): |
| return cmpEqual; |
| |
| case PackCategoriesIntoKey(fcNormal, fcNormal): |
| break; |
| } |
| |
| /* Two normal numbers. Do they have the same sign? */ |
| if (sign != rhs.sign) { |
| if (sign) |
| result = cmpLessThan; |
| else |
| result = cmpGreaterThan; |
| } else { |
| /* Compare absolute values; invert result if negative. */ |
| result = compareAbsoluteValue(rhs); |
| |
| if (sign) { |
| if (result == cmpLessThan) |
| result = cmpGreaterThan; |
| else if (result == cmpGreaterThan) |
| result = cmpLessThan; |
| } |
| } |
| |
| return result; |
| } |
| |
| /// IEEEFloat::convert - convert a value of one floating point type to another. |
| /// The return value corresponds to the IEEE754 exceptions. *losesInfo |
| /// records whether the transformation lost information, i.e. whether |
| /// converting the result back to the original type will produce the |
| /// original value (this is almost the same as return value==fsOK, but there |
| /// are edge cases where this is not so). |
| |
| IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, |
| roundingMode rounding_mode, |
| bool *losesInfo) { |
| lostFraction lostFraction; |
| unsigned int newPartCount, oldPartCount; |
| opStatus fs; |
| int shift; |
| const fltSemantics &fromSemantics = *semantics; |
| |
| lostFraction = lfExactlyZero; |
| newPartCount = partCountForBits(toSemantics.precision + 1); |
| oldPartCount = partCount(); |
| shift = toSemantics.precision - fromSemantics.precision; |
| |
| bool X86SpecialNan = false; |
| if (&fromSemantics == &semX87DoubleExtended && |
| &toSemantics != &semX87DoubleExtended && category == fcNaN && |
| (!(*significandParts() & 0x8000000000000000ULL) || |
| !(*significandParts() & 0x4000000000000000ULL))) { |
| // x86 has some unusual NaNs which cannot be represented in any other |
| // format; note them here. |
| X86SpecialNan = true; |
| } |
| |
| // If this is a truncation of a denormal number, and the target semantics |
| // has larger exponent range than the source semantics (this can happen |
| // when truncating from PowerPC double-double to double format), the |
| // right shift could lose result mantissa bits. Adjust exponent instead |
| // of performing excessive shift. |
| if (shift < 0 && isFiniteNonZero()) { |
| int exponentChange = significandMSB() + 1 - fromSemantics.precision; |
| if (exponent + exponentChange < toSemantics.minExponent) |
| exponentChange = toSemantics.minExponent - exponent; |
| if (exponentChange < shift) |
| exponentChange = shift; |
| if (exponentChange < 0) { |
| shift -= exponentChange; |
| exponent += exponentChange; |
| } |
| } |
| |
| // If this is a truncation, perform the shift before we narrow the storage. |
| if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) |
| lostFraction = shiftRight(significandParts(), oldPartCount, -shift); |
| |
| // Fix the storage so it can hold to new value. |
| if (newPartCount > oldPartCount) { |
| // The new type requires more storage; make it available. |
| integerPart *newParts; |
| newParts = new integerPart[newPartCount]; |
| APInt::tcSet(newParts, 0, newPartCount); |
| if (isFiniteNonZero() || category==fcNaN) |
| APInt::tcAssign(newParts, significandParts(), oldPartCount); |
| freeSignificand(); |
| significand.parts = newParts; |
| } else if (newPartCount == 1 && oldPartCount != 1) { |
| // Switch to built-in storage for a single part. |
| integerPart newPart = 0; |
| if (isFiniteNonZero() || category==fcNaN) |
| newPart = significandParts()[0]; |
| freeSignificand(); |
| significand.part = newPart; |
| } |
| |
| // Now that we have the right storage, switch the semantics. |
| semantics = &toSemantics; |
| |
| // If this is an extension, perform the shift now that the storage is |
| // available. |
| if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) |
| APInt::tcShiftLeft(significandParts(), newPartCount, shift); |
| |
| if (isFiniteNonZero()) { |
| fs = normalize(rounding_mode, lostFraction); |
| *losesInfo = (fs != opOK); |
| } else if (category == fcNaN) { |
| *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; |
| |
| // For x87 extended precision, we want to make a NaN, not a special NaN if |
| // the input wasn't special either. |
| if (!X86SpecialNan && semantics == &semX87DoubleExtended) |
| APInt::tcSetBit(significandParts(), semantics->precision - 1); |
| |
| // Convert of sNaN creates qNaN and raises an exception (invalid op). |
| // This also guarantees that a sNaN does not become Inf on a truncation |
| // that loses all payload bits. |
| if (isSignaling()) { |
| makeQuiet(); |
| fs = opInvalidOp; |
| } else { |
| fs = opOK; |
| } |
| } else { |
| *losesInfo = false; |
| fs = opOK; |
| } |
| |
| return fs; |
| } |
| |
| /* Convert a floating point number to an integer according to the |
| rounding mode. If the rounded integer value is out of range this |
| returns an invalid operation exception and the contents of the |
| destination parts are unspecified. If the rounded value is in |
| range but the floating point number is not the exact integer, the C |
| standard doesn't require an inexact exception to be raised. IEEE |
| 854 does require it so we do that. |
| |
| Note that for conversions to integer type the C standard requires |
| round-to-zero to always be used. */ |
| IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( |
| MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, |
| roundingMode rounding_mode, bool *isExact) const { |
| lostFraction lost_fraction; |
| const integerPart *src; |
| unsigned int dstPartsCount, truncatedBits; |
| |
| *isExact = false; |
| |
| /* Handle the three special cases first. */ |
| if (category == fcInfinity || category == fcNaN) |
| return opInvalidOp; |
| |
| dstPartsCount = partCountForBits(width); |
| assert(dstPartsCount <= parts.size() && "Integer too big"); |
| |
| if (category == fcZero) { |
| APInt::tcSet(parts.data(), 0, dstPartsCount); |
| // Negative zero can't be represented as an int. |
| *isExact = !sign; |
| return opOK; |
| } |
| |
| src = significandParts(); |
| |
| /* Step 1: place our absolute value, with any fraction truncated, in |
| the destination. */ |
| if (exponent < 0) { |
| /* Our absolute value is less than one; truncate everything. */ |
| APInt::tcSet(parts.data(), 0, dstPartsCount); |
| /* For exponent -1 the integer bit represents .5, look at that. |
| For smaller exponents leftmost truncated bit is 0. */ |
| truncatedBits = semantics->precision -1U - exponent; |
| } else { |
| /* We want the most significant (exponent + 1) bits; the rest are |
| truncated. */ |
| unsigned int bits = exponent + 1U; |
| |
| /* Hopelessly large in magnitude? */ |
| if (bits > width) |
| return opInvalidOp; |
| |
| if (bits < semantics->precision) { |
| /* We truncate (semantics->precision - bits) bits. */ |
| truncatedBits = semantics->precision - bits; |
| APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); |
| } else { |
| /* We want at least as many bits as are available. */ |
| APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, |
| 0); |
| APInt::tcShiftLeft(parts.data(), dstPartsCount, |
| bits - semantics->precision); |
| truncatedBits = 0; |
| } |
| } |
| |
| /* Step 2: work out any lost fraction, and increment the absolute |
| value if we would round away from zero. */ |
| if (truncatedBits) { |
| lost_fraction = lostFractionThroughTruncation(src, partCount(), |
| truncatedBits); |
| if (lost_fraction != lfExactlyZero && |
| roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { |
| if (APInt::tcIncrement(parts.data(), dstPartsCount)) |
| return opInvalidOp; /* Overflow. */ |
| } |
| } else { |
| lost_fraction = lfExactlyZero; |
| } |
| |
| /* Step 3: check if we fit in the destination. */ |
| unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; |
| |
| if (sign) { |
| if (!isSigned) { |
| /* Negative numbers cannot be represented as unsigned. */ |
| if (omsb != 0) |
| return opInvalidOp; |
| } else { |
| /* It takes omsb bits to represent the unsigned integer value. |
| We lose a bit for the sign, but care is needed as the |
| maximally negative integer is a special case. */ |
| if (omsb == width && |
| APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) |
| return opInvalidOp; |
| |
| /* This case can happen because of rounding. */ |
| if (omsb > width) |
| return opInvalidOp; |
| } |
| |
| APInt::tcNegate (parts.data(), dstPartsCount); |
| } else { |
| if (omsb >= width + !isSigned) |
| return opInvalidOp; |
| } |
| |
| if (lost_fraction == lfExactlyZero) { |
| *isExact = true; |
| return opOK; |
| } else |
| return opInexact; |
| } |
| |
| /* Same as convertToSignExtendedInteger, except we provide |
| deterministic values in case of an invalid operation exception, |
| namely zero for NaNs and the minimal or maximal value respectively |
| for underflow or overflow. |
| The *isExact output tells whether the result is exact, in the sense |
| that converting it back to the original floating point type produces |
| the original value. This is almost equivalent to result==opOK, |
| except for negative zeroes. |
| */ |
| IEEEFloat::opStatus |
| IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, |
| unsigned int width, bool isSigned, |
| roundingMode rounding_mode, bool *isExact) const { |
| opStatus fs; |
| |
| fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, |
| isExact); |
| |
| if (fs == opInvalidOp) { |
| unsigned int bits, dstPartsCount; |
| |
| dstPartsCount = partCountForBits(width); |
| assert(dstPartsCount <= parts.size() && "Integer too big"); |
| |
| if (category == fcNaN) |
| bits = 0; |
| else if (sign) |
| bits = isSigned; |
| else |
| bits = width - isSigned; |
| |
| tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); |
| if (sign && isSigned) |
| APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); |
| } |
| |
| return fs; |
| } |
| |
| /* Convert an unsigned integer SRC to a floating point number, |
| rounding according to ROUNDING_MODE. The sign of the floating |
| point number is not modified. */ |
| IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( |
| const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { |
| unsigned int omsb, precision, dstCount; |
| integerPart *dst; |
| lostFraction lost_fraction; |
| |
| category = fcNormal; |
| omsb = APInt::tcMSB(src, srcCount) + 1; |
| dst = significandParts(); |
| dstCount = partCount(); |
| precision = semantics->precision; |
| |
| /* We want the most significant PRECISION bits of SRC. There may not |
| be that many; extract what we can. */ |
| if (precision <= omsb) { |
| exponent = omsb - 1; |
| lost_fraction = lostFractionThroughTruncation(src, srcCount, |
| omsb - precision); |
| APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); |
| } else { |
| exponent = precision - 1; |
| lost_fraction = lfExactlyZero; |
| APInt::tcExtract(dst, dstCount, src, omsb, 0); |
| } |
| |
| return normalize(rounding_mode, lost_fraction); |
| } |
| |
| IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, |
| roundingMode rounding_mode) { |
| unsigned int partCount = Val.getNumWords(); |
| APInt api = Val; |
| |
| sign = false; |
| if (isSigned && api.isNegative()) { |
| sign = true; |
| api = -api; |
| } |
| |
| return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); |
| } |
| |
| /* Convert a two's complement integer SRC to a floating point number, |
| rounding according to ROUNDING_MODE. ISSIGNED is true if the |
| integer is signed, in which case it must be sign-extended. */ |
| IEEEFloat::opStatus |
| IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, |
| unsigned int srcCount, bool isSigned, |
| roundingMode rounding_mode) { |
| opStatus status; |
| |
| if (isSigned && |
| APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { |
| integerPart *copy; |
| |
| /* If we're signed and negative negate a copy. */ |
| sign = true; |
| copy = new integerPart[srcCount]; |
| APInt::tcAssign(copy, src, srcCount); |
| APInt::tcNegate(copy, srcCount); |
| status = convertFromUnsignedParts(copy, srcCount, rounding_mode); |
| delete [] copy; |
| } else { |
| sign = false; |
| status = convertFromUnsignedParts(src, srcCount, rounding_mode); |
| } |
| |
| return status; |
| } |
| |
| /* FIXME: should this just take a const APInt reference? */ |
| IEEEFloat::opStatus |
| IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, |
| unsigned int width, bool isSigned, |
| roundingMode rounding_mode) { |
| unsigned int partCount = partCountForBits(width); |
| APInt api = APInt(width, makeArrayRef(parts, partCount)); |
| |
| sign = false; |
| if (isSigned && APInt::tcExtractBit(parts, width - 1)) { |
| sign = true; |
| api = -api; |
| } |
| |
| return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); |
| } |
| |
| Expected<IEEEFloat::opStatus> |
| IEEEFloat::convertFromHexadecimalString(StringRef s, |
| roundingMode rounding_mode) { |
| lostFraction lost_fraction = lfExactlyZero; |
| |
| category = fcNormal; |
| zeroSignificand(); |
| exponent = 0; |
| |
| integerPart *significand = significandParts(); |
| unsigned partsCount = partCount(); |
| unsigned bitPos = partsCount * integerPartWidth; |
| bool computedTrailingFraction = false; |
| |
| // Skip leading zeroes and any (hexa)decimal point. |
| StringRef::iterator begin = s.begin(); |
| StringRef::iterator end = s.end(); |
| StringRef::iterator dot; |
| auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); |
| if (!PtrOrErr) |
| return PtrOrErr.takeError(); |
| StringRef::iterator p = *PtrOrErr; |
| StringRef::iterator firstSignificantDigit = p; |
| |
| while (p != end) { |
| integerPart hex_value; |
| |
| if (*p == '.') { |
| if (dot != end) |
| return createError("String contains multiple dots"); |
| dot = p++; |
| continue; |
| } |
| |
| hex_value = hexDigitValue(*p); |
| if (hex_value == -1U) |
| break; |
| |
| p++; |
| |
| // Store the number while we have space. |
| if (bitPos) { |
| bitPos -= 4; |
| hex_value <<= bitPos % integerPartWidth; |
| significand[bitPos / integerPartWidth] |= hex_value; |
| } else if (!computedTrailingFraction) { |
| auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); |
| if (!FractOrErr) |
| return FractOrErr.takeError(); |
| lost_fraction = *FractOrErr; |
| computedTrailingFraction = true; |
| } |
| } |
| |
| /* Hex floats require an exponent but not a hexadecimal point. */ |
| if (p == end) |
| return createError("Hex strings require an exponent"); |
| if (*p != 'p' && *p != 'P') |
| return createError("Invalid character in significand"); |
| if (p == begin) |
| return createError("Significand has no digits"); |
| if (dot != end && p - begin == 1) |
| return createError("Significand has no digits"); |
| |
| /* Ignore the exponent if we are zero. */ |
| if (p != firstSignificantDigit) { |
| int expAdjustment; |
| |
| /* Implicit hexadecimal point? */ |
| if (dot == end) |
| dot = p; |
| |
| /* Calculate the exponent adjustment implicit in the number of |
| significant digits. */ |
| expAdjustment = static_cast<int>(dot - firstSignificantDigit); |
| if (expAdjustment < 0) |
| expAdjustment++; |
| expAdjustment = expAdjustment * 4 - 1; |
| |
| /* Adjust for writing the significand starting at the most |
| significant nibble. */ |
| expAdjustment += semantics->precision; |
| expAdjustment -= partsCount * integerPartWidth; |
| |
| /* Adjust for the given exponent. */ |
| auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); |
| if (!ExpOrErr) |
| return ExpOrErr.takeError(); |
| exponent = *ExpOrErr; |
| } |
| |
| return normalize(rounding_mode, lost_fraction); |
| } |
| |
| IEEEFloat::opStatus |
| IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, |
| unsigned sigPartCount, int exp, |
| roundingMode rounding_mode) { |
| unsigned int parts, pow5PartCount; |
| fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; |
| integerPart pow5Parts[maxPowerOfFiveParts]; |
| bool isNearest; |
| |
| isNearest = (rounding_mode == rmNearestTiesToEven || |
| rounding_mode == rmNearestTiesToAway); |
| |
| parts = partCountForBits(semantics->precision + 11); |
| |
| /* Calculate pow(5, abs(exp)). */ |
| pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); |
| |
| for (;; parts *= 2) { |
| opStatus sigStatus, powStatus; |
| unsigned int excessPrecision, truncatedBits; |
| |
| calcSemantics.precision = parts * integerPartWidth - 1; |
| excessPrecision = calcSemantics.precision - semantics->precision; |
| truncatedBits = excessPrecision; |
| |
| IEEEFloat decSig(calcSemantics, uninitialized); |
| decSig.makeZero(sign); |
| IEEEFloat pow5(calcSemantics); |
| |
| sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, |
| rmNearestTiesToEven); |
| powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, |
| rmNearestTiesToEven); |
| /* Add exp, as 10^n = 5^n * 2^n. */ |
| decSig.exponent += exp; |
| |
| lostFraction calcLostFraction; |
| integerPart HUerr, HUdistance; |
| unsigned int powHUerr; |
| |
| if (exp >= 0) { |
| /* multiplySignificand leaves the precision-th bit set to 1. */ |
| calcLostFraction = decSig.multiplySignificand(pow5); |
| powHUerr = powStatus != opOK; |
| } else { |
| calcLostFraction = decSig.divideSignificand(pow5); |
| /* Denormal numbers have less precision. */ |
| if (decSig.exponent < semantics->minExponent) { |
| excessPrecision += (semantics->minExponent - decSig.exponent); |
| truncatedBits = excessPrecision; |
| if (excessPrecision > calcSemantics.precision) |
| excessPrecision = calcSemantics.precision; |
| } |
| /* Extra half-ulp lost in reciprocal of exponent. */ |
| powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; |
| } |
| |
| /* Both multiplySignificand and divideSignificand return the |
| result with the integer bit set. */ |
| assert(APInt::tcExtractBit |
| (decSig.significandParts(), calcSemantics.precision - 1) == 1); |
| |
| HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, |
| powHUerr); |
| HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), |
| excessPrecision, isNearest); |
| |
| /* Are we guaranteed to round correctly if we truncate? */ |
| if (HUdistance >= HUerr) { |
| APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), |
| calcSemantics.precision - excessPrecision, |
| excessPrecision); |
| /* Take the exponent of decSig. If we tcExtract-ed less bits |
| above we must adjust our exponent to compensate for the |
| implicit right shift. */ |
| exponent = (decSig.exponent + semantics->precision |
| - (calcSemantics.precision - excessPrecision)); |
| calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), |
| decSig.partCount(), |
| truncatedBits); |
| return normalize(rounding_mode, calcLostFraction); |
| } |
| } |
| } |
| |
| Expected<IEEEFloat::opStatus> |
| IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { |
| decimalInfo D; |
| opStatus fs; |
| |
| /* Scan the text. */ |
| StringRef::iterator p = str.begin(); |
| if (Error Err = interpretDecimal(p, str.end(), &D)) |
| return std::move(Err); |
| |
| /* Handle the quick cases. First the case of no significant digits, |
| i.e. zero, and then exponents that are obviously too large or too |
| small. Writing L for log 10 / log 2, a number d.ddddd*10^exp |
| definitely overflows if |
| |
| (exp - 1) * L >= maxExponent |
| |
| and definitely underflows to zero where |
| |
| (exp + 1) * L <= minExponent - precision |
| |
| With integer arithmetic the tightest bounds for L are |
| |
| 93/28 < L < 196/59 [ numerator <= 256 ] |
| 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] |
| */ |
| |
| // Test if we have a zero number allowing for strings with no null terminators |
| // and zero decimals with non-zero exponents. |
| // |
| // We computed firstSigDigit by ignoring all zeros and dots. Thus if |
| // D->firstSigDigit equals str.end(), every digit must be a zero and there can |
| // be at most one dot. On the other hand, if we have a zero with a non-zero |
| // exponent, then we know that D.firstSigDigit will be non-numeric. |
| if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { |
| category = fcZero; |
| fs = opOK; |
| |
| /* Check whether the normalized exponent is high enough to overflow |
| max during the log-rebasing in the max-exponent check below. */ |
| } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { |
| fs = handleOverflow(rounding_mode); |
| |
| /* If it wasn't, then it also wasn't high enough to overflow max |
| during the log-rebasing in the min-exponent check. Check that it |
| won't overflow min in either check, then perform the min-exponent |
| check. */ |
| } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || |
| (D.normalizedExponent + 1) * 28738 <= |
| 8651 * (semantics->minExponent - (int) semantics->precision)) { |
| /* Underflow to zero and round. */ |
| category = fcNormal; |
| zeroSignificand(); |
| fs = normalize(rounding_mode, lfLessThanHalf); |
| |
| /* We can finally safely perform the max-exponent check. */ |
| } else if ((D.normalizedExponent - 1) * 42039 |
| >= 12655 * semantics->maxExponent) { |
| /* Overflow and round. */ |
| fs = handleOverflow(rounding_mode); |
| } else { |
| integerPart *decSignificand; |
| unsigned int partCount; |
| |
| /* A tight upper bound on number of bits required to hold an |
| N-digit decimal integer is N * 196 / 59. Allocate enough space |
| to hold the full significand, and an extra part required by |
| tcMultiplyPart. */ |
| partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; |
| partCount = partCountForBits(1 + 196 * partCount / 59); |
| decSignificand = new integerPart[partCount + 1]; |
| partCount = 0; |
| |
| /* Convert to binary efficiently - we do almost all multiplication |
| in an integerPart. When this would overflow do we do a single |
| bignum multiplication, and then revert again to multiplication |
| in an integerPart. */ |
| do { |
| integerPart decValue, val, multiplier; |
| |
| val = 0; |
| multiplier = 1; |
| |
| do { |
| if (*p == '.') { |
| p++; |
| if (p == str.end()) { |
| break; |
| } |
| } |
| decValue = decDigitValue(*p++); |
| if (decValue >= 10U) { |
| delete[] decSignificand; |
| return createError("Invalid character in significand"); |
| } |
| multiplier *= 10; |
| val = val * 10 + decValue; |
| /* The maximum number that can be multiplied by ten with any |
| digit added without overflowing an integerPart. */ |
| } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); |
| |
| /* Multiply out the current part. */ |
| APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, |
| partCount, partCount + 1, false); |
| |
| /* If we used another part (likely but not guaranteed), increase |
| the count. */ |
| if (decSignificand[partCount]) |
| partCount++; |
| } while (p <= D.lastSigDigit); |
| |
| category = fcNormal; |
| fs = roundSignificandWithExponent(decSignificand, partCount, |
| D.exponent, rounding_mode); |
| |
| delete [] decSignificand; |
| } |
| |
| return fs; |
| } |
| |
| bool IEEEFloat::convertFromStringSpecials(StringRef str) { |
| const size_t MIN_NAME_SIZE = 3; |
| |
| if (str.size() < MIN_NAME_SIZE) |
| return false; |
| |
| if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { |
| makeInf(false); |
| return true; |
| } |
| |
| bool IsNegative = str.front() == '-'; |
| if (IsNegative) { |
| str = str.drop_front(); |
| if (str.size() < MIN_NAME_SIZE) |
| return false; |
| |
| if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) { |
| makeInf(true); |
| return true; |
| } |
| } |
| |
| // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. |
| bool IsSignaling = str.front() == 's' || str.front() == 'S'; |
| if (IsSignaling) { |
| str = str.drop_front(); |
| if (str.size() < MIN_NAME_SIZE) |
| return false; |
| } |
| |
| if (str.startswith("nan") || str.startswith("NaN")) { |
| str = str.drop_front(3); |
| |
| // A NaN without payload. |
| if (str.empty()) { |
| makeNaN(IsSignaling, IsNegative); |
| return true; |
| } |
| |
| // Allow the payload to be inside parentheses. |
| if (str.front() == '(') { |
| // Parentheses should be balanced (and not empty). |
| if (str.size() <= 2 || str.back() != ')') |
| return false; |
| |
| str = str.slice(1, str.size() - 1); |
| } |
| |
| // Determine the payload number's radix. |
| unsigned Radix = 10; |
| if (str[0] == '0') { |
| if (str.size() > 1 && tolower(str[1]) == 'x') { |
| str = str.drop_front(2); |
| Radix = 16; |
| } else |
| Radix = 8; |
| } |
| |
| // Parse the payload and make the NaN. |
| APInt Payload; |
| if (!str.getAsInteger(Radix, Payload)) { |
| makeNaN(IsSignaling, IsNegative, &Payload); |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| Expected<IEEEFloat::opStatus> |
| IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { |
| if (str.empty()) |
| return createError("Invalid string length"); |
| |
| // Handle special cases. |
| if (convertFromStringSpecials(str)) |
| return opOK; |
| |
| /* Handle a leading minus sign. */ |
| StringRef::iterator p = str.begin(); |
| size_t slen = str.size(); |
| sign = *p == '-' ? 1 : 0; |
| if (*p == '-' || *p == '+') { |
| p++; |
| slen--; |
| if (!slen) |
| return createError("String has no digits"); |
| } |
| |
| if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { |
| if (slen == 2) |
| return createError("Invalid string"); |
| return convertFromHexadecimalString(StringRef(p + 2, slen - 2), |
| rounding_mode); |
| } |
| |
| return convertFromDecimalString(StringRef(p, slen), rounding_mode); |
| } |
| |
| /* Write out a hexadecimal representation of the floating point value |
| to DST, which must be of sufficient size, in the C99 form |
| [-]0xh.hhhhp[+-]d. Return the number of characters written, |
| excluding the terminating NUL. |
| |
| If UPPERCASE, the output is in upper case, otherwise in lower case. |
| |
| HEXDIGITS digits appear altogether, rounding the value if |
| necessary. If HEXDIGITS is 0, the minimal precision to display the |
| number precisely is used instead. If nothing would appear after |
| the decimal point it is suppressed. |
| |
| The decimal exponent is always printed and has at least one digit. |
| Zero values display an exponent of zero. Infinities and NaNs |
| appear as "infinity" or "nan" respectively. |
| |
| The above rules are as specified by C99. There is ambiguity about |
| what the leading hexadecimal digit should be. This implementation |
| uses whatever is necessary so that the exponent is displayed as |
| stored. This implies the exponent will fall within the IEEE format |
| range, and the leading hexadecimal digit will be 0 (for denormals), |
| 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with |
| any other digits zero). |
| */ |
| unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, |
| bool upperCase, |
| roundingMode rounding_mode) const { |
| char *p; |
| |
| p = dst; |
| if (sign) |
| *dst++ = '-'; |
| |
| switch (category) { |
| case fcInfinity: |
| memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); |
| dst += sizeof infinityL - 1; |
| break; |
| |
| case fcNaN: |
| memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); |
| dst += sizeof NaNU - 1; |
| break; |
| |
| case fcZero: |
| *dst++ = '0'; |
| *dst++ = upperCase ? 'X': 'x'; |
| *dst++ = '0'; |
| if (hexDigits > 1) { |
| *dst++ = '.'; |
| memset (dst, '0', hexDigits - 1); |
| dst += hexDigits - 1; |
| } |
| *dst++ = upperCase ? 'P': 'p'; |
| *dst++ = '0'; |
| break; |
| |
| case fcNormal: |
| dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); |
| break; |
| } |
| |
| *dst = 0; |
| |
| return static_cast<unsigned int>(dst - p); |
| } |
| |
| /* Does the hard work of outputting the correctly rounded hexadecimal |
| form of a normal floating point number with the specified number of |
| hexadecimal digits. If HEXDIGITS is zero the minimum number of |
| digits necessary to print the value precisely is output. */ |
| char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, |
| bool upperCase, |
| roundingMode rounding_mode) const { |
| unsigned int count, valueBits, shift, partsCount, outputDigits; |
| const char *hexDigitChars; |
| const integerPart *significand; |
| char *p; |
| bool roundUp; |
| |
| *dst++ = '0'; |
| *dst++ = upperCase ? 'X': 'x'; |
| |
| |