| //===- llvm/unittest/ADT/APFloat.cpp - APFloat unit tests ---------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APSInt.h" |
| #include "llvm/ADT/Hashing.h" |
| #include "llvm/ADT/SmallString.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/FormatVariadic.h" |
| #include "gtest/gtest.h" |
| #include <cmath> |
| #include <limits> |
| #include <ostream> |
| #include <string> |
| #include <tuple> |
| #include <type_traits> |
| |
| using namespace llvm; |
| |
| static std::string convertToErrorFromString(StringRef Str) { |
| llvm::APFloat F(0.0); |
| auto StatusOrErr = |
| F.convertFromString(Str, llvm::APFloat::rmNearestTiesToEven); |
| EXPECT_TRUE(!StatusOrErr); |
| return toString(StatusOrErr.takeError()); |
| } |
| |
| static double convertToDoubleFromString(StringRef Str) { |
| llvm::APFloat F(0.0); |
| auto StatusOrErr = |
| F.convertFromString(Str, llvm::APFloat::rmNearestTiesToEven); |
| EXPECT_FALSE(!StatusOrErr); |
| consumeError(StatusOrErr.takeError()); |
| return F.convertToDouble(); |
| } |
| |
| static std::string convertToString(double d, unsigned Prec, unsigned Pad, |
| bool Tr = true) { |
| llvm::SmallVector<char, 100> Buffer; |
| llvm::APFloat F(d); |
| F.toString(Buffer, Prec, Pad, Tr); |
| return std::string(Buffer.data(), Buffer.size()); |
| } |
| |
| namespace llvm { |
| namespace detail { |
| class IEEEFloatUnitTestHelper { |
| public: |
| static void runTest(bool subtract, bool lhsSign, |
| APFloat::ExponentType lhsExponent, |
| APFloat::integerPart lhsSignificand, bool rhsSign, |
| APFloat::ExponentType rhsExponent, |
| APFloat::integerPart rhsSignificand, bool expectedSign, |
| APFloat::ExponentType expectedExponent, |
| APFloat::integerPart expectedSignificand, |
| lostFraction expectedLoss) { |
| // `addOrSubtractSignificand` only uses the sign, exponent and significand |
| IEEEFloat lhs(1.0); |
| lhs.sign = lhsSign; |
| lhs.exponent = lhsExponent; |
| lhs.significand.part = lhsSignificand; |
| IEEEFloat rhs(1.0); |
| rhs.sign = rhsSign; |
| rhs.exponent = rhsExponent; |
| rhs.significand.part = rhsSignificand; |
| lostFraction resultLoss = lhs.addOrSubtractSignificand(rhs, subtract); |
| EXPECT_EQ(resultLoss, expectedLoss); |
| EXPECT_EQ(lhs.sign, expectedSign); |
| EXPECT_EQ(lhs.exponent, expectedExponent); |
| EXPECT_EQ(lhs.significand.part, expectedSignificand); |
| } |
| }; |
| } // namespace detail |
| } // namespace llvm |
| |
| namespace { |
| |
| TEST(APFloatTest, isSignaling) { |
| // We test qNaN, -qNaN, +sNaN, -sNaN with and without payloads. *NOTE* The |
| // positive/negative distinction is included only since the getQNaN/getSNaN |
| // API provides the option. |
| APInt payload = APInt::getOneBitSet(4, 2); |
| APFloat QNan = APFloat::getQNaN(APFloat::IEEEsingle(), false); |
| EXPECT_FALSE(QNan.isSignaling()); |
| EXPECT_EQ(fcQNan, QNan.classify()); |
| |
| EXPECT_FALSE(APFloat::getQNaN(APFloat::IEEEsingle(), true).isSignaling()); |
| EXPECT_FALSE(APFloat::getQNaN(APFloat::IEEEsingle(), false, &payload).isSignaling()); |
| EXPECT_FALSE(APFloat::getQNaN(APFloat::IEEEsingle(), true, &payload).isSignaling()); |
| |
| APFloat SNan = APFloat::getSNaN(APFloat::IEEEsingle(), false); |
| EXPECT_TRUE(SNan.isSignaling()); |
| EXPECT_EQ(fcSNan, SNan.classify()); |
| |
| EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), true).isSignaling()); |
| EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), false, &payload).isSignaling()); |
| EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), true, &payload).isSignaling()); |
| } |
| |
| TEST(APFloatTest, next) { |
| |
| APFloat test(APFloat::IEEEquad(), APFloat::uninitialized); |
| APFloat expected(APFloat::IEEEquad(), APFloat::uninitialized); |
| |
| // 1. Test Special Cases Values. |
| // |
| // Test all special values for nextUp and nextDown perscribed by IEEE-754R |
| // 2008. These are: |
| // 1. +inf |
| // 2. -inf |
| // 3. getLargest() |
| // 4. -getLargest() |
| // 5. getSmallest() |
| // 6. -getSmallest() |
| // 7. qNaN |
| // 8. sNaN |
| // 9. +0 |
| // 10. -0 |
| |
| // nextUp(+inf) = +inf. |
| test = APFloat::getInf(APFloat::IEEEquad(), false); |
| expected = APFloat::getInf(APFloat::IEEEquad(), false); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isInfinity()); |
| EXPECT_TRUE(!test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(+inf) = -nextUp(-inf) = -(-getLargest()) = getLargest() |
| test = APFloat::getInf(APFloat::IEEEquad(), false); |
| expected = APFloat::getLargest(APFloat::IEEEquad(), false); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(!test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(-inf) = -getLargest() |
| test = APFloat::getInf(APFloat::IEEEquad(), true); |
| expected = APFloat::getLargest(APFloat::IEEEquad(), true); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(-inf) = -nextUp(+inf) = -(+inf) = -inf. |
| test = APFloat::getInf(APFloat::IEEEquad(), true); |
| expected = APFloat::getInf(APFloat::IEEEquad(), true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.isInfinity() && test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(getLargest()) = +inf |
| test = APFloat::getLargest(APFloat::IEEEquad(), false); |
| expected = APFloat::getInf(APFloat::IEEEquad(), false); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isInfinity() && !test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(getLargest()) = -nextUp(-getLargest()) |
| // = -(-getLargest() + inc) |
| // = getLargest() - inc. |
| test = APFloat::getLargest(APFloat::IEEEquad(), false); |
| expected = APFloat(APFloat::IEEEquad(), |
| "0x1.fffffffffffffffffffffffffffep+16383"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(!test.isInfinity() && !test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(-getLargest()) = -getLargest() + inc. |
| test = APFloat::getLargest(APFloat::IEEEquad(), true); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x1.fffffffffffffffffffffffffffep+16383"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(-getLargest()) = -nextUp(getLargest()) = -(inf) = -inf. |
| test = APFloat::getLargest(APFloat::IEEEquad(), true); |
| expected = APFloat::getInf(APFloat::IEEEquad(), true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.isInfinity() && test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(getSmallest()) = getSmallest() + inc. |
| test = APFloat(APFloat::IEEEquad(), "0x0.0000000000000000000000000001p-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "0x0.0000000000000000000000000002p-16382"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(getSmallest()) = -nextUp(-getSmallest()) = -(-0) = +0. |
| test = APFloat(APFloat::IEEEquad(), "0x0.0000000000000000000000000001p-16382"); |
| expected = APFloat::getZero(APFloat::IEEEquad(), false); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.isPosZero()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(-getSmallest()) = -0. |
| test = APFloat(APFloat::IEEEquad(), "-0x0.0000000000000000000000000001p-16382"); |
| expected = APFloat::getZero(APFloat::IEEEquad(), true); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isNegZero()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(-getSmallest()) = -nextUp(getSmallest()) = -getSmallest() - inc. |
| test = APFloat(APFloat::IEEEquad(), "-0x0.0000000000000000000000000001p-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x0.0000000000000000000000000002p-16382"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(qNaN) = qNaN |
| test = APFloat::getQNaN(APFloat::IEEEquad(), false); |
| expected = APFloat::getQNaN(APFloat::IEEEquad(), false); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(qNaN) = qNaN |
| test = APFloat::getQNaN(APFloat::IEEEquad(), false); |
| expected = APFloat::getQNaN(APFloat::IEEEquad(), false); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(sNaN) = qNaN |
| test = APFloat::getSNaN(APFloat::IEEEquad(), false); |
| expected = APFloat::getQNaN(APFloat::IEEEquad(), false); |
| EXPECT_EQ(test.next(false), APFloat::opInvalidOp); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(sNaN) = qNaN |
| test = APFloat::getSNaN(APFloat::IEEEquad(), false); |
| expected = APFloat::getQNaN(APFloat::IEEEquad(), false); |
| EXPECT_EQ(test.next(true), APFloat::opInvalidOp); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(+0) = +getSmallest() |
| test = APFloat::getZero(APFloat::IEEEquad(), false); |
| expected = APFloat::getSmallest(APFloat::IEEEquad(), false); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(+0) = -nextUp(-0) = -getSmallest() |
| test = APFloat::getZero(APFloat::IEEEquad(), false); |
| expected = APFloat::getSmallest(APFloat::IEEEquad(), true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(-0) = +getSmallest() |
| test = APFloat::getZero(APFloat::IEEEquad(), true); |
| expected = APFloat::getSmallest(APFloat::IEEEquad(), false); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(-0) = -nextUp(0) = -getSmallest() |
| test = APFloat::getZero(APFloat::IEEEquad(), true); |
| expected = APFloat::getSmallest(APFloat::IEEEquad(), true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 2. Binade Boundary Tests. |
| |
| // 2a. Test denormal <-> normal binade boundaries. |
| // * nextUp(+Largest Denormal) -> +Smallest Normal. |
| // * nextDown(-Largest Denormal) -> -Smallest Normal. |
| // * nextUp(-Smallest Normal) -> -Largest Denormal. |
| // * nextDown(+Smallest Normal) -> +Largest Denormal. |
| |
| // nextUp(+Largest Denormal) -> +Smallest Normal. |
| test = APFloat(APFloat::IEEEquad(), "0x0.ffffffffffffffffffffffffffffp-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "0x1.0000000000000000000000000000p-16382"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(-Largest Denormal) -> -Smallest Normal. |
| test = APFloat(APFloat::IEEEquad(), |
| "-0x0.ffffffffffffffffffffffffffffp-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x1.0000000000000000000000000000p-16382"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(-Smallest Normal) -> -LargestDenormal. |
| test = APFloat(APFloat::IEEEquad(), |
| "-0x1.0000000000000000000000000000p-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x0.ffffffffffffffffffffffffffffp-16382"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(+Smallest Normal) -> +Largest Denormal. |
| test = APFloat(APFloat::IEEEquad(), |
| "+0x1.0000000000000000000000000000p-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "+0x0.ffffffffffffffffffffffffffffp-16382"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 2b. Test normal <-> normal binade boundaries. |
| // * nextUp(-Normal Binade Boundary) -> -Normal Binade Boundary + 1. |
| // * nextDown(+Normal Binade Boundary) -> +Normal Binade Boundary - 1. |
| // * nextUp(+Normal Binade Boundary - 1) -> +Normal Binade Boundary. |
| // * nextDown(-Normal Binade Boundary + 1) -> -Normal Binade Boundary. |
| |
| // nextUp(-Normal Binade Boundary) -> -Normal Binade Boundary + 1. |
| test = APFloat(APFloat::IEEEquad(), "-0x1p+1"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x1.ffffffffffffffffffffffffffffp+0"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(+Normal Binade Boundary) -> +Normal Binade Boundary - 1. |
| test = APFloat(APFloat::IEEEquad(), "0x1p+1"); |
| expected = APFloat(APFloat::IEEEquad(), "0x1.ffffffffffffffffffffffffffffp+0"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(+Normal Binade Boundary - 1) -> +Normal Binade Boundary. |
| test = APFloat(APFloat::IEEEquad(), "0x1.ffffffffffffffffffffffffffffp+0"); |
| expected = APFloat(APFloat::IEEEquad(), "0x1p+1"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(-Normal Binade Boundary + 1) -> -Normal Binade Boundary. |
| test = APFloat(APFloat::IEEEquad(), "-0x1.ffffffffffffffffffffffffffffp+0"); |
| expected = APFloat(APFloat::IEEEquad(), "-0x1p+1"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 2c. Test using next at binade boundaries with a direction away from the |
| // binade boundary. Away from denormal <-> normal boundaries. |
| // |
| // This is to make sure that even though we are at a binade boundary, since |
| // we are rounding away, we do not trigger the binade boundary code. Thus we |
| // test: |
| // * nextUp(-Largest Denormal) -> -Largest Denormal + inc. |
| // * nextDown(+Largest Denormal) -> +Largest Denormal - inc. |
| // * nextUp(+Smallest Normal) -> +Smallest Normal + inc. |
| // * nextDown(-Smallest Normal) -> -Smallest Normal - inc. |
| |
| // nextUp(-Largest Denormal) -> -Largest Denormal + inc. |
| test = APFloat(APFloat::IEEEquad(), "-0x0.ffffffffffffffffffffffffffffp-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x0.fffffffffffffffffffffffffffep-16382"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(+Largest Denormal) -> +Largest Denormal - inc. |
| test = APFloat(APFloat::IEEEquad(), "0x0.ffffffffffffffffffffffffffffp-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "0x0.fffffffffffffffffffffffffffep-16382"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(!test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(+Smallest Normal) -> +Smallest Normal + inc. |
| test = APFloat(APFloat::IEEEquad(), "0x1.0000000000000000000000000000p-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "0x1.0000000000000000000000000001p-16382"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(!test.isDenormal()); |
| EXPECT_TRUE(!test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(-Smallest Normal) -> -Smallest Normal - inc. |
| test = APFloat(APFloat::IEEEquad(), "-0x1.0000000000000000000000000000p-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x1.0000000000000000000000000001p-16382"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(!test.isDenormal()); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 2d. Test values which cause our exponent to go to min exponent. This |
| // is to ensure that guards in the code to check for min exponent |
| // trigger properly. |
| // * nextUp(-0x1p-16381) -> -0x1.ffffffffffffffffffffffffffffp-16382 |
| // * nextDown(-0x1.ffffffffffffffffffffffffffffp-16382) -> |
| // -0x1p-16381 |
| // * nextUp(0x1.ffffffffffffffffffffffffffffp-16382) -> 0x1p-16382 |
| // * nextDown(0x1p-16382) -> 0x1.ffffffffffffffffffffffffffffp-16382 |
| |
| // nextUp(-0x1p-16381) -> -0x1.ffffffffffffffffffffffffffffp-16382 |
| test = APFloat(APFloat::IEEEquad(), "-0x1p-16381"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x1.ffffffffffffffffffffffffffffp-16382"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(-0x1.ffffffffffffffffffffffffffffp-16382) -> |
| // -0x1p-16381 |
| test = APFloat(APFloat::IEEEquad(), "-0x1.ffffffffffffffffffffffffffffp-16382"); |
| expected = APFloat(APFloat::IEEEquad(), "-0x1p-16381"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(0x1.ffffffffffffffffffffffffffffp-16382) -> 0x1p-16381 |
| test = APFloat(APFloat::IEEEquad(), "0x1.ffffffffffffffffffffffffffffp-16382"); |
| expected = APFloat(APFloat::IEEEquad(), "0x1p-16381"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(0x1p-16381) -> 0x1.ffffffffffffffffffffffffffffp-16382 |
| test = APFloat(APFloat::IEEEquad(), "0x1p-16381"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "0x1.ffffffffffffffffffffffffffffp-16382"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 3. Now we test both denormal/normal computation which will not cause us |
| // to go across binade boundaries. Specifically we test: |
| // * nextUp(+Denormal) -> +Denormal. |
| // * nextDown(+Denormal) -> +Denormal. |
| // * nextUp(-Denormal) -> -Denormal. |
| // * nextDown(-Denormal) -> -Denormal. |
| // * nextUp(+Normal) -> +Normal. |
| // * nextDown(+Normal) -> +Normal. |
| // * nextUp(-Normal) -> -Normal. |
| // * nextDown(-Normal) -> -Normal. |
| |
| // nextUp(+Denormal) -> +Denormal. |
| test = APFloat(APFloat::IEEEquad(), |
| "0x0.ffffffffffffffffffffffff000cp-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "0x0.ffffffffffffffffffffffff000dp-16382"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(!test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(+Denormal) -> +Denormal. |
| test = APFloat(APFloat::IEEEquad(), |
| "0x0.ffffffffffffffffffffffff000cp-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "0x0.ffffffffffffffffffffffff000bp-16382"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(!test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(-Denormal) -> -Denormal. |
| test = APFloat(APFloat::IEEEquad(), |
| "-0x0.ffffffffffffffffffffffff000cp-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x0.ffffffffffffffffffffffff000bp-16382"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(-Denormal) -> -Denormal |
| test = APFloat(APFloat::IEEEquad(), |
| "-0x0.ffffffffffffffffffffffff000cp-16382"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x0.ffffffffffffffffffffffff000dp-16382"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(+Normal) -> +Normal. |
| test = APFloat(APFloat::IEEEquad(), |
| "0x1.ffffffffffffffffffffffff000cp-16000"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "0x1.ffffffffffffffffffffffff000dp-16000"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(!test.isDenormal()); |
| EXPECT_TRUE(!test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(+Normal) -> +Normal. |
| test = APFloat(APFloat::IEEEquad(), |
| "0x1.ffffffffffffffffffffffff000cp-16000"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "0x1.ffffffffffffffffffffffff000bp-16000"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(!test.isDenormal()); |
| EXPECT_TRUE(!test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp(-Normal) -> -Normal. |
| test = APFloat(APFloat::IEEEquad(), |
| "-0x1.ffffffffffffffffffffffff000cp-16000"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x1.ffffffffffffffffffffffff000bp-16000"); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(!test.isDenormal()); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown(-Normal) -> -Normal. |
| test = APFloat(APFloat::IEEEquad(), |
| "-0x1.ffffffffffffffffffffffff000cp-16000"); |
| expected = APFloat(APFloat::IEEEquad(), |
| "-0x1.ffffffffffffffffffffffff000dp-16000"); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(!test.isDenormal()); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| TEST(APFloatTest, FMA) { |
| APFloat::roundingMode rdmd = APFloat::rmNearestTiesToEven; |
| |
| { |
| APFloat f1(14.5f); |
| APFloat f2(-14.5f); |
| APFloat f3(225.0f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(14.75f, f1.convertToFloat()); |
| } |
| |
| { |
| APFloat Val2(2.0f); |
| APFloat f1((float)1.17549435e-38F); |
| APFloat f2((float)1.17549435e-38F); |
| f1.divide(Val2, rdmd); |
| f2.divide(Val2, rdmd); |
| APFloat f3(12.0f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(12.0f, f1.convertToFloat()); |
| } |
| |
| // Test for correct zero sign when answer is exactly zero. |
| // fma(1.0, -1.0, 1.0) -> +ve 0. |
| { |
| APFloat f1(1.0); |
| APFloat f2(-1.0); |
| APFloat f3(1.0); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_TRUE(!f1.isNegative() && f1.isZero()); |
| } |
| |
| // Test for correct zero sign when answer is exactly zero and rounding towards |
| // negative. |
| // fma(1.0, -1.0, 1.0) -> +ve 0. |
| { |
| APFloat f1(1.0); |
| APFloat f2(-1.0); |
| APFloat f3(1.0); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmTowardNegative); |
| EXPECT_TRUE(f1.isNegative() && f1.isZero()); |
| } |
| |
| // Test for correct (in this case -ve) sign when adding like signed zeros. |
| // Test fma(0.0, -0.0, -0.0) -> -ve 0. |
| { |
| APFloat f1(0.0); |
| APFloat f2(-0.0); |
| APFloat f3(-0.0); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_TRUE(f1.isNegative() && f1.isZero()); |
| } |
| |
| // Test -ve sign preservation when small negative results underflow. |
| { |
| APFloat f1(APFloat::IEEEdouble(), "-0x1p-1074"); |
| APFloat f2(APFloat::IEEEdouble(), "+0x1p-1074"); |
| APFloat f3(0.0); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_TRUE(f1.isNegative() && f1.isZero()); |
| } |
| |
| // Test x87 extended precision case from http://llvm.org/PR20728. |
| { |
| APFloat M1(APFloat::x87DoubleExtended(), 1); |
| APFloat M2(APFloat::x87DoubleExtended(), 1); |
| APFloat A(APFloat::x87DoubleExtended(), 3); |
| |
| bool losesInfo = false; |
| M1.fusedMultiplyAdd(M1, A, APFloat::rmNearestTiesToEven); |
| M1.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(4.0f, M1.convertToFloat()); |
| } |
| |
| // Regression test that failed an assertion. |
| { |
| APFloat f1(-8.85242279E-41f); |
| APFloat f2(2.0f); |
| APFloat f3(8.85242279E-41f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(-8.85242279E-41f, f1.convertToFloat()); |
| } |
| |
| // The `addOrSubtractSignificand` can be considered to have 9 possible cases |
| // when subtracting: all combinations of {cmpLessThan, cmpGreaterThan, |
| // cmpEqual} and {no loss, loss from lhs, loss from rhs}. Test each reachable |
| // case here. |
| |
| // Regression test for failing the `assert(!carry)` in |
| // `addOrSubtractSignificand` and normalizing the exponent even when the |
| // significand is zero if there is a lost fraction. |
| // This tests cmpEqual, loss from lhs |
| { |
| APFloat f1(-1.4728589E-38f); |
| APFloat f2(3.7105144E-6f); |
| APFloat f3(5.5E-44f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(-0.0f, f1.convertToFloat()); |
| } |
| |
| // Test cmpGreaterThan, no loss |
| { |
| APFloat f1(2.0f); |
| APFloat f2(2.0f); |
| APFloat f3(-3.5f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(0.5f, f1.convertToFloat()); |
| } |
| |
| // Test cmpLessThan, no loss |
| { |
| APFloat f1(2.0f); |
| APFloat f2(2.0f); |
| APFloat f3(-4.5f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(-0.5f, f1.convertToFloat()); |
| } |
| |
| // Test cmpEqual, no loss |
| { |
| APFloat f1(2.0f); |
| APFloat f2(2.0f); |
| APFloat f3(-4.0f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(0.0f, f1.convertToFloat()); |
| } |
| |
| // Test cmpLessThan, loss from lhs |
| { |
| APFloat f1(2.0000002f); |
| APFloat f2(2.0000002f); |
| APFloat f3(-32.0f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(-27.999998f, f1.convertToFloat()); |
| } |
| |
| // Test cmpGreaterThan, loss from rhs |
| { |
| APFloat f1(1e10f); |
| APFloat f2(1e10f); |
| APFloat f3(-2.0000002f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(1e20f, f1.convertToFloat()); |
| } |
| |
| // Test cmpGreaterThan, loss from lhs |
| { |
| APFloat f1(1e-36f); |
| APFloat f2(0.0019531252f); |
| APFloat f3(-1e-45f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(1.953124e-39f, f1.convertToFloat()); |
| } |
| |
| // {cmpEqual, cmpLessThan} with loss from rhs can't occur for the usage in |
| // `fusedMultiplyAdd` as `multiplySignificand` normalises the MSB of lhs to |
| // one bit below the top. |
| |
| // Test cases from #104984 |
| { |
| APFloat f1(0.24999998f); |
| APFloat f2(2.3509885e-38f); |
| APFloat f3(-1e-45f); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(5.87747e-39f, f1.convertToFloat()); |
| } |
| { |
| APFloat f1(4.4501477170144023e-308); |
| APFloat f2(0.24999999999999997); |
| APFloat f3(-8.475904604373977e-309); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(2.64946468816203e-309, f1.convertToDouble()); |
| } |
| { |
| APFloat f1(APFloat::IEEEhalf(), APInt(16, 0x8fffu)); |
| APFloat f2(APFloat::IEEEhalf(), APInt(16, 0x2bffu)); |
| APFloat f3(APFloat::IEEEhalf(), APInt(16, 0x0172u)); |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(0x808eu, f1.bitcastToAPInt().getZExtValue()); |
| } |
| |
| // Test using only a single instance of APFloat. |
| { |
| APFloat F(1.5); |
| |
| F.fusedMultiplyAdd(F, F, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(3.75, F.convertToDouble()); |
| } |
| } |
| |
| TEST(APFloatTest, MinNum) { |
| APFloat f1(1.0); |
| APFloat f2(2.0); |
| APFloat nan = APFloat::getNaN(APFloat::IEEEdouble()); |
| |
| EXPECT_EQ(1.0, minnum(f1, f2).convertToDouble()); |
| EXPECT_EQ(1.0, minnum(f2, f1).convertToDouble()); |
| EXPECT_EQ(1.0, minnum(f1, nan).convertToDouble()); |
| EXPECT_EQ(1.0, minnum(nan, f1).convertToDouble()); |
| |
| APFloat zp(0.0); |
| APFloat zn(-0.0); |
| EXPECT_EQ(-0.0, minnum(zp, zn).convertToDouble()); |
| |
| APInt intPayload_89ab(64, 0x89ab); |
| APInt intPayload_cdef(64, 0xcdef); |
| APFloat nan_0123[2] = {APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123), |
| APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123)}; |
| APFloat mnan_4567[2] = {APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567), |
| APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567)}; |
| APFloat nan_89ab[2] = { |
| APFloat::getSNaN(APFloat::IEEEdouble(), false, &intPayload_89ab), |
| APFloat::getNaN(APFloat::IEEEdouble(), false, 0x89ab)}; |
| APFloat mnan_cdef[2] = { |
| APFloat::getSNaN(APFloat::IEEEdouble(), true, &intPayload_cdef), |
| APFloat::getNaN(APFloat::IEEEdouble(), true, 0xcdef)}; |
| |
| for (APFloat n : {nan_0123[0], mnan_4567[0]}) |
| for (APFloat f : {f1, f2, zn, zp}) { |
| APFloat res = minnum(f, n); |
| EXPECT_FALSE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(f)); |
| res = minnum(n, f); |
| EXPECT_FALSE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(f)); |
| } |
| for (auto n : {nan_89ab, mnan_cdef}) |
| for (APFloat f : {f1, f2, zn, zp}) { |
| APFloat res = minnum(f, n[0]); |
| EXPECT_TRUE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(n[1])); |
| res = minnum(n[0], f); |
| EXPECT_TRUE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(n[1])); |
| } |
| |
| // When NaN vs NaN, we should keep payload/sign of either one. |
| for (auto n1 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef}) |
| for (auto n2 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef}) { |
| APFloat res = minnum(n1[0], n2[0]); |
| EXPECT_TRUE(res.bitwiseIsEqual(n1[1]) || res.bitwiseIsEqual(n2[1])); |
| EXPECT_FALSE(res.isSignaling()); |
| } |
| } |
| |
| TEST(APFloatTest, MaxNum) { |
| APFloat f1(1.0); |
| APFloat f2(2.0); |
| APFloat nan = APFloat::getNaN(APFloat::IEEEdouble()); |
| |
| EXPECT_EQ(2.0, maxnum(f1, f2).convertToDouble()); |
| EXPECT_EQ(2.0, maxnum(f2, f1).convertToDouble()); |
| EXPECT_EQ(1.0, maxnum(f1, nan).convertToDouble()); |
| EXPECT_EQ(1.0, maxnum(nan, f1).convertToDouble()); |
| |
| APFloat zp(0.0); |
| APFloat zn(-0.0); |
| EXPECT_EQ(0.0, maxnum(zp, zn).convertToDouble()); |
| EXPECT_EQ(0.0, maxnum(zn, zp).convertToDouble()); |
| |
| APInt intPayload_89ab(64, 0x89ab); |
| APInt intPayload_cdef(64, 0xcdef); |
| APFloat nan_0123[2] = {APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123), |
| APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123)}; |
| APFloat mnan_4567[2] = {APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567), |
| APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567)}; |
| APFloat nan_89ab[2] = { |
| APFloat::getSNaN(APFloat::IEEEdouble(), false, &intPayload_89ab), |
| APFloat::getNaN(APFloat::IEEEdouble(), false, 0x89ab)}; |
| APFloat mnan_cdef[2] = { |
| APFloat::getSNaN(APFloat::IEEEdouble(), true, &intPayload_cdef), |
| APFloat::getNaN(APFloat::IEEEdouble(), true, 0xcdef)}; |
| |
| for (APFloat n : {nan_0123[0], mnan_4567[0]}) |
| for (APFloat f : {f1, f2, zn, zp}) { |
| APFloat res = maxnum(f, n); |
| EXPECT_FALSE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(f)); |
| res = maxnum(n, f); |
| EXPECT_FALSE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(f)); |
| } |
| for (auto n : {nan_89ab, mnan_cdef}) |
| for (APFloat f : {f1, f2, zn, zp}) { |
| APFloat res = maxnum(f, n[0]); |
| EXPECT_TRUE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(n[1])); |
| res = maxnum(n[0], f); |
| EXPECT_TRUE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(n[1])); |
| } |
| |
| // When NaN vs NaN, we should keep payload/sign of either one. |
| for (auto n1 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef}) |
| for (auto n2 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef}) { |
| APFloat res = maxnum(n1[0], n2[0]); |
| EXPECT_TRUE(res.bitwiseIsEqual(n1[1]) || res.bitwiseIsEqual(n2[1])); |
| EXPECT_FALSE(res.isSignaling()); |
| } |
| } |
| |
| TEST(APFloatTest, Minimum) { |
| APFloat f1(1.0); |
| APFloat f2(2.0); |
| APFloat zp(0.0); |
| APFloat zn(-0.0); |
| APFloat nan = APFloat::getNaN(APFloat::IEEEdouble()); |
| APFloat snan = APFloat::getSNaN(APFloat::IEEEdouble()); |
| |
| EXPECT_EQ(1.0, minimum(f1, f2).convertToDouble()); |
| EXPECT_EQ(1.0, minimum(f2, f1).convertToDouble()); |
| EXPECT_EQ(-0.0, minimum(zp, zn).convertToDouble()); |
| EXPECT_EQ(-0.0, minimum(zn, zp).convertToDouble()); |
| EXPECT_TRUE(std::isnan(minimum(f1, nan).convertToDouble())); |
| EXPECT_TRUE(std::isnan(minimum(nan, f1).convertToDouble())); |
| EXPECT_TRUE(maximum(snan, f1).isNaN()); |
| EXPECT_TRUE(maximum(f1, snan).isNaN()); |
| EXPECT_FALSE(maximum(snan, f1).isSignaling()); |
| EXPECT_FALSE(maximum(f1, snan).isSignaling()); |
| } |
| |
| TEST(APFloatTest, Maximum) { |
| APFloat f1(1.0); |
| APFloat f2(2.0); |
| APFloat zp(0.0); |
| APFloat zn(-0.0); |
| APFloat nan = APFloat::getNaN(APFloat::IEEEdouble()); |
| APFloat snan = APFloat::getSNaN(APFloat::IEEEdouble()); |
| |
| EXPECT_EQ(2.0, maximum(f1, f2).convertToDouble()); |
| EXPECT_EQ(2.0, maximum(f2, f1).convertToDouble()); |
| EXPECT_EQ(0.0, maximum(zp, zn).convertToDouble()); |
| EXPECT_EQ(0.0, maximum(zn, zp).convertToDouble()); |
| EXPECT_TRUE(std::isnan(maximum(f1, nan).convertToDouble())); |
| EXPECT_TRUE(std::isnan(maximum(nan, f1).convertToDouble())); |
| EXPECT_TRUE(maximum(snan, f1).isNaN()); |
| EXPECT_TRUE(maximum(f1, snan).isNaN()); |
| EXPECT_FALSE(maximum(snan, f1).isSignaling()); |
| EXPECT_FALSE(maximum(f1, snan).isSignaling()); |
| } |
| |
| TEST(APFloatTest, MinimumNumber) { |
| APFloat f1(1.0); |
| APFloat f2(2.0); |
| APFloat zp(0.0); |
| APFloat zn(-0.0); |
| APInt intPayload_89ab(64, 0x89ab); |
| APInt intPayload_cdef(64, 0xcdef); |
| APFloat nan_0123[2] = {APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123), |
| APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123)}; |
| APFloat mnan_4567[2] = {APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567), |
| APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567)}; |
| APFloat nan_89ab[2] = { |
| APFloat::getSNaN(APFloat::IEEEdouble(), false, &intPayload_89ab), |
| APFloat::getNaN(APFloat::IEEEdouble(), false, 0x89ab)}; |
| APFloat mnan_cdef[2] = { |
| APFloat::getSNaN(APFloat::IEEEdouble(), true, &intPayload_cdef), |
| APFloat::getNaN(APFloat::IEEEdouble(), true, 0xcdef)}; |
| |
| EXPECT_TRUE(f1.bitwiseIsEqual(minimumnum(f1, f2))); |
| EXPECT_TRUE(f1.bitwiseIsEqual(minimumnum(f2, f1))); |
| EXPECT_TRUE(zn.bitwiseIsEqual(minimumnum(zp, zn))); |
| EXPECT_TRUE(zn.bitwiseIsEqual(minimumnum(zn, zp))); |
| |
| EXPECT_TRUE(minimumnum(zn, zp).isNegative()); |
| EXPECT_TRUE(minimumnum(zp, zn).isNegative()); |
| EXPECT_TRUE(minimumnum(zn, zn).isNegative()); |
| EXPECT_FALSE(minimumnum(zp, zp).isNegative()); |
| |
| for (APFloat n : {nan_0123[0], mnan_4567[0], nan_89ab[0], mnan_cdef[0]}) |
| for (APFloat f : {f1, f2, zn, zp}) { |
| APFloat res = minimumnum(f, n); |
| EXPECT_FALSE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(f)); |
| res = minimumnum(n, f); |
| EXPECT_FALSE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(f)); |
| } |
| |
| // When NaN vs NaN, we should keep payload/sign of either one. |
| for (auto n1 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef}) |
| for (auto n2 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef}) { |
| APFloat res = minimumnum(n1[0], n2[0]); |
| EXPECT_TRUE(res.bitwiseIsEqual(n1[1]) || res.bitwiseIsEqual(n2[1])); |
| EXPECT_FALSE(res.isSignaling()); |
| } |
| } |
| |
| TEST(APFloatTest, MaximumNumber) { |
| APFloat f1(1.0); |
| APFloat f2(2.0); |
| APFloat zp(0.0); |
| APFloat zn(-0.0); |
| APInt intPayload_89ab(64, 0x89ab); |
| APInt intPayload_cdef(64, 0xcdef); |
| APFloat nan_0123[2] = {APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123), |
| APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123)}; |
| APFloat mnan_4567[2] = {APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567), |
| APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567)}; |
| APFloat nan_89ab[2] = { |
| APFloat::getSNaN(APFloat::IEEEdouble(), false, &intPayload_89ab), |
| APFloat::getNaN(APFloat::IEEEdouble(), false, 0x89ab)}; |
| APFloat mnan_cdef[2] = { |
| APFloat::getSNaN(APFloat::IEEEdouble(), true, &intPayload_cdef), |
| APFloat::getNaN(APFloat::IEEEdouble(), true, 0xcdef)}; |
| |
| EXPECT_TRUE(f2.bitwiseIsEqual(maximumnum(f1, f2))); |
| EXPECT_TRUE(f2.bitwiseIsEqual(maximumnum(f2, f1))); |
| EXPECT_TRUE(zp.bitwiseIsEqual(maximumnum(zp, zn))); |
| EXPECT_TRUE(zp.bitwiseIsEqual(maximumnum(zn, zp))); |
| |
| EXPECT_FALSE(maximumnum(zn, zp).isNegative()); |
| EXPECT_FALSE(maximumnum(zp, zn).isNegative()); |
| EXPECT_TRUE(maximumnum(zn, zn).isNegative()); |
| EXPECT_FALSE(maximumnum(zp, zp).isNegative()); |
| |
| for (APFloat n : {nan_0123[0], mnan_4567[0], nan_89ab[0], mnan_cdef[0]}) |
| for (APFloat f : {f1, f2, zn, zp}) { |
| APFloat res = maximumnum(f, n); |
| EXPECT_FALSE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(f)); |
| res = maximumnum(n, f); |
| EXPECT_FALSE(res.isNaN()); |
| EXPECT_TRUE(res.bitwiseIsEqual(f)); |
| } |
| |
| // When NaN vs NaN, we should keep payload/sign of either one. |
| for (auto n1 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef}) |
| for (auto n2 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef}) { |
| APFloat res = maximumnum(n1[0], n2[0]); |
| EXPECT_TRUE(res.bitwiseIsEqual(n1[1]) || res.bitwiseIsEqual(n2[1])); |
| EXPECT_FALSE(res.isSignaling()); |
| } |
| } |
| |
| TEST(APFloatTest, Denormal) { |
| APFloat::roundingMode rdmd = APFloat::rmNearestTiesToEven; |
| |
| // Test single precision |
| { |
| const char *MinNormalStr = "1.17549435082228750797e-38"; |
| EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), MinNormalStr).isDenormal()); |
| EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), 0).isDenormal()); |
| |
| APFloat Val2(APFloat::IEEEsingle(), 2); |
| APFloat T(APFloat::IEEEsingle(), MinNormalStr); |
| T.divide(Val2, rdmd); |
| EXPECT_TRUE(T.isDenormal()); |
| EXPECT_EQ(fcPosSubnormal, T.classify()); |
| |
| |
| const char *NegMinNormalStr = "-1.17549435082228750797e-38"; |
| EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), NegMinNormalStr).isDenormal()); |
| APFloat NegT(APFloat::IEEEsingle(), NegMinNormalStr); |
| NegT.divide(Val2, rdmd); |
| EXPECT_TRUE(NegT.isDenormal()); |
| EXPECT_EQ(fcNegSubnormal, NegT.classify()); |
| } |
| |
| // Test double precision |
| { |
| const char *MinNormalStr = "2.22507385850720138309e-308"; |
| EXPECT_FALSE(APFloat(APFloat::IEEEdouble(), MinNormalStr).isDenormal()); |
| EXPECT_FALSE(APFloat(APFloat::IEEEdouble(), 0).isDenormal()); |
| |
| APFloat Val2(APFloat::IEEEdouble(), 2); |
| APFloat T(APFloat::IEEEdouble(), MinNormalStr); |
| T.divide(Val2, rdmd); |
| EXPECT_TRUE(T.isDenormal()); |
| EXPECT_EQ(fcPosSubnormal, T.classify()); |
| } |
| |
| // Test Intel double-ext |
| { |
| const char *MinNormalStr = "3.36210314311209350626e-4932"; |
| EXPECT_FALSE(APFloat(APFloat::x87DoubleExtended(), MinNormalStr).isDenormal()); |
| EXPECT_FALSE(APFloat(APFloat::x87DoubleExtended(), 0).isDenormal()); |
| |
| APFloat Val2(APFloat::x87DoubleExtended(), 2); |
| APFloat T(APFloat::x87DoubleExtended(), MinNormalStr); |
| T.divide(Val2, rdmd); |
| EXPECT_TRUE(T.isDenormal()); |
| EXPECT_EQ(fcPosSubnormal, T.classify()); |
| } |
| |
| // Test quadruple precision |
| { |
| const char *MinNormalStr = "3.36210314311209350626267781732175260e-4932"; |
| EXPECT_FALSE(APFloat(APFloat::IEEEquad(), MinNormalStr).isDenormal()); |
| EXPECT_FALSE(APFloat(APFloat::IEEEquad(), 0).isDenormal()); |
| |
| APFloat Val2(APFloat::IEEEquad(), 2); |
| APFloat T(APFloat::IEEEquad(), MinNormalStr); |
| T.divide(Val2, rdmd); |
| EXPECT_TRUE(T.isDenormal()); |
| EXPECT_EQ(fcPosSubnormal, T.classify()); |
| } |
| |
| // Test TF32 |
| { |
| const char *MinNormalStr = "1.17549435082228750797e-38"; |
| EXPECT_FALSE(APFloat(APFloat::FloatTF32(), MinNormalStr).isDenormal()); |
| EXPECT_FALSE(APFloat(APFloat::FloatTF32(), 0).isDenormal()); |
| |
| APFloat Val2(APFloat::FloatTF32(), 2); |
| APFloat T(APFloat::FloatTF32(), MinNormalStr); |
| T.divide(Val2, rdmd); |
| EXPECT_TRUE(T.isDenormal()); |
| EXPECT_EQ(fcPosSubnormal, T.classify()); |
| |
| const char *NegMinNormalStr = "-1.17549435082228750797e-38"; |
| EXPECT_FALSE(APFloat(APFloat::FloatTF32(), NegMinNormalStr).isDenormal()); |
| APFloat NegT(APFloat::FloatTF32(), NegMinNormalStr); |
| NegT.divide(Val2, rdmd); |
| EXPECT_TRUE(NegT.isDenormal()); |
| EXPECT_EQ(fcNegSubnormal, NegT.classify()); |
| } |
| } |
| |
| TEST(APFloatTest, IsSmallestNormalized) { |
| for (unsigned I = 0; I != APFloat::S_MaxSemantics + 1; ++I) { |
| const fltSemantics &Semantics = |
| APFloat::EnumToSemantics(static_cast<APFloat::Semantics>(I)); |
| |
| // For Float8E8M0FNU format, the below cases are tested |
| // through Float8E8M0FNUSmallest and Float8E8M0FNUNext tests. |
| if (I == APFloat::S_Float8E8M0FNU) |
| continue; |
| |
| EXPECT_FALSE(APFloat::getZero(Semantics, false).isSmallestNormalized()); |
| EXPECT_FALSE(APFloat::getZero(Semantics, true).isSmallestNormalized()); |
| |
| if (APFloat::semanticsHasNaN(Semantics)) { |
| // Types that do not support Inf will return NaN when asked for Inf. |
| // (But only if they support NaN.) |
| EXPECT_FALSE(APFloat::getInf(Semantics, false).isSmallestNormalized()); |
| EXPECT_FALSE(APFloat::getInf(Semantics, true).isSmallestNormalized()); |
| |
| EXPECT_FALSE(APFloat::getQNaN(Semantics).isSmallestNormalized()); |
| EXPECT_FALSE(APFloat::getSNaN(Semantics).isSmallestNormalized()); |
| } |
| |
| EXPECT_FALSE(APFloat::getLargest(Semantics).isSmallestNormalized()); |
| EXPECT_FALSE(APFloat::getLargest(Semantics, true).isSmallestNormalized()); |
| |
| EXPECT_FALSE(APFloat::getSmallest(Semantics).isSmallestNormalized()); |
| EXPECT_FALSE(APFloat::getSmallest(Semantics, true).isSmallestNormalized()); |
| |
| EXPECT_FALSE(APFloat::getAllOnesValue(Semantics).isSmallestNormalized()); |
| |
| APFloat PosSmallestNormalized = |
| APFloat::getSmallestNormalized(Semantics, false); |
| APFloat NegSmallestNormalized = |
| APFloat::getSmallestNormalized(Semantics, true); |
| EXPECT_TRUE(PosSmallestNormalized.isSmallestNormalized()); |
| EXPECT_TRUE(NegSmallestNormalized.isSmallestNormalized()); |
| EXPECT_EQ(fcPosNormal, PosSmallestNormalized.classify()); |
| EXPECT_EQ(fcNegNormal, NegSmallestNormalized.classify()); |
| |
| for (APFloat *Val : {&PosSmallestNormalized, &NegSmallestNormalized}) { |
| bool OldSign = Val->isNegative(); |
| |
| // Step down, make sure it's still not smallest normalized. |
| EXPECT_EQ(APFloat::opOK, Val->next(false)); |
| EXPECT_EQ(OldSign, Val->isNegative()); |
| EXPECT_FALSE(Val->isSmallestNormalized()); |
| EXPECT_EQ(OldSign, Val->isNegative()); |
| |
| // Step back up should restore it to being smallest normalized. |
| EXPECT_EQ(APFloat::opOK, Val->next(true)); |
| EXPECT_TRUE(Val->isSmallestNormalized()); |
| EXPECT_EQ(OldSign, Val->isNegative()); |
| |
| // Step beyond should no longer smallest normalized. |
| EXPECT_EQ(APFloat::opOK, Val->next(true)); |
| EXPECT_FALSE(Val->isSmallestNormalized()); |
| EXPECT_EQ(OldSign, Val->isNegative()); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, Zero) { |
| EXPECT_EQ(0.0f, APFloat(0.0f).convertToFloat()); |
| EXPECT_EQ(-0.0f, APFloat(-0.0f).convertToFloat()); |
| EXPECT_TRUE(APFloat(-0.0f).isNegative()); |
| |
| EXPECT_EQ(0.0, APFloat(0.0).convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(-0.0).convertToDouble()); |
| EXPECT_TRUE(APFloat(-0.0).isNegative()); |
| |
| EXPECT_EQ(fcPosZero, APFloat(0.0).classify()); |
| EXPECT_EQ(fcNegZero, APFloat(-0.0).classify()); |
| } |
| |
| TEST(APFloatTest, getOne) { |
| EXPECT_EQ(APFloat::getOne(APFloat::IEEEsingle(), false).convertToFloat(), |
| 1.0f); |
| EXPECT_EQ(APFloat::getOne(APFloat::IEEEsingle(), true).convertToFloat(), |
| -1.0f); |
| } |
| |
| TEST(APFloatTest, DecimalStringsWithoutNullTerminators) { |
| // Make sure that we can parse strings without null terminators. |
| // rdar://14323230. |
| EXPECT_EQ(convertToDoubleFromString(StringRef("0.00", 3)), 0.0); |
| EXPECT_EQ(convertToDoubleFromString(StringRef("0.01", 3)), 0.0); |
| EXPECT_EQ(convertToDoubleFromString(StringRef("0.09", 3)), 0.0); |
| EXPECT_EQ(convertToDoubleFromString(StringRef("0.095", 4)), 0.09); |
| EXPECT_EQ(convertToDoubleFromString(StringRef("0.00e+3", 7)), 0.00); |
| EXPECT_EQ(convertToDoubleFromString(StringRef("0e+3", 4)), 0.00); |
| } |
| |
| TEST(APFloatTest, fromZeroDecimalString) { |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), ".0").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+.0").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-.0").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.0").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.0").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.0").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "00000.").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+00000.").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-00000.").convertToDouble()); |
| |
| EXPECT_EQ(0.0, APFloat(APFloat::IEEEdouble(), ".00000").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+.00000").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-.00000").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0000.00000").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0000.00000").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0000.00000").convertToDouble()); |
| } |
| |
| TEST(APFloatTest, fromZeroDecimalSingleExponentString) { |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e+1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e+1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e+1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e-1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e-1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.e1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.e1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.e1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.e+1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.e+1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.e+1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.e-1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.e-1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.e-1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), ".0e1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+.0e1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-.0e1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), ".0e+1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+.0e+1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-.0e+1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), ".0e-1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+.0e-1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-.0e-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.0e1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.0e1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.0e1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.0e+1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.0e+1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.0e+1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.0e-1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.0e-1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.0e-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "000.0000e1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+000.0000e+1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-000.0000e+1").convertToDouble()); |
| } |
| |
| TEST(APFloatTest, fromZeroDecimalLargeExponentString) { |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e1234").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e1234").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e1234").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e+1234").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e+1234").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e+1234").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e-1234").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e-1234").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e-1234").convertToDouble()); |
| |
| EXPECT_EQ(0.0, APFloat(APFloat::IEEEdouble(), "000.0000e1234").convertToDouble()); |
| EXPECT_EQ(0.0, APFloat(APFloat::IEEEdouble(), "000.0000e-1234").convertToDouble()); |
| |
| EXPECT_EQ(0.0, APFloat(APFloat::IEEEdouble(), StringRef("0e1234" "\0" "2", 6)).convertToDouble()); |
| } |
| |
| TEST(APFloatTest, fromZeroHexadecimalString) { |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0p1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0p1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0p1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0p+1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0p+1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0p+1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0p-1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0p-1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0p-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.p1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.p1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p+1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.p+1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.p+1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p-1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.p-1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.p-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.0p1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x.0p1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x.0p1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.0p+1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x.0p+1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x.0p+1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.0p-1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x.0p-1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x.0p-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.0p1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.0p1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.0p1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.0p+1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.0p+1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.0p+1").convertToDouble()); |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.0p-1").convertToDouble()); |
| EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.0p-1").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.0p-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x00000.p1").convertToDouble()); |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0000.00000p1").convertToDouble()); |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.00000p1").convertToDouble()); |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p1").convertToDouble()); |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0p1234").convertToDouble()); |
| EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0p1234").convertToDouble()); |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x00000.p1234").convertToDouble()); |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0000.00000p1234").convertToDouble()); |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.00000p1234").convertToDouble()); |
| EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p1234").convertToDouble()); |
| } |
| |
| TEST(APFloatTest, fromDecimalString) { |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1").convertToDouble()); |
| EXPECT_EQ(2.0, APFloat(APFloat::IEEEdouble(), "2.").convertToDouble()); |
| EXPECT_EQ(0.5, APFloat(APFloat::IEEEdouble(), ".5").convertToDouble()); |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1.0").convertToDouble()); |
| EXPECT_EQ(-2.0, APFloat(APFloat::IEEEdouble(), "-2").convertToDouble()); |
| EXPECT_EQ(-4.0, APFloat(APFloat::IEEEdouble(), "-4.").convertToDouble()); |
| EXPECT_EQ(-0.5, APFloat(APFloat::IEEEdouble(), "-.5").convertToDouble()); |
| EXPECT_EQ(-1.5, APFloat(APFloat::IEEEdouble(), "-1.5").convertToDouble()); |
| EXPECT_EQ(1.25e12, APFloat(APFloat::IEEEdouble(), "1.25e12").convertToDouble()); |
| EXPECT_EQ(1.25e+12, APFloat(APFloat::IEEEdouble(), "1.25e+12").convertToDouble()); |
| EXPECT_EQ(1.25e-12, APFloat(APFloat::IEEEdouble(), "1.25e-12").convertToDouble()); |
| EXPECT_EQ(1024.0, APFloat(APFloat::IEEEdouble(), "1024.").convertToDouble()); |
| EXPECT_EQ(1024.05, APFloat(APFloat::IEEEdouble(), "1024.05000").convertToDouble()); |
| EXPECT_EQ(0.05, APFloat(APFloat::IEEEdouble(), ".05000").convertToDouble()); |
| EXPECT_EQ(2.0, APFloat(APFloat::IEEEdouble(), "2.").convertToDouble()); |
| EXPECT_EQ(2.0e2, APFloat(APFloat::IEEEdouble(), "2.e2").convertToDouble()); |
| EXPECT_EQ(2.0e+2, APFloat(APFloat::IEEEdouble(), "2.e+2").convertToDouble()); |
| EXPECT_EQ(2.0e-2, APFloat(APFloat::IEEEdouble(), "2.e-2").convertToDouble()); |
| EXPECT_EQ(2.05e2, APFloat(APFloat::IEEEdouble(), "002.05000e2").convertToDouble()); |
| EXPECT_EQ(2.05e+2, APFloat(APFloat::IEEEdouble(), "002.05000e+2").convertToDouble()); |
| EXPECT_EQ(2.05e-2, APFloat(APFloat::IEEEdouble(), "002.05000e-2").convertToDouble()); |
| EXPECT_EQ(2.05e12, APFloat(APFloat::IEEEdouble(), "002.05000e12").convertToDouble()); |
| EXPECT_EQ(2.05e+12, APFloat(APFloat::IEEEdouble(), "002.05000e+12").convertToDouble()); |
| EXPECT_EQ(2.05e-12, APFloat(APFloat::IEEEdouble(), "002.05000e-12").convertToDouble()); |
| |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1e").convertToDouble()); |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "+1e").convertToDouble()); |
| EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble(), "-1e").convertToDouble()); |
| |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1.e").convertToDouble()); |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "+1.e").convertToDouble()); |
| EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble(), "-1.e").convertToDouble()); |
| |
| EXPECT_EQ(0.1, APFloat(APFloat::IEEEdouble(), ".1e").convertToDouble()); |
| EXPECT_EQ(0.1, APFloat(APFloat::IEEEdouble(), "+.1e").convertToDouble()); |
| EXPECT_EQ(-0.1, APFloat(APFloat::IEEEdouble(), "-.1e").convertToDouble()); |
| |
| EXPECT_EQ(1.1, APFloat(APFloat::IEEEdouble(), "1.1e").convertToDouble()); |
| EXPECT_EQ(1.1, APFloat(APFloat::IEEEdouble(), "+1.1e").convertToDouble()); |
| EXPECT_EQ(-1.1, APFloat(APFloat::IEEEdouble(), "-1.1e").convertToDouble()); |
| |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1e+").convertToDouble()); |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1e-").convertToDouble()); |
| |
| EXPECT_EQ(0.1, APFloat(APFloat::IEEEdouble(), ".1e").convertToDouble()); |
| EXPECT_EQ(0.1, APFloat(APFloat::IEEEdouble(), ".1e+").convertToDouble()); |
| EXPECT_EQ(0.1, APFloat(APFloat::IEEEdouble(), ".1e-").convertToDouble()); |
| |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1.0e").convertToDouble()); |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1.0e+").convertToDouble()); |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1.0e-").convertToDouble()); |
| |
| // These are "carefully selected" to overflow the fast log-base |
| // calculations in APFloat.cpp |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "99e99999").isInfinity()); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-99e99999").isInfinity()); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "1e-99999").isPosZero()); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-1e-99999").isNegZero()); |
| |
| EXPECT_EQ(2.71828, convertToDoubleFromString("2.71828")); |
| } |
| |
| TEST(APFloatTest, fromStringSpecials) { |
| const fltSemantics &Sem = APFloat::IEEEdouble(); |
| const unsigned Precision = 53; |
| const unsigned PayloadBits = Precision - 2; |
| uint64_t PayloadMask = (uint64_t(1) << PayloadBits) - uint64_t(1); |
| |
| uint64_t NaNPayloads[] = { |
| 0, |
| 1, |
| 123, |
| 0xDEADBEEF, |
| uint64_t(-2), |
| uint64_t(1) << PayloadBits, // overflow bit |
| uint64_t(1) << (PayloadBits - 1), // signaling bit |
| uint64_t(1) << (PayloadBits - 2) // highest possible bit |
| }; |
| |
| // Convert payload integer to decimal string representation. |
| std::string NaNPayloadDecStrings[std::size(NaNPayloads)]; |
| for (size_t I = 0; I < std::size(NaNPayloads); ++I) |
| NaNPayloadDecStrings[I] = utostr(NaNPayloads[I]); |
| |
| // Convert payload integer to hexadecimal string representation. |
| std::string NaNPayloadHexStrings[std::size(NaNPayloads)]; |
| for (size_t I = 0; I < std::size(NaNPayloads); ++I) |
| NaNPayloadHexStrings[I] = "0x" + utohexstr(NaNPayloads[I]); |
| |
| // Fix payloads to expected result. |
| for (uint64_t &Payload : NaNPayloads) |
| Payload &= PayloadMask; |
| |
| // Signaling NaN must have a non-zero payload. In case a zero payload is |
| // requested, a default arbitrary payload is set instead. Save this payload |
| // for testing. |
| const uint64_t SNaNDefaultPayload = |
| APFloat::getSNaN(Sem).bitcastToAPInt().getZExtValue() & PayloadMask; |
| |
| // Negative sign prefix (or none - for positive). |
| const char Signs[] = {0, '-'}; |
| |
| // "Signaling" prefix (or none - for "Quiet"). |
| const char NaNTypes[] = {0, 's', 'S'}; |
| |
| const StringRef NaNStrings[] = {"nan", "NaN"}; |
| for (StringRef NaNStr : NaNStrings) |
| for (char TypeChar : NaNTypes) { |
| bool Signaling = (TypeChar == 's' || TypeChar == 'S'); |
| |
| for (size_t J = 0; J < std::size(NaNPayloads); ++J) { |
| uint64_t Payload = (Signaling && !NaNPayloads[J]) ? SNaNDefaultPayload |
| : NaNPayloads[J]; |
| std::string &PayloadDec = NaNPayloadDecStrings[J]; |
| std::string &PayloadHex = NaNPayloadHexStrings[J]; |
| |
| for (char SignChar : Signs) { |
| bool Negative = (SignChar == '-'); |
| |
| std::string TestStrings[5]; |
| size_t NumTestStrings = 0; |
| |
| std::string Prefix; |
| if (SignChar) |
| Prefix += SignChar; |
| if (TypeChar) |
| Prefix += TypeChar; |
| Prefix += NaNStr; |
| |
| // Test without any paylod. |
| if (!Payload) |
| TestStrings[NumTestStrings++] = Prefix; |
| |
| // Test with the payload as a suffix. |
| TestStrings[NumTestStrings++] = Prefix + PayloadDec; |
| TestStrings[NumTestStrings++] = Prefix + PayloadHex; |
| |
| // Test with the payload inside parentheses. |
| TestStrings[NumTestStrings++] = Prefix + '(' + PayloadDec + ')'; |
| TestStrings[NumTestStrings++] = Prefix + '(' + PayloadHex + ')'; |
| |
| for (size_t K = 0; K < NumTestStrings; ++K) { |
| StringRef TestStr = TestStrings[K]; |
| |
| APFloat F(Sem); |
| bool HasError = !F.convertFromString( |
| TestStr, llvm::APFloat::rmNearestTiesToEven); |
| EXPECT_FALSE(HasError); |
| EXPECT_TRUE(F.isNaN()); |
| EXPECT_EQ(Signaling, F.isSignaling()); |
| EXPECT_EQ(Negative, F.isNegative()); |
| uint64_t PayloadResult = |
| F.bitcastToAPInt().getZExtValue() & PayloadMask; |
| EXPECT_EQ(Payload, PayloadResult); |
| } |
| } |
| } |
| } |
| |
| const StringRef InfStrings[] = {"inf", "INFINITY", "+Inf", |
| "-inf", "-INFINITY", "-Inf"}; |
| for (StringRef InfStr : InfStrings) { |
| bool Negative = InfStr.front() == '-'; |
| |
| APFloat F(Sem); |
| bool HasError = |
| !F.convertFromString(InfStr, llvm::APFloat::rmNearestTiesToEven); |
| EXPECT_FALSE(HasError); |
| EXPECT_TRUE(F.isInfinity()); |
| EXPECT_EQ(Negative, F.isNegative()); |
| uint64_t PayloadResult = F.bitcastToAPInt().getZExtValue() & PayloadMask; |
| EXPECT_EQ(UINT64_C(0), PayloadResult); |
| } |
| } |
| |
| TEST(APFloatTest, fromToStringSpecials) { |
| auto expects = [] (const char *first, const char *second) { |
| std::string roundtrip = convertToString(convertToDoubleFromString(second), 0, 3); |
| EXPECT_STREQ(first, roundtrip.c_str()); |
| }; |
| expects("+Inf", "+Inf"); |
| expects("+Inf", "INFINITY"); |
| expects("+Inf", "inf"); |
| expects("-Inf", "-Inf"); |
| expects("-Inf", "-INFINITY"); |
| expects("-Inf", "-inf"); |
| expects("NaN", "NaN"); |
| expects("NaN", "nan"); |
| expects("NaN", "-NaN"); |
| expects("NaN", "-nan"); |
| } |
| |
| TEST(APFloatTest, fromHexadecimalString) { |
| EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble(), "0x1p0").convertToDouble()); |
| EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble(), "+0x1p0").convertToDouble()); |
| EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble(), "-0x1p0").convertToDouble()); |
| |
| EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble(), "0x1p+0").convertToDouble()); |
| EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble(), "+0x1p+0").convertToDouble()); |
| EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble(), "-0x1p+0").convertToDouble()); |
| |
| EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble(), "0x1p-0").convertToDouble()); |
| EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble(), "+0x1p-0").convertToDouble()); |
| EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble(), "-0x1p-0").convertToDouble()); |
| |
| |
| EXPECT_EQ( 2.0, APFloat(APFloat::IEEEdouble(), "0x1p1").convertToDouble()); |
| EXPECT_EQ(+2.0, APFloat(APFloat::IEEEdouble(), "+0x1p1").convertToDouble()); |
| EXPECT_EQ(-2.0, APFloat(APFloat::IEEEdouble(), "-0x1p1").convertToDouble()); |
| |
| EXPECT_EQ( 2.0, APFloat(APFloat::IEEEdouble(), "0x1p+1").convertToDouble()); |
| EXPECT_EQ(+2.0, APFloat(APFloat::IEEEdouble(), "+0x1p+1").convertToDouble()); |
| EXPECT_EQ(-2.0, APFloat(APFloat::IEEEdouble(), "-0x1p+1").convertToDouble()); |
| |
| EXPECT_EQ( 0.5, APFloat(APFloat::IEEEdouble(), "0x1p-1").convertToDouble()); |
| EXPECT_EQ(+0.5, APFloat(APFloat::IEEEdouble(), "+0x1p-1").convertToDouble()); |
| EXPECT_EQ(-0.5, APFloat(APFloat::IEEEdouble(), "-0x1p-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 3.0, APFloat(APFloat::IEEEdouble(), "0x1.8p1").convertToDouble()); |
| EXPECT_EQ(+3.0, APFloat(APFloat::IEEEdouble(), "+0x1.8p1").convertToDouble()); |
| EXPECT_EQ(-3.0, APFloat(APFloat::IEEEdouble(), "-0x1.8p1").convertToDouble()); |
| |
| EXPECT_EQ( 3.0, APFloat(APFloat::IEEEdouble(), "0x1.8p+1").convertToDouble()); |
| EXPECT_EQ(+3.0, APFloat(APFloat::IEEEdouble(), "+0x1.8p+1").convertToDouble()); |
| EXPECT_EQ(-3.0, APFloat(APFloat::IEEEdouble(), "-0x1.8p+1").convertToDouble()); |
| |
| EXPECT_EQ( 0.75, APFloat(APFloat::IEEEdouble(), "0x1.8p-1").convertToDouble()); |
| EXPECT_EQ(+0.75, APFloat(APFloat::IEEEdouble(), "+0x1.8p-1").convertToDouble()); |
| EXPECT_EQ(-0.75, APFloat(APFloat::IEEEdouble(), "-0x1.8p-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble(), "0x1000.000p1").convertToDouble()); |
| EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble(), "+0x1000.000p1").convertToDouble()); |
| EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble(), "-0x1000.000p1").convertToDouble()); |
| |
| EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble(), "0x1000.000p+1").convertToDouble()); |
| EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble(), "+0x1000.000p+1").convertToDouble()); |
| EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble(), "-0x1000.000p+1").convertToDouble()); |
| |
| EXPECT_EQ( 2048.0, APFloat(APFloat::IEEEdouble(), "0x1000.000p-1").convertToDouble()); |
| EXPECT_EQ(+2048.0, APFloat(APFloat::IEEEdouble(), "+0x1000.000p-1").convertToDouble()); |
| EXPECT_EQ(-2048.0, APFloat(APFloat::IEEEdouble(), "-0x1000.000p-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble(), "0x1000p1").convertToDouble()); |
| EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble(), "+0x1000p1").convertToDouble()); |
| EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble(), "-0x1000p1").convertToDouble()); |
| |
| EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble(), "0x1000p+1").convertToDouble()); |
| EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble(), "+0x1000p+1").convertToDouble()); |
| EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble(), "-0x1000p+1").convertToDouble()); |
| |
| EXPECT_EQ( 2048.0, APFloat(APFloat::IEEEdouble(), "0x1000p-1").convertToDouble()); |
| EXPECT_EQ(+2048.0, APFloat(APFloat::IEEEdouble(), "+0x1000p-1").convertToDouble()); |
| EXPECT_EQ(-2048.0, APFloat(APFloat::IEEEdouble(), "-0x1000p-1").convertToDouble()); |
| |
| |
| EXPECT_EQ( 16384.0, APFloat(APFloat::IEEEdouble(), "0x10p10").convertToDouble()); |
| EXPECT_EQ(+16384.0, APFloat(APFloat::IEEEdouble(), "+0x10p10").convertToDouble()); |
| EXPECT_EQ(-16384.0, APFloat(APFloat::IEEEdouble(), "-0x10p10").convertToDouble()); |
| |
| EXPECT_EQ( 16384.0, APFloat(APFloat::IEEEdouble(), "0x10p+10").convertToDouble()); |
| EXPECT_EQ(+16384.0, APFloat(APFloat::IEEEdouble(), "+0x10p+10").convertToDouble()); |
| EXPECT_EQ(-16384.0, APFloat(APFloat::IEEEdouble(), "-0x10p+10").convertToDouble()); |
| |
| EXPECT_EQ( 0.015625, APFloat(APFloat::IEEEdouble(), "0x10p-10").convertToDouble()); |
| EXPECT_EQ(+0.015625, APFloat(APFloat::IEEEdouble(), "+0x10p-10").convertToDouble()); |
| EXPECT_EQ(-0.015625, APFloat(APFloat::IEEEdouble(), "-0x10p-10").convertToDouble()); |
| |
| EXPECT_EQ(1.0625, APFloat(APFloat::IEEEdouble(), "0x1.1p0").convertToDouble()); |
| EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "0x1p0").convertToDouble()); |
| |
| EXPECT_EQ(convertToDoubleFromString("0x1p-150"), |
| convertToDoubleFromString("+0x800000000000000001.p-221")); |
| EXPECT_EQ(2251799813685248.5, |
| convertToDoubleFromString("0x80000000000004000000.010p-28")); |
| } |
| |
| TEST(APFloatTest, toString) { |
| ASSERT_EQ("10", convertToString(10.0, 6, 3)); |
| ASSERT_EQ("1.0E+1", convertToString(10.0, 6, 0)); |
| ASSERT_EQ("10100", convertToString(1.01E+4, 5, 2)); |
| ASSERT_EQ("1.01E+4", convertToString(1.01E+4, 4, 2)); |
| ASSERT_EQ("1.01E+4", convertToString(1.01E+4, 5, 1)); |
| ASSERT_EQ("0.0101", convertToString(1.01E-2, 5, 2)); |
| ASSERT_EQ("0.0101", convertToString(1.01E-2, 4, 2)); |
| ASSERT_EQ("1.01E-2", convertToString(1.01E-2, 5, 1)); |
| ASSERT_EQ("0.78539816339744828", convertToString(0.78539816339744830961, 0, 3)); |
| ASSERT_EQ("4.9406564584124654E-324", convertToString(4.9406564584124654e-324, 0, 3)); |
| ASSERT_EQ("873.18340000000001", convertToString(873.1834, 0, 1)); |
| ASSERT_EQ("8.7318340000000001E+2", convertToString(873.1834, 0, 0)); |
| ASSERT_EQ("1.7976931348623157E+308", convertToString(1.7976931348623157E+308, 0, 0)); |
| ASSERT_EQ("10", convertToString(10.0, 6, 3, false)); |
| ASSERT_EQ("1.000000e+01", convertToString(10.0, 6, 0, false)); |
| ASSERT_EQ("10100", convertToString(1.01E+4, 5, 2, false)); |
| ASSERT_EQ("1.0100e+04", convertToString(1.01E+4, 4, 2, false)); |
| ASSERT_EQ("1.01000e+04", convertToString(1.01E+4, 5, 1, false)); |
| ASSERT_EQ("0.0101", convertToString(1.01E-2, 5, 2, false)); |
| ASSERT_EQ("0.0101", convertToString(1.01E-2, 4, 2, false)); |
| ASSERT_EQ("1.01000e-02", convertToString(1.01E-2, 5, 1, false)); |
| ASSERT_EQ("0.78539816339744828", |
| convertToString(0.78539816339744830961, 0, 3, false)); |
| ASSERT_EQ("4.94065645841246540e-324", |
| convertToString(4.9406564584124654e-324, 0, 3, false)); |
| ASSERT_EQ("873.18340000000001", convertToString(873.1834, 0, 1, false)); |
| ASSERT_EQ("8.73183400000000010e+02", convertToString(873.1834, 0, 0, false)); |
| ASSERT_EQ("1.79769313486231570e+308", |
| convertToString(1.7976931348623157E+308, 0, 0, false)); |
| |
| { |
| SmallString<64> Str; |
| APFloat UnnormalZero(APFloat::x87DoubleExtended(), APInt(80, {0, 1})); |
| UnnormalZero.toString(Str); |
| ASSERT_EQ("NaN", Str); |
| } |
| } |
| |
| TEST(APFloatTest, toInteger) { |
| bool isExact = false; |
| APSInt result(5, /*isUnsigned=*/true); |
| |
| EXPECT_EQ(APFloat::opOK, |
| APFloat(APFloat::IEEEdouble(), "10") |
| .convertToInteger(result, APFloat::rmTowardZero, &isExact)); |
| EXPECT_TRUE(isExact); |
| EXPECT_EQ(APSInt(APInt(5, 10), true), result); |
| |
| EXPECT_EQ(APFloat::opInvalidOp, |
| APFloat(APFloat::IEEEdouble(), "-10") |
| .convertToInteger(result, APFloat::rmTowardZero, &isExact)); |
| EXPECT_FALSE(isExact); |
| EXPECT_EQ(APSInt::getMinValue(5, true), result); |
| |
| EXPECT_EQ(APFloat::opInvalidOp, |
| APFloat(APFloat::IEEEdouble(), "32") |
| .convertToInteger(result, APFloat::rmTowardZero, &isExact)); |
| EXPECT_FALSE(isExact); |
| EXPECT_EQ(APSInt::getMaxValue(5, true), result); |
| |
| EXPECT_EQ(APFloat::opInexact, |
| APFloat(APFloat::IEEEdouble(), "7.9") |
| .convertToInteger(result, APFloat::rmTowardZero, &isExact)); |
| EXPECT_FALSE(isExact); |
| EXPECT_EQ(APSInt(APInt(5, 7), true), result); |
| |
| result.setIsUnsigned(false); |
| EXPECT_EQ(APFloat::opOK, |
| APFloat(APFloat::IEEEdouble(), "-10") |
| .convertToInteger(result, APFloat::rmTowardZero, &isExact)); |
| EXPECT_TRUE(isExact); |
| EXPECT_EQ(APSInt(APInt(5, -10, true), false), result); |
| |
| EXPECT_EQ(APFloat::opInvalidOp, |
| APFloat(APFloat::IEEEdouble(), "-17") |
| .convertToInteger(result, APFloat::rmTowardZero, &isExact)); |
| EXPECT_FALSE(isExact); |
| EXPECT_EQ(APSInt::getMinValue(5, false), result); |
| |
| EXPECT_EQ(APFloat::opInvalidOp, |
| APFloat(APFloat::IEEEdouble(), "16") |
| .convertToInteger(result, APFloat::rmTowardZero, &isExact)); |
| EXPECT_FALSE(isExact); |
| EXPECT_EQ(APSInt::getMaxValue(5, false), result); |
| } |
| |
| class APFloatConvertFromAPIntParamTest |
| : public ::testing::TestWithParam<const fltSemantics *> { |
| protected: |
| // Helper to run a conversion and compare the integer result directly. |
| static void testConversionAndCompareInt(const APInt &InputValue, |
| const bool IsSigned, |
| APFloat::roundingMode RM, |
| const APInt &ExpectedIntValue) { |
| const fltSemantics &Sem = *GetParam(); |
| APFloat F(Sem); |
| F.convertFromAPInt(InputValue, /*IsSigned=*/IsSigned, RM); |
| |
| APSInt ResultInt(InputValue.getBitWidth(), /*isUnsigned=*/!IsSigned); |
| bool IsExact; |
| F.convertToInteger(ResultInt, APFloat::rmTowardZero, &IsExact); |
| |
| EXPECT_TRUE(IsExact); |
| EXPECT_TRUE(ResultInt.eq(ExpectedIntValue)) |
| << "InputValue: " << InputValue << "\n" |
| << ResultInt << " vs " << ExpectedIntValue << "\n"; |
| } |
| }; |
| |
| TEST_P(APFloatConvertFromAPIntParamTest, HalfwayRounding) { |
| const fltSemantics &Sem = *GetParam(); |
| const unsigned Precision = APFloat::semanticsPrecision(Sem); |
| |
| if (Precision == 0) |
| GTEST_SKIP() << "Skipping test for semantics with no significand."; |
| |
| for (bool IsSigned : {false, true}) { |
| const unsigned BitWidth = Precision + 1 + (IsSigned ? 1 : 0); |
| |
| const APInt RoundedDownVal = APInt::getOneBitSet(BitWidth, Precision); |
| const APInt HalfwayVal = RoundedDownVal + 1; |
| const APInt RoundedUpVal = RoundedDownVal + 2; |
| |
| testConversionAndCompareInt(HalfwayVal, IsSigned, |
| APFloat::rmNearestTiesToEven, RoundedDownVal); |
| testConversionAndCompareInt(HalfwayVal, IsSigned, |
| APFloat::rmNearestTiesToAway, RoundedUpVal); |
| testConversionAndCompareInt(HalfwayVal, IsSigned, APFloat::rmTowardPositive, |
| RoundedUpVal); |
| testConversionAndCompareInt(HalfwayVal, IsSigned, APFloat::rmTowardNegative, |
| RoundedDownVal); |
| testConversionAndCompareInt(HalfwayVal, IsSigned, APFloat::rmTowardZero, |
| RoundedDownVal); |
| } |
| } |
| |
| TEST_P(APFloatConvertFromAPIntParamTest, MaxMagnitude) { |
| const fltSemantics &Sem = *GetParam(); |
| const unsigned Precision = APFloat::semanticsPrecision(Sem); |
| |
| if (Precision == 0) |
| GTEST_SKIP() << "Skipping test for semantics with no significand."; |
| |
| const APFloat Largest = APFloat::getLargest(Sem, /*Negative=*/false); |
| const int Exp = ilogb(Largest); |
| for (bool IsSigned : {false, true}) { |
| const unsigned BitWidth = Exp + 1 + (IsSigned ? 1 : 0); |
| |
| bool IsExact; |
| APSInt LargestAsInt{BitWidth, /*IsUnsigned=*/!IsSigned}; |
| const APFloat::opStatus ToIntStatus = |
| Largest.convertToInteger(LargestAsInt, APFloat::rmTowardZero, &IsExact); |
| EXPECT_EQ(ToIntStatus, APFloat::opOK); |
| |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| testConversionAndCompareInt(LargestAsInt, IsSigned, RM, LargestAsInt); |
| } |
| } |
| } |
| |
| INSTANTIATE_TEST_SUITE_P(IEEESemantics, APFloatConvertFromAPIntParamTest, |
| ::testing::Values(&APFloat::IEEEhalf(), |
| &APFloat::BFloat(), |
| &APFloat::IEEEsingle(), |
| &APFloat::IEEEdouble(), |
| &APFloat::IEEEquad())); |
| |
| static APInt nanbitsFromAPInt(const fltSemantics &Sem, bool SNaN, bool Negative, |
| uint64_t payload) { |
| APInt appayload(64, payload); |
| if (SNaN) |
| return APFloat::getSNaN(Sem, Negative, &appayload).bitcastToAPInt(); |
| else |
| return APFloat::getQNaN(Sem, Negative, &appayload).bitcastToAPInt(); |
| } |
| |
| TEST(APFloatTest, makeNaN) { |
| const struct { |
| uint64_t expected; |
| const fltSemantics &semantics; |
| bool SNaN; |
| bool Negative; |
| uint64_t payload; |
| } tests[] = { |
| // clang-format off |
| /* expected semantics SNaN Neg payload */ |
| { 0x7fc00000ULL, APFloat::IEEEsingle(), false, false, 0x00000000ULL }, |
| { 0xffc00000ULL, APFloat::IEEEsingle(), false, true, 0x00000000ULL }, |
| { 0x7fc0ae72ULL, APFloat::IEEEsingle(), false, false, 0x0000ae72ULL }, |
| { 0x7fffae72ULL, APFloat::IEEEsingle(), false, false, 0xffffae72ULL }, |
| { 0x7fdaae72ULL, APFloat::IEEEsingle(), false, false, 0x00daae72ULL }, |
| { 0x7fa00000ULL, APFloat::IEEEsingle(), true, false, 0x00000000ULL }, |
| { 0xffa00000ULL, APFloat::IEEEsingle(), true, true, 0x00000000ULL }, |
| { 0x7f80ae72ULL, APFloat::IEEEsingle(), true, false, 0x0000ae72ULL }, |
| { 0x7fbfae72ULL, APFloat::IEEEsingle(), true, false, 0xffffae72ULL }, |
| { 0x7f9aae72ULL, APFloat::IEEEsingle(), true, false, 0x001aae72ULL }, |
| { 0x7ff8000000000000ULL, APFloat::IEEEdouble(), false, false, 0x0000000000000000ULL }, |
| { 0xfff8000000000000ULL, APFloat::IEEEdouble(), false, true, 0x0000000000000000ULL }, |
| { 0x7ff800000000ae72ULL, APFloat::IEEEdouble(), false, false, 0x000000000000ae72ULL }, |
| { 0x7fffffffffffae72ULL, APFloat::IEEEdouble(), false, false, 0xffffffffffffae72ULL }, |
| { 0x7ffdaaaaaaaaae72ULL, APFloat::IEEEdouble(), false, false, 0x000daaaaaaaaae72ULL }, |
| { 0x7ff4000000000000ULL, APFloat::IEEEdouble(), true, false, 0x0000000000000000ULL }, |
| { 0xfff4000000000000ULL, APFloat::IEEEdouble(), true, true, 0x0000000000000000ULL }, |
| { 0x7ff000000000ae72ULL, APFloat::IEEEdouble(), true, false, 0x000000000000ae72ULL }, |
| { 0x7ff7ffffffffae72ULL, APFloat::IEEEdouble(), true, false, 0xffffffffffffae72ULL }, |
| { 0x7ff1aaaaaaaaae72ULL, APFloat::IEEEdouble(), true, false, 0x0001aaaaaaaaae72ULL }, |
| { 0x80ULL, APFloat::Float8E5M2FNUZ(), false, false, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E5M2FNUZ(), false, true, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E5M2FNUZ(), true, false, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E5M2FNUZ(), true, true, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E4M3FNUZ(), false, false, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E4M3FNUZ(), false, true, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E4M3FNUZ(), true, false, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E4M3FNUZ(), true, true, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E4M3B11FNUZ(), false, false, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E4M3B11FNUZ(), false, true, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E4M3B11FNUZ(), true, false, 0xaaULL }, |
| { 0x80ULL, APFloat::Float8E4M3B11FNUZ(), true, true, 0xaaULL }, |
| { 0x3fe00ULL, APFloat::FloatTF32(), false, false, 0x00000000ULL }, |
| { 0x7fe00ULL, APFloat::FloatTF32(), false, true, 0x00000000ULL }, |
| { 0x3feaaULL, APFloat::FloatTF32(), false, false, 0xaaULL }, |
| { 0x3ffaaULL, APFloat::FloatTF32(), false, false, 0xdaaULL }, |
| { 0x3ffaaULL, APFloat::FloatTF32(), false, false, 0xfdaaULL }, |
| { 0x3fd00ULL, APFloat::FloatTF32(), true, false, 0x00000000ULL }, |
| { 0x7fd00ULL, APFloat::FloatTF32(), true, true, 0x00000000ULL }, |
| { 0x3fcaaULL, APFloat::FloatTF32(), true, false, 0xaaULL }, |
| { 0x3fdaaULL, APFloat::FloatTF32(), true, false, 0xfaaULL }, |
| { 0x3fdaaULL, APFloat::FloatTF32(), true, false, 0x1aaULL }, |
| // clang-format on |
| }; |
| |
| for (const auto &t : tests) { |
| ASSERT_EQ(t.expected, nanbitsFromAPInt(t.semantics, t.SNaN, t.Negative, t.payload)); |
| } |
| } |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| #ifndef NDEBUG |
| TEST(APFloatTest, SemanticsDeath) { |
| EXPECT_DEATH(APFloat(APFloat::IEEEquad(), 0).convertToDouble(), |
| "Float semantics is not representable by IEEEdouble"); |
| EXPECT_DEATH(APFloat(APFloat::IEEEdouble(), 0).convertToFloat(), |
| "Float semantics is not representable by IEEEsingle"); |
| } |
| #endif |
| #endif |
| |
| TEST(APFloatTest, StringDecimalError) { |
| EXPECT_EQ("Invalid string length", convertToErrorFromString("")); |
| EXPECT_EQ("String has no digits", convertToErrorFromString("+")); |
| EXPECT_EQ("String has no digits", convertToErrorFromString("-")); |
| |
| EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("\0", 1))); |
| EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("1\0", 2))); |
| EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("1" "\0" "2", 3))); |
| EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("1" "\0" "2e1", 5))); |
| EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("1e\0", 3))); |
| EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("1e1\0", 4))); |
| EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("1e1" "\0" "2", 5))); |
| |
| EXPECT_EQ("Invalid character in significand", convertToErrorFromString("1.0f")); |
| |
| EXPECT_EQ("String contains multiple dots", convertToErrorFromString("..")); |
| EXPECT_EQ("String contains multiple dots", convertToErrorFromString("..0")); |
| EXPECT_EQ("String contains multiple dots", convertToErrorFromString("1.0.0")); |
| } |
| |
| TEST(APFloatTest, StringDecimalSignificandError) { |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( ".")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+.")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-.")); |
| |
| |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( "e")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+e")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-e")); |
| |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( "e1")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+e1")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-e1")); |
| |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( ".e1")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+.e1")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-.e1")); |
| |
| |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( ".e")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+.e")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-.e")); |
| } |
| |
| TEST(APFloatTest, StringHexadecimalError) { |
| EXPECT_EQ("Invalid string", convertToErrorFromString( "0x")); |
| EXPECT_EQ("Invalid string", convertToErrorFromString("+0x")); |
| EXPECT_EQ("Invalid string", convertToErrorFromString("-0x")); |
| |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString( "0x0")); |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("+0x0")); |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("-0x0")); |
| |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString( "0x0.")); |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("+0x0.")); |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("-0x0.")); |
| |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString( "0x.0")); |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("+0x.0")); |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("-0x.0")); |
| |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString( "0x0.0")); |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("+0x0.0")); |
| EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("-0x0.0")); |
| |
| EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("0x\0", 3))); |
| EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("0x1\0", 4))); |
| EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("0x1" "\0" "2", 5))); |
| EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("0x1" "\0" "2p1", 7))); |
| EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("0x1p\0", 5))); |
| EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("0x1p1\0", 6))); |
| EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("0x1p1" "\0" "2", 7))); |
| |
| EXPECT_EQ("Invalid character in exponent", convertToErrorFromString("0x1p0f")); |
| |
| EXPECT_EQ("String contains multiple dots", convertToErrorFromString("0x..p1")); |
| EXPECT_EQ("String contains multiple dots", convertToErrorFromString("0x..0p1")); |
| EXPECT_EQ("String contains multiple dots", convertToErrorFromString("0x1.0.0p1")); |
| } |
| |
| TEST(APFloatTest, StringHexadecimalSignificandError) { |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0x.")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0x.")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0x.")); |
| |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0xp")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0xp")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0xp")); |
| |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0xp+")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0xp+")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0xp+")); |
| |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0xp-")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0xp-")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0xp-")); |
| |
| |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0x.p")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0x.p")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0x.p")); |
| |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0x.p+")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0x.p+")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0x.p+")); |
| |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0x.p-")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0x.p-")); |
| EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0x.p-")); |
| } |
| |
| TEST(APFloatTest, StringHexadecimalExponentError) { |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1p")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1p")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1p")); |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1p+")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1p+")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1p+")); |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1p-")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1p-")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1p-")); |
| |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.p")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.p")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.p")); |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.p+")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.p+")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.p+")); |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.p-")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.p-")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.p-")); |
| |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x.1p")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x.1p")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x.1p")); |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x.1p+")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x.1p+")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x.1p+")); |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x.1p-")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x.1p-")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x.1p-")); |
| |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.1p")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.1p")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.1p")); |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.1p+")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.1p+")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.1p+")); |
| |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.1p-")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.1p-")); |
| EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.1p-")); |
| } |
| |
| TEST(APFloatTest, exactInverse) { |
| APFloat inv(0.0f); |
| |
| // Trivial operation. |
| EXPECT_TRUE(APFloat(2.0).getExactInverse(&inv)); |
| EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0.5))); |
| EXPECT_TRUE(APFloat(2.0f).getExactInverse(&inv)); |
| EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0.5f))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEquad(), "2.0").getExactInverse(&inv)); |
| EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::IEEEquad(), "0.5"))); |
| EXPECT_TRUE(APFloat(APFloat::PPCDoubleDouble(), "2.0").getExactInverse(&inv)); |
| EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble(), "0.5"))); |
| EXPECT_TRUE(APFloat(APFloat::x87DoubleExtended(), "2.0").getExactInverse(&inv)); |
| EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::x87DoubleExtended(), "0.5"))); |
| // 0x1p1022 has a normal inverse for IEEE 754 binary64: 0x1p-1022. |
| EXPECT_TRUE(APFloat(0x1p1022).getExactInverse(&inv)); |
| EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0x1p-1022))); |
| // With regards to getExactInverse, IEEEdouble and PPCDoubleDouble should |
| // behave the same. |
| EXPECT_TRUE( |
| APFloat(APFloat::PPCDoubleDouble(), "0x1p1022").getExactInverse(&inv)); |
| EXPECT_TRUE( |
| inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble(), "0x1p-1022"))); |
| |
| // FLT_MIN |
| EXPECT_TRUE(APFloat(1.17549435e-38f).getExactInverse(&inv)); |
| EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(8.5070592e+37f))); |
| |
| // Large float, inverse is a denormal. |
| EXPECT_FALSE(APFloat(1.7014118e38f).getExactInverse(nullptr)); |
| // Zero |
| EXPECT_FALSE(APFloat(0.0).getExactInverse(nullptr)); |
| // Denormalized float |
| EXPECT_FALSE(APFloat(1.40129846e-45f).getExactInverse(nullptr)); |
| // Largest subnormal |
| EXPECT_FALSE(APFloat(0x1p-127f).getExactInverse(nullptr)); |
| } |
| |
| TEST(APFloatTest, roundToIntegral) { |
| APFloat T(-0.5), S(3.14), R(APFloat::getLargest(APFloat::IEEEdouble())), P(0.0); |
| |
| P = T; |
| P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_EQ(-0.0, P.convertToDouble()); |
| P = T; |
| P.roundToIntegral(APFloat::rmTowardNegative); |
| EXPECT_EQ(-1.0, P.convertToDouble()); |
| P = T; |
| P.roundToIntegral(APFloat::rmTowardPositive); |
| EXPECT_EQ(-0.0, P.convertToDouble()); |
| P = T; |
| P.roundToIntegral(APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(-0.0, P.convertToDouble()); |
| |
| P = S; |
| P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_EQ(3.0, P.convertToDouble()); |
| P = S; |
| P.roundToIntegral(APFloat::rmTowardNegative); |
| EXPECT_EQ(3.0, P.convertToDouble()); |
| P = S; |
| P.roundToIntegral(APFloat::rmTowardPositive); |
| EXPECT_EQ(4.0, P.convertToDouble()); |
| P = S; |
| P.roundToIntegral(APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(3.0, P.convertToDouble()); |
| |
| P = R; |
| P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_EQ(R.convertToDouble(), P.convertToDouble()); |
| P = R; |
| P.roundToIntegral(APFloat::rmTowardNegative); |
| EXPECT_EQ(R.convertToDouble(), P.convertToDouble()); |
| P = R; |
| P.roundToIntegral(APFloat::rmTowardPositive); |
| EXPECT_EQ(R.convertToDouble(), P.convertToDouble()); |
| P = R; |
| P.roundToIntegral(APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(R.convertToDouble(), P.convertToDouble()); |
| |
| P = APFloat::getZero(APFloat::IEEEdouble()); |
| P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_EQ(0.0, P.convertToDouble()); |
| P = APFloat::getZero(APFloat::IEEEdouble(), true); |
| P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_EQ(-0.0, P.convertToDouble()); |
| P = APFloat::getNaN(APFloat::IEEEdouble()); |
| P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(std::isnan(P.convertToDouble())); |
| P = APFloat::getInf(APFloat::IEEEdouble()); |
| P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(std::isinf(P.convertToDouble()) && P.convertToDouble() > 0.0); |
| P = APFloat::getInf(APFloat::IEEEdouble(), true); |
| P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(std::isinf(P.convertToDouble()) && P.convertToDouble() < 0.0); |
| |
| APFloat::opStatus St; |
| |
| P = APFloat::getNaN(APFloat::IEEEdouble()); |
| St = P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(P.isNaN()); |
| EXPECT_FALSE(P.isNegative()); |
| EXPECT_EQ(APFloat::opOK, St); |
| |
| P = APFloat::getNaN(APFloat::IEEEdouble(), true); |
| St = P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(P.isNaN()); |
| EXPECT_TRUE(P.isNegative()); |
| EXPECT_EQ(APFloat::opOK, St); |
| |
| P = APFloat::getSNaN(APFloat::IEEEdouble()); |
| St = P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(P.isNaN()); |
| EXPECT_FALSE(P.isSignaling()); |
| EXPECT_FALSE(P.isNegative()); |
| EXPECT_EQ(APFloat::opInvalidOp, St); |
| |
| P = APFloat::getSNaN(APFloat::IEEEdouble(), true); |
| St = P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(P.isNaN()); |
| EXPECT_FALSE(P.isSignaling()); |
| EXPECT_TRUE(P.isNegative()); |
| EXPECT_EQ(APFloat::opInvalidOp, St); |
| |
| P = APFloat::getInf(APFloat::IEEEdouble()); |
| St = P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(P.isInfinity()); |
| EXPECT_FALSE(P.isNegative()); |
| EXPECT_EQ(APFloat::opOK, St); |
| |
| P = APFloat::getInf(APFloat::IEEEdouble(), true); |
| St = P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(P.isInfinity()); |
| EXPECT_TRUE(P.isNegative()); |
| EXPECT_EQ(APFloat::opOK, St); |
| |
| P = APFloat::getZero(APFloat::IEEEdouble(), false); |
| St = P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(P.isZero()); |
| EXPECT_FALSE(P.isNegative()); |
| EXPECT_EQ(APFloat::opOK, St); |
| |
| P = APFloat::getZero(APFloat::IEEEdouble(), false); |
| St = P.roundToIntegral(APFloat::rmTowardNegative); |
| EXPECT_TRUE(P.isZero()); |
| EXPECT_FALSE(P.isNegative()); |
| EXPECT_EQ(APFloat::opOK, St); |
| |
| P = APFloat::getZero(APFloat::IEEEdouble(), true); |
| St = P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_TRUE(P.isZero()); |
| EXPECT_TRUE(P.isNegative()); |
| EXPECT_EQ(APFloat::opOK, St); |
| |
| P = APFloat::getZero(APFloat::IEEEdouble(), true); |
| St = P.roundToIntegral(APFloat::rmTowardNegative); |
| EXPECT_TRUE(P.isZero()); |
| EXPECT_TRUE(P.isNegative()); |
| EXPECT_EQ(APFloat::opOK, St); |
| |
| P = APFloat(1E-100); |
| St = P.roundToIntegral(APFloat::rmTowardNegative); |
| EXPECT_TRUE(P.isZero()); |
| EXPECT_FALSE(P.isNegative()); |
| EXPECT_EQ(APFloat::opInexact, St); |
| |
| P = APFloat(1E-100); |
| St = P.roundToIntegral(APFloat::rmTowardPositive); |
| EXPECT_EQ(1.0, P.convertToDouble()); |
| EXPECT_FALSE(P.isNegative()); |
| EXPECT_EQ(APFloat::opInexact, St); |
| |
| P = APFloat(-1E-100); |
| St = P.roundToIntegral(APFloat::rmTowardNegative); |
| EXPECT_TRUE(P.isNegative()); |
| EXPECT_EQ(-1.0, P.convertToDouble()); |
| EXPECT_EQ(APFloat::opInexact, St); |
| |
| P = APFloat(-1E-100); |
| St = P.roundToIntegral(APFloat::rmTowardPositive); |
| EXPECT_TRUE(P.isZero()); |
| EXPECT_TRUE(P.isNegative()); |
| EXPECT_EQ(APFloat::opInexact, St); |
| |
| P = APFloat(10.0); |
| St = P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_EQ(10.0, P.convertToDouble()); |
| EXPECT_EQ(APFloat::opOK, St); |
| |
| P = APFloat(10.5); |
| St = P.roundToIntegral(APFloat::rmTowardZero); |
| EXPECT_EQ(10.0, P.convertToDouble()); |
| EXPECT_EQ(APFloat::opInexact, St); |
| |
| P = APFloat(10.5); |
| St = P.roundToIntegral(APFloat::rmTowardPositive); |
| EXPECT_EQ(11.0, P.convertToDouble()); |
| EXPECT_EQ(APFloat::opInexact, St); |
| |
| P = APFloat(10.5); |
| St = P.roundToIntegral(APFloat::rmTowardNegative); |
| EXPECT_EQ(10.0, P.convertToDouble()); |
| EXPECT_EQ(APFloat::opInexact, St); |
| |
| P = APFloat(10.5); |
| St = P.roundToIntegral(APFloat::rmNearestTiesToAway); |
| EXPECT_EQ(11.0, P.convertToDouble()); |
| EXPECT_EQ(APFloat::opInexact, St); |
| |
| P = APFloat(10.5); |
| St = P.roundToIntegral(APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(10.0, P.convertToDouble()); |
| EXPECT_EQ(APFloat::opInexact, St); |
| } |
| |
| TEST(APFloatTest, isInteger) { |
| APFloat T(-0.0); |
| EXPECT_TRUE(T.isInteger()); |
| T = APFloat(3.14159); |
| EXPECT_FALSE(T.isInteger()); |
| T = APFloat::getNaN(APFloat::IEEEdouble()); |
| EXPECT_FALSE(T.isInteger()); |
| T = APFloat::getInf(APFloat::IEEEdouble()); |
| EXPECT_FALSE(T.isInteger()); |
| T = APFloat::getInf(APFloat::IEEEdouble(), true); |
| EXPECT_FALSE(T.isInteger()); |
| T = APFloat::getLargest(APFloat::IEEEdouble()); |
| EXPECT_TRUE(T.isInteger()); |
| } |
| |
| TEST(DoubleAPFloatTest, isInteger) { |
| APFloat F1(-0.0); |
| APFloat F2(-0.0); |
| llvm::detail::DoubleAPFloat T(APFloat::PPCDoubleDouble(), std::move(F1), |
| std::move(F2)); |
| EXPECT_TRUE(T.isInteger()); |
| APFloat F3(3.14159); |
| APFloat F4(-0.0); |
| llvm::detail::DoubleAPFloat T2(APFloat::PPCDoubleDouble(), std::move(F3), |
| std::move(F4)); |
| EXPECT_FALSE(T2.isInteger()); |
| APFloat F5(-0.0); |
| APFloat F6(3.14159); |
| llvm::detail::DoubleAPFloat T3(APFloat::PPCDoubleDouble(), std::move(F5), |
| std::move(F6)); |
| EXPECT_FALSE(T3.isInteger()); |
| } |
| |
| // Test to check if the full range of Float8E8M0FNU |
| // values are being represented correctly. |
| TEST(APFloatTest, Float8E8M0FNUValues) { |
| // High end of the range |
| auto test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p127"); |
| EXPECT_EQ(0x1.0p127, test.convertToDouble()); |
| |
| test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p126"); |
| EXPECT_EQ(0x1.0p126, test.convertToDouble()); |
| |
| test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p125"); |
| EXPECT_EQ(0x1.0p125, test.convertToDouble()); |
| |
| // tests the fix in makeLargest() |
| test = APFloat::getLargest(APFloat::Float8E8M0FNU()); |
| EXPECT_EQ(0x1.0p127, test.convertToDouble()); |
| |
| // tests overflow to nan |
| APFloat nan = APFloat(APFloat::Float8E8M0FNU(), "nan"); |
| test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p128"); |
| EXPECT_TRUE(test.bitwiseIsEqual(nan)); |
| |
| // Mid of the range |
| test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p0"); |
| EXPECT_EQ(1.0, test.convertToDouble()); |
| |
| test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p1"); |
| EXPECT_EQ(2.0, test.convertToDouble()); |
| |
| test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p2"); |
| EXPECT_EQ(4.0, test.convertToDouble()); |
| |
| // Low end of the range |
| test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-125"); |
| EXPECT_EQ(0x1.0p-125, test.convertToDouble()); |
| |
| test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-126"); |
| EXPECT_EQ(0x1.0p-126, test.convertToDouble()); |
| |
| test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-127"); |
| EXPECT_EQ(0x1.0p-127, test.convertToDouble()); |
| |
| // Smallest value |
| test = APFloat::getSmallest(APFloat::Float8E8M0FNU()); |
| EXPECT_EQ(0x1.0p-127, test.convertToDouble()); |
| |
| // Value below the smallest, but clamped to the smallest |
| test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-128"); |
| EXPECT_EQ(0x1.0p-127, test.convertToDouble()); |
| } |
| |
| TEST(APFloatTest, getLargest) { |
| EXPECT_EQ(3.402823466e+38f, APFloat::getLargest(APFloat::IEEEsingle()).convertToFloat()); |
| EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble()).convertToDouble()); |
| EXPECT_EQ(448, APFloat::getLargest(APFloat::Float8E4M3FN()).convertToDouble()); |
| EXPECT_EQ(240, |
| APFloat::getLargest(APFloat::Float8E4M3FNUZ()).convertToDouble()); |
| EXPECT_EQ(57344, |
| APFloat::getLargest(APFloat::Float8E5M2FNUZ()).convertToDouble()); |
| EXPECT_EQ( |
| 30, APFloat::getLargest(APFloat::Float8E4M3B11FNUZ()).convertToDouble()); |
| EXPECT_EQ(3.40116213421e+38f, |
| APFloat::getLargest(APFloat::FloatTF32()).convertToFloat()); |
| EXPECT_EQ(1.701411834e+38f, |
| APFloat::getLargest(APFloat::Float8E8M0FNU()).convertToDouble()); |
| EXPECT_EQ(28, APFloat::getLargest(APFloat::Float6E3M2FN()).convertToDouble()); |
| EXPECT_EQ(7.5, |
| APFloat::getLargest(APFloat::Float6E2M3FN()).convertToDouble()); |
| EXPECT_EQ(6, APFloat::getLargest(APFloat::Float4E2M1FN()).convertToDouble()); |
| } |
| |
| TEST(APFloatTest, getSmallest) { |
| APFloat test = APFloat::getSmallest(APFloat::IEEEsingle(), false); |
| APFloat expected = APFloat(APFloat::IEEEsingle(), "0x0.000002p-126"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::IEEEsingle(), true); |
| expected = APFloat(APFloat::IEEEsingle(), "-0x0.000002p-126"); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::IEEEquad(), false); |
| expected = APFloat(APFloat::IEEEquad(), "0x0.0000000000000000000000000001p-16382"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::IEEEquad(), true); |
| expected = APFloat(APFloat::IEEEquad(), "-0x0.0000000000000000000000000001p-16382"); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), false); |
| expected = APFloat(APFloat::Float8E5M2FNUZ(), "0x0.4p-15"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), false); |
| expected = APFloat(APFloat::Float8E4M3FNUZ(), "0x0.2p-7"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::Float8E4M3B11FNUZ(), false); |
| expected = APFloat(APFloat::Float8E4M3B11FNUZ(), "0x0.2p-10"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::FloatTF32(), true); |
| expected = APFloat(APFloat::FloatTF32(), "-0x0.004p-126"); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::Float6E3M2FN(), false); |
| expected = APFloat(APFloat::Float6E3M2FN(), "0x0.1p0"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::Float6E2M3FN(), false); |
| expected = APFloat(APFloat::Float6E2M3FN(), "0x0.2p0"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::Float4E2M1FN(), false); |
| expected = APFloat(APFloat::Float4E2M1FN(), "0x0.8p0"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getSmallest(APFloat::Float8E8M0FNU()); |
| expected = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-127"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| TEST(APFloatTest, getSmallestNormalized) { |
| APFloat test = APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false); |
| APFloat expected = APFloat(APFloat::IEEEsingle(), "0x1p-126"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true); |
| expected = APFloat(APFloat::IEEEsingle(), "-0x1p-126"); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), false); |
| expected = APFloat(APFloat::IEEEdouble(), "0x1p-1022"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), true); |
| expected = APFloat(APFloat::IEEEdouble(), "-0x1p-1022"); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::IEEEquad(), false); |
| expected = APFloat(APFloat::IEEEquad(), "0x1p-16382"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::IEEEquad(), true); |
| expected = APFloat(APFloat::IEEEquad(), "-0x1p-16382"); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), false); |
| expected = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.0p-15"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), false); |
| expected = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.0p-7"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::Float8E4M3B11FNUZ(), false); |
| expected = APFloat(APFloat::Float8E4M3B11FNUZ(), "0x1.0p-10"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::FloatTF32(), false); |
| expected = APFloat(APFloat::FloatTF32(), "0x1p-126"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false); |
| expected = APFloat(APFloat::Float6E3M2FN(), "0x1p-2"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false); |
| expected = APFloat(APFloat::Float4E2M1FN(), "0x1p0"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false); |
| expected = APFloat(APFloat::Float6E2M3FN(), "0x1p0"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| test = APFloat::getSmallestNormalized(APFloat::Float8E8M0FNU(), false); |
| expected = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-127"); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(test.isFiniteNonZero()); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| } |
| |
| TEST(APFloatTest, getZero) { |
| struct { |
| const fltSemantics *semantics; |
| const bool sign; |
| const bool signedZero; |
| const unsigned long long bitPattern[2]; |
| const unsigned bitPatternLength; |
| } const GetZeroTest[] = { |
| {&APFloat::IEEEhalf(), false, true, {0, 0}, 1}, |
| {&APFloat::IEEEhalf(), true, true, {0x8000ULL, 0}, 1}, |
| {&APFloat::IEEEsingle(), false, true, {0, 0}, 1}, |
| {&APFloat::IEEEsingle(), true, true, {0x80000000ULL, 0}, 1}, |
| {&APFloat::IEEEdouble(), false, true, {0, 0}, 1}, |
| {&APFloat::IEEEdouble(), true, true, {0x8000000000000000ULL, 0}, 1}, |
| {&APFloat::IEEEquad(), false, true, {0, 0}, 2}, |
| {&APFloat::IEEEquad(), true, true, {0, 0x8000000000000000ULL}, 2}, |
| {&APFloat::PPCDoubleDouble(), false, true, {0, 0}, 2}, |
| {&APFloat::PPCDoubleDouble(), true, true, {0x8000000000000000ULL, 0}, 2}, |
| {&APFloat::x87DoubleExtended(), false, true, {0, 0}, 2}, |
| {&APFloat::x87DoubleExtended(), true, true, {0, 0x8000ULL}, 2}, |
| {&APFloat::Float8E5M2(), false, true, {0, 0}, 1}, |
| {&APFloat::Float8E5M2(), true, true, {0x80ULL, 0}, 1}, |
| {&APFloat::Float8E5M2FNUZ(), false, false, {0, 0}, 1}, |
| {&APFloat::Float8E5M2FNUZ(), true, false, {0, 0}, 1}, |
| {&APFloat::Float8E4M3(), false, true, {0, 0}, 1}, |
| {&APFloat::Float8E4M3(), true, true, {0x80ULL, 0}, 1}, |
| {&APFloat::Float8E4M3FN(), false, true, {0, 0}, 1}, |
| {&APFloat::Float8E4M3FN(), true, true, {0x80ULL, 0}, 1}, |
| {&APFloat::Float8E4M3FNUZ(), false, false, {0, 0}, 1}, |
| {&APFloat::Float8E4M3FNUZ(), true, false, {0, 0}, 1}, |
| {&APFloat::Float8E4M3B11FNUZ(), false, false, {0, 0}, 1}, |
| {&APFloat::Float8E4M3B11FNUZ(), true, false, {0, 0}, 1}, |
| {&APFloat::Float8E3M4(), false, true, {0, 0}, 1}, |
| {&APFloat::Float8E3M4(), true, true, {0x80ULL, 0}, 1}, |
| {&APFloat::FloatTF32(), false, true, {0, 0}, 1}, |
| {&APFloat::FloatTF32(), true, true, {0x40000ULL, 0}, 1}, |
| {&APFloat::Float6E3M2FN(), false, true, {0, 0}, 1}, |
| {&APFloat::Float6E3M2FN(), true, true, {0x20ULL, 0}, 1}, |
| {&APFloat::Float6E2M3FN(), false, true, {0, 0}, 1}, |
| {&APFloat::Float6E2M3FN(), true, true, {0x20ULL, 0}, 1}, |
| {&APFloat::Float4E2M1FN(), false, true, {0, 0}, 1}, |
| {&APFloat::Float4E2M1FN(), true, true, {0x8ULL, 0}, 1}}; |
| const unsigned NumGetZeroTests = std::size(GetZeroTest); |
| for (unsigned i = 0; i < NumGetZeroTests; ++i) { |
| APFloat test = APFloat::getZero(*GetZeroTest[i].semantics, |
| GetZeroTest[i].sign); |
| const char *pattern = GetZeroTest[i].sign? "-0x0p+0" : "0x0p+0"; |
| APFloat expected = APFloat(*GetZeroTest[i].semantics, |
| pattern); |
| EXPECT_TRUE(test.isZero()); |
| if (GetZeroTest[i].signedZero) |
| EXPECT_TRUE(GetZeroTest[i].sign ? test.isNegative() : !test.isNegative()); |
| else |
| EXPECT_TRUE(!test.isNegative()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| for (unsigned j = 0, je = GetZeroTest[i].bitPatternLength; j < je; ++j) { |
| EXPECT_EQ(GetZeroTest[i].bitPattern[j], |
| test.bitcastToAPInt().getRawData()[j]); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, copySign) { |
| EXPECT_TRUE(APFloat(-42.0).bitwiseIsEqual( |
| APFloat::copySign(APFloat(42.0), APFloat(-1.0)))); |
| EXPECT_TRUE(APFloat(42.0).bitwiseIsEqual( |
| APFloat::copySign(APFloat(-42.0), APFloat(1.0)))); |
| EXPECT_TRUE(APFloat(-42.0).bitwiseIsEqual( |
| APFloat::copySign(APFloat(-42.0), APFloat(-1.0)))); |
| EXPECT_TRUE(APFloat(42.0).bitwiseIsEqual( |
| APFloat::copySign(APFloat(42.0), APFloat(1.0)))); |
| // For floating-point formats with unsigned 0, copySign() to a zero is a noop |
| for (APFloat::Semantics S : |
| {APFloat::S_Float8E4M3FNUZ, APFloat::S_Float8E4M3B11FNUZ}) { |
| const llvm::fltSemantics &Sem = APFloat::EnumToSemantics(S); |
| EXPECT_TRUE(APFloat::getZero(Sem).bitwiseIsEqual( |
| APFloat::copySign(APFloat::getZero(Sem), APFloat(-1.0)))); |
| EXPECT_TRUE(APFloat::getNaN(Sem, true).bitwiseIsEqual( |
| APFloat::copySign(APFloat::getNaN(Sem, true), APFloat(1.0)))); |
| } |
| } |
| |
| TEST(APFloatTest, convert) { |
| bool losesInfo; |
| APFloat test(APFloat::IEEEdouble(), "1.0"); |
| test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| |
| test = APFloat(APFloat::x87DoubleExtended(), "0x1p-53"); |
| test.add(APFloat(APFloat::x87DoubleExtended(), "1.0"), APFloat::rmNearestTiesToEven); |
| test.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0, test.convertToDouble()); |
| EXPECT_TRUE(losesInfo); |
| |
| test = APFloat(APFloat::IEEEquad(), "0x1p-53"); |
| test.add(APFloat(APFloat::IEEEquad(), "1.0"), APFloat::rmNearestTiesToEven); |
| test.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0, test.convertToDouble()); |
| EXPECT_TRUE(losesInfo); |
| |
| test = APFloat(APFloat::x87DoubleExtended(), "0xf.fffffffp+28"); |
| test.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(4294967295.0, test.convertToDouble()); |
| EXPECT_FALSE(losesInfo); |
| |
| test = APFloat::getSNaN(APFloat::IEEEsingle()); |
| APFloat::opStatus status = test.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, &losesInfo); |
| // Conversion quiets the SNAN, so now 2 bits of the 64-bit significand should be set. |
| APInt topTwoBits(64, 0x6000000000000000); |
| EXPECT_TRUE(test.bitwiseIsEqual(APFloat::getQNaN(APFloat::x87DoubleExtended(), false, &topTwoBits))); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInvalidOp); |
| |
| test = APFloat::getQNaN(APFloat::IEEEsingle()); |
| APFloat X87QNaN = APFloat::getQNaN(APFloat::x87DoubleExtended()); |
| test.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(test.bitwiseIsEqual(X87QNaN)); |
| EXPECT_FALSE(losesInfo); |
| |
| test = APFloat::getSNaN(APFloat::x87DoubleExtended()); |
| test.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| APFloat X87SNaN = APFloat::getSNaN(APFloat::x87DoubleExtended()); |
| EXPECT_TRUE(test.bitwiseIsEqual(X87SNaN)); |
| EXPECT_FALSE(losesInfo); |
| |
| test = APFloat::getQNaN(APFloat::x87DoubleExtended()); |
| test.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(test.bitwiseIsEqual(X87QNaN)); |
| EXPECT_FALSE(losesInfo); |
| |
| // The payload is lost in truncation, but we retain NaN by setting the quiet bit. |
| APInt payload(52, 1); |
| test = APFloat::getSNaN(APFloat::IEEEdouble(), false, &payload); |
| status = test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(0x7fc00000, test.bitcastToAPInt()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInvalidOp); |
| |
| // The payload is lost in truncation. QNaN remains QNaN. |
| test = APFloat::getQNaN(APFloat::IEEEdouble(), false, &payload); |
| status = test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(0x7fc00000, test.bitcastToAPInt()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| // Test that subnormals are handled correctly in double to float conversion |
| test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000000p-1022"); |
| test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| |
| test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000001p-1022"); |
| test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| |
| test = APFloat(APFloat::IEEEdouble(), "-0x0.0000010000001p-1022"); |
| test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| |
| test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000000p-1022"); |
| test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| |
| test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000001p-1022"); |
| test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| |
| // Test subnormal conversion to bfloat |
| test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126"); |
| test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| |
| test = APFloat(APFloat::IEEEsingle(), "0x0.02p-126"); |
| test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(0x01, test.bitcastToAPInt()); |
| EXPECT_FALSE(losesInfo); |
| |
| test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126"); |
| test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToAway, &losesInfo); |
| EXPECT_EQ(0x01, test.bitcastToAPInt()); |
| EXPECT_TRUE(losesInfo); |
| } |
| |
| TEST(APFloatTest, Float8UZConvert) { |
| bool losesInfo = false; |
| std::pair<APFloat, APFloat::opStatus> toNaNTests[] = { |
| {APFloat::getQNaN(APFloat::IEEEsingle(), false), APFloat::opOK}, |
| {APFloat::getQNaN(APFloat::IEEEsingle(), true), APFloat::opOK}, |
| {APFloat::getSNaN(APFloat::IEEEsingle(), false), APFloat::opInvalidOp}, |
| {APFloat::getSNaN(APFloat::IEEEsingle(), true), APFloat::opInvalidOp}, |
| {APFloat::getInf(APFloat::IEEEsingle(), false), APFloat::opInexact}, |
| {APFloat::getInf(APFloat::IEEEsingle(), true), APFloat::opInexact}}; |
| for (APFloat::Semantics S : |
| {APFloat::S_Float8E5M2FNUZ, APFloat::S_Float8E4M3FNUZ, |
| APFloat::S_Float8E4M3B11FNUZ}) { |
| const llvm::fltSemantics &Sem = APFloat::EnumToSemantics(S); |
| SCOPED_TRACE("Semantics = " + std::to_string(S)); |
| for (auto [toTest, expectedRes] : toNaNTests) { |
| llvm::SmallString<16> value; |
| toTest.toString(value); |
| SCOPED_TRACE("toTest = " + value); |
| losesInfo = false; |
| APFloat test = toTest; |
| EXPECT_EQ(test.convert(Sem, APFloat::rmNearestTiesToAway, &losesInfo), |
| expectedRes); |
| EXPECT_TRUE(test.isNaN()); |
| EXPECT_TRUE(test.isNegative()); |
| EXPECT_FALSE(test.isSignaling()); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_EQ(0x80, test.bitcastToAPInt()); |
| EXPECT_TRUE(losesInfo); |
| } |
| |
| // Negative zero conversions are information losing. |
| losesInfo = false; |
| APFloat test = APFloat::getZero(APFloat::IEEEsingle(), true); |
| EXPECT_EQ(test.convert(Sem, APFloat::rmNearestTiesToAway, &losesInfo), |
| APFloat::opInexact); |
| EXPECT_TRUE(test.isZero()); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(0x0, test.bitcastToAPInt()); |
| |
| losesInfo = true; |
| test = APFloat::getZero(APFloat::IEEEsingle(), false); |
| EXPECT_EQ(test.convert(Sem, APFloat::rmNearestTiesToAway, &losesInfo), |
| APFloat::opOK); |
| EXPECT_TRUE(test.isZero()); |
| EXPECT_FALSE(test.isNegative()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(0x0, test.bitcastToAPInt()); |
| |
| // Except in casts between ourselves. |
| losesInfo = true; |
| test = APFloat::getZero(Sem); |
| EXPECT_EQ(test.convert(Sem, APFloat::rmNearestTiesToAway, &losesInfo), |
| APFloat::opOK); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(0x0, test.bitcastToAPInt()); |
| } |
| } |
| |
| struct DD { |
| double Hi; |
| double Lo; |
| }; |
| |
| template <typename T, typename U> |
| static APFloat makeDoubleAPFloat(T Hi, U Lo) { |
| APFloat HiFloat{APFloat::IEEEdouble(), APFloat::uninitialized}; |
| if constexpr (std::is_same_v<decltype(Hi), APFloat>) { |
| HiFloat = Hi; |
| } else if constexpr (std::is_same_v<decltype(Hi), double>) { |
| HiFloat = APFloat{Hi}; |
| } else { |
| HiFloat = {APFloat::IEEEdouble(), Hi}; |
| } |
| |
| APFloat LoFloat{APFloat::IEEEdouble(), APFloat::uninitialized}; |
| if constexpr (std::is_same_v<decltype(Lo), APFloat>) { |
| LoFloat = Lo; |
| } else if constexpr (std::is_same_v<decltype(Lo), double>) { |
| LoFloat = APFloat{Lo}; |
| } else { |
| LoFloat = {APFloat::IEEEdouble(), Lo}; |
| } |
| |
| APInt Bits = LoFloat.bitcastToAPInt().concat(HiFloat.bitcastToAPInt()); |
| return APFloat(APFloat::PPCDoubleDouble(), Bits); |
| } |
| |
| static APFloat makeDoubleAPFloat(DD X) { return makeDoubleAPFloat(X.Hi, X.Lo); } |
| |
| TEST(APFloatTest, PPCDoubleDouble) { |
| APFloat test(APFloat::PPCDoubleDouble(), "1.0"); |
| EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]); |
| EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]); |
| |
| // LDBL_MAX |
| test = APFloat(APFloat::PPCDoubleDouble(), "1.79769313486231580793728971405301e+308"); |
| EXPECT_EQ(0x7fefffffffffffffull, test.bitcastToAPInt().getRawData()[0]); |
| EXPECT_EQ(0x7c8ffffffffffffeull, test.bitcastToAPInt().getRawData()[1]); |
| |
| // LDBL_MIN |
| test = APFloat(APFloat::PPCDoubleDouble(), "2.00416836000897277799610805135016e-292"); |
| EXPECT_EQ(0x0360000000000000ull, test.bitcastToAPInt().getRawData()[0]); |
| EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]); |
| |
| // PR30869 |
| { |
| auto Result = APFloat(APFloat::PPCDoubleDouble(), "1.0") + |
| APFloat(APFloat::PPCDoubleDouble(), "1.0"); |
| EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics()); |
| |
| Result = APFloat(APFloat::PPCDoubleDouble(), "1.0") - |
| APFloat(APFloat::PPCDoubleDouble(), "1.0"); |
| EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics()); |
| |
| Result = APFloat(APFloat::PPCDoubleDouble(), "1.0") * |
| APFloat(APFloat::PPCDoubleDouble(), "1.0"); |
| EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics()); |
| |
| Result = APFloat(APFloat::PPCDoubleDouble(), "1.0") / |
| APFloat(APFloat::PPCDoubleDouble(), "1.0"); |
| EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics()); |
| |
| int Exp; |
| Result = frexp(APFloat(APFloat::PPCDoubleDouble(), "1.0"), Exp, |
| APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics()); |
| |
| Result = scalbn(APFloat(APFloat::PPCDoubleDouble(), "1.0"), 1, |
| APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics()); |
| } |
| } |
| |
| TEST(APFloatTest, isNegative) { |
| APFloat t(APFloat::IEEEsingle(), "0x1p+0"); |
| EXPECT_FALSE(t.isNegative()); |
| t = APFloat(APFloat::IEEEsingle(), "-0x1p+0"); |
| EXPECT_TRUE(t.isNegative()); |
| |
| EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isNegative()); |
| EXPECT_TRUE(APFloat::getInf(APFloat::IEEEsingle(), true).isNegative()); |
| |
| EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isNegative()); |
| EXPECT_TRUE(APFloat::getZero(APFloat::IEEEsingle(), true).isNegative()); |
| |
| EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isNegative()); |
| EXPECT_TRUE(APFloat::getNaN(APFloat::IEEEsingle(), true).isNegative()); |
| |
| EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isNegative()); |
| EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), true).isNegative()); |
| } |
| |
| TEST(APFloatTest, isNormal) { |
| APFloat t(APFloat::IEEEsingle(), "0x1p+0"); |
| EXPECT_TRUE(t.isNormal()); |
| |
| EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isNormal()); |
| EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isNormal()); |
| EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isNormal()); |
| EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isNormal()); |
| EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isNormal()); |
| } |
| |
| TEST(APFloatTest, isFinite) { |
| APFloat t(APFloat::IEEEsingle(), "0x1p+0"); |
| EXPECT_TRUE(t.isFinite()); |
| EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isFinite()); |
| EXPECT_TRUE(APFloat::getZero(APFloat::IEEEsingle(), false).isFinite()); |
| EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isFinite()); |
| EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isFinite()); |
| EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isFinite()); |
| } |
| |
| TEST(APFloatTest, isInfinity) { |
| APFloat t(APFloat::IEEEsingle(), "0x1p+0"); |
| EXPECT_FALSE(t.isInfinity()); |
| |
| APFloat PosInf = APFloat::getInf(APFloat::IEEEsingle(), false); |
| APFloat NegInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| |
| EXPECT_TRUE(PosInf.isInfinity()); |
| EXPECT_TRUE(PosInf.isPosInfinity()); |
| EXPECT_FALSE(PosInf.isNegInfinity()); |
| EXPECT_EQ(fcPosInf, PosInf.classify()); |
| |
| EXPECT_TRUE(NegInf.isInfinity()); |
| EXPECT_FALSE(NegInf.isPosInfinity()); |
| EXPECT_TRUE(NegInf.isNegInfinity()); |
| EXPECT_EQ(fcNegInf, NegInf.classify()); |
| |
| EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isInfinity()); |
| EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isInfinity()); |
| EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isInfinity()); |
| EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isInfinity()); |
| |
| for (unsigned I = 0; I != APFloat::S_MaxSemantics + 1; ++I) { |
| const fltSemantics &Semantics = |
| APFloat::EnumToSemantics(static_cast<APFloat::Semantics>(I)); |
| if (APFloat::semanticsHasInf(Semantics)) { |
| EXPECT_TRUE(APFloat::getInf(Semantics).isInfinity()); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, isNaN) { |
| APFloat t(APFloat::IEEEsingle(), "0x1p+0"); |
| EXPECT_FALSE(t.isNaN()); |
| EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isNaN()); |
| EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isNaN()); |
| EXPECT_TRUE(APFloat::getNaN(APFloat::IEEEsingle(), false).isNaN()); |
| EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isNaN()); |
| EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isNaN()); |
| |
| for (unsigned I = 0; I != APFloat::S_MaxSemantics + 1; ++I) { |
| const fltSemantics &Semantics = |
| APFloat::EnumToSemantics(static_cast<APFloat::Semantics>(I)); |
| if (APFloat::semanticsHasNaN(Semantics)) { |
| EXPECT_TRUE(APFloat::getNaN(Semantics).isNaN()); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, isFiniteNonZero) { |
| // Test positive/negative normal value. |
| EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "0x1p+0").isFiniteNonZero()); |
| EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "-0x1p+0").isFiniteNonZero()); |
| |
| // Test positive/negative denormal value. |
| EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isFiniteNonZero()); |
| EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "-0x1p-149").isFiniteNonZero()); |
| |
| // Test +/- Infinity. |
| EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isFiniteNonZero()); |
| EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), true).isFiniteNonZero()); |
| |
| // Test +/- Zero. |
| EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isFiniteNonZero()); |
| EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), true).isFiniteNonZero()); |
| |
| // Test +/- qNaN. +/- dont mean anything with qNaN but paranoia can't hurt in |
| // this instance. |
| EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isFiniteNonZero()); |
| EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), true).isFiniteNonZero()); |
| |
| // Test +/- sNaN. +/- dont mean anything with sNaN but paranoia can't hurt in |
| // this instance. |
| EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isFiniteNonZero()); |
| EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), true).isFiniteNonZero()); |
| } |
| |
| TEST(APFloatTest, add) { |
| // Test Special Cases against each other and normal values. |
| |
| APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false); |
| APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false); |
| APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true); |
| APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false); |
| APFloat SNaN = APFloat(APFloat::IEEEsingle(), "snan123"); |
| APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0"); |
| APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0"); |
| APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false); |
| APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false); |
| APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false); |
| APFloat MSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true); |
| |
| const int OverflowStatus = APFloat::opOverflow | APFloat::opInexact; |
| |
| struct { |
| APFloat x; |
| APFloat y; |
| const char *result; |
| int status; |
| int category; |
| } SpecialCaseTests[] = { |
| { PInf, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PZero, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MZero, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PZero, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MZero, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PZero, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PZero, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PZero, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, PNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, MNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, PLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, MLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, PSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, MSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, PSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, MSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MZero, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MZero, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, PNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, MNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, PLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, MLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, PSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, MSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, PSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, MSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { QNaN, PInf, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MInf, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PZero, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MZero, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, SNaN, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { QNaN, PNormalValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MNormalValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PLargestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MLargestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { SNaN, PInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, QNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PNormalValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PNormalValue, PZero, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MZero, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, PNormalValue, "0x1p+1", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PNormalValue, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PNormalValue, PSmallestValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PNormalValue, MSmallestValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PNormalValue, PSmallestNormalized, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PNormalValue, MSmallestNormalized, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MNormalValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MNormalValue, PZero, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MZero, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MNormalValue, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, MNormalValue, "-0x1p+1", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, PSmallestValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, MSmallestValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, PSmallestNormalized, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, MSmallestNormalized, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PLargestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PLargestValue, PZero, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, MZero, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PLargestValue, PNormalValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, MNormalValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, PLargestValue, "inf", OverflowStatus, APFloat::fcInfinity }, |
| { PLargestValue, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, PSmallestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, MSmallestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, PSmallestNormalized, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, MSmallestNormalized, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MLargestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MLargestValue, PZero, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, MZero, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MLargestValue, PNormalValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, MNormalValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, MLargestValue, "-inf", OverflowStatus, APFloat::fcInfinity }, |
| { MLargestValue, PSmallestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, MSmallestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, PSmallestNormalized, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, MSmallestNormalized, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestValue, PZero, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MZero, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestValue, PNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestValue, MNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestValue, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestValue, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestValue, PSmallestValue, "0x1p-148", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestValue, PSmallestNormalized, "0x1.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MSmallestNormalized, "-0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestValue, PZero, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MZero, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestValue, PNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestValue, MNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestValue, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestValue, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestValue, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestValue, MSmallestValue, "-0x1p-148", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PSmallestNormalized, "0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MSmallestNormalized, "-0x1.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestNormalized, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestNormalized, PZero, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MZero, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestNormalized, PNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestNormalized, MNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestNormalized, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestNormalized, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestNormalized, PSmallestValue, "0x1.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MSmallestValue, "0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PSmallestNormalized, "0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestNormalized, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestNormalized, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestNormalized, PZero, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MZero, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestNormalized, PNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestNormalized, MNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestNormalized, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestNormalized, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestNormalized, PSmallestValue, "-0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MSmallestValue, "-0x1.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestNormalized, MSmallestNormalized, "-0x1p-125", APFloat::opOK, APFloat::fcNormal } |
| }; |
| |
| for (size_t i = 0; i < std::size(SpecialCaseTests); ++i) { |
| APFloat x(SpecialCaseTests[i].x); |
| APFloat y(SpecialCaseTests[i].y); |
| APFloat::opStatus status = x.add(y, APFloat::rmNearestTiesToEven); |
| |
| APFloat result(APFloat::IEEEsingle(), SpecialCaseTests[i].result); |
| |
| EXPECT_TRUE(result.bitwiseIsEqual(x)); |
| EXPECT_EQ(SpecialCaseTests[i].status, (int)status); |
| EXPECT_EQ(SpecialCaseTests[i].category, (int)x.getCategory()); |
| } |
| } |
| |
| TEST(APFloatTest, subtract) { |
| // Test Special Cases against each other and normal values. |
| |
| APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false); |
| APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false); |
| APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true); |
| APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false); |
| APFloat SNaN = APFloat(APFloat::IEEEsingle(), "snan123"); |
| APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0"); |
| APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0"); |
| APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false); |
| APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false); |
| APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false); |
| APFloat MSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true); |
| |
| const int OverflowStatus = APFloat::opOverflow | APFloat::opInexact; |
| |
| struct { |
| APFloat x; |
| APFloat y; |
| const char *result; |
| int status; |
| int category; |
| } SpecialCaseTests[] = { |
| { PInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PZero, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MZero, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PZero, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MZero, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PZero, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PZero, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PZero, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, PNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, MNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, PLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, MLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, PSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, MSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, PSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PZero, MSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MZero, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MZero, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, PNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, MNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, PLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, MLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, PSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, MSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, PSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MZero, MSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { QNaN, PInf, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MInf, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PZero, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MZero, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, SNaN, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { QNaN, PNormalValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MNormalValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PLargestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MLargestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { SNaN, PInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, QNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PNormalValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PNormalValue, PZero, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MZero, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, MNormalValue, "0x1p+1", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PNormalValue, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PNormalValue, PSmallestValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PNormalValue, MSmallestValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PNormalValue, PSmallestNormalized, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PNormalValue, MSmallestNormalized, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MNormalValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MNormalValue, PZero, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MZero, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MNormalValue, PNormalValue, "-0x1p+1", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, PSmallestValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, MSmallestValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, PSmallestNormalized, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MNormalValue, MSmallestNormalized, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PLargestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PLargestValue, PZero, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, MZero, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PLargestValue, PNormalValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, MNormalValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, MLargestValue, "inf", OverflowStatus, APFloat::fcInfinity }, |
| { PLargestValue, PSmallestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, MSmallestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, PSmallestNormalized, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PLargestValue, MSmallestNormalized, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MLargestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MLargestValue, PZero, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, MZero, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MLargestValue, PNormalValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, MNormalValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, PLargestValue, "-inf", OverflowStatus, APFloat::fcInfinity }, |
| { MLargestValue, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, PSmallestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, MSmallestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, PSmallestNormalized, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MLargestValue, MSmallestNormalized, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestValue, PZero, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MZero, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestValue, PNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestValue, MNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestValue, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestValue, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestValue, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestValue, MSmallestValue, "0x1p-148", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, PSmallestNormalized, "-0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MSmallestNormalized, "0x1.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestValue, PZero, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MZero, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestValue, PNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestValue, MNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestValue, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestValue, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestValue, PSmallestValue, "-0x1p-148", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestValue, PSmallestNormalized, "-0x1.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MSmallestNormalized, "0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestNormalized, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestNormalized, PZero, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MZero, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestNormalized, PNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestNormalized, MNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestNormalized, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestNormalized, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { PSmallestNormalized, PSmallestValue, "0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MSmallestValue, "0x1.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestNormalized, MSmallestNormalized, "0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestNormalized, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestNormalized, PZero, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MZero, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestNormalized, PNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestNormalized, MNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestNormalized, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestNormalized, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal }, |
| { MSmallestNormalized, PSmallestValue, "-0x1.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MSmallestValue, "-0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PSmallestNormalized, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero } |
| }; |
| |
| for (size_t i = 0; i < std::size(SpecialCaseTests); ++i) { |
| APFloat x(SpecialCaseTests[i].x); |
| APFloat y(SpecialCaseTests[i].y); |
| APFloat::opStatus status = x.subtract(y, APFloat::rmNearestTiesToEven); |
| |
| APFloat result(APFloat::IEEEsingle(), SpecialCaseTests[i].result); |
| |
| EXPECT_TRUE(result.bitwiseIsEqual(x)); |
| EXPECT_EQ(SpecialCaseTests[i].status, (int)status); |
| EXPECT_EQ(SpecialCaseTests[i].category, (int)x.getCategory()); |
| } |
| } |
| |
| TEST(APFloatTest, multiply) { |
| // Test Special Cases against each other and normal values. |
| |
| APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false); |
| APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false); |
| APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true); |
| APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false); |
| APFloat SNaN = APFloat(APFloat::IEEEsingle(), "snan123"); |
| APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0"); |
| APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0"); |
| APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false); |
| APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false); |
| APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false); |
| APFloat MSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true); |
| |
| APFloat MaxQuad(APFloat::IEEEquad(), |
| "0x1.ffffffffffffffffffffffffffffp+16383"); |
| APFloat MinQuad(APFloat::IEEEquad(), |
| "0x0.0000000000000000000000000001p-16382"); |
| APFloat NMinQuad(APFloat::IEEEquad(), |
| "-0x0.0000000000000000000000000001p-16382"); |
| |
| const int OverflowStatus = APFloat::opOverflow | APFloat::opInexact; |
| const int UnderflowStatus = APFloat::opUnderflow | APFloat::opInexact; |
| |
| struct { |
| APFloat x; |
| APFloat y; |
| const char *result; |
| int status; |
| int category; |
| APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven; |
| } SpecialCaseTests[] = { |
| { PInf, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PZero, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, PNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { QNaN, PInf, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MInf, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PZero, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MZero, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, SNaN, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { QNaN, PNormalValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MNormalValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PLargestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MLargestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { SNaN, PInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, QNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PNormalValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PNormalValue, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, PNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, PLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, PSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, PSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MNormalValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MNormalValue, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MNormalValue, PNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, PLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, PSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, PSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PLargestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PLargestValue, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PLargestValue, PNormalValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, MNormalValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, PLargestValue, "inf", OverflowStatus, APFloat::fcInfinity }, |
| { PLargestValue, MLargestValue, "-inf", OverflowStatus, APFloat::fcInfinity }, |
| { PLargestValue, PSmallestValue, "0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, MSmallestValue, "-0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, PSmallestNormalized, "0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, MSmallestNormalized, "-0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MLargestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MLargestValue, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MLargestValue, PNormalValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, MNormalValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, PLargestValue, "-inf", OverflowStatus, APFloat::fcInfinity }, |
| { MLargestValue, MLargestValue, "inf", OverflowStatus, APFloat::fcInfinity }, |
| { MLargestValue, PSmallestValue, "-0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, MSmallestValue, "0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, PSmallestNormalized, "-0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, MSmallestNormalized, "0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestValue, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestValue, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestValue, PNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, PLargestValue, "0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MLargestValue, "-0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, PSmallestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestValue, MSmallestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestValue, PSmallestNormalized, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestValue, MSmallestNormalized, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestValue, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestValue, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestValue, PNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PLargestValue, "-0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MLargestValue, "0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PSmallestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestValue, MSmallestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestValue, PSmallestNormalized, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestValue, MSmallestNormalized, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestNormalized, PInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestNormalized, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PSmallestNormalized, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestNormalized, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestNormalized, PNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PLargestValue, "0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MLargestValue, "-0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PSmallestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestNormalized, MSmallestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestNormalized, PSmallestNormalized, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestNormalized, MSmallestNormalized, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestNormalized, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestNormalized, MInf, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MSmallestNormalized, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestNormalized, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestNormalized, PNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PLargestValue, "-0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MLargestValue, "0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PSmallestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestNormalized, MSmallestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestNormalized, PSmallestNormalized, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestNormalized, MSmallestNormalized, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| |
| {MaxQuad, MinQuad, "0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK, |
| APFloat::fcNormal, APFloat::rmNearestTiesToEven}, |
| {MaxQuad, MinQuad, "0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK, |
| APFloat::fcNormal, APFloat::rmTowardPositive}, |
| {MaxQuad, MinQuad, "0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK, |
| APFloat::fcNormal, APFloat::rmTowardNegative}, |
| {MaxQuad, MinQuad, "0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK, |
| APFloat::fcNormal, APFloat::rmTowardZero}, |
| {MaxQuad, MinQuad, "0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK, |
| APFloat::fcNormal, APFloat::rmNearestTiesToAway}, |
| |
| {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK, |
| APFloat::fcNormal, APFloat::rmNearestTiesToEven}, |
| {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK, |
| APFloat::fcNormal, APFloat::rmTowardPositive}, |
| {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK, |
| APFloat::fcNormal, APFloat::rmTowardNegative}, |
| {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK, |
| APFloat::fcNormal, APFloat::rmTowardZero}, |
| {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK, |
| APFloat::fcNormal, APFloat::rmNearestTiesToAway}, |
| |
| {MaxQuad, MaxQuad, "inf", OverflowStatus, APFloat::fcInfinity, |
| APFloat::rmNearestTiesToEven}, |
| {MaxQuad, MaxQuad, "inf", OverflowStatus, APFloat::fcInfinity, |
| APFloat::rmTowardPositive}, |
| {MaxQuad, MaxQuad, "0x1.ffffffffffffffffffffffffffffp+16383", |
| APFloat::opInexact, APFloat::fcNormal, APFloat::rmTowardNegative}, |
| {MaxQuad, MaxQuad, "0x1.ffffffffffffffffffffffffffffp+16383", |
| APFloat::opInexact, APFloat::fcNormal, APFloat::rmTowardZero}, |
| {MaxQuad, MaxQuad, "inf", OverflowStatus, APFloat::fcInfinity, |
| APFloat::rmNearestTiesToAway}, |
| |
| {MinQuad, MinQuad, "0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmNearestTiesToEven}, |
| {MinQuad, MinQuad, "0x0.0000000000000000000000000001p-16382", |
| UnderflowStatus, APFloat::fcNormal, APFloat::rmTowardPositive}, |
| {MinQuad, MinQuad, "0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmTowardNegative}, |
| {MinQuad, MinQuad, "0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmTowardZero}, |
| {MinQuad, MinQuad, "0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmNearestTiesToAway}, |
| |
| {MinQuad, NMinQuad, "-0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmNearestTiesToEven}, |
| {MinQuad, NMinQuad, "-0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmTowardPositive}, |
| {MinQuad, NMinQuad, "-0x0.0000000000000000000000000001p-16382", |
| UnderflowStatus, APFloat::fcNormal, APFloat::rmTowardNegative}, |
| {MinQuad, NMinQuad, "-0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmTowardZero}, |
| {MinQuad, NMinQuad, "-0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmNearestTiesToAway}, |
| }; |
| |
| for (size_t i = 0; i < std::size(SpecialCaseTests); ++i) { |
| APFloat x(SpecialCaseTests[i].x); |
| APFloat y(SpecialCaseTests[i].y); |
| APFloat::opStatus status = x.multiply(y, SpecialCaseTests[i].roundingMode); |
| |
| APFloat result(x.getSemantics(), SpecialCaseTests[i].result); |
| |
| EXPECT_TRUE(result.bitwiseIsEqual(x)); |
| EXPECT_EQ(SpecialCaseTests[i].status, (int)status); |
| EXPECT_EQ(SpecialCaseTests[i].category, (int)x.getCategory()); |
| } |
| } |
| |
| TEST(APFloatTest, divide) { |
| // Test Special Cases against each other and normal values. |
| |
| APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false); |
| APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false); |
| APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true); |
| APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false); |
| APFloat SNaN = APFloat(APFloat::IEEEsingle(), "snan123"); |
| APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0"); |
| APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0"); |
| APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false); |
| APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false); |
| APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false); |
| APFloat MSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true); |
| |
| APFloat MaxQuad(APFloat::IEEEquad(), |
| "0x1.ffffffffffffffffffffffffffffp+16383"); |
| APFloat MinQuad(APFloat::IEEEquad(), |
| "0x0.0000000000000000000000000001p-16382"); |
| APFloat NMinQuad(APFloat::IEEEquad(), |
| "-0x0.0000000000000000000000000001p-16382"); |
| |
| const int OverflowStatus = APFloat::opOverflow | APFloat::opInexact; |
| const int UnderflowStatus = APFloat::opUnderflow | APFloat::opInexact; |
| |
| struct { |
| APFloat x; |
| APFloat y; |
| const char *result; |
| int status; |
| int category; |
| APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven; |
| } SpecialCaseTests[] = { |
| { PInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PZero, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MZero, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, PSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PInf, MSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PZero, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MZero, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, PSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity }, |
| { MInf, MSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity }, |
| { PZero, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, PNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { QNaN, PInf, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MInf, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PZero, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MZero, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, SNaN, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { QNaN, PNormalValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MNormalValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PLargestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MLargestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { SNaN, PInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, QNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, PZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { PNormalValue, MZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { PNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, PNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, PLargestValue, "0x1p-128", UnderflowStatus, APFloat::fcNormal }, |
| { PNormalValue, MLargestValue, "-0x1p-128", UnderflowStatus, APFloat::fcNormal }, |
| { PNormalValue, PSmallestValue, "inf", OverflowStatus, APFloat::fcInfinity }, |
| { PNormalValue, MSmallestValue, "-inf", OverflowStatus, APFloat::fcInfinity }, |
| { PNormalValue, PSmallestNormalized, "0x1p+126", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MSmallestNormalized, "-0x1p+126", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, PZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { MNormalValue, MZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { MNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MNormalValue, PNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, PLargestValue, "-0x1p-128", UnderflowStatus, APFloat::fcNormal }, |
| { MNormalValue, MLargestValue, "0x1p-128", UnderflowStatus, APFloat::fcNormal }, |
| { MNormalValue, PSmallestValue, "-inf", OverflowStatus, APFloat::fcInfinity }, |
| { MNormalValue, MSmallestValue, "inf", OverflowStatus, APFloat::fcInfinity }, |
| { MNormalValue, PSmallestNormalized, "-0x1p+126", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MSmallestNormalized, "0x1p+126", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, PZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { PLargestValue, MZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { PLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PLargestValue, PNormalValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, MNormalValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, PLargestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, MLargestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, PSmallestValue, "inf", OverflowStatus, APFloat::fcInfinity }, |
| { PLargestValue, MSmallestValue, "-inf", OverflowStatus, APFloat::fcInfinity }, |
| { PLargestValue, PSmallestNormalized, "inf", OverflowStatus, APFloat::fcInfinity }, |
| { PLargestValue, MSmallestNormalized, "-inf", OverflowStatus, APFloat::fcInfinity }, |
| { MLargestValue, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, PZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { MLargestValue, MZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { MLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MLargestValue, PNormalValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, MNormalValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, PLargestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, MLargestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, PSmallestValue, "-inf", OverflowStatus, APFloat::fcInfinity }, |
| { MLargestValue, MSmallestValue, "inf", OverflowStatus, APFloat::fcInfinity }, |
| { MLargestValue, PSmallestNormalized, "-inf", OverflowStatus, APFloat::fcInfinity }, |
| { MLargestValue, MSmallestNormalized, "inf", OverflowStatus, APFloat::fcInfinity }, |
| { PSmallestValue, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestValue, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestValue, PZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { PSmallestValue, MZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { PSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestValue, PNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, PLargestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestValue, MLargestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestValue, PSmallestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MSmallestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, PSmallestNormalized, "0x1p-23", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MSmallestNormalized, "-0x1p-23", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestValue, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestValue, PZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { MSmallestValue, MZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { MSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestValue, PNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PLargestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestValue, MLargestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestValue, PSmallestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MSmallestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PSmallestNormalized, "-0x1p-23", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MSmallestNormalized, "0x1p-23", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestNormalized, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestNormalized, PZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { PSmallestNormalized, MZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { PSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestNormalized, PNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PLargestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestNormalized, MLargestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { PSmallestNormalized, PSmallestValue, "0x1p+23", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MSmallestValue, "-0x1p+23", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PSmallestNormalized, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MSmallestNormalized, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestNormalized, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestNormalized, PZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { MSmallestNormalized, MZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity }, |
| { MSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestNormalized, PNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PLargestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestNormalized, MLargestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero }, |
| { MSmallestNormalized, PSmallestValue, "-0x1p+23", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MSmallestValue, "0x1p+23", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PSmallestNormalized, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MSmallestNormalized, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| |
| {MaxQuad, NMinQuad, "-inf", OverflowStatus, APFloat::fcInfinity, |
| APFloat::rmNearestTiesToEven}, |
| {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp+16383", |
| APFloat::opInexact, APFloat::fcNormal, APFloat::rmTowardPositive}, |
| {MaxQuad, NMinQuad, "-inf", OverflowStatus, APFloat::fcInfinity, |
| APFloat::rmTowardNegative}, |
| {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp+16383", |
| APFloat::opInexact, APFloat::fcNormal, APFloat::rmTowardZero}, |
| {MaxQuad, NMinQuad, "-inf", OverflowStatus, APFloat::fcInfinity, |
| APFloat::rmNearestTiesToAway}, |
| |
| {MinQuad, MaxQuad, "0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmNearestTiesToEven}, |
| {MinQuad, MaxQuad, "0x0.0000000000000000000000000001p-16382", |
| UnderflowStatus, APFloat::fcNormal, APFloat::rmTowardPositive}, |
| {MinQuad, MaxQuad, "0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmTowardNegative}, |
| {MinQuad, MaxQuad, "0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmTowardZero}, |
| {MinQuad, MaxQuad, "0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmNearestTiesToAway}, |
| |
| {NMinQuad, MaxQuad, "-0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmNearestTiesToEven}, |
| {NMinQuad, MaxQuad, "-0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmTowardPositive}, |
| {NMinQuad, MaxQuad, "-0x0.0000000000000000000000000001p-16382", |
| UnderflowStatus, APFloat::fcNormal, APFloat::rmTowardNegative}, |
| {NMinQuad, MaxQuad, "-0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmTowardZero}, |
| {NMinQuad, MaxQuad, "-0", UnderflowStatus, APFloat::fcZero, |
| APFloat::rmNearestTiesToAway}, |
| }; |
| |
| for (size_t i = 0; i < std::size(SpecialCaseTests); ++i) { |
| APFloat x(SpecialCaseTests[i].x); |
| APFloat y(SpecialCaseTests[i].y); |
| APFloat::opStatus status = x.divide(y, SpecialCaseTests[i].roundingMode); |
| |
| APFloat result(x.getSemantics(), SpecialCaseTests[i].result); |
| |
| EXPECT_TRUE(result.bitwiseIsEqual(x)); |
| EXPECT_EQ(SpecialCaseTests[i].status, (int)status); |
| EXPECT_EQ(SpecialCaseTests[i].category, (int)x.getCategory()); |
| } |
| } |
| |
| TEST(APFloatTest, operatorOverloads) { |
| // This is mostly testing that these operator overloads compile. |
| APFloat One = APFloat(APFloat::IEEEsingle(), "0x1p+0"); |
| APFloat Two = APFloat(APFloat::IEEEsingle(), "0x2p+0"); |
| EXPECT_TRUE(Two.bitwiseIsEqual(One + One)); |
| EXPECT_TRUE(One.bitwiseIsEqual(Two - One)); |
| EXPECT_TRUE(Two.bitwiseIsEqual(One * Two)); |
| EXPECT_TRUE(One.bitwiseIsEqual(Two / Two)); |
| } |
| |
| TEST(APFloatTest, Comparisons) { |
| enum {MNan, MInf, MBig, MOne, MZer, PZer, POne, PBig, PInf, PNan, NumVals}; |
| APFloat Vals[NumVals] = { |
| APFloat::getNaN(APFloat::IEEEsingle(), true), |
| APFloat::getInf(APFloat::IEEEsingle(), true), |
| APFloat::getLargest(APFloat::IEEEsingle(), true), |
| APFloat(APFloat::IEEEsingle(), "-0x1p+0"), |
| APFloat::getZero(APFloat::IEEEsingle(), true), |
| APFloat::getZero(APFloat::IEEEsingle(), false), |
| APFloat(APFloat::IEEEsingle(), "0x1p+0"), |
| APFloat::getLargest(APFloat::IEEEsingle(), false), |
| APFloat::getInf(APFloat::IEEEsingle(), false), |
| APFloat::getNaN(APFloat::IEEEsingle(), false), |
| }; |
| using Relation = void (*)(const APFloat &, const APFloat &); |
| Relation LT = [](const APFloat &LHS, const APFloat &RHS) { |
| EXPECT_FALSE(LHS == RHS); |
| EXPECT_TRUE(LHS != RHS); |
| EXPECT_TRUE(LHS < RHS); |
| EXPECT_FALSE(LHS > RHS); |
| EXPECT_TRUE(LHS <= RHS); |
| EXPECT_FALSE(LHS >= RHS); |
| }; |
| Relation EQ = [](const APFloat &LHS, const APFloat &RHS) { |
| EXPECT_TRUE(LHS == RHS); |
| EXPECT_FALSE(LHS != RHS); |
| EXPECT_FALSE(LHS < RHS); |
| EXPECT_FALSE(LHS > RHS); |
| EXPECT_TRUE(LHS <= RHS); |
| EXPECT_TRUE(LHS >= RHS); |
| }; |
| Relation GT = [](const APFloat &LHS, const APFloat &RHS) { |
| EXPECT_FALSE(LHS == RHS); |
| EXPECT_TRUE(LHS != RHS); |
| EXPECT_FALSE(LHS < RHS); |
| EXPECT_TRUE(LHS > RHS); |
| EXPECT_FALSE(LHS <= RHS); |
| EXPECT_TRUE(LHS >= RHS); |
| }; |
| Relation UN = [](const APFloat &LHS, const APFloat &RHS) { |
| EXPECT_FALSE(LHS == RHS); |
| EXPECT_TRUE(LHS != RHS); |
| EXPECT_FALSE(LHS < RHS); |
| EXPECT_FALSE(LHS > RHS); |
| EXPECT_FALSE(LHS <= RHS); |
| EXPECT_FALSE(LHS >= RHS); |
| }; |
| Relation Relations[NumVals][NumVals] = { |
| // -N -I -B -1 -0 +0 +1 +B +I +N |
| /* MNan */ {UN, UN, UN, UN, UN, UN, UN, UN, UN, UN}, |
| /* MInf */ {UN, EQ, LT, LT, LT, LT, LT, LT, LT, UN}, |
| /* MBig */ {UN, GT, EQ, LT, LT, LT, LT, LT, LT, UN}, |
| /* MOne */ {UN, GT, GT, EQ, LT, LT, LT, LT, LT, UN}, |
| /* MZer */ {UN, GT, GT, GT, EQ, EQ, LT, LT, LT, UN}, |
| /* PZer */ {UN, GT, GT, GT, EQ, EQ, LT, LT, LT, UN}, |
| /* POne */ {UN, GT, GT, GT, GT, GT, EQ, LT, LT, UN}, |
| /* PBig */ {UN, GT, GT, GT, GT, GT, GT, EQ, LT, UN}, |
| /* PInf */ {UN, GT, GT, GT, GT, GT, GT, GT, EQ, UN}, |
| /* PNan */ {UN, UN, UN, UN, UN, UN, UN, UN, UN, UN}, |
| }; |
| for (unsigned I = 0; I < NumVals; ++I) |
| for (unsigned J = 0; J < NumVals; ++J) |
| Relations[I][J](Vals[I], Vals[J]); |
| } |
| |
| TEST(APFloatTest, abs) { |
| APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false); |
| APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false); |
| APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true); |
| APFloat PQNaN = APFloat::getNaN(APFloat::IEEEsingle(), false); |
| APFloat MQNaN = APFloat::getNaN(APFloat::IEEEsingle(), true); |
| APFloat PSNaN = APFloat::getSNaN(APFloat::IEEEsingle(), false); |
| APFloat MSNaN = APFloat::getSNaN(APFloat::IEEEsingle(), true); |
| APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0"); |
| APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0"); |
| APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false); |
| APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false); |
| APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false); |
| APFloat MSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true); |
| |
| EXPECT_TRUE(PInf.bitwiseIsEqual(abs(PInf))); |
| EXPECT_TRUE(PInf.bitwiseIsEqual(abs(MInf))); |
| EXPECT_TRUE(PZero.bitwiseIsEqual(abs(PZero))); |
| EXPECT_TRUE(PZero.bitwiseIsEqual(abs(MZero))); |
| EXPECT_TRUE(PQNaN.bitwiseIsEqual(abs(PQNaN))); |
| EXPECT_TRUE(PQNaN.bitwiseIsEqual(abs(MQNaN))); |
| EXPECT_TRUE(PSNaN.bitwiseIsEqual(abs(PSNaN))); |
| EXPECT_TRUE(PSNaN.bitwiseIsEqual(abs(MSNaN))); |
| EXPECT_TRUE(PNormalValue.bitwiseIsEqual(abs(PNormalValue))); |
| EXPECT_TRUE(PNormalValue.bitwiseIsEqual(abs(MNormalValue))); |
| EXPECT_TRUE(PLargestValue.bitwiseIsEqual(abs(PLargestValue))); |
| EXPECT_TRUE(PLargestValue.bitwiseIsEqual(abs(MLargestValue))); |
| EXPECT_TRUE(PSmallestValue.bitwiseIsEqual(abs(PSmallestValue))); |
| EXPECT_TRUE(PSmallestValue.bitwiseIsEqual(abs(MSmallestValue))); |
| EXPECT_TRUE(PSmallestNormalized.bitwiseIsEqual(abs(PSmallestNormalized))); |
| EXPECT_TRUE(PSmallestNormalized.bitwiseIsEqual(abs(MSmallestNormalized))); |
| } |
| |
| TEST(APFloatTest, neg) { |
| APFloat One = APFloat(APFloat::IEEEsingle(), "1.0"); |
| APFloat NegOne = APFloat(APFloat::IEEEsingle(), "-1.0"); |
| APFloat Zero = APFloat::getZero(APFloat::IEEEsingle(), false); |
| APFloat NegZero = APFloat::getZero(APFloat::IEEEsingle(), true); |
| APFloat Inf = APFloat::getInf(APFloat::IEEEsingle(), false); |
| APFloat NegInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false); |
| APFloat NegQNaN = APFloat::getNaN(APFloat::IEEEsingle(), true); |
| |
| EXPECT_TRUE(NegOne.bitwiseIsEqual(neg(One))); |
| EXPECT_TRUE(One.bitwiseIsEqual(neg(NegOne))); |
| EXPECT_TRUE(NegZero.bitwiseIsEqual(neg(Zero))); |
| EXPECT_TRUE(Zero.bitwiseIsEqual(neg(NegZero))); |
| EXPECT_TRUE(NegInf.bitwiseIsEqual(neg(Inf))); |
| EXPECT_TRUE(Inf.bitwiseIsEqual(neg(NegInf))); |
| EXPECT_TRUE(NegInf.bitwiseIsEqual(neg(Inf))); |
| EXPECT_TRUE(Inf.bitwiseIsEqual(neg(NegInf))); |
| EXPECT_TRUE(NegQNaN.bitwiseIsEqual(neg(QNaN))); |
| EXPECT_TRUE(QNaN.bitwiseIsEqual(neg(NegQNaN))); |
| |
| EXPECT_TRUE(NegOne.bitwiseIsEqual(-One)); |
| EXPECT_TRUE(One.bitwiseIsEqual(-NegOne)); |
| EXPECT_TRUE(NegZero.bitwiseIsEqual(-Zero)); |
| EXPECT_TRUE(Zero.bitwiseIsEqual(-NegZero)); |
| EXPECT_TRUE(NegInf.bitwiseIsEqual(-Inf)); |
| EXPECT_TRUE(Inf.bitwiseIsEqual(-NegInf)); |
| EXPECT_TRUE(NegInf.bitwiseIsEqual(-Inf)); |
| EXPECT_TRUE(Inf.bitwiseIsEqual(-NegInf)); |
| EXPECT_TRUE(NegQNaN.bitwiseIsEqual(-QNaN)); |
| EXPECT_TRUE(QNaN.bitwiseIsEqual(-NegQNaN)); |
| } |
| |
| TEST(APFloatTest, ilogb) { |
| EXPECT_EQ(-1074, ilogb(APFloat::getSmallest(APFloat::IEEEdouble(), false))); |
| EXPECT_EQ(-1074, ilogb(APFloat::getSmallest(APFloat::IEEEdouble(), true))); |
| EXPECT_EQ(-1023, ilogb(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1024"))); |
| EXPECT_EQ(-1023, ilogb(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1023"))); |
| EXPECT_EQ(-1023, ilogb(APFloat(APFloat::IEEEdouble(), "-0x1.ffffffffffffep-1023"))); |
| EXPECT_EQ(-51, ilogb(APFloat(APFloat::IEEEdouble(), "0x1p-51"))); |
| EXPECT_EQ(-1023, ilogb(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-1023"))); |
| EXPECT_EQ(-2, ilogb(APFloat(APFloat::IEEEdouble(), "0x0.ffffp-1"))); |
| EXPECT_EQ(-1023, ilogb(APFloat(APFloat::IEEEdouble(), "0x1.fffep-1023"))); |
| EXPECT_EQ(1023, ilogb(APFloat::getLargest(APFloat::IEEEdouble(), false))); |
| EXPECT_EQ(1023, ilogb(APFloat::getLargest(APFloat::IEEEdouble(), true))); |
| |
| |
| EXPECT_EQ(0, ilogb(APFloat(APFloat::IEEEsingle(), "0x1p+0"))); |
| EXPECT_EQ(0, ilogb(APFloat(APFloat::IEEEsingle(), "-0x1p+0"))); |
| EXPECT_EQ(42, ilogb(APFloat(APFloat::IEEEsingle(), "0x1p+42"))); |
| EXPECT_EQ(-42, ilogb(APFloat(APFloat::IEEEsingle(), "0x1p-42"))); |
| |
| EXPECT_EQ(APFloat::IEK_Inf, |
| ilogb(APFloat::getInf(APFloat::IEEEsingle(), false))); |
| EXPECT_EQ(APFloat::IEK_Inf, |
| ilogb(APFloat::getInf(APFloat::IEEEsingle(), true))); |
| EXPECT_EQ(APFloat::IEK_Zero, |
| ilogb(APFloat::getZero(APFloat::IEEEsingle(), false))); |
| EXPECT_EQ(APFloat::IEK_Zero, |
| ilogb(APFloat::getZero(APFloat::IEEEsingle(), true))); |
| EXPECT_EQ(APFloat::IEK_NaN, |
| ilogb(APFloat::getNaN(APFloat::IEEEsingle(), false))); |
| EXPECT_EQ(APFloat::IEK_NaN, |
| ilogb(APFloat::getSNaN(APFloat::IEEEsingle(), false))); |
| |
| EXPECT_EQ(127, ilogb(APFloat::getLargest(APFloat::IEEEsingle(), false))); |
| EXPECT_EQ(127, ilogb(APFloat::getLargest(APFloat::IEEEsingle(), true))); |
| |
| EXPECT_EQ(-149, ilogb(APFloat::getSmallest(APFloat::IEEEsingle(), false))); |
| EXPECT_EQ(-149, ilogb(APFloat::getSmallest(APFloat::IEEEsingle(), true))); |
| EXPECT_EQ(-126, |
| ilogb(APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false))); |
| EXPECT_EQ(-126, |
| ilogb(APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true))); |
| } |
| |
| TEST(APFloatTest, scalbn) { |
| |
| const APFloat::roundingMode RM = APFloat::rmNearestTiesToEven; |
| EXPECT_TRUE( |
| APFloat(APFloat::IEEEsingle(), "0x1p+0") |
| .bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEsingle(), "0x1p+0"), 0, RM))); |
| EXPECT_TRUE( |
| APFloat(APFloat::IEEEsingle(), "0x1p+42") |
| .bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEsingle(), "0x1p+0"), 42, RM))); |
| EXPECT_TRUE( |
| APFloat(APFloat::IEEEsingle(), "0x1p-42") |
| .bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEsingle(), "0x1p+0"), -42, RM))); |
| |
| APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false); |
| APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false); |
| APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true); |
| APFloat QPNaN = APFloat::getNaN(APFloat::IEEEsingle(), false); |
| APFloat QMNaN = APFloat::getNaN(APFloat::IEEEsingle(), true); |
| APFloat SNaN = APFloat::getSNaN(APFloat::IEEEsingle(), false); |
| |
| EXPECT_TRUE(PInf.bitwiseIsEqual(scalbn(PInf, 0, RM))); |
| EXPECT_TRUE(MInf.bitwiseIsEqual(scalbn(MInf, 0, RM))); |
| EXPECT_TRUE(PZero.bitwiseIsEqual(scalbn(PZero, 0, RM))); |
| EXPECT_TRUE(MZero.bitwiseIsEqual(scalbn(MZero, 0, RM))); |
| EXPECT_TRUE(QPNaN.bitwiseIsEqual(scalbn(QPNaN, 0, RM))); |
| EXPECT_TRUE(QMNaN.bitwiseIsEqual(scalbn(QMNaN, 0, RM))); |
| EXPECT_FALSE(scalbn(SNaN, 0, RM).isSignaling()); |
| |
| APFloat ScalbnSNaN = scalbn(SNaN, 1, RM); |
| EXPECT_TRUE(ScalbnSNaN.isNaN() && !ScalbnSNaN.isSignaling()); |
| |
| // Make sure highest bit of payload is preserved. |
| const APInt Payload(64, (UINT64_C(1) << 50) | |
| (UINT64_C(1) << 49) | |
| (UINT64_C(1234) << 32) | |
| 1); |
| |
| APFloat SNaNWithPayload = APFloat::getSNaN(APFloat::IEEEdouble(), false, |
| &Payload); |
| APFloat QuietPayload = scalbn(SNaNWithPayload, 1, RM); |
| EXPECT_TRUE(QuietPayload.isNaN() && !QuietPayload.isSignaling()); |
| EXPECT_EQ(Payload, QuietPayload.bitcastToAPInt().getLoBits(51)); |
| |
| EXPECT_TRUE(PInf.bitwiseIsEqual( |
| scalbn(APFloat(APFloat::IEEEsingle(), "0x1p+0"), 128, RM))); |
| EXPECT_TRUE(MInf.bitwiseIsEqual( |
| scalbn(APFloat(APFloat::IEEEsingle(), "-0x1p+0"), 128, RM))); |
| EXPECT_TRUE(PInf.bitwiseIsEqual( |
| scalbn(APFloat(APFloat::IEEEsingle(), "0x1p+127"), 1, RM))); |
| EXPECT_TRUE(PZero.bitwiseIsEqual( |
| scalbn(APFloat(APFloat::IEEEsingle(), "0x1p-127"), -127, RM))); |
| EXPECT_TRUE(MZero.bitwiseIsEqual( |
| scalbn(APFloat(APFloat::IEEEsingle(), "-0x1p-127"), -127, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "-0x1p-149").bitwiseIsEqual( |
| scalbn(APFloat(APFloat::IEEEsingle(), "-0x1p-127"), -22, RM))); |
| EXPECT_TRUE(PZero.bitwiseIsEqual( |
| scalbn(APFloat(APFloat::IEEEsingle(), "0x1p-126"), -24, RM))); |
| |
| |
| APFloat SmallestF64 = APFloat::getSmallest(APFloat::IEEEdouble(), false); |
| APFloat NegSmallestF64 = APFloat::getSmallest(APFloat::IEEEdouble(), true); |
| |
| APFloat LargestF64 = APFloat::getLargest(APFloat::IEEEdouble(), false); |
| APFloat NegLargestF64 = APFloat::getLargest(APFloat::IEEEdouble(), true); |
| |
| APFloat SmallestNormalizedF64 |
| = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), false); |
| APFloat NegSmallestNormalizedF64 |
| = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), true); |
| |
| APFloat LargestDenormalF64(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1023"); |
| APFloat NegLargestDenormalF64(APFloat::IEEEdouble(), "-0x1.ffffffffffffep-1023"); |
| |
| |
| EXPECT_TRUE(SmallestF64.bitwiseIsEqual( |
| scalbn(APFloat(APFloat::IEEEdouble(), "0x1p-1074"), 0, RM))); |
| EXPECT_TRUE(NegSmallestF64.bitwiseIsEqual( |
| scalbn(APFloat(APFloat::IEEEdouble(), "-0x1p-1074"), 0, RM))); |
| |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p+1023") |
| .bitwiseIsEqual(scalbn(SmallestF64, 2097, RM))); |
| |
| EXPECT_TRUE(scalbn(SmallestF64, -2097, RM).isPosZero()); |
| EXPECT_TRUE(scalbn(SmallestF64, -2098, RM).isPosZero()); |
| EXPECT_TRUE(scalbn(SmallestF64, -2099, RM).isPosZero()); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p+1022") |
| .bitwiseIsEqual(scalbn(SmallestF64, 2096, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p+1023") |
| .bitwiseIsEqual(scalbn(SmallestF64, 2097, RM))); |
| EXPECT_TRUE(scalbn(SmallestF64, 2098, RM).isInfinity()); |
| EXPECT_TRUE(scalbn(SmallestF64, 2099, RM).isInfinity()); |
| |
| // Test for integer overflows when adding to exponent. |
| EXPECT_TRUE(scalbn(SmallestF64, -INT_MAX, RM).isPosZero()); |
| EXPECT_TRUE(scalbn(LargestF64, INT_MAX, RM).isInfinity()); |
| |
| EXPECT_TRUE(LargestDenormalF64 |
| .bitwiseIsEqual(scalbn(LargestDenormalF64, 0, RM))); |
| EXPECT_TRUE(NegLargestDenormalF64 |
| .bitwiseIsEqual(scalbn(NegLargestDenormalF64, 0, RM))); |
| |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1022") |
| .bitwiseIsEqual(scalbn(LargestDenormalF64, 1, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1.ffffffffffffep-1021") |
| .bitwiseIsEqual(scalbn(NegLargestDenormalF64, 2, RM))); |
| |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep+1") |
| .bitwiseIsEqual(scalbn(LargestDenormalF64, 1024, RM))); |
| EXPECT_TRUE(scalbn(LargestDenormalF64, -1023, RM).isPosZero()); |
| EXPECT_TRUE(scalbn(LargestDenormalF64, -1024, RM).isPosZero()); |
| EXPECT_TRUE(scalbn(LargestDenormalF64, -2048, RM).isPosZero()); |
| EXPECT_TRUE(scalbn(LargestDenormalF64, 2047, RM).isInfinity()); |
| EXPECT_TRUE(scalbn(LargestDenormalF64, 2098, RM).isInfinity()); |
| EXPECT_TRUE(scalbn(LargestDenormalF64, 2099, RM).isInfinity()); |
| |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-2") |
| .bitwiseIsEqual(scalbn(LargestDenormalF64, 1021, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1") |
| .bitwiseIsEqual(scalbn(LargestDenormalF64, 1022, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep+0") |
| .bitwiseIsEqual(scalbn(LargestDenormalF64, 1023, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep+1023") |
| .bitwiseIsEqual(scalbn(LargestDenormalF64, 2046, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p+974") |
| .bitwiseIsEqual(scalbn(SmallestF64, 2048, RM))); |
| |
| APFloat RandomDenormalF64(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp+51"); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-972") |
| .bitwiseIsEqual(scalbn(RandomDenormalF64, -1023, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-1") |
| .bitwiseIsEqual(scalbn(RandomDenormalF64, -52, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-2") |
| .bitwiseIsEqual(scalbn(RandomDenormalF64, -53, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp+0") |
| .bitwiseIsEqual(scalbn(RandomDenormalF64, -51, RM))); |
| |
| EXPECT_TRUE(scalbn(RandomDenormalF64, -2097, RM).isPosZero()); |
| EXPECT_TRUE(scalbn(RandomDenormalF64, -2090, RM).isPosZero()); |
| |
| |
| EXPECT_TRUE( |
| APFloat(APFloat::IEEEdouble(), "-0x1p-1073") |
| .bitwiseIsEqual(scalbn(NegLargestF64, -2097, RM))); |
| |
| EXPECT_TRUE( |
| APFloat(APFloat::IEEEdouble(), "-0x1p-1024") |
| .bitwiseIsEqual(scalbn(NegLargestF64, -2048, RM))); |
| |
| EXPECT_TRUE( |
| APFloat(APFloat::IEEEdouble(), "0x1p-1073") |
| .bitwiseIsEqual(scalbn(LargestF64, -2097, RM))); |
| |
| EXPECT_TRUE( |
| APFloat(APFloat::IEEEdouble(), "0x1p-1074") |
| .bitwiseIsEqual(scalbn(LargestF64, -2098, RM))); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1p-1074") |
| .bitwiseIsEqual(scalbn(NegLargestF64, -2098, RM))); |
| EXPECT_TRUE(scalbn(NegLargestF64, -2099, RM).isNegZero()); |
| EXPECT_TRUE(scalbn(LargestF64, 1, RM).isInfinity()); |
| |
| |
| EXPECT_TRUE( |
| APFloat(APFloat::IEEEdouble(), "0x1p+0") |
| .bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEdouble(), "0x1p+52"), -52, RM))); |
| |
| EXPECT_TRUE( |
| APFloat(APFloat::IEEEdouble(), "0x1p-103") |
| .bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEdouble(), "0x1p-51"), -52, RM))); |
| } |
| |
| TEST(APFloatTest, frexp) { |
| const APFloat::roundingMode RM = APFloat::rmNearestTiesToEven; |
| |
| APFloat PZero = APFloat::getZero(APFloat::IEEEdouble(), false); |
| APFloat MZero = APFloat::getZero(APFloat::IEEEdouble(), true); |
| APFloat One(1.0); |
| APFloat MOne(-1.0); |
| APFloat Two(2.0); |
| APFloat MTwo(-2.0); |
| |
| APFloat LargestDenormal(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1023"); |
| APFloat NegLargestDenormal(APFloat::IEEEdouble(), "-0x1.ffffffffffffep-1023"); |
| |
| APFloat Smallest = APFloat::getSmallest(APFloat::IEEEdouble(), false); |
| APFloat NegSmallest = APFloat::getSmallest(APFloat::IEEEdouble(), true); |
| |
| APFloat Largest = APFloat::getLargest(APFloat::IEEEdouble(), false); |
| APFloat NegLargest = APFloat::getLargest(APFloat::IEEEdouble(), true); |
| |
| APFloat PInf = APFloat::getInf(APFloat::IEEEdouble(), false); |
| APFloat MInf = APFloat::getInf(APFloat::IEEEdouble(), true); |
| |
| APFloat QPNaN = APFloat::getNaN(APFloat::IEEEdouble(), false); |
| APFloat QMNaN = APFloat::getNaN(APFloat::IEEEdouble(), true); |
| APFloat SNaN = APFloat::getSNaN(APFloat::IEEEdouble(), false); |
| |
| // Make sure highest bit of payload is preserved. |
| const APInt Payload(64, (UINT64_C(1) << 50) | |
| (UINT64_C(1) << 49) | |
| (UINT64_C(1234) << 32) | |
| 1); |
| |
| APFloat SNaNWithPayload = APFloat::getSNaN(APFloat::IEEEdouble(), false, |
| &Payload); |
| |
| APFloat SmallestNormalized |
| = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), false); |
| APFloat NegSmallestNormalized |
| = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), true); |
| |
| int Exp; |
| APFloat Frac(APFloat::IEEEdouble()); |
| |
| |
| Frac = frexp(PZero, Exp, RM); |
| EXPECT_EQ(0, Exp); |
| EXPECT_TRUE(Frac.isPosZero()); |
| |
| Frac = frexp(MZero, Exp, RM); |
| EXPECT_EQ(0, Exp); |
| EXPECT_TRUE(Frac.isNegZero()); |
| |
| |
| Frac = frexp(One, Exp, RM); |
| EXPECT_EQ(1, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p-1").bitwiseIsEqual(Frac)); |
| |
| Frac = frexp(MOne, Exp, RM); |
| EXPECT_EQ(1, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1p-1").bitwiseIsEqual(Frac)); |
| |
| Frac = frexp(LargestDenormal, Exp, RM); |
| EXPECT_EQ(-1022, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1").bitwiseIsEqual(Frac)); |
| |
| Frac = frexp(NegLargestDenormal, Exp, RM); |
| EXPECT_EQ(-1022, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1.ffffffffffffep-1").bitwiseIsEqual(Frac)); |
| |
| |
| Frac = frexp(Smallest, Exp, RM); |
| EXPECT_EQ(-1073, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p-1").bitwiseIsEqual(Frac)); |
| |
| Frac = frexp(NegSmallest, Exp, RM); |
| EXPECT_EQ(-1073, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1p-1").bitwiseIsEqual(Frac)); |
| |
| |
| Frac = frexp(Largest, Exp, RM); |
| EXPECT_EQ(1024, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.fffffffffffffp-1").bitwiseIsEqual(Frac)); |
| |
| Frac = frexp(NegLargest, Exp, RM); |
| EXPECT_EQ(1024, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1.fffffffffffffp-1").bitwiseIsEqual(Frac)); |
| |
| |
| Frac = frexp(PInf, Exp, RM); |
| EXPECT_EQ(INT_MAX, Exp); |
| EXPECT_TRUE(Frac.isInfinity() && !Frac.isNegative()); |
| |
| Frac = frexp(MInf, Exp, RM); |
| EXPECT_EQ(INT_MAX, Exp); |
| EXPECT_TRUE(Frac.isInfinity() && Frac.isNegative()); |
| |
| Frac = frexp(QPNaN, Exp, RM); |
| EXPECT_EQ(INT_MIN, Exp); |
| EXPECT_TRUE(Frac.isNaN()); |
| |
| Frac = frexp(QMNaN, Exp, RM); |
| EXPECT_EQ(INT_MIN, Exp); |
| EXPECT_TRUE(Frac.isNaN()); |
| |
| Frac = frexp(SNaN, Exp, RM); |
| EXPECT_EQ(INT_MIN, Exp); |
| EXPECT_TRUE(Frac.isNaN() && !Frac.isSignaling()); |
| |
| Frac = frexp(SNaNWithPayload, Exp, RM); |
| EXPECT_EQ(INT_MIN, Exp); |
| EXPECT_TRUE(Frac.isNaN() && !Frac.isSignaling()); |
| EXPECT_EQ(Payload, Frac.bitcastToAPInt().getLoBits(51)); |
| |
| Frac = frexp(APFloat(APFloat::IEEEdouble(), "0x0.ffffp-1"), Exp, RM); |
| EXPECT_EQ(-1, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.fffep-1").bitwiseIsEqual(Frac)); |
| |
| Frac = frexp(APFloat(APFloat::IEEEdouble(), "0x1p-51"), Exp, RM); |
| EXPECT_EQ(-50, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p-1").bitwiseIsEqual(Frac)); |
| |
| Frac = frexp(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp+51"), Exp, RM); |
| EXPECT_EQ(52, Exp); |
| EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-1").bitwiseIsEqual(Frac)); |
| } |
| |
| TEST(APFloatTest, mod) { |
| { |
| APFloat f1(APFloat::IEEEdouble(), "1.5"); |
| APFloat f2(APFloat::IEEEdouble(), "1.0"); |
| APFloat expected(APFloat::IEEEdouble(), "0.5"); |
| EXPECT_EQ(f1.mod(f2), APFloat::opOK); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "0.5"); |
| APFloat f2(APFloat::IEEEdouble(), "1.0"); |
| APFloat expected(APFloat::IEEEdouble(), "0.5"); |
| EXPECT_EQ(f1.mod(f2), APFloat::opOK); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "0x1.3333333333333p-2"); // 0.3 |
| APFloat f2(APFloat::IEEEdouble(), "0x1.47ae147ae147bp-7"); // 0.01 |
| APFloat expected(APFloat::IEEEdouble(), |
| "0x1.47ae147ae1471p-7"); // 0.009999999999999983 |
| EXPECT_EQ(f1.mod(f2), APFloat::opOK); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "0x1p64"); // 1.8446744073709552e19 |
| APFloat f2(APFloat::IEEEdouble(), "1.5"); |
| APFloat expected(APFloat::IEEEdouble(), "1.0"); |
| EXPECT_EQ(f1.mod(f2), APFloat::opOK); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "0x1p1000"); |
| APFloat f2(APFloat::IEEEdouble(), "0x1p-1000"); |
| APFloat expected(APFloat::IEEEdouble(), "0.0"); |
| EXPECT_EQ(f1.mod(f2), APFloat::opOK); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "0.0"); |
| APFloat f2(APFloat::IEEEdouble(), "1.0"); |
| APFloat expected(APFloat::IEEEdouble(), "0.0"); |
| EXPECT_EQ(f1.mod(f2), APFloat::opOK); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "1.0"); |
| APFloat f2(APFloat::IEEEdouble(), "0.0"); |
| EXPECT_EQ(f1.mod(f2), APFloat::opInvalidOp); |
| EXPECT_TRUE(f1.isNaN()); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "0.0"); |
| APFloat f2(APFloat::IEEEdouble(), "0.0"); |
| EXPECT_EQ(f1.mod(f2), APFloat::opInvalidOp); |
| EXPECT_TRUE(f1.isNaN()); |
| } |
| { |
| APFloat f1 = APFloat::getInf(APFloat::IEEEdouble(), false); |
| APFloat f2(APFloat::IEEEdouble(), "1.0"); |
| EXPECT_EQ(f1.mod(f2), APFloat::opInvalidOp); |
| EXPECT_TRUE(f1.isNaN()); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "-4.0"); |
| APFloat f2(APFloat::IEEEdouble(), "-2.0"); |
| APFloat expected(APFloat::IEEEdouble(), "-0.0"); |
| EXPECT_EQ(f1.mod(f2), APFloat::opOK); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "-4.0"); |
| APFloat f2(APFloat::IEEEdouble(), "2.0"); |
| APFloat expected(APFloat::IEEEdouble(), "-0.0"); |
| EXPECT_EQ(f1.mod(f2), APFloat::opOK); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| // Test E4M3FN mod where the LHS exponent is maxExponent (8) and the RHS is |
| // the max value whose exponent is minExponent (-6). This requires special |
| // logic in the mod implementation to prevent overflow to NaN. |
| APFloat f1(APFloat::Float8E4M3FN(), "0x1p8"); // 256 |
| APFloat f2(APFloat::Float8E4M3FN(), "0x1.ep-6"); // 0.029296875 |
| APFloat expected(APFloat::Float8E4M3FN(), "0x1p-8"); // 0.00390625 |
| EXPECT_EQ(f1.mod(f2), APFloat::opOK); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| } |
| |
| TEST(APFloatTest, remainder) { |
| // Test Special Cases against each other and normal values. |
| |
| APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false); |
| APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false); |
| APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true); |
| APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false); |
| APFloat SNaN = APFloat(APFloat::IEEEsingle(), "snan123"); |
| APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0"); |
| APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0"); |
| APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false); |
| APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false); |
| APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true); |
| APFloat PSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false); |
| APFloat MSmallestNormalized = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true); |
| |
| APFloat PVal1(APFloat::IEEEsingle(), "0x1.fffffep+126"); |
| APFloat MVal1(APFloat::IEEEsingle(), "-0x1.fffffep+126"); |
| APFloat PVal2(APFloat::IEEEsingle(), "0x1.fffffep-126"); |
| APFloat MVal2(APFloat::IEEEsingle(), "-0x1.fffffep-126"); |
| APFloat PVal3(APFloat::IEEEsingle(), "0x1p-125"); |
| APFloat MVal3(APFloat::IEEEsingle(), "-0x1p-125"); |
| APFloat PVal4(APFloat::IEEEsingle(), "0x1p+127"); |
| APFloat MVal4(APFloat::IEEEsingle(), "-0x1p+127"); |
| APFloat PVal5(APFloat::IEEEsingle(), "1.5"); |
| APFloat MVal5(APFloat::IEEEsingle(), "-1.5"); |
| APFloat PVal6(APFloat::IEEEsingle(), "1"); |
| APFloat MVal6(APFloat::IEEEsingle(), "-1"); |
| |
| struct { |
| APFloat x; |
| APFloat y; |
| const char *result; |
| int status; |
| int category; |
| } SpecialCaseTests[] = { |
| { PInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PNormalValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, MNormalValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PLargestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, MLargestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PSmallestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, MSmallestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, PSmallestNormalized, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PInf, MSmallestNormalized, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PNormalValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, MNormalValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PLargestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, MLargestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PSmallestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, MSmallestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, PSmallestNormalized, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MInf, MSmallestNormalized, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PZero, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PZero, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MZero, PNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MZero, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { QNaN, PInf, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MInf, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PZero, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MZero, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, SNaN, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { QNaN, PNormalValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MNormalValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PLargestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MLargestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, PSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { QNaN, MSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { SNaN, PInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, QNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, PSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { SNaN, MSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, PInf, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MInf, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PNormalValue, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, PLargestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, MLargestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PNormalValue, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PNormalValue, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, PInf, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MInf, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MNormalValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MNormalValue, PNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, MNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, PLargestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, MLargestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MNormalValue, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MNormalValue, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, PInf, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, MInf, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { PLargestValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PLargestValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PLargestValue, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PLargestValue, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, PInf, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, MInf, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal }, |
| { MLargestValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MLargestValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MLargestValue, PNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, MNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, PLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, MLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MLargestValue, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestValue, PInf, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MInf, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestValue, PNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, PLargestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MLargestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestValue, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestValue, PSmallestNormalized, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestValue, MSmallestNormalized, "0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PInf, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MInf, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestValue, PNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PLargestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MLargestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestValue, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestValue, PSmallestNormalized, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestValue, MSmallestNormalized, "-0x1p-149", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PInf, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MInf, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestNormalized, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { PSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { PSmallestNormalized, PNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PLargestValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, MLargestValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PSmallestNormalized, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestNormalized, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestNormalized, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PSmallestNormalized, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestNormalized, PInf, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MInf, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestNormalized, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN }, |
| { MSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN }, |
| { MSmallestNormalized, PNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PLargestValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, MLargestValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MSmallestNormalized, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestNormalized, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestNormalized, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MSmallestNormalized, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| |
| { PVal1, PVal1, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal1, MVal1, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal1, PVal2, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal1, MVal2, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal1, PVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal1, MVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal1, PVal4, "-0x1p+103", APFloat::opOK, APFloat::fcNormal }, |
| { PVal1, MVal4, "-0x1p+103", APFloat::opOK, APFloat::fcNormal }, |
| { PVal1, PVal5, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal1, MVal5, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal1, PVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal1, MVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal1, PVal1, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal1, MVal1, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal1, PVal2, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal1, MVal2, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal1, PVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal1, MVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal1, PVal4, "0x1p+103", APFloat::opOK, APFloat::fcNormal }, |
| { MVal1, MVal4, "0x1p+103", APFloat::opOK, APFloat::fcNormal }, |
| { MVal1, PVal5, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal1, MVal5, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal1, PVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal1, MVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal2, PVal1, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal2, MVal1, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal2, PVal2, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal2, MVal2, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal2, PVal3, "-0x0.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal2, MVal3, "-0x0.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal2, PVal4, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal2, MVal4, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal2, PVal5, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal2, MVal5, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal2, PVal6, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal2, MVal6, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal2, PVal1, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal2, MVal1, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal2, PVal2, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal2, MVal2, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal2, PVal3, "0x0.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal2, MVal3, "0x0.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal2, PVal4, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal2, MVal4, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal2, PVal5, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal2, MVal5, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal2, PVal6, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal2, MVal6, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal3, PVal1, "0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { PVal3, MVal1, "0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { PVal3, PVal2, "0x0.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal3, MVal2, "0x0.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal3, PVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal3, MVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal3, PVal4, "0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { PVal3, MVal4, "0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { PVal3, PVal5, "0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { PVal3, MVal5, "0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { PVal3, PVal6, "0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { PVal3, MVal6, "0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { MVal3, PVal1, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { MVal3, MVal1, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { MVal3, PVal2, "-0x0.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal3, MVal2, "-0x0.000002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal3, PVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal3, MVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal3, PVal4, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { MVal3, MVal4, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { MVal3, PVal5, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { MVal3, MVal5, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { MVal3, PVal6, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { MVal3, MVal6, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }, |
| { PVal4, PVal1, "0x1p+103", APFloat::opOK, APFloat::fcNormal }, |
| { PVal4, MVal1, "0x1p+103", APFloat::opOK, APFloat::fcNormal }, |
| { PVal4, PVal2, "0x0.002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal4, MVal2, "0x0.002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal4, PVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal4, MVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal4, PVal4, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal4, MVal4, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal4, PVal5, "0.5", APFloat::opOK, APFloat::fcNormal }, |
| { PVal4, MVal5, "0.5", APFloat::opOK, APFloat::fcNormal }, |
| { PVal4, PVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal4, MVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal4, PVal1, "-0x1p+103", APFloat::opOK, APFloat::fcNormal }, |
| { MVal4, MVal1, "-0x1p+103", APFloat::opOK, APFloat::fcNormal }, |
| { MVal4, PVal2, "-0x0.002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal4, MVal2, "-0x0.002p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal4, PVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal4, MVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal4, PVal4, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal4, MVal4, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal4, PVal5, "-0.5", APFloat::opOK, APFloat::fcNormal }, |
| { MVal4, MVal5, "-0.5", APFloat::opOK, APFloat::fcNormal }, |
| { MVal4, PVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal4, MVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal5, PVal1, "1.5", APFloat::opOK, APFloat::fcNormal }, |
| { PVal5, MVal1, "1.5", APFloat::opOK, APFloat::fcNormal }, |
| { PVal5, PVal2, "0x0.00006p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal5, MVal2, "0x0.00006p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal5, PVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal5, MVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal5, PVal4, "1.5", APFloat::opOK, APFloat::fcNormal }, |
| { PVal5, MVal4, "1.5", APFloat::opOK, APFloat::fcNormal }, |
| { PVal5, PVal5, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal5, MVal5, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal5, PVal6, "-0.5", APFloat::opOK, APFloat::fcNormal }, |
| { PVal5, MVal6, "-0.5", APFloat::opOK, APFloat::fcNormal }, |
| { MVal5, PVal1, "-1.5", APFloat::opOK, APFloat::fcNormal }, |
| { MVal5, MVal1, "-1.5", APFloat::opOK, APFloat::fcNormal }, |
| { MVal5, PVal2, "-0x0.00006p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal5, MVal2, "-0x0.00006p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal5, PVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal5, MVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal5, PVal4, "-1.5", APFloat::opOK, APFloat::fcNormal }, |
| { MVal5, MVal4, "-1.5", APFloat::opOK, APFloat::fcNormal }, |
| { MVal5, PVal5, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal5, MVal5, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal5, PVal6, "0.5", APFloat::opOK, APFloat::fcNormal }, |
| { MVal5, MVal6, "0.5", APFloat::opOK, APFloat::fcNormal }, |
| { PVal6, PVal1, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PVal6, MVal1, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PVal6, PVal2, "0x0.00004p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal6, MVal2, "0x0.00004p-126", APFloat::opOK, APFloat::fcNormal }, |
| { PVal6, PVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal6, MVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal6, PVal4, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PVal6, MVal4, "0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { PVal6, PVal5, "-0.5", APFloat::opOK, APFloat::fcNormal }, |
| { PVal6, MVal5, "-0.5", APFloat::opOK, APFloat::fcNormal }, |
| { PVal6, PVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { PVal6, MVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal6, PVal1, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MVal6, MVal1, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MVal6, PVal2, "-0x0.00004p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal6, MVal2, "-0x0.00004p-126", APFloat::opOK, APFloat::fcNormal }, |
| { MVal6, PVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal6, MVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal6, PVal4, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MVal6, MVal4, "-0x1p+0", APFloat::opOK, APFloat::fcNormal }, |
| { MVal6, PVal5, "0.5", APFloat::opOK, APFloat::fcNormal }, |
| { MVal6, MVal5, "0.5", APFloat::opOK, APFloat::fcNormal }, |
| { MVal6, PVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| { MVal6, MVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero }, |
| }; |
| |
| for (size_t i = 0; i < std::size(SpecialCaseTests); ++i) { |
| APFloat x(SpecialCaseTests[i].x); |
| APFloat y(SpecialCaseTests[i].y); |
| APFloat::opStatus status = x.remainder(y); |
| |
| APFloat result(x.getSemantics(), SpecialCaseTests[i].result); |
| |
| EXPECT_TRUE(result.bitwiseIsEqual(x)); |
| EXPECT_EQ(SpecialCaseTests[i].status, (int)status); |
| EXPECT_EQ(SpecialCaseTests[i].category, (int)x.getCategory()); |
| } |
| |
| { |
| APFloat f1(APFloat::IEEEdouble(), "0x1.3333333333333p-2"); // 0.3 |
| APFloat f2(APFloat::IEEEdouble(), "0x1.47ae147ae147bp-7"); // 0.01 |
| APFloat expected(APFloat::IEEEdouble(), "-0x1.4p-56"); |
| EXPECT_EQ(APFloat::opOK, f1.remainder(f2)); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "0x1p64"); // 1.8446744073709552e19 |
| APFloat f2(APFloat::IEEEdouble(), "1.5"); |
| APFloat expected(APFloat::IEEEdouble(), "-0.5"); |
| EXPECT_EQ(APFloat::opOK, f1.remainder(f2)); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "0x1p1000"); |
| APFloat f2(APFloat::IEEEdouble(), "0x1p-1000"); |
| APFloat expected(APFloat::IEEEdouble(), "0.0"); |
| EXPECT_EQ(APFloat::opOK, f1.remainder(f2)); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1 = APFloat::getInf(APFloat::IEEEdouble(), false); |
| APFloat f2(APFloat::IEEEdouble(), "1.0"); |
| EXPECT_EQ(f1.remainder(f2), APFloat::opInvalidOp); |
| EXPECT_TRUE(f1.isNaN()); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "-4.0"); |
| APFloat f2(APFloat::IEEEdouble(), "-2.0"); |
| APFloat expected(APFloat::IEEEdouble(), "-0.0"); |
| EXPECT_EQ(APFloat::opOK, f1.remainder(f2)); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| { |
| APFloat f1(APFloat::IEEEdouble(), "-4.0"); |
| APFloat f2(APFloat::IEEEdouble(), "2.0"); |
| APFloat expected(APFloat::IEEEdouble(), "-0.0"); |
| EXPECT_EQ(APFloat::opOK, f1.remainder(f2)); |
| EXPECT_TRUE(f1.bitwiseIsEqual(expected)); |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleAddSpecial) { |
| using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, |
| APFloat::fltCategory, APFloat::roundingMode>; |
| DataType Data[] = { |
| // (1 + 0) + (-1 + 0) = fcZero |
| std::make_tuple(0x3ff0000000000000ull, 0, 0xbff0000000000000ull, 0, |
| APFloat::fcZero, APFloat::rmNearestTiesToEven), |
| // LDBL_MAX + (1.1 >> (1023 - 106) + 0)) = fcInfinity |
| std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull, |
| 0x7948000000000000ull, 0ull, APFloat::fcInfinity, |
| APFloat::rmNearestTiesToEven), |
| // TODO: change the 4th 0x75effffffffffffe to 0x75efffffffffffff when |
| // semPPCDoubleDoubleLegacy is gone. |
| // LDBL_MAX + (1.011111... >> (1023 - 106) + (1.1111111...0 >> (1023 - |
| // 160))) = fcNormal |
| std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull, |
| 0x7947ffffffffffffull, 0x75effffffffffffeull, |
| APFloat::fcNormal, APFloat::rmNearestTiesToEven), |
| // LDBL_MAX + (1.1 >> (1023 - 106) + 0)) = fcInfinity |
| std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull, |
| 0x7fefffffffffffffull, 0x7c8ffffffffffffeull, |
| APFloat::fcInfinity, APFloat::rmNearestTiesToEven), |
| // NaN + (1 + 0) = fcNaN |
| std::make_tuple(0x7ff8000000000000ull, 0, 0x3ff0000000000000ull, 0, |
| APFloat::fcNaN, APFloat::rmNearestTiesToEven), |
| }; |
| |
| for (auto Tp : Data) { |
| uint64_t Op1[2], Op2[2]; |
| APFloat::fltCategory Expected; |
| APFloat::roundingMode RM; |
| std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected, RM) = Tp; |
| |
| { |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A1.add(A2, RM); |
| |
| EXPECT_EQ(Expected, A1.getCategory()) |
| << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op1[0], Op1[1], |
| Op2[0], Op2[1]) |
| .str(); |
| } |
| { |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A2.add(A1, RM); |
| |
| EXPECT_EQ(Expected, A2.getCategory()) |
| << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op2[0], Op2[1], |
| Op1[0], Op1[1]) |
| .str(); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleAdd) { |
| using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, |
| uint64_t, APFloat::roundingMode>; |
| DataType Data[] = { |
| // (1 + 0) + (1e-105 + 0) = (1 + 1e-105) |
| std::make_tuple(0x3ff0000000000000ull, 0, 0x3960000000000000ull, 0, |
| 0x3ff0000000000000ull, 0x3960000000000000ull, |
| APFloat::rmNearestTiesToEven), |
| // (1 + 0) + (1e-106 + 0) = (1 + 1e-106) |
| std::make_tuple(0x3ff0000000000000ull, 0, 0x3950000000000000ull, 0, |
| 0x3ff0000000000000ull, 0x3950000000000000ull, |
| APFloat::rmNearestTiesToEven), |
| // (1 + 1e-106) + (1e-106 + 0) = (1 + 1e-105) |
| std::make_tuple(0x3ff0000000000000ull, 0x3950000000000000ull, |
| 0x3950000000000000ull, 0, 0x3ff0000000000000ull, |
| 0x3960000000000000ull, APFloat::rmNearestTiesToEven), |
| // (1 + 0) + (epsilon + 0) = (1 + epsilon) |
| std::make_tuple(0x3ff0000000000000ull, 0, 0x0000000000000001ull, 0, |
| 0x3ff0000000000000ull, 0x0000000000000001ull, |
| APFloat::rmNearestTiesToEven), |
| // TODO: change 0xf950000000000000 to 0xf940000000000000, when |
| // semPPCDoubleDoubleLegacy is gone. |
| // (DBL_MAX - 1 << (1023 - 105)) + (1 << (1023 - 53) + 0) = DBL_MAX + |
| // 1.11111... << (1023 - 52) |
| std::make_tuple(0x7fefffffffffffffull, 0xf950000000000000ull, |
| 0x7c90000000000000ull, 0, 0x7fefffffffffffffull, |
| 0x7c8ffffffffffffeull, APFloat::rmNearestTiesToEven), |
| // TODO: change 0xf950000000000000 to 0xf940000000000000, when |
| // semPPCDoubleDoubleLegacy is gone. |
| // (1 << (1023 - 53) + 0) + (DBL_MAX - 1 << (1023 - 105)) = DBL_MAX + |
| // 1.11111... << (1023 - 52) |
| std::make_tuple(0x7c90000000000000ull, 0, 0x7fefffffffffffffull, |
| 0xf950000000000000ull, 0x7fefffffffffffffull, |
| 0x7c8ffffffffffffeull, APFloat::rmNearestTiesToEven), |
| }; |
| |
| for (auto Tp : Data) { |
| uint64_t Op1[2], Op2[2], Expected[2]; |
| APFloat::roundingMode RM; |
| std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp; |
| |
| { |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A1.add(A2, RM); |
| |
| EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0]) |
| << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op1[0], Op1[1], |
| Op2[0], Op2[1]) |
| .str(); |
| EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1]) |
| << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op1[0], Op1[1], |
| Op2[0], Op2[1]) |
| .str(); |
| } |
| { |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A2.add(A1, RM); |
| |
| EXPECT_EQ(Expected[0], A2.bitcastToAPInt().getRawData()[0]) |
| << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op2[0], Op2[1], |
| Op1[0], Op1[1]) |
| .str(); |
| EXPECT_EQ(Expected[1], A2.bitcastToAPInt().getRawData()[1]) |
| << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op2[0], Op2[1], |
| Op1[0], Op1[1]) |
| .str(); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleSubtract) { |
| using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, |
| uint64_t, APFloat::roundingMode>; |
| DataType Data[] = { |
| // (1 + 0) - (-1e-105 + 0) = (1 + 1e-105) |
| std::make_tuple(0x3ff0000000000000ull, 0, 0xb960000000000000ull, 0, |
| 0x3ff0000000000000ull, 0x3960000000000000ull, |
| APFloat::rmNearestTiesToEven), |
| // (1 + 0) - (-1e-106 + 0) = (1 + 1e-106) |
| std::make_tuple(0x3ff0000000000000ull, 0, 0xb950000000000000ull, 0, |
| 0x3ff0000000000000ull, 0x3950000000000000ull, |
| APFloat::rmNearestTiesToEven), |
| }; |
| |
| for (auto Tp : Data) { |
| uint64_t Op1[2], Op2[2], Expected[2]; |
| APFloat::roundingMode RM; |
| std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp; |
| |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A1.subtract(A2, RM); |
| |
| EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0]) |
| << formatv("({0:x} + {1:x}) - ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0], |
| Op2[1]) |
| .str(); |
| EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1]) |
| << formatv("({0:x} + {1:x}) - ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0], |
| Op2[1]) |
| .str(); |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleMultiplySpecial) { |
| using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, |
| APFloat::fltCategory, APFloat::roundingMode>; |
| DataType Data[] = { |
| // fcNaN * fcNaN = fcNaN |
| std::make_tuple(0x7ff8000000000000ull, 0, 0x7ff8000000000000ull, 0, |
| APFloat::fcNaN, APFloat::rmNearestTiesToEven), |
| // fcNaN * fcZero = fcNaN |
| std::make_tuple(0x7ff8000000000000ull, 0, 0, 0, APFloat::fcNaN, |
| APFloat::rmNearestTiesToEven), |
| // fcNaN * fcInfinity = fcNaN |
| std::make_tuple(0x7ff8000000000000ull, 0, 0x7ff0000000000000ull, 0, |
| APFloat::fcNaN, APFloat::rmNearestTiesToEven), |
| // fcNaN * fcNormal = fcNaN |
| std::make_tuple(0x7ff8000000000000ull, 0, 0x3ff0000000000000ull, 0, |
| APFloat::fcNaN, APFloat::rmNearestTiesToEven), |
| // fcInfinity * fcInfinity = fcInfinity |
| std::make_tuple(0x7ff0000000000000ull, 0, 0x7ff0000000000000ull, 0, |
| APFloat::fcInfinity, APFloat::rmNearestTiesToEven), |
| // fcInfinity * fcZero = fcNaN |
| std::make_tuple(0x7ff0000000000000ull, 0, 0, 0, APFloat::fcNaN, |
| APFloat::rmNearestTiesToEven), |
| // fcInfinity * fcNormal = fcInfinity |
| std::make_tuple(0x7ff0000000000000ull, 0, 0x3ff0000000000000ull, 0, |
| APFloat::fcInfinity, APFloat::rmNearestTiesToEven), |
| // fcZero * fcZero = fcZero |
| std::make_tuple(0, 0, 0, 0, APFloat::fcZero, |
| APFloat::rmNearestTiesToEven), |
| // fcZero * fcNormal = fcZero |
| std::make_tuple(0, 0, 0x3ff0000000000000ull, 0, APFloat::fcZero, |
| APFloat::rmNearestTiesToEven), |
| }; |
| |
| for (auto Tp : Data) { |
| uint64_t Op1[2], Op2[2]; |
| APFloat::fltCategory Expected; |
| APFloat::roundingMode RM; |
| std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected, RM) = Tp; |
| |
| { |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A1.multiply(A2, RM); |
| |
| EXPECT_EQ(Expected, A1.getCategory()) |
| << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op1[0], Op1[1], |
| Op2[0], Op2[1]) |
| .str(); |
| } |
| { |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A2.multiply(A1, RM); |
| |
| EXPECT_EQ(Expected, A2.getCategory()) |
| << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op2[0], Op2[1], |
| Op1[0], Op1[1]) |
| .str(); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleMultiply) { |
| using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, |
| uint64_t, APFloat::roundingMode>; |
| DataType Data[] = { |
| // 1/3 * 3 = 1.0 |
| std::make_tuple(0x3fd5555555555555ull, 0x3c75555555555556ull, |
| 0x4008000000000000ull, 0, 0x3ff0000000000000ull, 0, |
| APFloat::rmNearestTiesToEven), |
| // (1 + epsilon) * (1 + 0) = fcZero |
| std::make_tuple(0x3ff0000000000000ull, 0x0000000000000001ull, |
| 0x3ff0000000000000ull, 0, 0x3ff0000000000000ull, |
| 0x0000000000000001ull, APFloat::rmNearestTiesToEven), |
| // (1 + epsilon) * (1 + epsilon) = 1 + 2 * epsilon |
| std::make_tuple(0x3ff0000000000000ull, 0x0000000000000001ull, |
| 0x3ff0000000000000ull, 0x0000000000000001ull, |
| 0x3ff0000000000000ull, 0x0000000000000002ull, |
| APFloat::rmNearestTiesToEven), |
| // -(1 + epsilon) * (1 + epsilon) = -1 |
| std::make_tuple(0xbff0000000000000ull, 0x0000000000000001ull, |
| 0x3ff0000000000000ull, 0x0000000000000001ull, |
| 0xbff0000000000000ull, 0, APFloat::rmNearestTiesToEven), |
| // (0.5 + 0) * (1 + 2 * epsilon) = 0.5 + epsilon |
| std::make_tuple(0x3fe0000000000000ull, 0, 0x3ff0000000000000ull, |
| 0x0000000000000002ull, 0x3fe0000000000000ull, |
| 0x0000000000000001ull, APFloat::rmNearestTiesToEven), |
| // (0.5 + 0) * (1 + epsilon) = 0.5 |
| std::make_tuple(0x3fe0000000000000ull, 0, 0x3ff0000000000000ull, |
| 0x0000000000000001ull, 0x3fe0000000000000ull, 0, |
| APFloat::rmNearestTiesToEven), |
| // __LDBL_MAX__ * (1 + 1 << 106) = inf |
| std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull, |
| 0x3ff0000000000000ull, 0x3950000000000000ull, |
| 0x7ff0000000000000ull, 0, APFloat::rmNearestTiesToEven), |
| // __LDBL_MAX__ * (1 + 1 << 107) > __LDBL_MAX__, but not inf, yes =_=||| |
| std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull, |
| 0x3ff0000000000000ull, 0x3940000000000000ull, |
| 0x7fefffffffffffffull, 0x7c8fffffffffffffull, |
| APFloat::rmNearestTiesToEven), |
| // __LDBL_MAX__ * (1 + 1 << 108) = __LDBL_MAX__ |
| std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull, |
| 0x3ff0000000000000ull, 0x3930000000000000ull, |
| 0x7fefffffffffffffull, 0x7c8ffffffffffffeull, |
| APFloat::rmNearestTiesToEven), |
| }; |
| |
| for (auto Tp : Data) { |
| uint64_t Op1[2], Op2[2], Expected[2]; |
| APFloat::roundingMode RM; |
| std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp; |
| |
| { |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A1.multiply(A2, RM); |
| |
| EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0]) |
| << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op1[0], Op1[1], |
| Op2[0], Op2[1]) |
| .str(); |
| EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1]) |
| << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op1[0], Op1[1], |
| Op2[0], Op2[1]) |
| .str(); |
| } |
| { |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A2.multiply(A1, RM); |
| |
| EXPECT_EQ(Expected[0], A2.bitcastToAPInt().getRawData()[0]) |
| << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op2[0], Op2[1], |
| Op1[0], Op1[1]) |
| .str(); |
| EXPECT_EQ(Expected[1], A2.bitcastToAPInt().getRawData()[1]) |
| << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op2[0], Op2[1], |
| Op1[0], Op1[1]) |
| .str(); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleDivide) { |
| using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, |
| uint64_t, APFloat::roundingMode>; |
| // TODO: Only a sanity check for now. Add more edge cases when the |
| // double-double algorithm is implemented. |
| DataType Data[] = { |
| // 1 / 3 = 1/3 |
| std::make_tuple(0x3ff0000000000000ull, 0, 0x4008000000000000ull, 0, |
| 0x3fd5555555555555ull, 0x3c75555555555556ull, |
| APFloat::rmNearestTiesToEven), |
| }; |
| |
| for (auto Tp : Data) { |
| uint64_t Op1[2], Op2[2], Expected[2]; |
| APFloat::roundingMode RM; |
| std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp; |
| |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A1.divide(A2, RM); |
| |
| EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0]) |
| << formatv("({0:x} + {1:x}) / ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0], |
| Op2[1]) |
| .str(); |
| EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1]) |
| << formatv("({0:x} + {1:x}) / ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0], |
| Op2[1]) |
| .str(); |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleRemainder) { |
| using DataType = |
| std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>; |
| DataType Data[] = { |
| // remainder(3.0 + 3.0 << 53, 1.25 + 1.25 << 53) = (0.5 + 0.5 << 53) |
| std::make_tuple(0x4008000000000000ull, 0x3cb8000000000000ull, |
| 0x3ff4000000000000ull, 0x3ca4000000000000ull, |
| 0x3fe0000000000000ull, 0x3c90000000000000ull), |
| // remainder(3.0 + 3.0 << 53, 1.75 + 1.75 << 53) = (-0.5 - 0.5 << 53) |
| std::make_tuple(0x4008000000000000ull, 0x3cb8000000000000ull, |
| 0x3ffc000000000000ull, 0x3cac000000000000ull, |
| 0xbfe0000000000000ull, 0xbc90000000000000ull), |
| }; |
| |
| for (auto Tp : Data) { |
| uint64_t Op1[2], Op2[2], Expected[2]; |
| std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1]) = Tp; |
| |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A1.remainder(A2); |
| |
| EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0]) |
| << formatv("remainder({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0], Op1[1], |
| Op2[0], Op2[1]) |
| .str(); |
| EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1]) |
| << formatv("remainder(({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0], |
| Op1[1], Op2[0], Op2[1]) |
| .str(); |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleMod) { |
| using DataType = |
| std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>; |
| DataType Data[] = { |
| // mod(3.0 + 3.0 << 53, 1.25 + 1.25 << 53) = (0.5 + 0.5 << 53) |
| std::make_tuple(0x4008000000000000ull, 0x3cb8000000000000ull, |
| 0x3ff4000000000000ull, 0x3ca4000000000000ull, |
| 0x3fe0000000000000ull, 0x3c90000000000000ull), |
| // mod(3.0 + 3.0 << 53, 1.75 + 1.75 << 53) = (1.25 + 1.25 << 53) |
| // 0xbc98000000000000 doesn't seem right, but it's what we currently have. |
| // TODO: investigate |
| std::make_tuple(0x4008000000000000ull, 0x3cb8000000000000ull, |
| 0x3ffc000000000000ull, 0x3cac000000000000ull, |
| 0x3ff4000000000001ull, 0xbc98000000000000ull), |
| }; |
| |
| for (auto Tp : Data) { |
| uint64_t Op1[2], Op2[2], Expected[2]; |
| std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1]) = Tp; |
| |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| A1.mod(A2); |
| |
| EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0]) |
| << formatv("fmod(({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0], Op1[1], |
| Op2[0], Op2[1]) |
| .str(); |
| EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1]) |
| << formatv("fmod(({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0], Op1[1], |
| Op2[0], Op2[1]) |
| .str(); |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleFMA) { |
| // Sanity check for now. |
| APFloat A(APFloat::PPCDoubleDouble(), "2"); |
| A.fusedMultiplyAdd(APFloat(APFloat::PPCDoubleDouble(), "3"), |
| APFloat(APFloat::PPCDoubleDouble(), "4"), |
| APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(APFloat::cmpEqual, |
| APFloat(APFloat::PPCDoubleDouble(), "10").compare(A)); |
| } |
| |
| struct PPCDoubleDoubleRoundToIntegralTestCase { |
| DD Input; |
| DD Rounded[5] = {}; |
| constexpr PPCDoubleDoubleRoundToIntegralTestCase & |
| withRounded(DD R, APFloat::roundingMode RM) { |
| Rounded[static_cast<std::underlying_type_t<APFloat::roundingMode>>(RM)] = R; |
| return *this; |
| } |
| }; |
| |
| auto ppcDoubleDoubleRoundToIntegralTests() { |
| constexpr double Eps = std::numeric_limits<double>::epsilon(); |
| constexpr double HalfEps = Eps / 2.0; |
| constexpr double QuarterEps = Eps / 4.0; |
| constexpr double SmallestNormal = std::numeric_limits<double>::min(); |
| constexpr double EvenIntegerThreshold{uint64_t{1} |
| << std::numeric_limits<double>::digits}; |
| constexpr double Inf = std::numeric_limits<double>::infinity(); |
| constexpr double QNaN = std::numeric_limits<double>::quiet_NaN(); |
| using TestCase = PPCDoubleDoubleRoundToIntegralTestCase; |
| static constexpr auto TestCases = std::array{ |
| // 1. Zeros and Basic Integers |
| // Input: Positive Zero (0.0, 0.0) |
| TestCase({{0.0, 0.0}}) |
| .withRounded({0.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({0.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({0.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({0.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({0.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Negative Zero (-0.0, 0.0) |
| TestCase({{-0.0, 0.0}}) |
| .withRounded({-0.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-0.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-0.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-0.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-0.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Positive Even (2.0, 0.0) |
| TestCase({{2.0, 0.0}}) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Positive Odd (3.0, 0.0) |
| TestCase({{3.0, 0.0}}) |
| .withRounded({3.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({3.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({3.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({3.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({3.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Negative Even (-2.0, 0.0) |
| TestCase({{-2.0, 0.0}}) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // 2. General Fractions (Non-Ties) |
| // Input: 2.3 |
| TestCase({{2.3, 0.0}}) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({3.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: 2.7 |
| TestCase({{2.7, 0.0}}) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({3.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({3.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({3.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: -2.3 |
| TestCase({{-2.3, 0.0}}) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-3.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: -2.7 |
| TestCase({{-2.7, 0.0}}) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-3.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-3.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-3.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: 2.3 + Tiny |
| TestCase({{2.3, SmallestNormal}}) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({3.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // 3. Exact Midpoints (Ties at N.5) |
| // Input: 0.5 |
| TestCase({{0.5, 0.0}}) |
| .withRounded({0.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({0.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({1.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({0.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: 1.5 (Odd base) |
| TestCase({{1.5, 0.0}}) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: 2.5 (Even base) |
| TestCase({{2.5, 0.0}}) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({3.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({3.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: -0.5 |
| TestCase({{-0.5, 0.0}}) |
| .withRounded({-0.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-1.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-0.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-1.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-0.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: -1.5 (Odd base) |
| TestCase({{-1.5, 0.0}}) |
| .withRounded({-1.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-1.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: -2.5 (Even base) |
| TestCase({{-2.5, 0.0}}) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-3.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-3.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // 4. Near Midpoints (lo breaks the tie) |
| // Input: Slightly > 2.5 |
| TestCase({{2.5, SmallestNormal}}) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({3.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({3.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({3.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Slightly < 2.5 |
| TestCase({{2.5, -SmallestNormal}}) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({3.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Slightly > 1.5 |
| TestCase({{1.5, SmallestNormal}}) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Slightly < 1.5 |
| TestCase({{1.5, -SmallestNormal}}) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({1.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({1.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Slightly > -2.5 (closer to 0) |
| TestCase({{-2.5, SmallestNormal}}) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-3.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Slightly < -2.5 (further from 0) |
| TestCase({{-2.5, -SmallestNormal}}) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-3.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-3.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-3.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // 5. Near Integers (lo crosses the integer boundary) |
| // Input: Slightly > 2.0 |
| TestCase({{2.0, SmallestNormal}}) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({3.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Slightly < 2.0 (1.99...) |
| TestCase({{2.0, -SmallestNormal}}) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Slightly > -2.0 (-1.99...) |
| TestCase({{-2.0, SmallestNormal}}) |
| .withRounded({-1.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-1.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Slightly < -2.0 |
| TestCase({{-2.0, -SmallestNormal}}) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-3.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Slightly > 0.0 |
| TestCase({{SmallestNormal, 0.0}}) |
| .withRounded({0.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({0.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({0.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({0.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: Slightly < 0.0 |
| TestCase({{-SmallestNormal, 0.0}}) |
| .withRounded({-0.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-1.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-0.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-0.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-0.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // 6. Boundary of Canonicalization (Maximum lo) |
| // Input: 1.0 + Max lo (1 + 2^-53) |
| TestCase({{1.0, HalfEps}}) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({2.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({1.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({1.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: 1.0 - Max lo (1 - 2^-54) |
| TestCase({{1.0, -QuarterEps}}) |
| .withRounded({0.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({0.0, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({1.0, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({1.0, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({1.0, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // 7. Large Magnitudes (Beyond 2^53). N = EvenIntegerThreshold (Even) |
| // Input: EvenIntegerThreshold (Exact) |
| TestCase({{EvenIntegerThreshold, 0.0}}) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold, 0.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold, 0.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // Input: EvenIntegerThreshold+1 (Exact) |
| TestCase({{EvenIntegerThreshold, 1.0}}) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold, 1.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold, 1.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // Fractions |
| // Input: EvenIntegerThreshold+0.25 |
| TestCase({{EvenIntegerThreshold, 0.25}}) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold, 0.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold, 0.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // Input: EvenIntegerThreshold+0.75 |
| TestCase({{EvenIntegerThreshold, 0.75}}) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold, 1.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold, 1.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // Ties (Midpoints) |
| // Input: EvenIntegerThreshold-0.5 |
| TestCase({{EvenIntegerThreshold, -0.5}}) |
| .withRounded({EvenIntegerThreshold - 1.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold - 1.0, 0.0}, |
| APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold, 0.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold, 0.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // Input: EvenIntegerThreshold+0.5 |
| TestCase({{EvenIntegerThreshold, 0.5}}) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold, 1.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold, 0.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // Input: EvenIntegerThreshold+1.5 |
| TestCase({{EvenIntegerThreshold + 2.0, -0.5}}) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold + 2.0, 0.0}, |
| APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold + 2.0, 0.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold + 2.0, 0.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // Input: EvenIntegerThreshold+2.5 |
| TestCase({{EvenIntegerThreshold + 2.0, 0.5}}) |
| .withRounded({EvenIntegerThreshold + 2.0, 0.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold + 2.0, 0.0}, |
| APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold + 4.0, -1.0}, |
| APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold + 4.0, -1.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold + 2.0, 0.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // Near Ties |
| // Input: EvenIntegerThreshold+0.5+HalfEps |
| TestCase({{EvenIntegerThreshold, 0.5 + HalfEps}}) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold, 1.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold, 1.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // Input: EvenIntegerThreshold+0.5-QuarterEps |
| TestCase({{EvenIntegerThreshold, 0.5 - QuarterEps}}) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold, 0.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold, 0.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // Canonical Boundary (Max lo for EvenIntegerThreshold is 1.0) |
| // Input: EvenIntegerThreshold+1.0 |
| TestCase({{EvenIntegerThreshold, 1.0}}) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardZero) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardNegative) |
| .withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive) |
| .withRounded({EvenIntegerThreshold, 1.0}, |
| APFloat::rmNearestTiesToAway) |
| .withRounded({EvenIntegerThreshold, 1.0}, |
| APFloat::rmNearestTiesToEven), |
| |
| // 8. Special Values |
| // Input: +Inf |
| TestCase({{Inf, 0.0}}) |
| .withRounded({Inf, 0.0}, APFloat::rmTowardZero) |
| .withRounded({Inf, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({Inf, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({Inf, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({Inf, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: -Inf |
| TestCase({{-Inf, 0.0}}) |
| .withRounded({-Inf, 0.0}, APFloat::rmTowardZero) |
| .withRounded({-Inf, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({-Inf, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({-Inf, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({-Inf, 0.0}, APFloat::rmNearestTiesToEven), |
| |
| // Input: NaN input hi. Expected output canonical (NaN, 0.0). |
| TestCase({{QNaN, 0.0}}) |
| .withRounded({QNaN, 0.0}, APFloat::rmTowardZero) |
| .withRounded({QNaN, 0.0}, APFloat::rmTowardNegative) |
| .withRounded({QNaN, 0.0}, APFloat::rmTowardPositive) |
| .withRounded({QNaN, 0.0}, APFloat::rmNearestTiesToAway) |
| .withRounded({QNaN, 0.0}, APFloat::rmNearestTiesToEven), |
| }; |
| return TestCases; |
| } |
| |
| class PPCDoubleDoubleRoundToIntegralValueTest |
| : public testing::Test, |
| public ::testing::WithParamInterface< |
| PPCDoubleDoubleRoundToIntegralTestCase> {}; |
| |
| INSTANTIATE_TEST_SUITE_P( |
| PPCDoubleDoubleRoundToIntegralValueParamTests, |
| PPCDoubleDoubleRoundToIntegralValueTest, |
| ::testing::ValuesIn(ppcDoubleDoubleRoundToIntegralTests())); |
| |
| TEST_P(PPCDoubleDoubleRoundToIntegralValueTest, |
| PPCDoubleDoubleRoundToIntegral) { |
| const PPCDoubleDoubleRoundToIntegralTestCase TestCase = GetParam(); |
| const APFloat Input = makeDoubleAPFloat(TestCase.Input); |
| EXPECT_FALSE(Input.isDenormal()) |
| << TestCase.Input.Hi << " + " << TestCase.Input.Lo; |
| for (size_t I = 0, E = std::size(TestCase.Rounded); I != E; ++I) { |
| const auto RM = static_cast<APFloat::roundingMode>(I); |
| const APFloat Expected = makeDoubleAPFloat(TestCase.Rounded[I]); |
| EXPECT_FALSE(Expected.isDenormal()) |
| << TestCase.Rounded[I].Hi << " + " << TestCase.Input.Lo; |
| APFloat Actual = Input; |
| Actual.roundToIntegral(RM); |
| if (Actual.isNaN()) |
| EXPECT_TRUE(Actual.isNaN()); |
| else |
| EXPECT_EQ(Actual.compare(Expected), APFloat::cmpEqual) |
| << "RM: " << RM << " Input.Hi: " << TestCase.Input.Hi |
| << " Input.Lo: " << TestCase.Input.Lo << " Actual: " << Actual |
| << " Expected.Hi: " << TestCase.Rounded[I].Hi |
| << " Expected.Lo: " << TestCase.Rounded[I].Lo |
| << " Expected: " << Expected; |
| } |
| } |
| |
| namespace PPCDoubleDoubleConvertToIntegerTestDetails { |
| // Define the rounding modes for easier readability. |
| static constexpr auto RNE = APFloat::rmNearestTiesToEven; |
| static constexpr auto RNA = APFloat::rmNearestTiesToAway; |
| static constexpr auto RTZ = APFloat::rmTowardZero; |
| static constexpr auto RUP = APFloat::rmTowardPositive; |
| static constexpr auto RDN = APFloat::rmTowardNegative; |
| |
| struct TestCase { |
| // Structure to hold the expected result of a conversion |
| struct ExpectedConversion { |
| // The expected integer value represented as a string (decimal). |
| // We use a string to easily represent arbitrary precision values in |
| // constexpr. The test runner should parse this into an APSInt matching the |
| // test configuration. |
| const char *ExpectedIntStr; |
| APFloat::opStatus Status; |
| }; |
| |
| DD Input; |
| unsigned IntegerWidth; |
| bool IsSigned; |
| // Array indexed by the rounding mode enum value. |
| std::array<ExpectedConversion, 5> Rounded = {}; |
| |
| // Helper to define the expected results for a specific rounding mode. |
| constexpr TestCase &with(APFloat::roundingMode RM, const char *ExpectedStr, |
| APFloat::opStatus Status) { |
| Rounded[static_cast<std::underlying_type_t<APFloat::roundingMode>>(RM)] = { |
| ExpectedStr, |
| Status, |
| }; |
| return *this; |
| } |
| |
| // Helper to define the same result for all rounding modes. |
| constexpr TestCase &withAll(const char *ExpectedStr, |
| APFloat::opStatus Status) { |
| return with(RNE, ExpectedStr, Status) |
| .with(RNA, ExpectedStr, Status) |
| .with(RTZ, ExpectedStr, Status) |
| .with(RUP, ExpectedStr, Status) |
| .with(RDN, ExpectedStr, Status); |
| } |
| }; |
| |
| auto testCases() { |
| // Define the status codes. |
| constexpr auto OK = llvm::APFloat::opOK; |
| constexpr auto Inexact = llvm::APFloat::opInexact; |
| // The API specifies opInvalidOp for out-of-range (overflow/underflow) and |
| // NaN. |
| constexpr auto Invalid = llvm::APFloat::opInvalidOp; |
| |
| // Helper constants for constructing specific DD values. |
| constexpr double Infinity = std::numeric_limits<double>::infinity(); |
| constexpr double NaN = std::numeric_limits<double>::quiet_NaN(); |
| constexpr double DMAX = std::numeric_limits<double>::max(); |
| |
| // Powers of 2 |
| constexpr double P53 = 0x1p53; |
| constexpr double P63 = 0x1p63; |
| constexpr double P64 = 0x1p64; |
| // 2^-100 (A very small delta demonstrating extended precision) |
| constexpr double PM100 = 0x1p-100; |
| |
| static constexpr auto ConvertToIntegerTestCases = std::array{ |
| // 1. Zeros, NaNs, and Infinities (Target: 64-bit Signed) |
| // INT64_MAX = 9223372036854775807 |
| // INT64_MIN = -9223372036854775808 |
| |
| // Input: Positive Zero (0.0, 0.0) |
| TestCase{{0.0, 0.0}, 64, true}.withAll("0", OK), |
| |
| // Input: Negative Zero (-0.0, 0.0) |
| TestCase{{-0.0, 0.0}, 64, true}.withAll("0", OK), |
| |
| // Input: NaN. Expected behavior: Invalid, deterministic result of 0. |
| TestCase{{NaN, 0.0}, 64, true}.withAll("0", Invalid), |
| |
| // Input: +Infinity. Expected behavior: Invalid, deterministic result of |
| // INT64_MAX. |
| TestCase{{Infinity, 0.0}, 64, true}.withAll("9223372036854775807", |
| Invalid), |
| |
| // Input: -Infinity. Expected behavior: Invalid, deterministic result of |
| // INT64_MIN. |
| TestCase{{-Infinity, 0.0}, 64, true}.withAll("-9223372036854775808", |
| Invalid), |
| |
| // 2. Basic Rounding and Tie-Breaking (Target: 32-bit Signed) |
| |
| // Input: 2.5 (Tie, preceding integer is Even) |
| TestCase{{2.5, 0.0}, 32, true} |
| .with(RTZ, "2", Inexact) |
| .with(RDN, "2", Inexact) |
| .with(RUP, "3", Inexact) |
| .with(RNA, "3", Inexact) |
| .with(RNE, "2", Inexact), |
| |
| // Input: 3.5 (Tie, preceding integer is Odd) |
| TestCase{{3.5, 0.0}, 32, true} |
| .with(RTZ, "3", Inexact) |
| .with(RDN, "3", Inexact) |
| .with(RUP, "4", Inexact) |
| .with(RNA, "4", Inexact) |
| .with(RNE, "4", Inexact), |
| |
| // Input: -2.5 (Tie, preceding integer is Even) |
| TestCase{{-2.5, 0.0}, 32, true} |
| .with(RTZ, "-2", Inexact) |
| .with(RDN, "-3", Inexact) |
| .with(RUP, "-2", Inexact) |
| .with(RNA, "-3", Inexact) |
| .with(RNE, "-2", Inexact), |
| |
| // 3. Double-Double Precision (The role of 'lo') |
| // Testing how extended precision affects rounding decisions. |
| |
| // Input: 2.5 + Epsilon (Slightly above tie) |
| TestCase{{2.5, PM100}, 32, true} |
| .with(RTZ, "2", Inexact) |
| .with(RDN, "2", Inexact) |
| .with(RUP, "3", Inexact) |
| .with(RNA, "3", Inexact) |
| .with(RNE, "3", Inexact), |
| |
| // Input: 2.5 - Epsilon (Slightly below tie) |
| TestCase{{2.5, -PM100}, 32, true} |
| .with(RTZ, "2", Inexact) |
| .with(RDN, "2", Inexact) |
| .with(RUP, "3", Inexact) |
| .with(RNA, "2", Inexact) |
| .with(RNE, "2", Inexact), |
| |
| // Input: 1.0 + Epsilon (Just above 1.0, e.g., 1.00...1) |
| TestCase{{1.0, PM100}, 32, true} |
| .with(RTZ, "1", Inexact) |
| .with(RDN, "1", Inexact) |
| .with(RUP, "2", Inexact) |
| .with(RNA, "1", Inexact) |
| .with(RNE, "1", Inexact), |
| |
| // Input: 1.0 - Epsilon (Just below 1.0, e.g. 0.999...) |
| TestCase{{1.0, -PM100}, 32, true} |
| .with(RTZ, "0", Inexact) |
| .with(RDN, "0", Inexact) |
| .with(RUP, "1", Inexact) |
| .with(RNA, "1", Inexact) |
| .with(RNE, "1", Inexact), |
| |
| // Input: Large number tie-breaking (Crucial test for DD implementation) |
| // Input: 2^53 + 1.5. |
| // A standard double(2^53 + 1.5) rounds to 2^53 + 2.0. |
| // The DD representation must precisely hold 2^53 + 1.5. |
| // The canonical DD representation is {2^53 + 2.0, -0.5}. |
| // Value is 9007199254740993.5 |
| TestCase{{P53 + 2.0, -0.5}, 64, true} |
| .with(RTZ, "9007199254740993", Inexact) |
| .with(RDN, "9007199254740993", Inexact) |
| .with(RUP, "9007199254740994", Inexact) |
| .with(RNA, "9007199254740994", Inexact) |
| .with(RNE, "9007199254740994", Inexact), |
| |
| // 4. Overflow Boundaries (Signed) |
| |
| // Input: Exactly INT64_MAX. (2^63 - 1) |
| // Represented precisely as (2^63, -1.0) |
| TestCase{{P63, -1.0}, 64, true}.withAll("9223372036854775807", OK), |
| |
| // Input: INT64_MAX + 0.3. |
| // Represented as (2^63, -0.7) |
| TestCase{{P63, -0.7}, 64, true} |
| .with(RTZ, "9223372036854775807", Inexact) |
| .with(RDN, "9223372036854775807", Inexact) |
| .with(RNA, "9223372036854775807", Inexact) |
| .with(RNE, "9223372036854775807", Inexact) |
| .with(RUP, "9223372036854775807", Invalid), |
| |
| // Input: INT64_MAX + 0.5 (Tie at the boundary) |
| // Represented as (2^63, -0.5). Target integers are MAX (odd) and 2^63 |
| // (even). |
| TestCase{{P63, -0.5}, 64, true} |
| .with(RTZ, "9223372036854775807", Inexact) |
| .with(RDN, "9223372036854775807", Inexact) |
| .with(RUP, "9223372036854775807", Invalid) |
| .with(RNA, "9223372036854775807", Invalid) |
| .with(RNE, "9223372036854775807", Invalid), |
| |
| // Input: 2^55 - 2^1 - 2^-52 to signed integer. |
| // Represented as (2^55 - 2^2, 2^1 - 2^-1). |
| TestCase{{0x1.fffffffffffffp+54, 0x1.8p0}, 56, true} |
| .with(RTZ, "36028797018963965", Inexact) |
| .with(RDN, "36028797018963965", Inexact) |
| .with(RUP, "36028797018963966", Inexact) |
| .with(RNA, "36028797018963966", Inexact) |
| .with(RNE, "36028797018963966", Inexact), |
| |
| // Input: 2^55 - 2^1 - 2^-52 to signed integer. |
| // Represented as (2^55 - 2^2, 2^1 - 2^-52). |
| TestCase{{0x1.fffffffffffffp+54, 0x1.fffffffffffffp0}, 56, true} |
| .with(RTZ, "36028797018963965", Inexact) |
| .with(RDN, "36028797018963965", Inexact) |
| .with(RUP, "36028797018963966", Inexact) |
| .with(RNA, "36028797018963966", Inexact) |
| .with(RNE, "36028797018963966", Inexact), |
| |
| // Input: Exactly 2^63 (One past INT64_MAX) |
| TestCase{{P63, 0.0}, 64, true}.withAll("9223372036854775807", Invalid), |
| |
| // Input: Exactly INT64_MIN (-2^63) |
| TestCase{{-P63, 0.0}, 64, true}.withAll("-9223372036854775808", OK), |
| |
| // Input: INT64_MIN - 0.5 (Tie at the lower boundary) |
| // Target integers are -2^63-1 (odd) and MIN (even). |
| TestCase{{-P63, -0.5}, 64, true} |
| .with(RTZ, "-9223372036854775808", Inexact) |
| .with(RUP, "-9223372036854775808", Inexact) |
| // RDN rounds down, causing overflow. |
| .with(RDN, "-9223372036854775808", Invalid) |
| // RNA rounds away (down), causing overflow. |
| .with(RNA, "-9223372036854775808", Invalid) |
| // RNE rounds to even (up to -2^63), which is OK. |
| .with(RNE, "-9223372036854775808", Inexact), |
| |
| // 5. Overflow Boundaries (Unsigned) |
| // UINT64_MAX = 18446744073709551615 (2^64 - 1) |
| |
| // Input: Exactly UINT64_MAX. (2^64 - 1) |
| // Represented precisely as (2^64, -1.0) |
| TestCase{{P64, -1.0}, 64, false}.withAll("18446744073709551615", OK), |
| |
| // Input: UINT64_MAX + 0.5 (Tie at the boundary) |
| // Represented as (2^64, -0.5) |
| TestCase{{P64, -0.5}, 64, false} |
| .with(RTZ, "18446744073709551615", Inexact) |
| .with(RDN, "18446744073709551615", Inexact) |
| // RUP rounds up (2^64), causing overflow. |
| .with(RUP, "18446744073709551615", Invalid) |
| // RNA rounds away (up), causing overflow. |
| .with(RNA, "18446744073709551615", Invalid) |
| // RNE rounds to even (up to 2^64), causing overflow. |
| .with(RNE, "18446744073709551615", Invalid), |
| |
| // Input: 2^55 - 2^1 - 2^-52 to unsigned integer. |
| // Represented as (2^55 - 2^2, 2^1 - 2^-1). |
| TestCase{{0x1.fffffffffffffp+54, 0x1.8p0}, 55, false} |
| .with(RTZ, "36028797018963965", Inexact) |
| .with(RDN, "36028797018963965", Inexact) |
| .with(RUP, "36028797018963966", Inexact) |
| .with(RNA, "36028797018963966", Inexact) |
| .with(RNE, "36028797018963966", Inexact), |
| |
| // Input: 2^55 - 2^1 - 2^-52 to unsigned integer. |
| // Represented as (2^55 - 2^2, 2^1 - 2^-52). |
| TestCase{{0x1.fffffffffffffp+54, 0x1.fffffffffffffp0}, 55, false} |
| .with(RTZ, "36028797018963965", Inexact) |
| .with(RDN, "36028797018963965", Inexact) |
| .with(RUP, "36028797018963966", Inexact) |
| .with(RNA, "36028797018963966", Inexact) |
| .with(RNE, "36028797018963966", Inexact), |
| |
| // Input: -0.3 (Slightly below zero) |
| TestCase{{-0.3, 0.0}, 64, false} |
| .with(RTZ, "0", Inexact) |
| .with(RUP, "0", Inexact) |
| .with(RNA, "0", Inexact) |
| .with(RNE, "0", Inexact) |
| .with(RDN, "0", Invalid), |
| |
| // Input: -0.5 (Tie at zero) |
| TestCase{{-0.5, 0.0}, 64, false} |
| .with(RTZ, "0", Inexact) |
| .with(RUP, "0", Inexact) |
| // RNE rounds to even (0). |
| .with(RNE, "0", Inexact) |
| .with(RDN, "0", Invalid) |
| // RNA rounds away (-1), causing overflow. |
| .with(RNA, "0", Invalid), |
| |
| // Input: -1.0 (Negative integer) |
| TestCase{{-1.0, 0.0}, 64, false}.withAll("0", Invalid), |
| |
| // 6. High Precision Integers (Target: 128-bit Signed) |
| // INT128_MAX = 170141183460469231731687303715884105727 |
| |
| // Input: 2^100 (Exactly representable in DD) |
| // 2^100 = 1267650600228229401496703205376.0 |
| TestCase{{1267650600228229401496703205376.0, 0.0}, 128, true}.withAll( |
| "1267650600228229401496703205376", OK), |
| |
| // Input: DMAX. (Approx 1.8e308). |
| // This is vastly larger than INT128_MAX (Approx 1.7e38). |
| TestCase{{DMAX, 0.0}, 128, true}.withAll( |
| "170141183460469231731687303715884105727", Invalid), |
| |
| // Input: Largest semPPCDoubleDoubleLegacy |
| TestCase{{DMAX, 0x1.ffffffffffffep+969}, 128, true}.withAll( |
| "170141183460469231731687303715884105727", Invalid), |
| |
| // 7. Round to negative -0 |
| TestCase{{-PM100, 0.0}, 32, true} |
| .with(RTZ, "0", Inexact) |
| .with(RUP, "0", Inexact) |
| .with(RNA, "0", Inexact) |
| .with(RNE, "0", Inexact) |
| .with(RDN, "-1", Inexact), |
| }; |
| return ConvertToIntegerTestCases; |
| } |
| } // namespace PPCDoubleDoubleConvertToIntegerTestDetails |
| |
| class PPCDoubleDoubleConvertToIntegerValueTest |
| : public testing::Test, |
| public ::testing::WithParamInterface< |
| PPCDoubleDoubleConvertToIntegerTestDetails::TestCase> {}; |
| |
| INSTANTIATE_TEST_SUITE_P( |
| PPCDoubleDoubleConvertToIntegerValueParamTests, |
| PPCDoubleDoubleConvertToIntegerValueTest, |
| ::testing::ValuesIn( |
| PPCDoubleDoubleConvertToIntegerTestDetails::testCases())); |
| |
| TEST_P(PPCDoubleDoubleConvertToIntegerValueTest, |
| PPCDoubleDoubleConvertToInteger) { |
| const PPCDoubleDoubleConvertToIntegerTestDetails::TestCase Params = |
| GetParam(); |
| const APFloat Input = makeDoubleAPFloat(Params.Input); |
| EXPECT_FALSE(Input.isDenormal()) |
| << Params.Input.Hi << " + " << Params.Input.Lo; |
| |
| for (size_t I = 0, E = std::size(Params.Rounded); I != E; ++I) { |
| const auto RM = static_cast<APFloat::roundingMode>(I); |
| const auto &Expected = Params.Rounded[I]; |
| APSInt ActualInteger(Params.IntegerWidth, /*isUnsigned=*/!Params.IsSigned); |
| |
| APSInt ExpectedInteger{Expected.ExpectedIntStr}; |
| EXPECT_LE(ExpectedInteger.getBitWidth(), Params.IntegerWidth); |
| ExpectedInteger = ExpectedInteger.extend(Params.IntegerWidth); |
| if (ExpectedInteger.isUnsigned() && Params.IsSigned) { |
| ExpectedInteger.setIsSigned(Params.IsSigned); |
| EXPECT_FALSE(ExpectedInteger.isNegative()); |
| } |
| |
| const bool NegativeUnderflow = |
| ExpectedInteger.isZero() && Input.isNegative(); |
| const bool ExpectedIsExact = |
| Expected.Status == APFloat::opOK && !NegativeUnderflow; |
| bool ActualIsExact; |
| const auto ActualStatus = |
| Input.convertToInteger(ActualInteger, RM, &ActualIsExact); |
| EXPECT_EQ(ActualStatus, Expected.Status); |
| EXPECT_EQ(ActualIsExact, ExpectedIsExact); |
| EXPECT_EQ(ActualInteger, ExpectedInteger); |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleCompare) { |
| using DataType = |
| std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, APFloat::cmpResult>; |
| |
| DataType Data[] = { |
| // (1 + 0) = (1 + 0) |
| std::make_tuple(0x3ff0000000000000ull, 0, 0x3ff0000000000000ull, 0, |
| APFloat::cmpEqual), |
| // (1 + 0) < (1.00...1 + 0) |
| std::make_tuple(0x3ff0000000000000ull, 0, 0x3ff0000000000001ull, 0, |
| APFloat::cmpLessThan), |
| // (1.00...1 + 0) > (1 + 0) |
| std::make_tuple(0x3ff0000000000001ull, 0, 0x3ff0000000000000ull, 0, |
| APFloat::cmpGreaterThan), |
| // (1 + 0) < (1 + epsilon) |
| std::make_tuple(0x3ff0000000000000ull, 0, 0x3ff0000000000001ull, |
| 0x0000000000000001ull, APFloat::cmpLessThan), |
| // NaN != NaN |
| std::make_tuple(0x7ff8000000000000ull, 0, 0x7ff8000000000000ull, 0, |
| APFloat::cmpUnordered), |
| // (1 + 0) != NaN |
| std::make_tuple(0x3ff0000000000000ull, 0, 0x7ff8000000000000ull, 0, |
| APFloat::cmpUnordered), |
| // Inf = Inf |
| std::make_tuple(0x7ff0000000000000ull, 0, 0x7ff0000000000000ull, 0, |
| APFloat::cmpEqual), |
| }; |
| |
| for (auto Tp : Data) { |
| uint64_t Op1[2], Op2[2]; |
| APFloat::cmpResult Expected; |
| std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected) = Tp; |
| |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| EXPECT_EQ(Expected, A1.compare(A2)) |
| << formatv("compare(({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0], Op1[1], |
| Op2[0], Op2[1]) |
| .str(); |
| } |
| } |
| |
| namespace PPCDoubleDoubleCompareAbsoluteValueTestDetails { |
| struct TestCase { |
| DD LHS; |
| DD RHS; |
| APFloat::cmpResult Result; |
| }; |
| |
| auto testCases() { |
| static constexpr auto CompareAbsoluteValueTestCases = std::array{ |
| TestCase{ |
| {1.0, 0.0}, |
| {1.0, 0.0}, |
| APFloat::cmpEqual, |
| }, |
| TestCase{ |
| {1.0, -0.0}, |
| {1.0, +0.0}, |
| APFloat::cmpEqual, |
| }, |
| TestCase{ |
| {1.0, 0.0}, |
| {0x1.0000000000001p+0, 0.0}, |
| APFloat::cmpLessThan, |
| }, |
| TestCase{ |
| {0x1.0000000000001p+0, 0.0}, |
| {1.0, 0.0}, |
| APFloat::cmpGreaterThan, |
| }, |
| TestCase{ |
| {0x1.0000000000001p+0, +0x1p-1074}, |
| {1.0, -0x1p-1074}, |
| APFloat::cmpGreaterThan, |
| }, |
| TestCase{ |
| {0x1.0000000000001p+0, -0x1p-1074}, |
| {1.0, +0x1p-1074}, |
| APFloat::cmpGreaterThan, |
| }, |
| TestCase{ |
| {1.0, 0.0}, |
| {1.0, -0x1p-1074}, |
| APFloat::cmpGreaterThan, |
| }, |
| TestCase{ |
| {1.0, 0.0}, |
| {1.0, +0x1p-1074}, |
| APFloat::cmpLessThan, |
| }, |
| TestCase{ |
| {1.0, +0x1p-1073}, |
| {1.0, -0x1p-1074}, |
| APFloat::cmpGreaterThan, |
| }, |
| TestCase{ |
| {1.0, +0x1p-1074}, |
| {1.0, -0x1p-1074}, |
| APFloat::cmpGreaterThan, |
| }, |
| }; |
| return CompareAbsoluteValueTestCases; |
| } |
| } // namespace PPCDoubleDoubleCompareAbsoluteValueTestDetails |
| |
| class PPCDoubleDoubleCompareAbsoluteValueValueTest |
| : public testing::Test, |
| public ::testing::WithParamInterface< |
| PPCDoubleDoubleCompareAbsoluteValueTestDetails::TestCase> {}; |
| |
| INSTANTIATE_TEST_SUITE_P( |
| PPCDoubleDoubleCompareAbsoluteValueValueParamTests, |
| PPCDoubleDoubleCompareAbsoluteValueValueTest, |
| ::testing::ValuesIn( |
| PPCDoubleDoubleCompareAbsoluteValueTestDetails::testCases())); |
| |
| TEST_P(PPCDoubleDoubleCompareAbsoluteValueValueTest, |
| PPCDoubleDoubleCompareAbsoluteValue) { |
| auto Param = GetParam(); |
| for (bool LHSNegate : {false, true}) { |
| auto LHS = llvm::detail::DoubleAPFloat{APFloat::PPCDoubleDouble(), |
| APFloat{Param.LHS.Hi}, |
| APFloat{Param.LHS.Lo}}; |
| if (LHSNegate) |
| LHS.changeSign(); |
| for (bool RHSNegate : {false, true}) { |
| auto RHS = llvm::detail::DoubleAPFloat{APFloat::PPCDoubleDouble(), |
| APFloat{Param.RHS.Hi}, |
| APFloat{Param.RHS.Lo}}; |
| if (RHSNegate) |
| RHS.changeSign(); |
| |
| EXPECT_EQ(LHS.compareAbsoluteValue(RHS), Param.Result); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleBitwiseIsEqual) { |
| using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, bool>; |
| |
| DataType Data[] = { |
| // (1 + 0) = (1 + 0) |
| std::make_tuple(0x3ff0000000000000ull, 0, 0x3ff0000000000000ull, 0, true), |
| // (1 + 0) != (1.00...1 + 0) |
| std::make_tuple(0x3ff0000000000000ull, 0, 0x3ff0000000000001ull, 0, |
| false), |
| // NaN = NaN |
| std::make_tuple(0x7ff8000000000000ull, 0, 0x7ff8000000000000ull, 0, true), |
| // NaN != NaN with a different bit pattern |
| std::make_tuple(0x7ff8000000000000ull, 0, 0x7ff8000000000000ull, |
| 0x3ff0000000000000ull, false), |
| // Inf = Inf |
| std::make_tuple(0x7ff0000000000000ull, 0, 0x7ff0000000000000ull, 0, true), |
| }; |
| |
| for (auto Tp : Data) { |
| uint64_t Op1[2], Op2[2]; |
| bool Expected; |
| std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected) = Tp; |
| |
| APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1)); |
| APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2)); |
| EXPECT_EQ(Expected, A1.bitwiseIsEqual(A2)) |
| << formatv("({0:x} + {1:x}) = ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0], |
| Op2[1]) |
| .str(); |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleHashValue) { |
| uint64_t Data1[] = {0x3ff0000000000001ull, 0x0000000000000001ull}; |
| uint64_t Data2[] = {0x3ff0000000000001ull, 0}; |
| // The hash values are *hopefully* different. |
| EXPECT_NE( |
| hash_value(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Data1))), |
| hash_value(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Data2)))); |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleChangeSign) { |
| uint64_t Data[] = { |
| 0x400f000000000000ull, 0xbcb0000000000000ull, |
| }; |
| APFloat Float(APFloat::PPCDoubleDouble(), APInt(128, 2, Data)); |
| { |
| APFloat Actual = |
| APFloat::copySign(Float, APFloat(APFloat::IEEEdouble(), "1")); |
| EXPECT_EQ(0x400f000000000000ull, Actual.bitcastToAPInt().getRawData()[0]); |
| EXPECT_EQ(0xbcb0000000000000ull, Actual.bitcastToAPInt().getRawData()[1]); |
| } |
| { |
| APFloat Actual = |
| APFloat::copySign(Float, APFloat(APFloat::IEEEdouble(), "-1")); |
| EXPECT_EQ(0xc00f000000000000ull, Actual.bitcastToAPInt().getRawData()[0]); |
| EXPECT_EQ(0x3cb0000000000000ull, Actual.bitcastToAPInt().getRawData()[1]); |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleFactories) { |
| { |
| uint64_t Data[] = { |
| 0, 0, |
| }; |
| EXPECT_EQ(APInt(128, 2, Data), |
| APFloat::getZero(APFloat::PPCDoubleDouble()).bitcastToAPInt()); |
| } |
| { |
| uint64_t Data[] = { |
| 0x7fefffffffffffffull, 0x7c8ffffffffffffeull, |
| }; |
| EXPECT_EQ(APInt(128, 2, Data), |
| APFloat::getLargest(APFloat::PPCDoubleDouble()).bitcastToAPInt()); |
| } |
| { |
| uint64_t Data[] = { |
| 0x0000000000000001ull, 0, |
| }; |
| EXPECT_EQ( |
| APInt(128, 2, Data), |
| APFloat::getSmallest(APFloat::PPCDoubleDouble()).bitcastToAPInt()); |
| } |
| { |
| uint64_t Data[] = {0x0360000000000000ull, 0}; |
| EXPECT_EQ(APInt(128, 2, Data), |
| APFloat::getSmallestNormalized(APFloat::PPCDoubleDouble()) |
| .bitcastToAPInt()); |
| } |
| { |
| uint64_t Data[] = { |
| 0x8000000000000000ull, 0x0000000000000000ull, |
| }; |
| EXPECT_EQ( |
| APInt(128, 2, Data), |
| APFloat::getZero(APFloat::PPCDoubleDouble(), true).bitcastToAPInt()); |
| } |
| { |
| uint64_t Data[] = { |
| 0xffefffffffffffffull, 0xfc8ffffffffffffeull, |
| }; |
| EXPECT_EQ( |
| APInt(128, 2, Data), |
| APFloat::getLargest(APFloat::PPCDoubleDouble(), true).bitcastToAPInt()); |
| } |
| { |
| uint64_t Data[] = { |
| 0x8000000000000001ull, 0x0000000000000000ull, |
| }; |
| EXPECT_EQ(APInt(128, 2, Data), |
| APFloat::getSmallest(APFloat::PPCDoubleDouble(), true) |
| .bitcastToAPInt()); |
| } |
| { |
| uint64_t Data[] = { |
| 0x8360000000000000ull, 0x0000000000000000ull, |
| }; |
| EXPECT_EQ(APInt(128, 2, Data), |
| APFloat::getSmallestNormalized(APFloat::PPCDoubleDouble(), true) |
| .bitcastToAPInt()); |
| } |
| EXPECT_TRUE(APFloat::getSmallest(APFloat::PPCDoubleDouble()).isSmallest()); |
| EXPECT_TRUE(APFloat::getLargest(APFloat::PPCDoubleDouble()).isLargest()); |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleIsDenormal) { |
| EXPECT_TRUE(APFloat::getSmallest(APFloat::PPCDoubleDouble()).isDenormal()); |
| EXPECT_FALSE(APFloat::getLargest(APFloat::PPCDoubleDouble()).isDenormal()); |
| EXPECT_FALSE( |
| APFloat::getSmallestNormalized(APFloat::PPCDoubleDouble()).isDenormal()); |
| { |
| // (4 + 3) is not normalized |
| uint64_t Data[] = { |
| 0x4010000000000000ull, 0x4008000000000000ull, |
| }; |
| EXPECT_TRUE( |
| APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Data)).isDenormal()); |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleScalbn) { |
| // 3.0 + 3.0 << 53 |
| uint64_t Input[] = { |
| 0x4008000000000000ull, 0x3cb8000000000000ull, |
| }; |
| APFloat Result = |
| scalbn(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Input)), 1, |
| APFloat::rmNearestTiesToEven); |
| // 6.0 + 6.0 << 53 |
| EXPECT_EQ(0x4018000000000000ull, Result.bitcastToAPInt().getRawData()[0]); |
| EXPECT_EQ(0x3cc8000000000000ull, Result.bitcastToAPInt().getRawData()[1]); |
| } |
| |
| namespace PPCDoubleDoubleFrexpTestDetails { |
| // Define the rounding modes for easier readability. |
| static constexpr auto RNE = APFloat::rmNearestTiesToEven; |
| static constexpr auto RNA = APFloat::rmNearestTiesToAway; |
| static constexpr auto RTZ = APFloat::rmTowardZero; |
| static constexpr auto RUP = APFloat::rmTowardPositive; |
| static constexpr auto RDN = APFloat::rmTowardNegative; |
| |
| struct TestCase { |
| // Structure to hold the expected result of a conversion |
| struct ExpectedFractionExponent { |
| DD Fraction; |
| int Exponent; |
| friend APFloat::cmpResult compare(const ExpectedFractionExponent &Lhs, |
| const ExpectedFractionExponent &Rhs) { |
| const APFloat LhsFraction = makeDoubleAPFloat(Lhs.Fraction); |
| const APFloat RhsFraction = makeDoubleAPFloat(Rhs.Fraction); |
| const APFloat::cmpResult FractionRelation = |
| LhsFraction.compare(RhsFraction); |
| if (FractionRelation == APFloat::cmpUnordered) |
| return APFloat::cmpUnordered; |
| if (LhsFraction.isZero() && RhsFraction.isZero()) |
| return APFloat::cmpEqual; |
| if (!LhsFraction.isNegative() && |
| (RhsFraction.isNegative() || RhsFraction.isZero())) |
| return APFloat::cmpGreaterThan; |
| if (!RhsFraction.isNegative() && |
| (LhsFraction.isNegative() || LhsFraction.isZero())) |
| return APFloat::cmpLessThan; |
| if (Lhs.Exponent > Rhs.Exponent) |
| return LhsFraction.isNegative() ? APFloat::cmpLessThan |
| : APFloat::cmpGreaterThan; |
| if (Lhs.Exponent < Rhs.Exponent) |
| return RhsFraction.isNegative() ? APFloat::cmpGreaterThan |
| : APFloat::cmpLessThan; |
| return FractionRelation; |
| } |
| }; |
| |
| DD Input; |
| // Array indexed by the rounding mode enum value. |
| std::array<ExpectedFractionExponent, 5> Rounded = {}; |
| |
| // Helper to define the expected results for a specific rounding mode. |
| constexpr TestCase &with(APFloat::roundingMode RM, DD ExpectedDD, |
| int ExpectedExponent) { |
| Rounded[static_cast<std::underlying_type_t<APFloat::roundingMode>>(RM)] = { |
| ExpectedDD, |
| ExpectedExponent, |
| }; |
| return *this; |
| } |
| |
| // Helper to define the same result for all rounding modes. |
| constexpr TestCase &withAll(DD ExpectedDD, int ExpectedExponent) { |
| return with(RNE, ExpectedDD, ExpectedExponent) |
| .with(RNA, ExpectedDD, ExpectedExponent) |
| .with(RTZ, ExpectedDD, ExpectedExponent) |
| .with(RUP, ExpectedDD, ExpectedExponent) |
| .with(RDN, ExpectedDD, ExpectedExponent); |
| } |
| }; |
| |
| auto testCases() { |
| static constexpr auto FrexpTestCases = std::array{ |
| // Input: +infinity |
| TestCase{{std::numeric_limits<double>::infinity(), 0.0}}.withAll( |
| {std::numeric_limits<double>::infinity(), 0.0}, INT_MAX), |
| |
| // Input: -infinity |
| TestCase{{-std::numeric_limits<double>::infinity(), 0.0}}.withAll( |
| {-std::numeric_limits<double>::infinity(), 0.0}, INT_MAX), |
| |
| // Input: NaN |
| TestCase{{std::numeric_limits<double>::quiet_NaN(), 0.0}}.withAll( |
| {std::numeric_limits<double>::quiet_NaN(), 0.0}, INT_MIN), |
| |
| // Input: 2^-1074 |
| TestCase{{0x1p-1074, 0.0}}.withAll({0x1p-1, 0.0}, -1073), |
| TestCase{{-0x1p-1074, 0.0}}.withAll({-0x1p-1, 0.0}, -1073), |
| |
| // Input: (2^1, -2^-1073 + -2^-1074) |
| TestCase{{0x1p1, -0x1.8p-1073}} |
| .withAll({0x1p0, -0x1p-1073}, 1) |
| .with(RNA, {0x1p0, -0x1p-1074}, 1) |
| .with(RUP, {0x1p0, -0x1p-1074}, 1), |
| TestCase{{-0x1p1, 0x1.8p-1073}} |
| .withAll({-0x1p0, 0x1p-1073}, 1) |
| .with(RNA, {-0x1p0, 0x1p-1074}, 1) |
| .with(RDN, {-0x1p0, 0x1p-1074}, 1), |
| |
| // Input: (2^1, -2^-1073) |
| TestCase{{0x1p1, -0x1p-1073}}.withAll({0x1p0, -0x1p-1074}, 1), |
| |
| // Input: (2^1, -2^-1074) |
| TestCase{{0x1p1, -0x1p-1074}} |
| .withAll({0x1p-1, -0.0}, 2) |
| .with(RDN, {0x1p0, -0x1p-1074}, 1) |
| .with(RTZ, {0x1p0, -0x1p-1074}, 1), |
| |
| // Input: (2^2, -2^-1072 + -2^-1073 + -2^-1074) |
| TestCase{{0x1p2, -0x1.cp-1072}} |
| .withAll({0x1p0, -0x1p-1073}, 2) |
| .with(RUP, {0x1p0, -0x1p-1074}, 2), |
| |
| // Input: (2^2, -2^-1072 + -2^-1073) |
| TestCase{{0x1p2, -0x1.8p-1072}} |
| .withAll({0x1p0, -0x1p-1073}, 2) |
| .with(RNA, {0x1p0, -0x1p-1074}, 2) |
| .with(RUP, {0x1p0, -0x1p-1074}, 2), |
| TestCase{{-0x1p2, 0x1.8p-1072}} |
| .withAll({-0x1p0, 0x1p-1073}, 2) |
| .with(RNA, {-0x1p0, 0x1p-1074}, 2) |
| .with(RDN, {-0x1p0, 0x1p-1074}, 2), |
| |
| // Input: (2^2, -2^-1072 + -2^-1074) |
| TestCase{{0x1p2, -0x1.4cp-1072}} |
| .withAll({0x1p0, -0x1p-1074}, 2) |
| .with(RDN, {0x1p0, -0x1p-1073}, 2) |
| .with(RTZ, {0x1p0, -0x1p-1073}, 2), |
| |
| // Input: (2^2, -2^-1072) |
| TestCase{{0x1p2, -0x1p-1072}}.withAll({0x1p0, -0x1p-1074}, 2), |
| |
| // Input: (2^2, -2^-1073 + -2^-1074) |
| TestCase{{0x1p2, -0x1.8p-1073}} |
| .withAll({0x1p0, -0x1p-1074}, 2) |
| .with(RUP, {0x1p-1, -0.0}, 3), |
| |
| // Input: (2^2, -2^-1073) |
| TestCase{{0x1p2, -0x1p-1073}} |
| .withAll({0x1p-1, -0.0}, 3) |
| .with(RDN, {0x1p0, -0x1p-1074}, 2) |
| .with(RTZ, {0x1p0, -0x1p-1074}, 2), |
| |
| // Input: (2^2, -2^-1074) |
| TestCase{{0x1p2, -0x1p-1074}} |
| .withAll({0x1p-1, -0.0}, 3) |
| .with(RDN, {0x1p0, -0x1p-1074}, 2) |
| .with(RTZ, {0x1p0, -0x1p-1074}, 2), |
| |
| // Input: 3+3*2^-53 canonicalized to (3+2^-51, -2^-53) |
| // Output: 0.75+0.75*2^-53 canonicalized to (.75+2^-53, -2^-55) |
| TestCase{{0x1.8000000000001p1, -0x1p-53}}.withAll( |
| {0x1.8000000000001p-1, -0x1p-55}, 2), |
| TestCase{{-0x1.8000000000001p1, 0x1p-53}}.withAll( |
| {-0x1.8000000000001p-1, 0x1p-55}, 2), |
| |
| // Input: (2^1021+2^969, 2^968-2^915) |
| TestCase{{0x1.0000000000001p1021, 0x1.fffffffffffffp967}}.withAll( |
| {0x1.0000000000001p-1, 0x1.fffffffffffffp-55}, 1022), |
| TestCase{{-0x1.0000000000001p1021, -0x1.fffffffffffffp967}}.withAll( |
| {-0x1.0000000000001p-1, -0x1.fffffffffffffp-55}, 1022), |
| |
| // Input: (2^1023, -2^-1) |
| TestCase{{0x1p+1023, -0x1p-1}}.withAll({0x1p0, -0x1p-1024}, 1023), |
| TestCase{{-0x1p+1023, 0x1p-1}}.withAll({-0x1p0, 0x1p-1024}, 1023), |
| |
| // Input: (2^1023, -2^-51) |
| TestCase{{0x1p+1023, -0x1p-51}}.withAll({0x1p0, -0x1p-1074}, 1023), |
| TestCase{{-0x1p+1023, 0x1p-51}}.withAll({-0x1p0, 0x1p-1074}, 1023), |
| |
| // Input: (2^1023, -2^-52) |
| TestCase{{0x1p+1023, -0x1p-52}} |
| .withAll({0x1p-1, -0x0p0}, 1024) |
| .with(RDN, {0x1p0, -0x1p-1074}, 1023) |
| .with(RTZ, {0x1p0, -0x1p-1074}, 1023), |
| TestCase{{-0x1p+1023, 0x1p-52}} |
| .withAll({-0x1p-1, 0x0p0}, 1024) |
| .with(RUP, {-0x1p0, 0x1p-1074}, 1023) |
| .with(RTZ, {-0x1p0, 0x1p-1074}, 1023), |
| |
| // Input: (2^1023, 2^-1074) |
| TestCase{{0x1p+1023, 0x1p-1074}} |
| .withAll({0x1p-1, 0x0p+0}, 1024) |
| .with(RUP, {0x1p-1, 0x1p-1074}, 1024), |
| TestCase{{-0x1p+1023, -0x1p-1074}} |
| .withAll({-0x1p-1, -0x0p+0}, 1024) |
| .with(RDN, {-0x1p-1, -0x1p-1074}, 1024), |
| |
| // Input: (2^1024-2^971, 2^970-2^918) |
| TestCase{{0x1.fffffffffffffp+1023, 0x1.ffffffffffffep+969}}.withAll( |
| {0x1.fffffffffffffp-1, 0x1.ffffffffffffep-55}, 1024), |
| TestCase{{-0x1.fffffffffffffp+1023, -0x1.ffffffffffffep+969}}.withAll( |
| {-0x1.fffffffffffffp-1, -0x1.ffffffffffffep-55}, 1024), |
| }; |
| return FrexpTestCases; |
| } |
| } // namespace PPCDoubleDoubleFrexpTestDetails |
| |
| class PPCDoubleDoubleFrexpValueTest |
| : public testing::Test, |
| public ::testing::WithParamInterface< |
| PPCDoubleDoubleFrexpTestDetails::TestCase> {}; |
| |
| INSTANTIATE_TEST_SUITE_P( |
| PPCDoubleDoubleFrexpValueParamTests, PPCDoubleDoubleFrexpValueTest, |
| ::testing::ValuesIn(PPCDoubleDoubleFrexpTestDetails::testCases())); |
| |
| TEST_P(PPCDoubleDoubleFrexpValueTest, PPCDoubleDoubleFrexp) { |
| const PPCDoubleDoubleFrexpTestDetails::TestCase Params = GetParam(); |
| const APFloat Input = makeDoubleAPFloat(Params.Input); |
| auto RmToIdx = [](APFloat::roundingMode RM) { |
| return static_cast<std::underlying_type_t<APFloat::roundingMode>>(RM); |
| }; |
| // First, make sure our expected results are consistent with each other before |
| // bothering to test the implementation. |
| if (Input.isFinite()) { |
| // Make sure the input is canonical. |
| EXPECT_EQ(APFloat{Params.Input.Hi}, |
| APFloat{Params.Input.Hi} + APFloat{Params.Input.Lo}) |
| << Params.Input.Hi << " + " << Params.Input.Lo; |
| |
| const auto Dn = Params.Rounded[RmToIdx(APFloat::rmTowardNegative)]; |
| const auto Up = Params.Rounded[RmToIdx(APFloat::rmTowardPositive)]; |
| const auto Tz = Params.Rounded[RmToIdx(APFloat::rmTowardZero)]; |
| const auto Ne = Params.Rounded[RmToIdx(APFloat::rmNearestTiesToEven)]; |
| const auto Na = Params.Rounded[RmToIdx(APFloat::rmNearestTiesToAway)]; |
| |
| // The rdn result must be no larger than the rup result. |
| const APFloat::cmpResult DnVsUp = compare(Dn, Up); |
| EXPECT_TRUE(DnVsUp == APFloat::cmpLessThan || DnVsUp == APFloat::cmpEqual); |
| |
| for (size_t I = 0, E = std::size(Params.Rounded); I != E; ++I) { |
| const APFloat RoundedFraction = |
| makeDoubleAPFloat(Params.Rounded[I].Fraction); |
| // All possible results should be bracketed by [Dn, Up]. |
| const APFloat::cmpResult VsDn = compare(Params.Rounded[I], Dn); |
| EXPECT_TRUE(VsDn == APFloat::cmpGreaterThan || VsDn == APFloat::cmpEqual); |
| const APFloat::cmpResult VsUp = compare(Params.Rounded[I], Up); |
| EXPECT_TRUE(VsUp == APFloat::cmpLessThan || VsUp == APFloat::cmpEqual); |
| // A rounding result is either equal to the rup or rdn result. |
| EXPECT_TRUE(VsUp == APFloat::cmpEqual || VsDn == APFloat::cmpEqual); |
| // frexp returns a result whose magnitude is in in [.5, 1) so its exponent |
| // should be -1. |
| if (!RoundedFraction.isZero()) |
| EXPECT_EQ(ilogb(RoundedFraction), -1) |
| << static_cast<APFloat::roundingMode>(I); |
| // Decomposition preserves sign. |
| EXPECT_EQ(RoundedFraction.isNegative(), Input.isNegative()); |
| // A rounding result must be canonical. |
| EXPECT_EQ(APFloat{Params.Rounded[I].Fraction.Hi}, |
| APFloat{Params.Rounded[I].Fraction.Hi} + |
| APFloat{Params.Rounded[I].Fraction.Lo}) |
| << Params.Rounded[I].Fraction.Hi << " + " |
| << Params.Rounded[I].Fraction.Lo; |
| } |
| |
| // The rtz result must be either rup or rdn depending on the sign. |
| if (Input.isNegative()) { |
| const APFloat::cmpResult TzVsUp = compare(Tz, Up); |
| EXPECT_EQ(TzVsUp, APFloat::cmpEqual); |
| } else { |
| const APFloat::cmpResult TzVsDn = compare(Tz, Dn); |
| EXPECT_EQ(TzVsDn, APFloat::cmpEqual); |
| } |
| |
| // The recomposed up should be at least as big as the input. |
| const APFloat RecomposedUp = |
| scalbn(makeDoubleAPFloat(Up.Fraction), Up.Exponent, |
| APFloat::rmNearestTiesToEven); |
| EXPECT_TRUE(RecomposedUp >= Input); |
| // The recomposed down can't be larger than the input. |
| const APFloat RecomposedDn = |
| scalbn(makeDoubleAPFloat(Dn.Fraction), Dn.Exponent, |
| APFloat::rmNearestTiesToEven); |
| EXPECT_TRUE(RecomposedDn <= Input); |
| // The recomposed tz must have a smaller magnitude. |
| const APFloat RecomposedTz = |
| scalbn(makeDoubleAPFloat(Tz.Fraction), Tz.Exponent, |
| APFloat::rmNearestTiesToEven); |
| EXPECT_TRUE(abs(RecomposedTz) <= abs(Input)); |
| // Either both or neither of the recomposed round-to-nearest results are |
| // equal to the input. |
| const APFloat RecomposedNe = |
| scalbn(makeDoubleAPFloat(Ne.Fraction), Ne.Exponent, |
| APFloat::rmNearestTiesToEven); |
| const APFloat RecomposedNa = |
| scalbn(makeDoubleAPFloat(Na.Fraction), Na.Exponent, |
| APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(RecomposedNe == Input, RecomposedNa == Input); |
| // Either the ne result equals the na result or the na result has a bigger |
| // magnitude. |
| const APFloat::cmpResult NeVsNa = |
| abs(RecomposedNe).compare(abs(RecomposedNa)); |
| EXPECT_TRUE(NeVsNa == APFloat::cmpLessThan || NeVsNa == APFloat::cmpEqual); |
| // ne and na may only disagree if they broke a tie differently. |
| if (NeVsNa == APFloat::cmpLessThan) { |
| // ne's magnitude should be lower than input. |
| const APFloat::cmpResult NeVsInput = |
| abs(RecomposedNe).compare(abs(Input)); |
| EXPECT_EQ(NeVsInput, APFloat::cmpLessThan); |
| // na's magnitude should be greater than input. |
| const APFloat::cmpResult NaVsInput = |
| abs(RecomposedNa).compare(abs(Input)); |
| EXPECT_EQ(NaVsInput, APFloat::cmpGreaterThan); |
| } |
| // If up or down perfectly reconstructs the input, the round-to-nearest |
| // results should too. |
| if (RecomposedUp == Input || RecomposedDn == Input) { |
| EXPECT_EQ(RecomposedNe, Input); |
| EXPECT_EQ(RecomposedNa, Input); |
| } |
| } |
| |
| for (size_t I = 0, E = std::size(Params.Rounded); I != E; ++I) { |
| const auto RM = static_cast<APFloat::roundingMode>(I); |
| const auto &Expected = Params.Rounded[I]; |
| const APFloat ExpectedFraction = makeDoubleAPFloat(Expected.Fraction); |
| |
| int ActualExponent; |
| const APFloat ActualFraction = frexp(Input, ActualExponent, RM); |
| if (ExpectedFraction.isNaN()) |
| EXPECT_TRUE(ActualFraction.isNaN()); |
| else |
| EXPECT_EQ(ActualFraction.compare(ExpectedFraction), APFloat::cmpEqual) |
| << ActualFraction << " vs " << ExpectedFraction << " for input " |
| << Params.Input.Hi << " + " << Params.Input.Lo << " RM " << RM; |
| EXPECT_EQ(ActualExponent, Expected.Exponent) |
| << "for input " << Params.Input.Hi << " + " << Params.Input.Lo |
| << " RM " << RM; |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleNext) { |
| auto NextUp = [](APFloat X) { |
| X.next(/*nextDown=*/false); |
| return X; |
| }; |
| |
| auto NextDown = [](APFloat X) { |
| X.next(/*nextDown=*/true); |
| return X; |
| }; |
| |
| auto Zero = [] { return APFloat::getZero(APFloat::IEEEdouble()); }; |
| |
| auto One = [] { return APFloat::getOne(APFloat::IEEEdouble()); }; |
| |
| // 0x1p-1074 |
| auto MinSubnormal = [] { |
| return APFloat::getSmallest(APFloat::IEEEdouble()); |
| }; |
| |
| // 2^-52 |
| auto Eps = [&] { |
| const fltSemantics &Sem = APFloat::IEEEdouble(); |
| return scalbn(One(), 1 - APFloat::semanticsPrecision(Sem), |
| APFloat::rmNearestTiesToEven); |
| }; |
| |
| // 2^-53 |
| auto EpsNeg = [&] { return scalbn(Eps(), -1, APFloat::rmNearestTiesToEven); }; |
| |
| APFloat Test(APFloat::PPCDoubleDouble(), APFloat::uninitialized); |
| APFloat Expected(APFloat::PPCDoubleDouble(), APFloat::uninitialized); |
| |
| // 1. Test Special Cases Values. |
| // |
| // Test all special values for nextUp and nextDown prescribed by IEEE-754R |
| // 2008. These are: |
| // 1. +inf |
| // 2. -inf |
| // 3. getLargest() |
| // 4. -getLargest() |
| // 5. getSmallest() |
| // 6. -getSmallest() |
| // 7. qNaN |
| // 8. sNaN |
| // 9. +0 |
| // 10. -0 |
| |
| // nextUp(+inf) = +inf. |
| Test = APFloat::getInf(APFloat::PPCDoubleDouble(), false); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.isPosInfinity()); |
| EXPECT_TRUE(!Test.isNegative()); |
| |
| // nextDown(+inf) = -nextUp(-inf) = -(-getLargest()) = getLargest() |
| Test = APFloat::getInf(APFloat::PPCDoubleDouble(), false); |
| EXPECT_EQ(Test.next(true), APFloat::opOK); |
| EXPECT_FALSE(Test.isNegative()); |
| EXPECT_TRUE(Test.isLargest()); |
| |
| // nextUp(-inf) = -getLargest() |
| Test = APFloat::getInf(APFloat::PPCDoubleDouble(), true); |
| Expected = APFloat::getLargest(APFloat::PPCDoubleDouble(), true); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.isNegative()); |
| EXPECT_TRUE(Test.isLargest()); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| |
| // nextDown(-inf) = -nextUp(+inf) = -(+inf) = -inf. |
| Test = APFloat::getInf(APFloat::PPCDoubleDouble(), true); |
| Expected = APFloat::getInf(APFloat::PPCDoubleDouble(), true); |
| EXPECT_EQ(Test.next(true), APFloat::opOK); |
| EXPECT_TRUE(Test.isNegInfinity()); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| |
| // nextUp(getLargest()) = +inf |
| Test = APFloat::getLargest(APFloat::PPCDoubleDouble(), false); |
| Expected = APFloat::getInf(APFloat::PPCDoubleDouble(), false); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.isPosInfinity()); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| |
| // nextUp(-getSmallest()) = -0. |
| Test = APFloat::getSmallest(Test.getSemantics(), /*Neg=*/true); |
| Expected = APFloat::getZero(APFloat::PPCDoubleDouble(), true); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.isNegZero()); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| |
| // nextDown(getSmallest()) = -nextUp(-getSmallest()) = -(-0) = +0. |
| Test = APFloat::getSmallest(Test.getSemantics(), /*Neg=*/false); |
| EXPECT_EQ(Test.next(true), APFloat::opOK); |
| EXPECT_TRUE(Test.isPosZero()); |
| |
| // nextDown(-getLargest()) = -nextUp(getLargest()) = -(inf) = -inf. |
| Test = APFloat::getLargest(APFloat::PPCDoubleDouble(), true); |
| EXPECT_EQ(Test.next(true), APFloat::opOK); |
| EXPECT_TRUE(Test.isNegInfinity()); |
| |
| // nextUp(qNaN) = qNaN |
| Test = APFloat::getQNaN(APFloat::PPCDoubleDouble(), false); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.isNaN()); |
| EXPECT_FALSE(Test.isSignaling()); |
| |
| // nextDown(qNaN) = qNaN |
| Test = APFloat::getQNaN(APFloat::PPCDoubleDouble(), false); |
| EXPECT_EQ(Test.next(true), APFloat::opOK); |
| EXPECT_TRUE(Test.isNaN()); |
| EXPECT_FALSE(Test.isSignaling()); |
| |
| // nextUp(sNaN) = qNaN |
| Test = APFloat::getSNaN(APFloat::PPCDoubleDouble(), false); |
| EXPECT_EQ(Test.next(false), APFloat::opInvalidOp); |
| EXPECT_TRUE(Test.isNaN()); |
| EXPECT_FALSE(Test.isSignaling()); |
| |
| // nextDown(sNaN) = qNaN |
| Test = APFloat::getSNaN(APFloat::PPCDoubleDouble(), false); |
| EXPECT_EQ(Test.next(true), APFloat::opInvalidOp); |
| EXPECT_TRUE(Test.isNaN()); |
| EXPECT_FALSE(Test.isSignaling()); |
| |
| // nextUp(+0) = +getSmallest() |
| Test = APFloat::getZero(APFloat::PPCDoubleDouble(), false); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_FALSE(Test.isNegative()); |
| EXPECT_TRUE(Test.isSmallest()); |
| |
| // nextDown(+0) = -nextUp(-0) = -getSmallest() |
| Test = APFloat::getZero(APFloat::PPCDoubleDouble(), false); |
| EXPECT_EQ(Test.next(true), APFloat::opOK); |
| EXPECT_TRUE(Test.isNegative()); |
| EXPECT_TRUE(Test.isSmallest()); |
| |
| // nextUp(-0) = +getSmallest() |
| Test = APFloat::getZero(APFloat::PPCDoubleDouble(), true); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_FALSE(Test.isNegative()); |
| EXPECT_TRUE(Test.isSmallest()); |
| |
| // nextDown(-0) = -nextUp(0) = -getSmallest() |
| Test = APFloat::getZero(APFloat::PPCDoubleDouble(), true); |
| EXPECT_EQ(Test.next(true), APFloat::opOK); |
| EXPECT_TRUE(Test.isNegative()); |
| EXPECT_TRUE(Test.isSmallest()); |
| |
| // 2. Cases where the lo APFloat is zero. |
| |
| // 2a. |hi| < 2*DBL_MIN_NORMAL (DD precision == D precision) |
| Test = APFloat(APFloat::PPCDoubleDouble(), "0x1.fffffffffffffp-1022"); |
| Expected = APFloat(APFloat::PPCDoubleDouble(), "0x1p-1021"); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual); |
| |
| // 2b. |hi| >= 2*DBL_MIN_NORMAL (DD precision > D precision) |
| // Test at hi = 1.0, lo = 0. |
| Test = makeDoubleAPFloat(One(), Zero()); |
| Expected = makeDoubleAPFloat(One(), MinSubnormal()); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| |
| // Test at hi = -1.0. delta = 2^-1074 (positive, moving towards +Inf). |
| Test = makeDoubleAPFloat(-One(), Zero()); |
| Expected = makeDoubleAPFloat(-One(), MinSubnormal()); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| |
| // Testing the boundary where calculated delta equals DBL_TRUE_MIN. |
| // Requires ilogb(hi) = E = -968. |
| // delta = 2^(-968 - 106) = 2^-1074 = DBL_TRUE_MIN. |
| Test = makeDoubleAPFloat("0x1p-968", Zero()); |
| Expected = makeDoubleAPFloat("0x1p-968", MinSubnormal()); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| |
| // Testing below the boundary (E < -968). Delta clamps to DBL_TRUE_MIN. |
| Test = makeDoubleAPFloat("0x1p-969", Zero()); |
| Expected = makeDoubleAPFloat("0x1p-969", MinSubnormal()); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| |
| // 3. Standard Increment (No rollover) |
| // hi=1.0, lo=2^-1074. |
| Test = makeDoubleAPFloat(One(), MinSubnormal()); |
| Expected = makeDoubleAPFloat(One(), NextUp(MinSubnormal())); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| |
| // Incrementing negative lo. |
| Test = makeDoubleAPFloat(One(), -MinSubnormal()); |
| Expected = makeDoubleAPFloat(One(), Zero()); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual); |
| |
| // Crossing lo=0. |
| Test = makeDoubleAPFloat(One(), -MinSubnormal()); |
| Expected = makeDoubleAPFloat(One(), Zero()); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual); |
| |
| // 4. Rollover Cases around 1.0 (Positive hi) |
| // hi=1.0, lo=nextDown(2^-53). |
| Test = makeDoubleAPFloat(One(), NextDown(EpsNeg())); |
| EXPECT_FALSE(Test.isDenormal()); |
| Expected = makeDoubleAPFloat(One(), EpsNeg()); |
| EXPECT_FALSE(Test.isDenormal()); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| |
| // Input: (1, ulp(1)/2). nextUp(lo)=next(H). V>Midpoint. Rollover occurs |
| // Can't naively increment lo: |
| // RTNE(0x1p+0 + 0x1.0000000000001p-53) == 0x1.0000000000001p+0. |
| // Can't naively TwoSum(0x1p+0, nextUp(0x1p-53)): |
| // It gives {nextUp(0x1p+0), nextUp(nextUp(-0x1p-53))} but the next |
| // number should be {nextUp(0x1p+0), nextUp(-0x1p-53)}. |
| Test = makeDoubleAPFloat(One(), EpsNeg()); |
| EXPECT_FALSE(Test.isDenormal()); |
| Expected = makeDoubleAPFloat(NextUp(One()), NextUp(-EpsNeg())); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| EXPECT_FALSE(Test.isDenormal()); |
| |
| // hi = nextDown(1), lo = nextDown(0x1p-54) |
| Test = makeDoubleAPFloat(NextDown(One()), NextDown(APFloat(0x1p-54))); |
| EXPECT_FALSE(Test.isDenormal()); |
| Expected = makeDoubleAPFloat(One(), APFloat(-0x1p-54)); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| EXPECT_FALSE(Test.isDenormal()); |
| |
| // 5. Negative Rollover (Moving towards Zero / +Inf) |
| |
| // hi = -1, lo = nextDown(0x1p-54) |
| Test = makeDoubleAPFloat(APFloat(-1.0), NextDown(APFloat(0x1p-54))); |
| EXPECT_FALSE(Test.isDenormal()); |
| Expected = makeDoubleAPFloat(APFloat(-1.0), APFloat(0x1p-54)); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| EXPECT_FALSE(Test.isDenormal()); |
| |
| // hi = -1, lo = 0x1p-54 |
| Test = makeDoubleAPFloat(APFloat(-1.0), APFloat(0x1p-54)); |
| EXPECT_FALSE(Test.isDenormal()); |
| Expected = |
| makeDoubleAPFloat(NextUp(APFloat(-1.0)), NextUp(APFloat(-0x1p-54))); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| EXPECT_FALSE(Test.isDenormal()); |
| |
| // 6. Rollover across Power of 2 boundary (Exponent change) |
| Test = makeDoubleAPFloat(NextDown(APFloat(2.0)), NextDown(EpsNeg())); |
| EXPECT_FALSE(Test.isDenormal()); |
| Expected = makeDoubleAPFloat(APFloat(2.0), -EpsNeg()); |
| EXPECT_EQ(Test.next(false), APFloat::opOK); |
| EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); |
| EXPECT_FALSE(Test.isDenormal()); |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleConvertFromAPIntInexact) { |
| // Create an integer which would not be exactly representable in |
| // PPCDoubleDoubleLegacy. |
| for (bool IsSigned : {false, true}) { |
| const unsigned BitWidth = |
| APFloat::semanticsPrecision(APFloat::IEEEdouble()) * 3 + |
| (IsSigned ? 1 : 0); |
| |
| for (bool Negative : |
| IsSigned ? std::vector{false, true} : std::vector{false}) { |
| APInt Huge = APInt{BitWidth, 0}; |
| // Set the highest bit without making Huge negative.. |
| Huge.setBit(BitWidth - (IsSigned ? 2 : 1)); |
| // Set the low bit. |
| Huge.setBit(0); |
| if (Negative) |
| Huge.negate(); |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(Huge, /*IsSigned=*/IsSigned, RM); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opOK); |
| |
| bool IsExact; |
| APSInt ResultInt{Huge.getBitWidth(), /*isUnsigned=*/!IsSigned}; |
| const APFloat::opStatus ConvertToStatus = |
| F.convertToInteger(ResultInt, APFloat::rmTowardZero, &IsExact); |
| |
| EXPECT_TRUE(IsExact) << "RM: " << RM; |
| EXPECT_TRUE(ResultInt.eq(Huge)) << ResultInt << " vs " << Huge << "\n"; |
| EXPECT_EQ(ConvertToStatus, APFloat::opOK); |
| } |
| } |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleConvertFromAPIntBoundary) { |
| const unsigned Binary64Precision = |
| APFloat::semanticsPrecision(APFloat::IEEEdouble()); |
| APSInt Boundary = |
| APSInt::getMaxValue(Binary64Precision + 1, /*Unsigned=*/true); |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| const APFloat Exact = makeDoubleAPFloat(0x1p54, -0x1p0); |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(Boundary, /*IsSigned=*/false, RM); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opOK); |
| EXPECT_EQ(F, Exact); |
| } |
| |
| Boundary = APSInt{APInt::getHighBitsSet(/*numBits=*/128, |
| /*hiBitsSet=*/Binary64Precision + 1), |
| /*isUnsigned=*/true}; |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| const APFloat Exact = makeDoubleAPFloat(0x1p128, -0x1p74); |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(Boundary, /*IsSigned=*/false, RM); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opOK); |
| EXPECT_EQ(F, Exact); |
| } |
| } |
| |
| TEST(APFloatTest, PPCDoubleDoubleConvertFromAPIntEnormous) { |
| APFloat Largest = APFloat::getLargest(APFloat::PPCDoubleDouble()); |
| int Exponent = ilogb(Largest); |
| unsigned BitWidth = Exponent + 1; |
| APSInt HugeInt{BitWidth, /*isUnsigned=*/true}; |
| bool IsExact; |
| APFloat::opStatus Status = |
| Largest.convertToInteger(HugeInt, APFloat::rmTowardPositive, &IsExact); |
| ASSERT_EQ(Status, APFloat::opOK); |
| ASSERT_TRUE(IsExact); |
| |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opOK); |
| EXPECT_EQ(F, Largest); |
| } |
| |
| const unsigned MaxExponent = |
| APFloat::semanticsMaxExponent(APFloat::IEEEdouble()); |
| const unsigned Binary64Precision = |
| APFloat::semanticsPrecision(APFloat::IEEEdouble()); |
| const unsigned UlpOfLargest = MaxExponent - (2 * Binary64Precision); |
| const unsigned HalfUlpOfLargest = UlpOfLargest - 1; |
| |
| // Add just under a half-ulp. This should never overflow for |
| // round-ties-to-nearest modes. |
| HugeInt.setLowBits(HalfUlpOfLargest); |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM); |
| if (RM == APFloat::rmTowardPositive) { |
| EXPECT_TRUE(F.isPosInfinity()) << F; |
| EXPECT_EQ(ConvertFromStatus, APFloat::opInexact | APFloat::opOverflow); |
| } else { |
| EXPECT_EQ(F, Largest); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opInexact); |
| } |
| } |
| |
| // Now test adding a half-ulp. This should cause overflow for ties-to-away. |
| // ties-to-even will not overflow if the max finite value has a clear low bit. |
| ++HugeInt; |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM); |
| const bool Overflow = |
| RM == APFloat::rmTowardPositive || RM == APFloat::rmNearestTiesToAway || |
| (RM == APFloat::rmNearestTiesToEven && HugeInt[UlpOfLargest]); |
| if (Overflow) { |
| EXPECT_TRUE(F.isPosInfinity()) << F; |
| EXPECT_EQ(ConvertFromStatus, APFloat::opInexact | APFloat::opOverflow); |
| } else { |
| EXPECT_EQ(F, Largest); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opInexact); |
| } |
| } |
| |
| // Now test adding just over a half-ulp. This should break all ties. |
| ++HugeInt; |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM); |
| const bool Overflow = RM == APFloat::rmTowardPositive || |
| RM == APFloat::rmNearestTiesToAway || |
| RM == APFloat::rmNearestTiesToEven; |
| if (Overflow) { |
| EXPECT_TRUE(F.isPosInfinity()) << F; |
| EXPECT_EQ(ConvertFromStatus, APFloat::opInexact | APFloat::opOverflow); |
| } else { |
| EXPECT_EQ(F, Largest); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opInexact); |
| } |
| } |
| |
| HugeInt.setAllBits(); |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM); |
| const bool Overflow = RM == APFloat::rmTowardPositive || |
| RM == APFloat::rmNearestTiesToAway || |
| RM == APFloat::rmNearestTiesToEven; |
| if (Overflow) { |
| EXPECT_TRUE(F.isPosInfinity()) << F; |
| EXPECT_EQ(ConvertFromStatus, APFloat::opInexact | APFloat::opOverflow); |
| } else { |
| EXPECT_EQ(F, Largest); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opInexact); |
| } |
| } |
| |
| HugeInt.clearAllBits(); |
| HugeInt.setBit(2 * Binary64Precision + 1); |
| HugeInt.setLowBits(Binary64Precision + 1); |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| const APFloat RoundUp = makeDoubleAPFloat(0x1p107, 0x1p54); |
| const APFloat RoundDown = makeDoubleAPFloat(0x1p107, 0x1.fffffffffffffp53); |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opInexact); |
| |
| if (RM == APFloat::rmNearestTiesToEven || |
| RM == APFloat::rmNearestTiesToAway || RM == APFloat::rmTowardPositive) |
| EXPECT_EQ(F, RoundUp); |
| else |
| EXPECT_EQ(F, RoundDown); |
| } |
| |
| ++HugeInt; |
| // 162259276829213381405976519770112 can be represented exactly. |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| const APFloat Exact = makeDoubleAPFloat(0x1p107, 0x1p54); |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opOK); |
| EXPECT_EQ(F, Exact); |
| } |
| |
| ++HugeInt; |
| // 162259276829213381405976519770113 rounds to either: |
| // 162259276829213381405976519770112 |
| // 162259276829213381405976519770114 |
| for (const APFloat::roundingMode RM : |
| {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative, |
| APFloat::rmTowardPositive, APFloat::rmTowardZero, |
| APFloat::rmNearestTiesToEven}) { |
| const APFloat RoundUp = |
| makeDoubleAPFloat(0x1.0000000000001p107, -0x1.fffffffffffffp53); |
| const APFloat RoundDown = makeDoubleAPFloat(0x1p107, 0x1p54); |
| EXPECT_LT(RoundDown, RoundUp); |
| |
| APFloat F{APFloat::PPCDoubleDouble()}; |
| const APFloat::opStatus ConvertFromStatus = |
| F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM); |
| EXPECT_EQ(ConvertFromStatus, APFloat::opInexact); |
| |
| if (RM == APFloat::rmNearestTiesToAway || RM == APFloat::rmTowardPositive) |
| EXPECT_EQ(F, RoundUp); |
| else |
| EXPECT_EQ(F, RoundDown); |
| } |
| } |
| |
| TEST(APFloatTest, x87Largest) { |
| APFloat MaxX87Val = APFloat::getLargest(APFloat::x87DoubleExtended()); |
| EXPECT_TRUE(MaxX87Val.isLargest()); |
| } |
| |
| TEST(APFloatTest, x87Next) { |
| APFloat F(APFloat::x87DoubleExtended(), "-1.0"); |
| F.next(false); |
| EXPECT_TRUE(ilogb(F) == -1); |
| } |
| |
| TEST(APFloatTest, Float8ExhaustivePair) { |
| // Test each pair of 8-bit floats with non-standard semantics |
| for (APFloat::Semantics Sem : |
| {APFloat::S_Float8E4M3FN, APFloat::S_Float8E5M2FNUZ, |
| APFloat::S_Float8E4M3FNUZ, APFloat::S_Float8E4M3B11FNUZ}) { |
| const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem); |
| for (int i = 0; i < 256; i++) { |
| for (int j = 0; j < 256; j++) { |
| SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) + |
| ",j=" + std::to_string(j)); |
| APFloat x(S, APInt(8, i)); |
| APFloat y(S, APInt(8, j)); |
| |
| bool losesInfo; |
| APFloat x16 = x; |
| x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_FALSE(losesInfo); |
| APFloat y16 = y; |
| y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_FALSE(losesInfo); |
| |
| // Add |
| APFloat z = x; |
| z.add(y, APFloat::rmNearestTiesToEven); |
| APFloat z16 = x16; |
| z16.add(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Subtract |
| z = x; |
| z.subtract(y, APFloat::rmNearestTiesToEven); |
| z16 = x16; |
| z16.subtract(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Multiply |
| z = x; |
| z.multiply(y, APFloat::rmNearestTiesToEven); |
| z16 = x16; |
| z16.multiply(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Divide |
| z = x; |
| z.divide(y, APFloat::rmNearestTiesToEven); |
| z16 = x16; |
| z16.divide(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Mod |
| z = x; |
| z.mod(y); |
| z16 = x16; |
| z16.mod(y16); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Remainder |
| z = x; |
| z.remainder(y); |
| z16 = x16; |
| z16.remainder(y16); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| } |
| } |
| } |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUExhaustivePair) { |
| // Test each pair of 8-bit values for Float8E8M0FNU format |
| APFloat::Semantics Sem = APFloat::S_Float8E8M0FNU; |
| const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem); |
| for (int i = 0; i < 256; i++) { |
| for (int j = 0; j < 256; j++) { |
| SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) + |
| ",j=" + std::to_string(j)); |
| APFloat x(S, APInt(8, i)); |
| APFloat y(S, APInt(8, j)); |
| |
| bool losesInfo; |
| APFloat xd = x; |
| xd.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_FALSE(losesInfo); |
| APFloat yd = y; |
| yd.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_FALSE(losesInfo); |
| |
| // Add |
| APFloat z = x; |
| z.add(y, APFloat::rmNearestTiesToEven); |
| APFloat zd = xd; |
| zd.add(yd, APFloat::rmNearestTiesToEven); |
| zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(zd)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Subtract |
| if (i >= j) { |
| z = x; |
| z.subtract(y, APFloat::rmNearestTiesToEven); |
| zd = xd; |
| zd.subtract(yd, APFloat::rmNearestTiesToEven); |
| zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(zd)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| } |
| |
| // Multiply |
| z = x; |
| z.multiply(y, APFloat::rmNearestTiesToEven); |
| zd = xd; |
| zd.multiply(yd, APFloat::rmNearestTiesToEven); |
| zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(zd)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Divide |
| z = x; |
| z.divide(y, APFloat::rmNearestTiesToEven); |
| zd = xd; |
| zd.divide(yd, APFloat::rmNearestTiesToEven); |
| zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(zd)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Mod |
| z = x; |
| z.mod(y); |
| zd = xd; |
| zd.mod(yd); |
| zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(zd)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| APFloat mod_cached = z; |
| // When one of them is a NaN, the result is a NaN. |
| // When i < j, the mod is 'i' since it is the smaller |
| // number. Otherwise the mod is always zero since |
| // both x and y are powers-of-two in this format. |
| // Since this format does not support zero and it is |
| // represented as the smallest normalized value, we |
| // test for isSmallestNormalized(). |
| if (i == 255 || j == 255) |
| EXPECT_TRUE(z.isNaN()); |
| else if (i >= j) |
| EXPECT_TRUE(z.isSmallestNormalized()); |
| else |
| EXPECT_TRUE(z.bitwiseIsEqual(x)); |
| |
| // Remainder |
| z = x; |
| z.remainder(y); |
| zd = xd; |
| zd.remainder(yd); |
| zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(zd)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| // Since this format has only exponents (i.e. no precision) |
| // we expect the remainder and mod to provide the same results. |
| EXPECT_TRUE(z.bitwiseIsEqual(mod_cached)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| } |
| } |
| } |
| |
| TEST(APFloatTest, Float6ExhaustivePair) { |
| // Test each pair of 6-bit floats with non-standard semantics |
| for (APFloat::Semantics Sem : |
| {APFloat::S_Float6E3M2FN, APFloat::S_Float6E2M3FN}) { |
| const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem); |
| for (int i = 1; i < 64; i++) { |
| for (int j = 1; j < 64; j++) { |
| SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) + |
| ",j=" + std::to_string(j)); |
| APFloat x(S, APInt(6, i)); |
| APFloat y(S, APInt(6, j)); |
| |
| bool losesInfo; |
| APFloat x16 = x; |
| x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_FALSE(losesInfo); |
| APFloat y16 = y; |
| y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_FALSE(losesInfo); |
| |
| // Add |
| APFloat z = x; |
| z.add(y, APFloat::rmNearestTiesToEven); |
| APFloat z16 = x16; |
| z16.add(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Subtract |
| z = x; |
| z.subtract(y, APFloat::rmNearestTiesToEven); |
| z16 = x16; |
| z16.subtract(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Multiply |
| z = x; |
| z.multiply(y, APFloat::rmNearestTiesToEven); |
| z16 = x16; |
| z16.multiply(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Skip divide by 0 |
| if (j == 0 || j == 32) |
| continue; |
| |
| // Divide |
| z = x; |
| z.divide(y, APFloat::rmNearestTiesToEven); |
| z16 = x16; |
| z16.divide(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Mod |
| z = x; |
| z.mod(y); |
| z16 = x16; |
| z16.mod(y16); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Remainder |
| z = x; |
| z.remainder(y); |
| z16 = x16; |
| z16.remainder(y16); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| } |
| } |
| } |
| } |
| |
| TEST(APFloatTest, Float4ExhaustivePair) { |
| // Test each pair of 4-bit floats with non-standard semantics |
| for (APFloat::Semantics Sem : {APFloat::S_Float4E2M1FN}) { |
| const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem); |
| for (int i = 0; i < 16; i++) { |
| for (int j = 0; j < 16; j++) { |
| SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) + |
| ",j=" + std::to_string(j)); |
| APFloat x(S, APInt(4, i)); |
| APFloat y(S, APInt(4, j)); |
| |
| bool losesInfo; |
| APFloat x16 = x; |
| x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_FALSE(losesInfo); |
| APFloat y16 = y; |
| y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_FALSE(losesInfo); |
| |
| // Add |
| APFloat z = x; |
| z.add(y, APFloat::rmNearestTiesToEven); |
| APFloat z16 = x16; |
| z16.add(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Subtract |
| z = x; |
| z.subtract(y, APFloat::rmNearestTiesToEven); |
| z16 = x16; |
| z16.subtract(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Multiply |
| z = x; |
| z.multiply(y, APFloat::rmNearestTiesToEven); |
| z16 = x16; |
| z16.multiply(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Skip divide by 0 |
| if (j == 0 || j == 8) |
| continue; |
| |
| // Divide |
| z = x; |
| z.divide(y, APFloat::rmNearestTiesToEven); |
| z16 = x16; |
| z16.divide(y16, APFloat::rmNearestTiesToEven); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Mod |
| z = x; |
| z.mod(y); |
| z16 = x16; |
| z16.mod(y16); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| |
| // Remainder |
| z = x; |
| z.remainder(y); |
| z16 = x16; |
| z16.remainder(y16); |
| z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_TRUE(z.bitwiseIsEqual(z16)) |
| << "sem=" << Sem << ", i=" << i << ", j=" << j; |
| } |
| } |
| } |
| } |
| |
| TEST(APFloatTest, ConvertE4M3FNToE5M2) { |
| bool losesInfo; |
| APFloat test(APFloat::Float8E4M3FN(), "1.0"); |
| APFloat::opStatus status = test.convert( |
| APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| test = APFloat(APFloat::Float8E4M3FN(), "0.0"); |
| status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| test = APFloat(APFloat::Float8E4M3FN(), "0x1.2p0"); // 1.125 |
| status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.0p0 /* 1.0 */, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| |
| test = APFloat(APFloat::Float8E4M3FN(), "0x1.6p0"); // 1.375 |
| status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.8p0 /* 1.5 */, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| |
| // Convert E4M3FN denormal to E5M2 normal. Should not be truncated, despite |
| // the destination format having one fewer significand bit |
| test = APFloat(APFloat::Float8E4M3FN(), "0x1.Cp-7"); |
| status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.Cp-7, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| // Test convert from NaN |
| test = APFloat(APFloat::Float8E4M3FN(), "nan"); |
| status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(std::isnan(test.convertToFloat())); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| } |
| |
| TEST(APFloatTest, ConvertE5M2ToE4M3FN) { |
| bool losesInfo; |
| APFloat test(APFloat::Float8E5M2(), "1.0"); |
| APFloat::opStatus status = test.convert( |
| APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| test = APFloat(APFloat::Float8E5M2(), "0.0"); |
| status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| test = APFloat(APFloat::Float8E5M2(), "0x1.Cp8"); // 448 |
| status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.Cp8 /* 448 */, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| // Test overflow |
| test = APFloat(APFloat::Float8E5M2(), "0x1.0p9"); // 512 |
| status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(std::isnan(test.convertToFloat())); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOverflow | APFloat::opInexact); |
| |
| // Test underflow |
| test = APFloat(APFloat::Float8E5M2(), "0x1.0p-10"); |
| status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0., test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); |
| |
| // Test rounding up to smallest denormal number |
| test = APFloat(APFloat::Float8E5M2(), "0x1.8p-10"); |
| status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.0p-9, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); |
| |
| // Testing inexact rounding to denormal number |
| test = APFloat(APFloat::Float8E5M2(), "0x1.8p-9"); |
| status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.0p-8, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); |
| |
| APFloat nan = APFloat(APFloat::Float8E4M3FN(), "nan"); |
| |
| // Testing convert from Inf |
| test = APFloat(APFloat::Float8E5M2(), "inf"); |
| status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(std::isnan(test.convertToFloat())); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| EXPECT_TRUE(test.bitwiseIsEqual(nan)); |
| |
| // Testing convert from quiet NaN |
| test = APFloat(APFloat::Float8E5M2(), "nan"); |
| status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(std::isnan(test.convertToFloat())); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(nan)); |
| |
| // Testing convert from signaling NaN |
| test = APFloat(APFloat::Float8E5M2(), "snan"); |
| status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(std::isnan(test.convertToFloat())); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInvalidOp); |
| EXPECT_TRUE(test.bitwiseIsEqual(nan)); |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNGetInf) { |
| APFloat t = APFloat::getInf(APFloat::Float8E4M3FN()); |
| EXPECT_TRUE(t.isNaN()); |
| EXPECT_FALSE(t.isInfinity()); |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNFromString) { |
| // Exactly representable |
| EXPECT_EQ(448, APFloat(APFloat::Float8E4M3FN(), "448").convertToDouble()); |
| // Round down to maximum value |
| EXPECT_EQ(448, APFloat(APFloat::Float8E4M3FN(), "464").convertToDouble()); |
| // Round up, causing overflow to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E4M3FN(), "465").isNaN()); |
| // Overflow without rounding |
| EXPECT_TRUE(APFloat(APFloat::Float8E4M3FN(), "480").isNaN()); |
| // Inf converted to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E4M3FN(), "inf").isNaN()); |
| // NaN converted to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E4M3FN(), "nan").isNaN()); |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNAdd) { |
| APFloat QNaN = APFloat::getNaN(APFloat::Float8E4M3FN(), false); |
| |
| auto FromStr = [](StringRef S) { |
| return APFloat(APFloat::Float8E4M3FN(), S); |
| }; |
| |
| struct { |
| APFloat x; |
| APFloat y; |
| const char *result; |
| int status; |
| int category; |
| APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven; |
| } AdditionTests[] = { |
| // Test addition operations involving NaN, overflow, and the max E4M3FN |
| // value (448) because E4M3FN differs from IEEE-754 types in these regards |
| {FromStr("448"), FromStr("16"), "448", APFloat::opInexact, |
| APFloat::fcNormal}, |
| {FromStr("448"), FromStr("18"), "NaN", |
| APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN}, |
| {FromStr("448"), FromStr("32"), "NaN", |
| APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN}, |
| {FromStr("-448"), FromStr("-32"), "-NaN", |
| APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN}, |
| {QNaN, FromStr("-448"), "NaN", APFloat::opOK, APFloat::fcNaN}, |
| {FromStr("448"), FromStr("-32"), "416", APFloat::opOK, APFloat::fcNormal}, |
| {FromStr("448"), FromStr("0"), "448", APFloat::opOK, APFloat::fcNormal}, |
| {FromStr("448"), FromStr("32"), "448", APFloat::opInexact, |
| APFloat::fcNormal, APFloat::rmTowardZero}, |
| {FromStr("448"), FromStr("448"), "448", APFloat::opInexact, |
| APFloat::fcNormal, APFloat::rmTowardZero}, |
| }; |
| |
| for (size_t i = 0; i < std::size(AdditionTests); ++i) { |
| APFloat x(AdditionTests[i].x); |
| APFloat y(AdditionTests[i].y); |
| APFloat::opStatus status = x.add(y, AdditionTests[i].roundingMode); |
| |
| APFloat result(APFloat::Float8E4M3FN(), AdditionTests[i].result); |
| |
| EXPECT_TRUE(result.bitwiseIsEqual(x)); |
| EXPECT_EQ(AdditionTests[i].status, (int)status); |
| EXPECT_EQ(AdditionTests[i].category, (int)x.getCategory()); |
| } |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNDivideByZero) { |
| APFloat x(APFloat::Float8E4M3FN(), "1"); |
| APFloat zero(APFloat::Float8E4M3FN(), "0"); |
| EXPECT_EQ(x.divide(zero, APFloat::rmNearestTiesToEven), APFloat::opDivByZero); |
| EXPECT_TRUE(x.isNaN()); |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNNext) { |
| APFloat test(APFloat::Float8E4M3FN(), APFloat::uninitialized); |
| APFloat expected(APFloat::Float8E4M3FN(), APFloat::uninitialized); |
| |
| // nextUp on positive numbers |
| for (int i = 0; i < 127; i++) { |
| test = APFloat(APFloat::Float8E4M3FN(), APInt(8, i)); |
| expected = APFloat(APFloat::Float8E4M3FN(), APInt(8, i + 1)); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| // nextUp on negative zero |
| test = APFloat::getZero(APFloat::Float8E4M3FN(), true); |
| expected = APFloat::getSmallest(APFloat::Float8E4M3FN(), false); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextUp on negative nonzero numbers |
| for (int i = 129; i < 255; i++) { |
| test = APFloat(APFloat::Float8E4M3FN(), APInt(8, i)); |
| expected = APFloat(APFloat::Float8E4M3FN(), APInt(8, i - 1)); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| // nextUp on NaN |
| test = APFloat::getQNaN(APFloat::Float8E4M3FN(), false); |
| expected = APFloat::getQNaN(APFloat::Float8E4M3FN(), false); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown on positive nonzero finite numbers |
| for (int i = 1; i < 127; i++) { |
| test = APFloat(APFloat::Float8E4M3FN(), APInt(8, i)); |
| expected = APFloat(APFloat::Float8E4M3FN(), APInt(8, i - 1)); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| // nextDown on positive zero |
| test = APFloat::getZero(APFloat::Float8E4M3FN(), true); |
| expected = APFloat::getSmallest(APFloat::Float8E4M3FN(), true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // nextDown on negative finite numbers |
| for (int i = 128; i < 255; i++) { |
| test = APFloat(APFloat::Float8E4M3FN(), APInt(8, i)); |
| expected = APFloat(APFloat::Float8E4M3FN(), APInt(8, i + 1)); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| // nextDown on NaN |
| test = APFloat::getQNaN(APFloat::Float8E4M3FN(), false); |
| expected = APFloat::getQNaN(APFloat::Float8E4M3FN(), false); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNExhaustive) { |
| // Test each of the 256 Float8E4M3FN values. |
| for (int i = 0; i < 256; i++) { |
| APFloat test(APFloat::Float8E4M3FN(), APInt(8, i)); |
| SCOPED_TRACE("i=" + std::to_string(i)); |
| |
| // isLargest |
| if (i == 126 || i == 254) { |
| EXPECT_TRUE(test.isLargest()); |
| EXPECT_EQ(abs(test).convertToDouble(), 448.); |
| } else { |
| EXPECT_FALSE(test.isLargest()); |
| } |
| |
| // isSmallest |
| if (i == 1 || i == 129) { |
| EXPECT_TRUE(test.isSmallest()); |
| EXPECT_EQ(abs(test).convertToDouble(), 0x1p-9); |
| } else { |
| EXPECT_FALSE(test.isSmallest()); |
| } |
| |
| // convert to BFloat |
| APFloat test2 = test; |
| bool losesInfo; |
| APFloat::opStatus status = test2.convert( |
| APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| EXPECT_FALSE(losesInfo); |
| if (i == 127 || i == 255) |
| EXPECT_TRUE(test2.isNaN()); |
| else |
| EXPECT_EQ(test.convertToFloat(), test2.convertToFloat()); |
| |
| // bitcastToAPInt |
| EXPECT_EQ(i, test.bitcastToAPInt()); |
| } |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUExhaustive) { |
| // Test each of the 256 Float8E8M0FNU values. |
| for (int i = 0; i < 256; i++) { |
| APFloat test(APFloat::Float8E8M0FNU(), APInt(8, i)); |
| SCOPED_TRACE("i=" + std::to_string(i)); |
| |
| // bitcastToAPInt |
| EXPECT_EQ(i, test.bitcastToAPInt()); |
| |
| // isLargest |
| if (i == 254) { |
| EXPECT_TRUE(test.isLargest()); |
| EXPECT_EQ(abs(test).convertToDouble(), 0x1.0p127); |
| } else { |
| EXPECT_FALSE(test.isLargest()); |
| } |
| |
| // isSmallest |
| if (i == 0) { |
| EXPECT_TRUE(test.isSmallest()); |
| EXPECT_EQ(abs(test).convertToDouble(), 0x1.0p-127); |
| } else { |
| EXPECT_FALSE(test.isSmallest()); |
| } |
| |
| // convert to Double |
| bool losesInfo; |
| std::string val = std::to_string(i - 127); // 127 is the bias |
| llvm::SmallString<16> str("0x1.0p"); |
| str += val; |
| APFloat test2(APFloat::IEEEdouble(), str); |
| |
| APFloat::opStatus status = test.convert( |
| APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| EXPECT_FALSE(losesInfo); |
| if (i == 255) |
| EXPECT_TRUE(test.isNaN()); |
| else |
| EXPECT_EQ(test.convertToDouble(), test2.convertToDouble()); |
| } |
| } |
| |
| TEST(APFloatTest, Float8E5M2FNUZNext) { |
| APFloat test(APFloat::Float8E5M2FNUZ(), APFloat::uninitialized); |
| APFloat expected(APFloat::Float8E5M2FNUZ(), APFloat::uninitialized); |
| |
| // 1. NextUp of largest bit pattern is nan |
| test = APFloat::getLargest(APFloat::Float8E5M2FNUZ()); |
| expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ()); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.isNaN()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 2. NextUp of smallest negative denormal is +0 |
| test = APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), true); |
| expected = APFloat::getZero(APFloat::Float8E5M2FNUZ(), false); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_FALSE(test.isNegZero()); |
| EXPECT_TRUE(test.isPosZero()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 3. nextDown of negative of largest value is NaN |
| test = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), true); |
| expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ()); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.isNaN()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 4. nextDown of +0 is smallest negative denormal |
| test = APFloat::getZero(APFloat::Float8E5M2FNUZ(), false); |
| expected = APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 5. nextUp of NaN is NaN |
| test = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), false); |
| expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), true); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isNaN()); |
| |
| // 6. nextDown of NaN is NaN |
| test = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), false); |
| expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.isNaN()); |
| } |
| |
| TEST(APFloatTest, Float8E5M2FNUZChangeSign) { |
| APFloat test = APFloat(APFloat::Float8E5M2FNUZ(), "1.0"); |
| APFloat expected = APFloat(APFloat::Float8E5M2FNUZ(), "-1.0"); |
| test.changeSign(); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getZero(APFloat::Float8E5M2FNUZ()); |
| expected = test; |
| test.changeSign(); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getNaN(APFloat::Float8E5M2FNUZ()); |
| expected = test; |
| test.changeSign(); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| TEST(APFloatTest, Float8E5M2FNUZFromString) { |
| // Exactly representable |
| EXPECT_EQ(57344, |
| APFloat(APFloat::Float8E5M2FNUZ(), "57344").convertToDouble()); |
| // Round down to maximum value |
| EXPECT_EQ(57344, |
| APFloat(APFloat::Float8E5M2FNUZ(), "59392").convertToDouble()); |
| // Round up, causing overflow to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "61440").isNaN()); |
| // Overflow without rounding |
| EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "131072").isNaN()); |
| // Inf converted to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "inf").isNaN()); |
| // NaN converted to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "nan").isNaN()); |
| // Negative zero converted to positive zero |
| EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "-0").isPosZero()); |
| } |
| |
| TEST(APFloatTest, UnsignedZeroArithmeticSpecial) { |
| // Float semantics with only unsigned zero (ex. Float8E4M3FNUZ) violate the |
| // IEEE rules about signs in arithmetic operations when producing zeros, |
| // because they only have one zero. Most of the rest of the complexities of |
| // arithmetic on these values are covered by the other Float8 types' test |
| // cases and so are not repeated here. |
| |
| // The IEEE round towards negative rule doesn't apply |
| for (APFloat::Semantics S : |
| {APFloat::S_Float8E4M3FNUZ, APFloat::S_Float8E4M3B11FNUZ}) { |
| const llvm::fltSemantics &Sem = APFloat::EnumToSemantics(S); |
| APFloat test = APFloat::getSmallest(Sem); |
| APFloat rhs = test; |
| EXPECT_EQ(test.subtract(rhs, APFloat::rmTowardNegative), APFloat::opOK); |
| EXPECT_TRUE(test.isZero()); |
| EXPECT_FALSE(test.isNegative()); |
| |
| // Multiplication of (small) * (-small) is +0 |
| test = APFloat::getSmallestNormalized(Sem); |
| rhs = -test; |
| EXPECT_EQ(test.multiply(rhs, APFloat::rmNearestTiesToAway), |
| APFloat::opInexact | APFloat::opUnderflow); |
| EXPECT_TRUE(test.isZero()); |
| EXPECT_FALSE(test.isNegative()); |
| |
| // Dividing the negatize float_min by anything gives +0 |
| test = APFloat::getSmallest(Sem, true); |
| rhs = APFloat(Sem, "2.0"); |
| EXPECT_EQ(test.divide(rhs, APFloat::rmNearestTiesToEven), |
| APFloat::opInexact | APFloat::opUnderflow); |
| EXPECT_TRUE(test.isZero()); |
| EXPECT_FALSE(test.isNegative()); |
| |
| // Remainder can't copy sign because there's only one zero |
| test = APFloat(Sem, "-4.0"); |
| rhs = APFloat(Sem, "2.0"); |
| EXPECT_EQ(test.remainder(rhs), APFloat::opOK); |
| EXPECT_TRUE(test.isZero()); |
| EXPECT_FALSE(test.isNegative()); |
| |
| // And same for mod |
| test = APFloat(Sem, "-4.0"); |
| rhs = APFloat(Sem, "2.0"); |
| EXPECT_EQ(test.mod(rhs), APFloat::opOK); |
| EXPECT_TRUE(test.isZero()); |
| EXPECT_FALSE(test.isNegative()); |
| |
| // FMA correctly handles both the multiply and add parts of all this |
| test = APFloat(Sem, "2.0"); |
| rhs = test; |
| APFloat addend = APFloat(Sem, "-4.0"); |
| EXPECT_EQ(test.fusedMultiplyAdd(rhs, addend, APFloat::rmTowardNegative), |
| APFloat::opOK); |
| EXPECT_TRUE(test.isZero()); |
| EXPECT_FALSE(test.isNegative()); |
| } |
| } |
| |
| TEST(APFloatTest, Float8E5M2FNUZAdd) { |
| APFloat QNaN = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), false); |
| |
| auto FromStr = [](StringRef S) { |
| return APFloat(APFloat::Float8E5M2FNUZ(), S); |
| }; |
| |
| struct { |
| APFloat x; |
| APFloat y; |
| const char *result; |
| int status; |
| int category; |
| APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven; |
| } AdditionTests[] = { |
| // Test addition operations involving NaN, overflow, and the max E5M2FNUZ |
| // value (57344) because E5M2FNUZ differs from IEEE-754 types in these |
| // regards |
| {FromStr("57344"), FromStr("2048"), "57344", APFloat::opInexact, |
| APFloat::fcNormal}, |
| {FromStr("57344"), FromStr("4096"), "NaN", |
| APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN}, |
| {FromStr("-57344"), FromStr("-4096"), "NaN", |
| APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN}, |
| {QNaN, FromStr("-57344"), "NaN", APFloat::opOK, APFloat::fcNaN}, |
| {FromStr("57344"), FromStr("-8192"), "49152", APFloat::opOK, |
| APFloat::fcNormal}, |
| {FromStr("57344"), FromStr("0"), "57344", APFloat::opOK, |
| APFloat::fcNormal}, |
| {FromStr("57344"), FromStr("4096"), "57344", APFloat::opInexact, |
| APFloat::fcNormal, APFloat::rmTowardZero}, |
| {FromStr("57344"), FromStr("57344"), "57344", APFloat::opInexact, |
| APFloat::fcNormal, APFloat::rmTowardZero}, |
| }; |
| |
| for (size_t i = 0; i < std::size(AdditionTests); ++i) { |
| APFloat x(AdditionTests[i].x); |
| APFloat y(AdditionTests[i].y); |
| APFloat::opStatus status = x.add(y, AdditionTests[i].roundingMode); |
| |
| APFloat result(APFloat::Float8E5M2FNUZ(), AdditionTests[i].result); |
| |
| EXPECT_TRUE(result.bitwiseIsEqual(x)); |
| EXPECT_EQ(AdditionTests[i].status, (int)status); |
| EXPECT_EQ(AdditionTests[i].category, (int)x.getCategory()); |
| } |
| } |
| |
| TEST(APFloatTest, Float8E5M2FNUZDivideByZero) { |
| APFloat x(APFloat::Float8E5M2FNUZ(), "1"); |
| APFloat zero(APFloat::Float8E5M2FNUZ(), "0"); |
| EXPECT_EQ(x.divide(zero, APFloat::rmNearestTiesToEven), APFloat::opDivByZero); |
| EXPECT_TRUE(x.isNaN()); |
| } |
| |
| TEST(APFloatTest, Float8UnsignedZeroExhaustive) { |
| struct { |
| const fltSemantics *semantics; |
| const double largest; |
| const double smallest; |
| } const exhaustiveTests[] = {{&APFloat::Float8E5M2FNUZ(), 57344., 0x1.0p-17}, |
| {&APFloat::Float8E4M3FNUZ(), 240., 0x1.0p-10}, |
| {&APFloat::Float8E4M3B11FNUZ(), 30., 0x1.0p-13}}; |
| for (const auto &testInfo : exhaustiveTests) { |
| const fltSemantics &sem = *testInfo.semantics; |
| SCOPED_TRACE("Semantics=" + std::to_string(APFloat::SemanticsToEnum(sem))); |
| // Test each of the 256 values. |
| for (int i = 0; i < 256; i++) { |
| SCOPED_TRACE("i=" + std::to_string(i)); |
| APFloat test(sem, APInt(8, i)); |
| |
| // isLargest |
| if (i == 127 || i == 255) { |
| EXPECT_TRUE(test.isLargest()); |
| EXPECT_EQ(abs(test).convertToDouble(), testInfo.largest); |
| } else { |
| EXPECT_FALSE(test.isLargest()); |
| } |
| |
| // isSmallest |
| if (i == 1 || i == 129) { |
| EXPECT_TRUE(test.isSmallest()); |
| EXPECT_EQ(abs(test).convertToDouble(), testInfo.smallest); |
| } else { |
| EXPECT_FALSE(test.isSmallest()); |
| } |
| |
| // convert to BFloat |
| APFloat test2 = test; |
| bool losesInfo; |
| APFloat::opStatus status = test2.convert( |
| APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| EXPECT_FALSE(losesInfo); |
| if (i == 128) |
| EXPECT_TRUE(test2.isNaN()); |
| else |
| EXPECT_EQ(test.convertToFloat(), test2.convertToFloat()); |
| |
| // bitcastToAPInt |
| EXPECT_EQ(i, test.bitcastToAPInt()); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNUZNext) { |
| for (APFloat::Semantics S : |
| {APFloat::S_Float8E4M3FNUZ, APFloat::S_Float8E4M3B11FNUZ}) { |
| const llvm::fltSemantics &Sem = APFloat::EnumToSemantics(S); |
| APFloat test(Sem, APFloat::uninitialized); |
| APFloat expected(Sem, APFloat::uninitialized); |
| |
| // 1. NextUp of largest bit pattern is nan |
| test = APFloat::getLargest(Sem); |
| expected = APFloat::getNaN(Sem); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.isNaN()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 2. NextUp of smallest negative denormal is +0 |
| test = APFloat::getSmallest(Sem, true); |
| expected = APFloat::getZero(Sem, false); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_FALSE(test.isNegZero()); |
| EXPECT_TRUE(test.isPosZero()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 3. nextDown of negative of largest value is NaN |
| test = APFloat::getLargest(Sem, true); |
| expected = APFloat::getNaN(Sem); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.isNaN()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 4. nextDown of +0 is smallest negative denormal |
| test = APFloat::getZero(Sem, false); |
| expected = APFloat::getSmallest(Sem, true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 5. nextUp of NaN is NaN |
| test = APFloat::getNaN(Sem, false); |
| expected = APFloat::getNaN(Sem, true); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isNaN()); |
| |
| // 6. nextDown of NaN is NaN |
| test = APFloat::getNaN(Sem, false); |
| expected = APFloat::getNaN(Sem, true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_TRUE(test.isNaN()); |
| } |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNUZChangeSign) { |
| for (APFloat::Semantics S : |
| {APFloat::S_Float8E4M3FNUZ, APFloat::S_Float8E4M3B11FNUZ}) { |
| const llvm::fltSemantics &Sem = APFloat::EnumToSemantics(S); |
| APFloat test = APFloat(Sem, "1.0"); |
| APFloat expected = APFloat(Sem, "-1.0"); |
| test.changeSign(); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getZero(Sem); |
| expected = test; |
| test.changeSign(); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| test = APFloat::getNaN(Sem); |
| expected = test; |
| test.changeSign(); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNUZFromString) { |
| // Exactly representable |
| EXPECT_EQ(240, APFloat(APFloat::Float8E4M3FNUZ(), "240").convertToDouble()); |
| // Round down to maximum value |
| EXPECT_EQ(240, APFloat(APFloat::Float8E4M3FNUZ(), "247").convertToDouble()); |
| // Round up, causing overflow to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "248").isNaN()); |
| // Overflow without rounding |
| EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "480").isNaN()); |
| // Inf converted to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "inf").isNaN()); |
| // NaN converted to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "nan").isNaN()); |
| // Negative zero converted to positive zero |
| EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "-0").isPosZero()); |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNUZAdd) { |
| APFloat QNaN = APFloat::getNaN(APFloat::Float8E4M3FNUZ(), false); |
| |
| auto FromStr = [](StringRef S) { |
| return APFloat(APFloat::Float8E4M3FNUZ(), S); |
| }; |
| |
| struct { |
| APFloat x; |
| APFloat y; |
| const char *result; |
| int status; |
| int category; |
| APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven; |
| } AdditionTests[] = { |
| // Test addition operations involving NaN, overflow, and the max E4M3FNUZ |
| // value (240) because E4M3FNUZ differs from IEEE-754 types in these |
| // regards |
| {FromStr("240"), FromStr("4"), "240", APFloat::opInexact, |
| APFloat::fcNormal}, |
| {FromStr("240"), FromStr("8"), "NaN", |
| APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN}, |
| {FromStr("240"), FromStr("16"), "NaN", |
| APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN}, |
| {FromStr("-240"), FromStr("-16"), "NaN", |
| APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN}, |
| {QNaN, FromStr("-240"), "NaN", APFloat::opOK, APFloat::fcNaN}, |
| {FromStr("240"), FromStr("-16"), "224", APFloat::opOK, APFloat::fcNormal}, |
| {FromStr("240"), FromStr("0"), "240", APFloat::opOK, APFloat::fcNormal}, |
| {FromStr("240"), FromStr("32"), "240", APFloat::opInexact, |
| APFloat::fcNormal, APFloat::rmTowardZero}, |
| {FromStr("240"), FromStr("240"), "240", APFloat::opInexact, |
| APFloat::fcNormal, APFloat::rmTowardZero}, |
| }; |
| |
| for (size_t i = 0; i < std::size(AdditionTests); ++i) { |
| APFloat x(AdditionTests[i].x); |
| APFloat y(AdditionTests[i].y); |
| APFloat::opStatus status = x.add(y, AdditionTests[i].roundingMode); |
| |
| APFloat result(APFloat::Float8E4M3FNUZ(), AdditionTests[i].result); |
| |
| EXPECT_TRUE(result.bitwiseIsEqual(x)); |
| EXPECT_EQ(AdditionTests[i].status, (int)status); |
| EXPECT_EQ(AdditionTests[i].category, (int)x.getCategory()); |
| } |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNUZDivideByZero) { |
| APFloat x(APFloat::Float8E4M3FNUZ(), "1"); |
| APFloat zero(APFloat::Float8E4M3FNUZ(), "0"); |
| EXPECT_EQ(x.divide(zero, APFloat::rmNearestTiesToEven), APFloat::opDivByZero); |
| EXPECT_TRUE(x.isNaN()); |
| } |
| |
| TEST(APFloatTest, ConvertE5M2FNUZToE4M3FNUZ) { |
| bool losesInfo; |
| APFloat test(APFloat::Float8E5M2FNUZ(), "1.0"); |
| APFloat::opStatus status = test.convert( |
| APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| losesInfo = true; |
| test = APFloat(APFloat::Float8E5M2FNUZ(), "0.0"); |
| status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| losesInfo = true; |
| test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.Cp7"); // 224 |
| status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.Cp7 /* 224 */, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| // Test overflow |
| losesInfo = false; |
| test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.0p8"); // 256 |
| status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(std::isnan(test.convertToFloat())); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOverflow | APFloat::opInexact); |
| |
| // Test underflow |
| test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.0p-11"); |
| status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0., test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); |
| |
| // Test rounding up to smallest denormal number |
| losesInfo = false; |
| test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.8p-11"); |
| status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.0p-10, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); |
| |
| // Testing inexact rounding to denormal number |
| losesInfo = false; |
| test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.8p-10"); |
| status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.0p-9, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); |
| } |
| |
| TEST(APFloatTest, ConvertE4M3FNUZToE5M2FNUZ) { |
| bool losesInfo; |
| APFloat test(APFloat::Float8E4M3FNUZ(), "1.0"); |
| APFloat::opStatus status = test.convert( |
| APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| losesInfo = true; |
| test = APFloat(APFloat::Float8E4M3FNUZ(), "0.0"); |
| status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| losesInfo = false; |
| test = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.2p0"); // 1.125 |
| status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.0p0 /* 1.0 */, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| |
| losesInfo = false; |
| test = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.6p0"); // 1.375 |
| status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.8p0 /* 1.5 */, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| |
| // Convert E4M3FNUZ denormal to E5M2 normal. Should not be truncated, despite |
| // the destination format having one fewer significand bit |
| losesInfo = true; |
| test = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.Cp-8"); |
| status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0x1.Cp-8, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| } |
| |
| TEST(APFloatTest, F8ToString) { |
| for (APFloat::Semantics S : |
| {APFloat::S_Float8E5M2, APFloat::S_Float8E4M3FN, |
| APFloat::S_Float8E5M2FNUZ, APFloat::S_Float8E4M3FNUZ, |
| APFloat::S_Float8E4M3B11FNUZ}) { |
| SCOPED_TRACE("Semantics=" + std::to_string(S)); |
| for (int i = 0; i < 256; i++) { |
| SCOPED_TRACE("i=" + std::to_string(i)); |
| APFloat test(APFloat::EnumToSemantics(S), APInt(8, i)); |
| llvm::SmallString<128> str; |
| test.toString(str); |
| |
| if (test.isNaN()) { |
| EXPECT_EQ(str, "NaN"); |
| } else { |
| APFloat test2(APFloat::EnumToSemantics(S), str); |
| EXPECT_TRUE(test.bitwiseIsEqual(test2)); |
| } |
| } |
| } |
| } |
| |
| TEST(APFloatTest, BitsToF8ToBits) { |
| for (APFloat::Semantics S : |
| {APFloat::S_Float8E5M2, APFloat::S_Float8E4M3FN, |
| APFloat::S_Float8E5M2FNUZ, APFloat::S_Float8E4M3FNUZ, |
| APFloat::S_Float8E4M3B11FNUZ}) { |
| SCOPED_TRACE("Semantics=" + std::to_string(S)); |
| for (int i = 0; i < 256; i++) { |
| SCOPED_TRACE("i=" + std::to_string(i)); |
| APInt bits_in = APInt(8, i); |
| APFloat test(APFloat::EnumToSemantics(S), bits_in); |
| APInt bits_out = test.bitcastToAPInt(); |
| EXPECT_EQ(bits_in, bits_out); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, F8ToBitsToF8) { |
| for (APFloat::Semantics S : |
| {APFloat::S_Float8E5M2, APFloat::S_Float8E4M3FN, |
| APFloat::S_Float8E5M2FNUZ, APFloat::S_Float8E4M3FNUZ, |
| APFloat::S_Float8E4M3B11FNUZ}) { |
| SCOPED_TRACE("Semantics=" + std::to_string(S)); |
| auto &Sem = APFloat::EnumToSemantics(S); |
| for (bool negative : {false, true}) { |
| SCOPED_TRACE("negative=" + std::to_string(negative)); |
| APFloat test = APFloat::getZero(Sem, /*Negative=*/negative); |
| for (int i = 0; i < 128; i++, test.next(/*nextDown=*/negative)) { |
| SCOPED_TRACE("i=" + std::to_string(i)); |
| APInt bits = test.bitcastToAPInt(); |
| APFloat test2 = APFloat(Sem, bits); |
| if (test.isNaN()) { |
| EXPECT_TRUE(test2.isNaN()); |
| } else { |
| EXPECT_TRUE(test.bitwiseIsEqual(test2)); |
| } |
| } |
| } |
| } |
| } |
| |
| TEST(APFloatTest, IEEEdoubleToDouble) { |
| APFloat DPosZero(0.0); |
| APFloat DPosZeroToDouble(DPosZero.convertToDouble()); |
| EXPECT_TRUE(DPosZeroToDouble.isPosZero()); |
| APFloat DNegZero(-0.0); |
| APFloat DNegZeroToDouble(DNegZero.convertToDouble()); |
| EXPECT_TRUE(DNegZeroToDouble.isNegZero()); |
| |
| APFloat DOne(1.0); |
| EXPECT_EQ(1.0, DOne.convertToDouble()); |
| APFloat DPosLargest = APFloat::getLargest(APFloat::IEEEdouble(), false); |
| EXPECT_EQ(std::numeric_limits<double>::max(), DPosLargest.convertToDouble()); |
| APFloat DNegLargest = APFloat::getLargest(APFloat::IEEEdouble(), true); |
| EXPECT_EQ(-std::numeric_limits<double>::max(), DNegLargest.convertToDouble()); |
| APFloat DPosSmallest = |
| APFloat::getSmallestNormalized(APFloat::IEEEdouble(), false); |
| EXPECT_EQ(std::numeric_limits<double>::min(), DPosSmallest.convertToDouble()); |
| APFloat DNegSmallest = |
| APFloat::getSmallestNormalized(APFloat::IEEEdouble(), true); |
| EXPECT_EQ(-std::numeric_limits<double>::min(), |
| DNegSmallest.convertToDouble()); |
| |
| APFloat DSmallestDenorm = APFloat::getSmallest(APFloat::IEEEdouble(), false); |
| EXPECT_EQ(std::numeric_limits<double>::denorm_min(), |
| DSmallestDenorm.convertToDouble()); |
| APFloat DLargestDenorm(APFloat::IEEEdouble(), "0x0.FFFFFFFFFFFFFp-1022"); |
| EXPECT_EQ(/*0x0.FFFFFFFFFFFFFp-1022*/ 2.225073858507201e-308, |
| DLargestDenorm.convertToDouble()); |
| |
| APFloat DPosInf = APFloat::getInf(APFloat::IEEEdouble()); |
| EXPECT_EQ(std::numeric_limits<double>::infinity(), DPosInf.convertToDouble()); |
| APFloat DNegInf = APFloat::getInf(APFloat::IEEEdouble(), true); |
| EXPECT_EQ(-std::numeric_limits<double>::infinity(), |
| DNegInf.convertToDouble()); |
| APFloat DQNaN = APFloat::getQNaN(APFloat::IEEEdouble()); |
| EXPECT_TRUE(std::isnan(DQNaN.convertToDouble())); |
| } |
| |
| TEST(APFloatTest, IEEEsingleToDouble) { |
| APFloat FPosZero(0.0F); |
| APFloat FPosZeroToDouble(FPosZero.convertToDouble()); |
| EXPECT_TRUE(FPosZeroToDouble.isPosZero()); |
| APFloat FNegZero(-0.0F); |
| APFloat FNegZeroToDouble(FNegZero.convertToDouble()); |
| EXPECT_TRUE(FNegZeroToDouble.isNegZero()); |
| |
| APFloat FOne(1.0F); |
| EXPECT_EQ(1.0, FOne.convertToDouble()); |
| APFloat FPosLargest = APFloat::getLargest(APFloat::IEEEsingle(), false); |
| EXPECT_EQ(std::numeric_limits<float>::max(), FPosLargest.convertToDouble()); |
| APFloat FNegLargest = APFloat::getLargest(APFloat::IEEEsingle(), true); |
| EXPECT_EQ(-std::numeric_limits<float>::max(), FNegLargest.convertToDouble()); |
| APFloat FPosSmallest = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false); |
| EXPECT_EQ(std::numeric_limits<float>::min(), FPosSmallest.convertToDouble()); |
| APFloat FNegSmallest = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true); |
| EXPECT_EQ(-std::numeric_limits<float>::min(), FNegSmallest.convertToDouble()); |
| |
| APFloat FSmallestDenorm = APFloat::getSmallest(APFloat::IEEEsingle(), false); |
| EXPECT_EQ(std::numeric_limits<float>::denorm_min(), |
| FSmallestDenorm.convertToDouble()); |
| APFloat FLargestDenorm(APFloat::IEEEdouble(), "0x0.FFFFFEp-126"); |
| EXPECT_EQ(/*0x0.FFFFFEp-126*/ 1.1754942106924411e-38, |
| FLargestDenorm.convertToDouble()); |
| |
| APFloat FPosInf = APFloat::getInf(APFloat::IEEEsingle()); |
| EXPECT_EQ(std::numeric_limits<double>::infinity(), FPosInf.convertToDouble()); |
| APFloat FNegInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| EXPECT_EQ(-std::numeric_limits<double>::infinity(), |
| FNegInf.convertToDouble()); |
| APFloat FQNaN = APFloat::getQNaN(APFloat::IEEEsingle()); |
| EXPECT_TRUE(std::isnan(FQNaN.convertToDouble())); |
| } |
| |
| TEST(APFloatTest, IEEEhalfToDouble) { |
| APFloat HPosZero = APFloat::getZero(APFloat::IEEEhalf()); |
| APFloat HPosZeroToDouble(HPosZero.convertToDouble()); |
| EXPECT_TRUE(HPosZeroToDouble.isPosZero()); |
| APFloat HNegZero = APFloat::getZero(APFloat::IEEEhalf(), true); |
| APFloat HNegZeroToDouble(HNegZero.convertToDouble()); |
| EXPECT_TRUE(HNegZeroToDouble.isNegZero()); |
| |
| APFloat HOne(APFloat::IEEEhalf(), "1.0"); |
| EXPECT_EQ(1.0, HOne.convertToDouble()); |
| APFloat HPosLargest = APFloat::getLargest(APFloat::IEEEhalf(), false); |
| EXPECT_EQ(65504.0, HPosLargest.convertToDouble()); |
| APFloat HNegLargest = APFloat::getLargest(APFloat::IEEEhalf(), true); |
| EXPECT_EQ(-65504.0, HNegLargest.convertToDouble()); |
| APFloat HPosSmallest = |
| APFloat::getSmallestNormalized(APFloat::IEEEhalf(), false); |
| EXPECT_EQ(/*0x1.p-14*/ 6.103515625e-05, HPosSmallest.convertToDouble()); |
| APFloat HNegSmallest = |
| APFloat::getSmallestNormalized(APFloat::IEEEhalf(), true); |
| EXPECT_EQ(/*-0x1.p-14*/ -6.103515625e-05, HNegSmallest.convertToDouble()); |
| |
| APFloat HSmallestDenorm = APFloat::getSmallest(APFloat::IEEEhalf(), false); |
| EXPECT_EQ(/*0x1.p-24*/ 5.960464477539063e-08, |
| HSmallestDenorm.convertToDouble()); |
| APFloat HLargestDenorm(APFloat::IEEEhalf(), "0x1.FFCp-14"); |
| EXPECT_EQ(/*0x1.FFCp-14*/ 0.00012201070785522461, |
| HLargestDenorm.convertToDouble()); |
| |
| APFloat HPosInf = APFloat::getInf(APFloat::IEEEhalf()); |
| EXPECT_EQ(std::numeric_limits<double>::infinity(), HPosInf.convertToDouble()); |
| APFloat HNegInf = APFloat::getInf(APFloat::IEEEhalf(), true); |
| EXPECT_EQ(-std::numeric_limits<double>::infinity(), |
| HNegInf.convertToDouble()); |
| APFloat HQNaN = APFloat::getQNaN(APFloat::IEEEhalf()); |
| EXPECT_TRUE(std::isnan(HQNaN.convertToDouble())); |
| |
| APFloat BPosZero = APFloat::getZero(APFloat::IEEEhalf()); |
| APFloat BPosZeroToDouble(BPosZero.convertToDouble()); |
| EXPECT_TRUE(BPosZeroToDouble.isPosZero()); |
| APFloat BNegZero = APFloat::getZero(APFloat::IEEEhalf(), true); |
| APFloat BNegZeroToDouble(BNegZero.convertToDouble()); |
| EXPECT_TRUE(BNegZeroToDouble.isNegZero()); |
| } |
| |
| TEST(APFloatTest, BFloatToDouble) { |
| APFloat BOne(APFloat::BFloat(), "1.0"); |
| EXPECT_EQ(1.0, BOne.convertToDouble()); |
| APFloat BPosLargest = APFloat::getLargest(APFloat::BFloat(), false); |
| EXPECT_EQ(/*0x1.FEp127*/ 3.3895313892515355e+38, |
| BPosLargest.convertToDouble()); |
| APFloat BNegLargest = APFloat::getLargest(APFloat::BFloat(), true); |
| EXPECT_EQ(/*-0x1.FEp127*/ -3.3895313892515355e+38, |
| BNegLargest.convertToDouble()); |
| APFloat BPosSmallest = |
| APFloat::getSmallestNormalized(APFloat::BFloat(), false); |
| EXPECT_EQ(/*0x1.p-126*/ 1.1754943508222875e-38, |
| BPosSmallest.convertToDouble()); |
| APFloat BNegSmallest = |
| APFloat::getSmallestNormalized(APFloat::BFloat(), true); |
| EXPECT_EQ(/*-0x1.p-126*/ -1.1754943508222875e-38, |
| BNegSmallest.convertToDouble()); |
| |
| APFloat BSmallestDenorm = APFloat::getSmallest(APFloat::BFloat(), false); |
| EXPECT_EQ(/*0x1.p-133*/ 9.183549615799121e-41, |
| BSmallestDenorm.convertToDouble()); |
| APFloat BLargestDenorm(APFloat::BFloat(), "0x1.FCp-127"); |
| EXPECT_EQ(/*0x1.FCp-127*/ 1.1663108012064884e-38, |
| BLargestDenorm.convertToDouble()); |
| |
| APFloat BPosInf = APFloat::getInf(APFloat::BFloat()); |
| EXPECT_EQ(std::numeric_limits<double>::infinity(), BPosInf.convertToDouble()); |
| APFloat BNegInf = APFloat::getInf(APFloat::BFloat(), true); |
| EXPECT_EQ(-std::numeric_limits<double>::infinity(), |
| BNegInf.convertToDouble()); |
| APFloat BQNaN = APFloat::getQNaN(APFloat::BFloat()); |
| EXPECT_TRUE(std::isnan(BQNaN.convertToDouble())); |
| } |
| |
| TEST(APFloatTest, Float8E5M2ToDouble) { |
| APFloat One(APFloat::Float8E5M2(), "1.0"); |
| EXPECT_EQ(1.0, One.convertToDouble()); |
| APFloat Two(APFloat::Float8E5M2(), "2.0"); |
| EXPECT_EQ(2.0, Two.convertToDouble()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E5M2(), false); |
| EXPECT_EQ(5.734400e+04, PosLargest.convertToDouble()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E5M2(), true); |
| EXPECT_EQ(-5.734400e+04, NegLargest.convertToDouble()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E5M2(), false); |
| EXPECT_EQ(0x1.p-14, PosSmallest.convertToDouble()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E5M2(), true); |
| EXPECT_EQ(-0x1.p-14, NegSmallest.convertToDouble()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E5M2(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1p-16, SmallestDenorm.convertToDouble()); |
| |
| APFloat PosInf = APFloat::getInf(APFloat::Float8E5M2()); |
| EXPECT_EQ(std::numeric_limits<double>::infinity(), PosInf.convertToDouble()); |
| APFloat NegInf = APFloat::getInf(APFloat::Float8E5M2(), true); |
| EXPECT_EQ(-std::numeric_limits<double>::infinity(), NegInf.convertToDouble()); |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E5M2()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToDouble())); |
| } |
| |
| TEST(APFloatTest, Float8E4M3ToDouble) { |
| APFloat One(APFloat::Float8E4M3(), "1.0"); |
| EXPECT_EQ(1.0, One.convertToDouble()); |
| APFloat Two(APFloat::Float8E4M3(), "2.0"); |
| EXPECT_EQ(2.0, Two.convertToDouble()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3(), false); |
| EXPECT_EQ(240.0F, PosLargest.convertToDouble()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3(), true); |
| EXPECT_EQ(-240.0F, NegLargest.convertToDouble()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3(), false); |
| EXPECT_EQ(0x1.p-6, PosSmallest.convertToDouble()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3(), true); |
| EXPECT_EQ(-0x1.p-6, NegSmallest.convertToDouble()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1.p-9, SmallestDenorm.convertToDouble()); |
| |
| APFloat PosInf = APFloat::getInf(APFloat::Float8E4M3()); |
| EXPECT_EQ(std::numeric_limits<double>::infinity(), PosInf.convertToDouble()); |
| APFloat NegInf = APFloat::getInf(APFloat::Float8E4M3(), true); |
| EXPECT_EQ(-std::numeric_limits<double>::infinity(), NegInf.convertToDouble()); |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToDouble())); |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNToDouble) { |
| APFloat One(APFloat::Float8E4M3FN(), "1.0"); |
| EXPECT_EQ(1.0, One.convertToDouble()); |
| APFloat Two(APFloat::Float8E4M3FN(), "2.0"); |
| EXPECT_EQ(2.0, Two.convertToDouble()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3FN(), false); |
| EXPECT_EQ(448., PosLargest.convertToDouble()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3FN(), true); |
| EXPECT_EQ(-448., NegLargest.convertToDouble()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3FN(), false); |
| EXPECT_EQ(0x1.p-6, PosSmallest.convertToDouble()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3FN(), true); |
| EXPECT_EQ(-0x1.p-6, NegSmallest.convertToDouble()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3FN(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1p-9, SmallestDenorm.convertToDouble()); |
| |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FN()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToDouble())); |
| } |
| |
| TEST(APFloatTest, Float8E5M2FNUZToDouble) { |
| APFloat One(APFloat::Float8E5M2FNUZ(), "1.0"); |
| EXPECT_EQ(1.0, One.convertToDouble()); |
| APFloat Two(APFloat::Float8E5M2FNUZ(), "2.0"); |
| EXPECT_EQ(2.0, Two.convertToDouble()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), false); |
| EXPECT_EQ(57344., PosLargest.convertToDouble()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), true); |
| EXPECT_EQ(-57344., NegLargest.convertToDouble()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), false); |
| EXPECT_EQ(0x1.p-15, PosSmallest.convertToDouble()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), true); |
| EXPECT_EQ(-0x1.p-15, NegSmallest.convertToDouble()); |
| |
| APFloat SmallestDenorm = |
| APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1p-17, SmallestDenorm.convertToDouble()); |
| |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E5M2FNUZ()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToDouble())); |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNUZToDouble) { |
| APFloat One(APFloat::Float8E4M3FNUZ(), "1.0"); |
| EXPECT_EQ(1.0, One.convertToDouble()); |
| APFloat Two(APFloat::Float8E4M3FNUZ(), "2.0"); |
| EXPECT_EQ(2.0, Two.convertToDouble()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), false); |
| EXPECT_EQ(240., PosLargest.convertToDouble()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), true); |
| EXPECT_EQ(-240., NegLargest.convertToDouble()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), false); |
| EXPECT_EQ(0x1.p-7, PosSmallest.convertToDouble()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), true); |
| EXPECT_EQ(-0x1.p-7, NegSmallest.convertToDouble()); |
| |
| APFloat SmallestDenorm = |
| APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1p-10, SmallestDenorm.convertToDouble()); |
| |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FNUZ()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToDouble())); |
| } |
| |
| TEST(APFloatTest, Float8E3M4ToDouble) { |
| APFloat PosZero = APFloat::getZero(APFloat::Float8E3M4(), false); |
| APFloat PosZeroToDouble(PosZero.convertToDouble()); |
| EXPECT_TRUE(PosZeroToDouble.isPosZero()); |
| APFloat NegZero = APFloat::getZero(APFloat::Float8E3M4(), true); |
| APFloat NegZeroToDouble(NegZero.convertToDouble()); |
| EXPECT_TRUE(NegZeroToDouble.isNegZero()); |
| |
| APFloat One(APFloat::Float8E3M4(), "1.0"); |
| EXPECT_EQ(1.0, One.convertToDouble()); |
| APFloat Two(APFloat::Float8E3M4(), "2.0"); |
| EXPECT_EQ(2.0, Two.convertToDouble()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E3M4(), false); |
| EXPECT_EQ(15.5F, PosLargest.convertToDouble()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E3M4(), true); |
| EXPECT_EQ(-15.5F, NegLargest.convertToDouble()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E3M4(), false); |
| EXPECT_EQ(0x1.p-2, PosSmallest.convertToDouble()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E3M4(), true); |
| EXPECT_EQ(-0x1.p-2, NegSmallest.convertToDouble()); |
| |
| APFloat PosSmallestDenorm = |
| APFloat::getSmallest(APFloat::Float8E3M4(), false); |
| EXPECT_TRUE(PosSmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1.p-6, PosSmallestDenorm.convertToDouble()); |
| APFloat NegSmallestDenorm = APFloat::getSmallest(APFloat::Float8E3M4(), true); |
| EXPECT_TRUE(NegSmallestDenorm.isDenormal()); |
| EXPECT_EQ(-0x1.p-6, NegSmallestDenorm.convertToDouble()); |
| |
| APFloat PosInf = APFloat::getInf(APFloat::Float8E3M4()); |
| EXPECT_EQ(std::numeric_limits<double>::infinity(), PosInf.convertToDouble()); |
| APFloat NegInf = APFloat::getInf(APFloat::Float8E3M4(), true); |
| EXPECT_EQ(-std::numeric_limits<double>::infinity(), NegInf.convertToDouble()); |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E3M4()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToDouble())); |
| } |
| |
| TEST(APFloatTest, FloatTF32ToDouble) { |
| APFloat One(APFloat::FloatTF32(), "1.0"); |
| EXPECT_EQ(1.0, One.convertToDouble()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::FloatTF32(), false); |
| EXPECT_EQ(3.401162134214653489792616e+38, PosLargest.convertToDouble()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::FloatTF32(), true); |
| EXPECT_EQ(-3.401162134214653489792616e+38, NegLargest.convertToDouble()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::FloatTF32(), false); |
| EXPECT_EQ(1.1754943508222875079687e-38, PosSmallest.convertToDouble()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::FloatTF32(), true); |
| EXPECT_EQ(-1.1754943508222875079687e-38, NegSmallest.convertToDouble()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::FloatTF32(), false); |
| EXPECT_EQ(1.1479437019748901445007e-41, SmallestDenorm.convertToDouble()); |
| APFloat LargestDenorm(APFloat::FloatTF32(), "0x1.FF8p-127"); |
| EXPECT_EQ(/*0x1.FF8p-127*/ 1.1743464071203126178242e-38, |
| LargestDenorm.convertToDouble()); |
| |
| APFloat PosInf = APFloat::getInf(APFloat::FloatTF32()); |
| EXPECT_EQ(std::numeric_limits<double>::infinity(), PosInf.convertToDouble()); |
| APFloat NegInf = APFloat::getInf(APFloat::FloatTF32(), true); |
| EXPECT_EQ(-std::numeric_limits<double>::infinity(), NegInf.convertToDouble()); |
| APFloat QNaN = APFloat::getQNaN(APFloat::FloatTF32()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToDouble())); |
| } |
| |
| TEST(APFloatTest, Float8E5M2FNUZToFloat) { |
| APFloat PosZero = APFloat::getZero(APFloat::Float8E5M2FNUZ()); |
| APFloat PosZeroToFloat(PosZero.convertToFloat()); |
| EXPECT_TRUE(PosZeroToFloat.isPosZero()); |
| // Negative zero is not supported |
| APFloat NegZero = APFloat::getZero(APFloat::Float8E5M2FNUZ(), true); |
| APFloat NegZeroToFloat(NegZero.convertToFloat()); |
| EXPECT_TRUE(NegZeroToFloat.isPosZero()); |
| APFloat One(APFloat::Float8E5M2FNUZ(), "1.0"); |
| EXPECT_EQ(1.0F, One.convertToFloat()); |
| APFloat Two(APFloat::Float8E5M2FNUZ(), "2.0"); |
| EXPECT_EQ(2.0F, Two.convertToFloat()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), false); |
| EXPECT_EQ(57344.F, PosLargest.convertToFloat()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), true); |
| EXPECT_EQ(-57344.F, NegLargest.convertToFloat()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), false); |
| EXPECT_EQ(0x1.p-15F, PosSmallest.convertToFloat()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), true); |
| EXPECT_EQ(-0x1.p-15F, NegSmallest.convertToFloat()); |
| |
| APFloat SmallestDenorm = |
| APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1p-17F, SmallestDenorm.convertToFloat()); |
| |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E5M2FNUZ()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToFloat())); |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNUZToFloat) { |
| APFloat PosZero = APFloat::getZero(APFloat::Float8E4M3FNUZ()); |
| APFloat PosZeroToFloat(PosZero.convertToFloat()); |
| EXPECT_TRUE(PosZeroToFloat.isPosZero()); |
| // Negative zero is not supported |
| APFloat NegZero = APFloat::getZero(APFloat::Float8E4M3FNUZ(), true); |
| APFloat NegZeroToFloat(NegZero.convertToFloat()); |
| EXPECT_TRUE(NegZeroToFloat.isPosZero()); |
| APFloat One(APFloat::Float8E4M3FNUZ(), "1.0"); |
| EXPECT_EQ(1.0F, One.convertToFloat()); |
| APFloat Two(APFloat::Float8E4M3FNUZ(), "2.0"); |
| EXPECT_EQ(2.0F, Two.convertToFloat()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), false); |
| EXPECT_EQ(240.F, PosLargest.convertToFloat()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), true); |
| EXPECT_EQ(-240.F, NegLargest.convertToFloat()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), false); |
| EXPECT_EQ(0x1.p-7F, PosSmallest.convertToFloat()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), true); |
| EXPECT_EQ(-0x1.p-7F, NegSmallest.convertToFloat()); |
| |
| APFloat SmallestDenorm = |
| APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1p-10F, SmallestDenorm.convertToFloat()); |
| |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FNUZ()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToFloat())); |
| } |
| |
| TEST(APFloatTest, IEEEsingleToFloat) { |
| APFloat FPosZero(0.0F); |
| APFloat FPosZeroToFloat(FPosZero.convertToFloat()); |
| EXPECT_TRUE(FPosZeroToFloat.isPosZero()); |
| APFloat FNegZero(-0.0F); |
| APFloat FNegZeroToFloat(FNegZero.convertToFloat()); |
| EXPECT_TRUE(FNegZeroToFloat.isNegZero()); |
| |
| APFloat FOne(1.0F); |
| EXPECT_EQ(1.0F, FOne.convertToFloat()); |
| APFloat FPosLargest = APFloat::getLargest(APFloat::IEEEsingle(), false); |
| EXPECT_EQ(std::numeric_limits<float>::max(), FPosLargest.convertToFloat()); |
| APFloat FNegLargest = APFloat::getLargest(APFloat::IEEEsingle(), true); |
| EXPECT_EQ(-std::numeric_limits<float>::max(), FNegLargest.convertToFloat()); |
| APFloat FPosSmallest = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false); |
| EXPECT_EQ(std::numeric_limits<float>::min(), FPosSmallest.convertToFloat()); |
| APFloat FNegSmallest = |
| APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true); |
| EXPECT_EQ(-std::numeric_limits<float>::min(), FNegSmallest.convertToFloat()); |
| |
| APFloat FSmallestDenorm = APFloat::getSmallest(APFloat::IEEEsingle(), false); |
| EXPECT_EQ(std::numeric_limits<float>::denorm_min(), |
| FSmallestDenorm.convertToFloat()); |
| APFloat FLargestDenorm(APFloat::IEEEsingle(), "0x1.FFFFFEp-126"); |
| EXPECT_EQ(/*0x1.FFFFFEp-126*/ 2.3509885615147286e-38F, |
| FLargestDenorm.convertToFloat()); |
| |
| APFloat FPosInf = APFloat::getInf(APFloat::IEEEsingle()); |
| EXPECT_EQ(std::numeric_limits<float>::infinity(), FPosInf.convertToFloat()); |
| APFloat FNegInf = APFloat::getInf(APFloat::IEEEsingle(), true); |
| EXPECT_EQ(-std::numeric_limits<float>::infinity(), FNegInf.convertToFloat()); |
| APFloat FQNaN = APFloat::getQNaN(APFloat::IEEEsingle()); |
| EXPECT_TRUE(std::isnan(FQNaN.convertToFloat())); |
| } |
| |
| TEST(APFloatTest, IEEEhalfToFloat) { |
| APFloat HPosZero = APFloat::getZero(APFloat::IEEEhalf()); |
| APFloat HPosZeroToFloat(HPosZero.convertToFloat()); |
| EXPECT_TRUE(HPosZeroToFloat.isPosZero()); |
| APFloat HNegZero = APFloat::getZero(APFloat::IEEEhalf(), true); |
| APFloat HNegZeroToFloat(HNegZero.convertToFloat()); |
| EXPECT_TRUE(HNegZeroToFloat.isNegZero()); |
| |
| APFloat HOne(APFloat::IEEEhalf(), "1.0"); |
| EXPECT_EQ(1.0F, HOne.convertToFloat()); |
| APFloat HPosLargest = APFloat::getLargest(APFloat::IEEEhalf(), false); |
| EXPECT_EQ(/*0x1.FFCp15*/ 65504.0F, HPosLargest.convertToFloat()); |
| APFloat HNegLargest = APFloat::getLargest(APFloat::IEEEhalf(), true); |
| EXPECT_EQ(/*-0x1.FFCp15*/ -65504.0F, HNegLargest.convertToFloat()); |
| APFloat HPosSmallest = |
| APFloat::getSmallestNormalized(APFloat::IEEEhalf(), false); |
| EXPECT_EQ(/*0x1.p-14*/ 6.103515625e-05F, HPosSmallest.convertToFloat()); |
| APFloat HNegSmallest = |
| APFloat::getSmallestNormalized(APFloat::IEEEhalf(), true); |
| EXPECT_EQ(/*-0x1.p-14*/ -6.103515625e-05F, HNegSmallest.convertToFloat()); |
| |
| APFloat HSmallestDenorm = APFloat::getSmallest(APFloat::IEEEhalf(), false); |
| EXPECT_EQ(/*0x1.p-24*/ 5.960464477539063e-08F, |
| HSmallestDenorm.convertToFloat()); |
| APFloat HLargestDenorm(APFloat::IEEEhalf(), "0x1.FFCp-14"); |
| EXPECT_EQ(/*0x1.FFCp-14*/ 0.00012201070785522461F, |
| HLargestDenorm.convertToFloat()); |
| |
| APFloat HPosInf = APFloat::getInf(APFloat::IEEEhalf()); |
| EXPECT_EQ(std::numeric_limits<float>::infinity(), HPosInf.convertToFloat()); |
| APFloat HNegInf = APFloat::getInf(APFloat::IEEEhalf(), true); |
| EXPECT_EQ(-std::numeric_limits<float>::infinity(), HNegInf.convertToFloat()); |
| APFloat HQNaN = APFloat::getQNaN(APFloat::IEEEhalf()); |
| EXPECT_TRUE(std::isnan(HQNaN.convertToFloat())); |
| } |
| |
| TEST(APFloatTest, BFloatToFloat) { |
| APFloat BPosZero = APFloat::getZero(APFloat::BFloat()); |
| APFloat BPosZeroToDouble(BPosZero.convertToFloat()); |
| EXPECT_TRUE(BPosZeroToDouble.isPosZero()); |
| APFloat BNegZero = APFloat::getZero(APFloat::BFloat(), true); |
| APFloat BNegZeroToDouble(BNegZero.convertToFloat()); |
| EXPECT_TRUE(BNegZeroToDouble.isNegZero()); |
| |
| APFloat BOne(APFloat::BFloat(), "1.0"); |
| EXPECT_EQ(1.0F, BOne.convertToFloat()); |
| APFloat BPosLargest = APFloat::getLargest(APFloat::BFloat(), false); |
| EXPECT_EQ(/*0x1.FEp127*/ 3.3895313892515355e+38F, |
| BPosLargest.convertToFloat()); |
| APFloat BNegLargest = APFloat::getLargest(APFloat::BFloat(), true); |
| EXPECT_EQ(/*-0x1.FEp127*/ -3.3895313892515355e+38F, |
| BNegLargest.convertToFloat()); |
| APFloat BPosSmallest = |
| APFloat::getSmallestNormalized(APFloat::BFloat(), false); |
| EXPECT_EQ(/*0x1.p-126*/ 1.1754943508222875e-38F, |
| BPosSmallest.convertToFloat()); |
| APFloat BNegSmallest = |
| APFloat::getSmallestNormalized(APFloat::BFloat(), true); |
| EXPECT_EQ(/*-0x1.p-126*/ -1.1754943508222875e-38F, |
| BNegSmallest.convertToFloat()); |
| |
| APFloat BSmallestDenorm = APFloat::getSmallest(APFloat::BFloat(), false); |
| EXPECT_EQ(/*0x1.p-133*/ 9.183549615799121e-41F, |
| BSmallestDenorm.convertToFloat()); |
| APFloat BLargestDenorm(APFloat::BFloat(), "0x1.FCp-127"); |
| EXPECT_EQ(/*0x1.FCp-127*/ 1.1663108012064884e-38F, |
| BLargestDenorm.convertToFloat()); |
| |
| APFloat BPosInf = APFloat::getInf(APFloat::BFloat()); |
| EXPECT_EQ(std::numeric_limits<float>::infinity(), BPosInf.convertToFloat()); |
| APFloat BNegInf = APFloat::getInf(APFloat::BFloat(), true); |
| EXPECT_EQ(-std::numeric_limits<float>::infinity(), BNegInf.convertToFloat()); |
| APFloat BQNaN = APFloat::getQNaN(APFloat::BFloat()); |
| EXPECT_TRUE(std::isnan(BQNaN.convertToFloat())); |
| } |
| |
| TEST(APFloatTest, Float8E5M2ToFloat) { |
| APFloat PosZero = APFloat::getZero(APFloat::Float8E5M2()); |
| APFloat PosZeroToFloat(PosZero.convertToFloat()); |
| EXPECT_TRUE(PosZeroToFloat.isPosZero()); |
| APFloat NegZero = APFloat::getZero(APFloat::Float8E5M2(), true); |
| APFloat NegZeroToFloat(NegZero.convertToFloat()); |
| EXPECT_TRUE(NegZeroToFloat.isNegZero()); |
| |
| APFloat One(APFloat::Float8E5M2(), "1.0"); |
| EXPECT_EQ(1.0F, One.convertToFloat()); |
| APFloat Two(APFloat::Float8E5M2(), "2.0"); |
| EXPECT_EQ(2.0F, Two.convertToFloat()); |
| |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E5M2(), false); |
| EXPECT_EQ(5.734400e+04, PosLargest.convertToFloat()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E5M2(), true); |
| EXPECT_EQ(-5.734400e+04, NegLargest.convertToFloat()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E5M2(), false); |
| EXPECT_EQ(0x1.p-14, PosSmallest.convertToFloat()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E5M2(), true); |
| EXPECT_EQ(-0x1.p-14, NegSmallest.convertToFloat()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E5M2(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1.p-16, SmallestDenorm.convertToFloat()); |
| |
| APFloat PosInf = APFloat::getInf(APFloat::Float8E5M2()); |
| EXPECT_EQ(std::numeric_limits<float>::infinity(), PosInf.convertToFloat()); |
| APFloat NegInf = APFloat::getInf(APFloat::Float8E5M2(), true); |
| EXPECT_EQ(-std::numeric_limits<float>::infinity(), NegInf.convertToFloat()); |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E5M2()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToFloat())); |
| } |
| |
| TEST(APFloatTest, Float8E4M3ToFloat) { |
| APFloat PosZero = APFloat::getZero(APFloat::Float8E4M3()); |
| APFloat PosZeroToFloat(PosZero.convertToFloat()); |
| EXPECT_TRUE(PosZeroToFloat.isPosZero()); |
| APFloat NegZero = APFloat::getZero(APFloat::Float8E4M3(), true); |
| APFloat NegZeroToFloat(NegZero.convertToFloat()); |
| EXPECT_TRUE(NegZeroToFloat.isNegZero()); |
| |
| APFloat One(APFloat::Float8E4M3(), "1.0"); |
| EXPECT_EQ(1.0F, One.convertToFloat()); |
| APFloat Two(APFloat::Float8E4M3(), "2.0"); |
| EXPECT_EQ(2.0F, Two.convertToFloat()); |
| |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3(), false); |
| EXPECT_EQ(240.0F, PosLargest.convertToFloat()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3(), true); |
| EXPECT_EQ(-240.0F, NegLargest.convertToFloat()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3(), false); |
| EXPECT_EQ(0x1.p-6, PosSmallest.convertToFloat()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3(), true); |
| EXPECT_EQ(-0x1.p-6, NegSmallest.convertToFloat()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1.p-9, SmallestDenorm.convertToFloat()); |
| |
| APFloat PosInf = APFloat::getInf(APFloat::Float8E4M3()); |
| EXPECT_EQ(std::numeric_limits<float>::infinity(), PosInf.convertToFloat()); |
| APFloat NegInf = APFloat::getInf(APFloat::Float8E4M3(), true); |
| EXPECT_EQ(-std::numeric_limits<float>::infinity(), NegInf.convertToFloat()); |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToFloat())); |
| } |
| |
| TEST(APFloatTest, Float8E4M3FNToFloat) { |
| APFloat PosZero = APFloat::getZero(APFloat::Float8E4M3FN()); |
| APFloat PosZeroToFloat(PosZero.convertToFloat()); |
| EXPECT_TRUE(PosZeroToFloat.isPosZero()); |
| APFloat NegZero = APFloat::getZero(APFloat::Float8E4M3FN(), true); |
| APFloat NegZeroToFloat(NegZero.convertToFloat()); |
| EXPECT_TRUE(NegZeroToFloat.isNegZero()); |
| |
| APFloat One(APFloat::Float8E4M3FN(), "1.0"); |
| EXPECT_EQ(1.0F, One.convertToFloat()); |
| APFloat Two(APFloat::Float8E4M3FN(), "2.0"); |
| EXPECT_EQ(2.0F, Two.convertToFloat()); |
| |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3FN(), false); |
| EXPECT_EQ(448., PosLargest.convertToFloat()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3FN(), true); |
| EXPECT_EQ(-448, NegLargest.convertToFloat()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3FN(), false); |
| EXPECT_EQ(0x1.p-6, PosSmallest.convertToFloat()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E4M3FN(), true); |
| EXPECT_EQ(-0x1.p-6, NegSmallest.convertToFloat()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3FN(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1.p-9, SmallestDenorm.convertToFloat()); |
| |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FN()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToFloat())); |
| } |
| |
| TEST(APFloatTest, Float8E3M4ToFloat) { |
| APFloat PosZero = APFloat::getZero(APFloat::Float8E3M4(), false); |
| APFloat PosZeroToFloat(PosZero.convertToFloat()); |
| EXPECT_TRUE(PosZeroToFloat.isPosZero()); |
| APFloat NegZero = APFloat::getZero(APFloat::Float8E3M4(), true); |
| APFloat NegZeroToFloat(NegZero.convertToFloat()); |
| EXPECT_TRUE(NegZeroToFloat.isNegZero()); |
| |
| APFloat One(APFloat::Float8E3M4(), "1.0"); |
| EXPECT_EQ(1.0F, One.convertToFloat()); |
| APFloat Two(APFloat::Float8E3M4(), "2.0"); |
| EXPECT_EQ(2.0F, Two.convertToFloat()); |
| |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float8E3M4(), false); |
| EXPECT_EQ(15.5F, PosLargest.convertToFloat()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float8E3M4(), true); |
| EXPECT_EQ(-15.5F, NegLargest.convertToFloat()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E3M4(), false); |
| EXPECT_EQ(0x1.p-2, PosSmallest.convertToFloat()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float8E3M4(), true); |
| EXPECT_EQ(-0x1.p-2, NegSmallest.convertToFloat()); |
| |
| APFloat PosSmallestDenorm = |
| APFloat::getSmallest(APFloat::Float8E3M4(), false); |
| EXPECT_TRUE(PosSmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x1.p-6, PosSmallestDenorm.convertToFloat()); |
| APFloat NegSmallestDenorm = APFloat::getSmallest(APFloat::Float8E3M4(), true); |
| EXPECT_TRUE(NegSmallestDenorm.isDenormal()); |
| EXPECT_EQ(-0x1.p-6, NegSmallestDenorm.convertToFloat()); |
| |
| APFloat PosInf = APFloat::getInf(APFloat::Float8E3M4()); |
| EXPECT_EQ(std::numeric_limits<float>::infinity(), PosInf.convertToFloat()); |
| APFloat NegInf = APFloat::getInf(APFloat::Float8E3M4(), true); |
| EXPECT_EQ(-std::numeric_limits<float>::infinity(), NegInf.convertToFloat()); |
| APFloat QNaN = APFloat::getQNaN(APFloat::Float8E3M4()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToFloat())); |
| } |
| |
| TEST(APFloatTest, FloatTF32ToFloat) { |
| APFloat PosZero = APFloat::getZero(APFloat::FloatTF32()); |
| APFloat PosZeroToFloat(PosZero.convertToFloat()); |
| EXPECT_TRUE(PosZeroToFloat.isPosZero()); |
| APFloat NegZero = APFloat::getZero(APFloat::FloatTF32(), true); |
| APFloat NegZeroToFloat(NegZero.convertToFloat()); |
| EXPECT_TRUE(NegZeroToFloat.isNegZero()); |
| |
| APFloat One(APFloat::FloatTF32(), "1.0"); |
| EXPECT_EQ(1.0F, One.convertToFloat()); |
| APFloat Two(APFloat::FloatTF32(), "2.0"); |
| EXPECT_EQ(2.0F, Two.convertToFloat()); |
| |
| APFloat PosLargest = APFloat::getLargest(APFloat::FloatTF32(), false); |
| EXPECT_EQ(3.40116213421e+38F, PosLargest.convertToFloat()); |
| |
| APFloat NegLargest = APFloat::getLargest(APFloat::FloatTF32(), true); |
| EXPECT_EQ(-3.40116213421e+38F, NegLargest.convertToFloat()); |
| |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::FloatTF32(), false); |
| EXPECT_EQ(/*0x1.p-126*/ 1.1754943508222875e-38F, |
| PosSmallest.convertToFloat()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::FloatTF32(), true); |
| EXPECT_EQ(/*-0x1.p-126*/ -1.1754943508222875e-38F, |
| NegSmallest.convertToFloat()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::FloatTF32(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x0.004p-126, SmallestDenorm.convertToFloat()); |
| |
| APFloat QNaN = APFloat::getQNaN(APFloat::FloatTF32()); |
| EXPECT_TRUE(std::isnan(QNaN.convertToFloat())); |
| } |
| |
| TEST(APFloatTest, getExactLog2) { |
| for (unsigned I = 0; I != APFloat::S_MaxSemantics + 1; ++I) { |
| auto SemEnum = static_cast<APFloat::Semantics>(I); |
| const fltSemantics &Semantics = APFloat::EnumToSemantics(SemEnum); |
| |
| // For the Float8E8M0FNU format, the below cases along |
| // with some more corner cases are tested through |
| // Float8E8M0FNUGetExactLog2. |
| if (I == APFloat::S_Float8E8M0FNU) |
| continue; |
| |
| APFloat One(Semantics, "1.0"); |
| APFloat Smallest = APFloat::getSmallest(Semantics); |
| APFloat Largest = APFloat::getLargest(Semantics); |
| |
| int MinExp = APFloat::semanticsMinExponent(Semantics); |
| int MaxExp = APFloat::semanticsMaxExponent(Semantics); |
| int Precision = APFloat::semanticsPrecision(Semantics); |
| |
| EXPECT_EQ(0, One.getExactLog2()); |
| EXPECT_EQ(INT_MIN, APFloat(Semantics, "3.0").getExactLog2()); |
| EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2()); |
| EXPECT_EQ(INT_MIN, APFloat(Semantics, "3.0").getExactLog2Abs()); |
| EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2Abs()); |
| |
| if (I == APFloat::S_Float6E2M3FN || I == APFloat::S_Float4E2M1FN) { |
| EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2()); |
| EXPECT_EQ(INT_MIN, APFloat(Semantics, "-4.0").getExactLog2()); |
| EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2Abs()); |
| EXPECT_EQ(2, APFloat(Semantics, "-4.0").getExactLog2Abs()); |
| } else { |
| EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2()); |
| EXPECT_EQ(INT_MIN, APFloat(Semantics, "-8.0").getExactLog2()); |
| EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2()); |
| EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2Abs()); |
| EXPECT_EQ(INT_MIN, APFloat(Semantics, "-0.25").getExactLog2()); |
| EXPECT_EQ(-2, APFloat(Semantics, "-0.25").getExactLog2Abs()); |
| EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2Abs()); |
| EXPECT_EQ(3, APFloat(Semantics, "-8.0").getExactLog2Abs()); |
| } |
| |
| EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, false).getExactLog2()); |
| EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, true).getExactLog2()); |
| EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, false).getExactLog2Abs()); |
| EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, true).getExactLog2Abs()); |
| |
| if (APFloat::semanticsHasNaN(Semantics)) { |
| // Types that do not support Inf will return NaN when asked for Inf. |
| // (But only if they support NaN.) |
| EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2()); |
| EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2()); |
| EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2()); |
| EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2()); |
| |
| EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2Abs()); |
| EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2Abs()); |
| EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2Abs()); |
| EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2Abs()); |
| } |
| |
| EXPECT_EQ( |
| INT_MIN, |
| scalbn(Smallest, -2, APFloat::rmNearestTiesToEven).getExactLog2()); |
| EXPECT_EQ( |
| INT_MIN, |
| scalbn(Smallest, -1, APFloat::rmNearestTiesToEven).getExactLog2()); |
| |
| EXPECT_EQ(INT_MIN, |
| scalbn(Largest, 1, APFloat::rmNearestTiesToEven).getExactLog2()); |
| |
| for (int i = MinExp - Precision + 1; i <= MaxExp; ++i) { |
| EXPECT_EQ(i, scalbn(One, i, APFloat::rmNearestTiesToEven).getExactLog2()); |
| } |
| } |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUGetZero) { |
| #ifdef GTEST_HAS_DEATH_TEST |
| #ifndef NDEBUG |
| EXPECT_DEATH(APFloat::getZero(APFloat::Float8E8M0FNU(), false), |
| "This floating point format does not support Zero"); |
| EXPECT_DEATH(APFloat::getZero(APFloat::Float8E8M0FNU(), true), |
| "This floating point format does not support Zero"); |
| #endif |
| #endif |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUGetSignedValues) { |
| #ifdef GTEST_HAS_DEATH_TEST |
| #ifndef NDEBUG |
| EXPECT_DEATH(APFloat(APFloat::Float8E8M0FNU(), "-64"), |
| "This floating point format does not support signed values"); |
| EXPECT_DEATH(APFloat(APFloat::Float8E8M0FNU(), "-0x1.0p128"), |
| "This floating point format does not support signed values"); |
| EXPECT_DEATH(APFloat(APFloat::Float8E8M0FNU(), "-inf"), |
| "This floating point format does not support signed values"); |
| EXPECT_DEATH(APFloat::getNaN(APFloat::Float8E8M0FNU(), true), |
| "This floating point format does not support signed values"); |
| EXPECT_DEATH(APFloat::getInf(APFloat::Float8E8M0FNU(), true), |
| "This floating point format does not support signed values"); |
| EXPECT_DEATH(APFloat::getSmallest(APFloat::Float8E8M0FNU(), true), |
| "This floating point format does not support signed values"); |
| EXPECT_DEATH(APFloat::getSmallestNormalized(APFloat::Float8E8M0FNU(), true), |
| "This floating point format does not support signed values"); |
| EXPECT_DEATH(APFloat::getLargest(APFloat::Float8E8M0FNU(), true), |
| "This floating point format does not support signed values"); |
| APFloat x = APFloat(APFloat::Float8E8M0FNU(), "4"); |
| APFloat y = APFloat(APFloat::Float8E8M0FNU(), "8"); |
| EXPECT_DEATH(x.subtract(y, APFloat::rmNearestTiesToEven), |
| "This floating point format does not support signed values"); |
| #endif |
| #endif |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUGetInf) { |
| // The E8M0 format does not support infinity and the |
| // all ones representation is treated as NaN. |
| APFloat t = APFloat::getInf(APFloat::Float8E8M0FNU()); |
| EXPECT_TRUE(t.isNaN()); |
| EXPECT_FALSE(t.isInfinity()); |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUFromString) { |
| // Exactly representable |
| EXPECT_EQ(64, APFloat(APFloat::Float8E8M0FNU(), "64").convertToDouble()); |
| // Overflow to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E8M0FNU(), "0x1.0p128").isNaN()); |
| // Inf converted to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E8M0FNU(), "inf").isNaN()); |
| // NaN converted to NaN |
| EXPECT_TRUE(APFloat(APFloat::Float8E8M0FNU(), "nan").isNaN()); |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUDivideByZero) { |
| APFloat x(APFloat::Float8E8M0FNU(), "1"); |
| APFloat zero(APFloat::Float8E8M0FNU(), "0"); |
| x.divide(zero, APFloat::rmNearestTiesToEven); |
| |
| // Zero is represented as the smallest normalized value |
| // in this format i.e 2^-127. |
| // This tests the fix in convertFromDecimalString() function. |
| EXPECT_EQ(0x1.0p-127, zero.convertToDouble()); |
| |
| // [1 / (2^-127)] = 2^127 |
| EXPECT_EQ(0x1.0p127, x.convertToDouble()); |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUGetExactLog2) { |
| const fltSemantics &Semantics = APFloat::Float8E8M0FNU(); |
| APFloat One(Semantics, "1.0"); |
| EXPECT_EQ(0, One.getExactLog2()); |
| |
| // In the Float8E8M0FNU format, 3 is rounded-up to 4. |
| // So, we expect 2 as the result. |
| EXPECT_EQ(2, APFloat(Semantics, "3.0").getExactLog2()); |
| EXPECT_EQ(2, APFloat(Semantics, "3.0").getExactLog2Abs()); |
| |
| // In the Float8E8M0FNU format, 5 is rounded-down to 4. |
| // So, we expect 2 as the result. |
| EXPECT_EQ(2, APFloat(Semantics, "5.0").getExactLog2()); |
| EXPECT_EQ(2, APFloat(Semantics, "5.0").getExactLog2Abs()); |
| |
| // Exact power-of-two value. |
| EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2()); |
| EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2Abs()); |
| |
| // Negative exponent value. |
| EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2()); |
| EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2Abs()); |
| |
| int MinExp = APFloat::semanticsMinExponent(Semantics); |
| int MaxExp = APFloat::semanticsMaxExponent(Semantics); |
| int Precision = APFloat::semanticsPrecision(Semantics); |
| |
| // Values below the minExp getting capped to minExp. |
| EXPECT_EQ(-127, |
| scalbn(One, MinExp - Precision - 1, APFloat::rmNearestTiesToEven) |
| .getExactLog2()); |
| EXPECT_EQ(-127, scalbn(One, MinExp - Precision, APFloat::rmNearestTiesToEven) |
| .getExactLog2()); |
| |
| // Values above the maxExp overflow to NaN, and getExactLog2() returns |
| // INT_MIN for these cases. |
| EXPECT_EQ( |
| INT_MIN, |
| scalbn(One, MaxExp + 1, APFloat::rmNearestTiesToEven).getExactLog2()); |
| |
| // This format can represent [minExp, maxExp]. |
| // So, the result is the same as the 'Exp' of the scalbn. |
| for (int i = MinExp - Precision + 1; i <= MaxExp; ++i) { |
| EXPECT_EQ(i, scalbn(One, i, APFloat::rmNearestTiesToEven).getExactLog2()); |
| } |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUSmallest) { |
| APFloat test(APFloat::getSmallest(APFloat::Float8E8M0FNU())); |
| EXPECT_EQ(0x1.0p-127, test.convertToDouble()); |
| |
| // For E8M0 format, there are no denorms. |
| // So, getSmallest is equal to isSmallestNormalized(). |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| EXPECT_EQ(fcPosNormal, test.classify()); |
| |
| test = APFloat::getAllOnesValue(APFloat::Float8E8M0FNU()); |
| EXPECT_FALSE(test.isSmallestNormalized()); |
| EXPECT_TRUE(test.isNaN()); |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUNext) { |
| APFloat test(APFloat::getSmallest(APFloat::Float8E8M0FNU())); |
| // Increment of 1 should reach 2^-126 |
| EXPECT_EQ(APFloat::opOK, test.next(false)); |
| EXPECT_FALSE(test.isSmallestNormalized()); |
| EXPECT_EQ(0x1.0p-126, test.convertToDouble()); |
| |
| // Decrement of 1, again, should reach 2^-127 |
| // i.e. smallest normalized |
| EXPECT_EQ(APFloat::opOK, test.next(true)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| |
| // Decrement again, but gets capped at the smallest normalized |
| EXPECT_EQ(APFloat::opOK, test.next(true)); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| } |
| |
| TEST(APFloatTest, Float8E8M0FNUFMA) { |
| APFloat f1(APFloat::Float8E8M0FNU(), "4.0"); |
| APFloat f2(APFloat::Float8E8M0FNU(), "2.0"); |
| APFloat f3(APFloat::Float8E8M0FNU(), "8.0"); |
| |
| // Exact value: 4*2 + 8 = 16. |
| f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(16.0, f1.convertToDouble()); |
| |
| // 4*2 + 4 = 12 but it gets rounded-up to 16. |
| f1 = APFloat(APFloat::Float8E8M0FNU(), "4.0"); |
| f1.fusedMultiplyAdd(f2, f1, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(16.0, f1.convertToDouble()); |
| |
| // 4*2 + 2 = 10 but it gets rounded-down to 8. |
| f1 = APFloat(APFloat::Float8E8M0FNU(), "4.0"); |
| f1.fusedMultiplyAdd(f2, f2, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(8.0, f1.convertToDouble()); |
| |
| // All of them using the same value. |
| f1 = APFloat(APFloat::Float8E8M0FNU(), "1.0"); |
| f1.fusedMultiplyAdd(f1, f1, APFloat::rmNearestTiesToEven); |
| EXPECT_EQ(2.0, f1.convertToDouble()); |
| } |
| |
| TEST(APFloatTest, ConvertDoubleToE8M0FNU) { |
| bool losesInfo; |
| APFloat test(APFloat::IEEEdouble(), "1.0"); |
| APFloat::opStatus status = test.convert( |
| APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0, test.convertToDouble()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| // For E8M0, zero encoding is represented as the smallest normalized value. |
| test = APFloat(APFloat::IEEEdouble(), "0.0"); |
| status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| EXPECT_EQ(0x1.0p-127, test.convertToDouble()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| // Test that the conversion of a power-of-two value is precise. |
| test = APFloat(APFloat::IEEEdouble(), "8.0"); |
| status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(8.0f, test.convertToDouble()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| // Test to check round-down conversion to power-of-two. |
| // The fractional part of 9 is "001" (i.e. 1.125x2^3=9). |
| test = APFloat(APFloat::IEEEdouble(), "9.0"); |
| status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(8.0f, test.convertToDouble()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| |
| // Test to check round-up conversion to power-of-two. |
| // The fractional part of 13 is "101" (i.e. 1.625x2^3=13). |
| test = APFloat(APFloat::IEEEdouble(), "13.0"); |
| status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(16.0f, test.convertToDouble()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| |
| // Test to check round-up conversion to power-of-two. |
| // The fractional part of 12 is "100" (i.e. 1.5x2^3=12). |
| test = APFloat(APFloat::IEEEdouble(), "12.0"); |
| status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(16.0f, test.convertToDouble()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| |
| // Overflow to NaN. |
| test = APFloat(APFloat::IEEEdouble(), "0x1.0p128"); |
| status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(test.isNaN()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOverflow | APFloat::opInexact); |
| |
| // Underflow to smallest normalized value. |
| test = APFloat(APFloat::IEEEdouble(), "0x1.0p-128"); |
| status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_TRUE(test.isSmallestNormalized()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); |
| } |
| |
| TEST(APFloatTest, Float6E3M2FNFromString) { |
| // Exactly representable |
| EXPECT_EQ(28, APFloat(APFloat::Float6E3M2FN(), "28").convertToDouble()); |
| // Round down to maximum value |
| EXPECT_EQ(28, APFloat(APFloat::Float6E3M2FN(), "32").convertToDouble()); |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| #ifndef NDEBUG |
| EXPECT_DEATH(APFloat(APFloat::Float6E3M2FN(), "inf"), |
| "This floating point format does not support Inf"); |
| EXPECT_DEATH(APFloat(APFloat::Float6E3M2FN(), "nan"), |
| "This floating point format does not support NaN"); |
| #endif |
| #endif |
| |
| EXPECT_TRUE(APFloat(APFloat::Float6E3M2FN(), "0").isPosZero()); |
| EXPECT_TRUE(APFloat(APFloat::Float6E3M2FN(), "-0").isNegZero()); |
| } |
| |
| TEST(APFloatTest, Float6E2M3FNFromString) { |
| // Exactly representable |
| EXPECT_EQ(7.5, APFloat(APFloat::Float6E2M3FN(), "7.5").convertToDouble()); |
| // Round down to maximum value |
| EXPECT_EQ(7.5, APFloat(APFloat::Float6E2M3FN(), "32").convertToDouble()); |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| #ifndef NDEBUG |
| EXPECT_DEATH(APFloat(APFloat::Float6E2M3FN(), "inf"), |
| "This floating point format does not support Inf"); |
| EXPECT_DEATH(APFloat(APFloat::Float6E2M3FN(), "nan"), |
| "This floating point format does not support NaN"); |
| #endif |
| #endif |
| |
| EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "0").isPosZero()); |
| EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "-0").isNegZero()); |
| } |
| |
| TEST(APFloatTest, Float4E2M1FNFromString) { |
| // Exactly representable |
| EXPECT_EQ(6, APFloat(APFloat::Float4E2M1FN(), "6").convertToDouble()); |
| // Round down to maximum value |
| EXPECT_EQ(6, APFloat(APFloat::Float4E2M1FN(), "32").convertToDouble()); |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| #ifndef NDEBUG |
| EXPECT_DEATH(APFloat(APFloat::Float4E2M1FN(), "inf"), |
| "This floating point format does not support Inf"); |
| EXPECT_DEATH(APFloat(APFloat::Float4E2M1FN(), "nan"), |
| "This floating point format does not support NaN"); |
| #endif |
| #endif |
| |
| EXPECT_TRUE(APFloat(APFloat::Float4E2M1FN(), "0").isPosZero()); |
| EXPECT_TRUE(APFloat(APFloat::Float4E2M1FN(), "-0").isNegZero()); |
| } |
| |
| TEST(APFloatTest, ConvertE3M2FToE2M3F) { |
| bool losesInfo; |
| APFloat test(APFloat::Float6E3M2FN(), "1.0"); |
| APFloat::opStatus status = test.convert( |
| APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| test = APFloat(APFloat::Float6E3M2FN(), "0.0"); |
| status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| // Test overflow |
| test = APFloat(APFloat::Float6E3M2FN(), "28"); |
| status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(7.5f, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| |
| // Test underflow |
| test = APFloat(APFloat::Float6E3M2FN(), ".0625"); |
| status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0., test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); |
| |
| // Testing inexact rounding to denormal number |
| test = APFloat(APFloat::Float6E3M2FN(), "0.1875"); |
| status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0.25, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); |
| } |
| |
| TEST(APFloatTest, ConvertE2M3FToE3M2F) { |
| bool losesInfo; |
| APFloat test(APFloat::Float6E2M3FN(), "1.0"); |
| APFloat::opStatus status = test.convert( |
| APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| test = APFloat(APFloat::Float6E2M3FN(), "0.0"); |
| status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| test = APFloat(APFloat::Float6E2M3FN(), ".125"); |
| status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(.125, test.convertToFloat()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| // Test inexact rounding |
| test = APFloat(APFloat::Float6E2M3FN(), "7.5"); |
| status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(8, test.convertToFloat()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| } |
| |
| TEST(APFloatTest, ConvertDoubleToE2M1F) { |
| bool losesInfo; |
| APFloat test(APFloat::IEEEdouble(), "1.0"); |
| APFloat::opStatus status = test.convert( |
| APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven, &losesInfo); |
| EXPECT_EQ(1.0, test.convertToDouble()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| test = APFloat(APFloat::IEEEdouble(), "0.0"); |
| status = test.convert(APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0.0f, test.convertToDouble()); |
| EXPECT_FALSE(losesInfo); |
| EXPECT_EQ(status, APFloat::opOK); |
| |
| // Test overflow |
| test = APFloat(APFloat::IEEEdouble(), "8"); |
| status = test.convert(APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(6, test.convertToDouble()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_EQ(status, APFloat::opInexact); |
| |
| // Test underflow |
| test = APFloat(APFloat::IEEEdouble(), "0.25"); |
| status = test.convert(APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven, |
| &losesInfo); |
| EXPECT_EQ(0., test.convertToDouble()); |
| EXPECT_TRUE(losesInfo); |
| EXPECT_FALSE(test.isDenormal()); |
| EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); |
| } |
| |
| TEST(APFloatTest, Float6E3M2FNNext) { |
| APFloat test(APFloat::Float6E3M2FN(), APFloat::uninitialized); |
| APFloat expected(APFloat::Float6E3M2FN(), APFloat::uninitialized); |
| |
| // 1. NextUp of largest bit pattern is the same |
| test = APFloat::getLargest(APFloat::Float6E3M2FN()); |
| expected = APFloat::getLargest(APFloat::Float6E3M2FN()); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 2. NextUp of smallest negative denormal is -0 |
| test = APFloat::getSmallest(APFloat::Float6E3M2FN(), true); |
| expected = APFloat::getZero(APFloat::Float6E3M2FN(), true); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isNegZero()); |
| EXPECT_FALSE(test.isPosZero()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 3. nextDown of negative of largest value is the same |
| test = APFloat::getLargest(APFloat::Float6E3M2FN(), true); |
| expected = test; |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_FALSE(test.isNaN()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 4. nextDown of +0 is smallest negative denormal |
| test = APFloat::getZero(APFloat::Float6E3M2FN(), false); |
| expected = APFloat::getSmallest(APFloat::Float6E3M2FN(), true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| TEST(APFloatTest, Float6E2M3FNNext) { |
| APFloat test(APFloat::Float6E2M3FN(), APFloat::uninitialized); |
| APFloat expected(APFloat::Float6E2M3FN(), APFloat::uninitialized); |
| |
| // 1. NextUp of largest bit pattern is the same |
| test = APFloat::getLargest(APFloat::Float6E2M3FN()); |
| expected = APFloat::getLargest(APFloat::Float6E2M3FN()); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 2. NextUp of smallest negative denormal is -0 |
| test = APFloat::getSmallest(APFloat::Float6E2M3FN(), true); |
| expected = APFloat::getZero(APFloat::Float6E2M3FN(), true); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isNegZero()); |
| EXPECT_FALSE(test.isPosZero()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 3. nextDown of negative of largest value is the same |
| test = APFloat::getLargest(APFloat::Float6E2M3FN(), true); |
| expected = test; |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_FALSE(test.isNaN()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 4. nextDown of +0 is smallest negative denormal |
| test = APFloat::getZero(APFloat::Float6E2M3FN(), false); |
| expected = APFloat::getSmallest(APFloat::Float6E2M3FN(), true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| TEST(APFloatTest, Float4E2M1FNNext) { |
| APFloat test(APFloat::Float4E2M1FN(), APFloat::uninitialized); |
| APFloat expected(APFloat::Float4E2M1FN(), APFloat::uninitialized); |
| |
| // 1. NextUp of largest bit pattern is the same |
| test = APFloat::getLargest(APFloat::Float4E2M1FN()); |
| expected = APFloat::getLargest(APFloat::Float4E2M1FN()); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 2. NextUp of smallest negative denormal is -0 |
| test = APFloat::getSmallest(APFloat::Float4E2M1FN(), true); |
| expected = APFloat::getZero(APFloat::Float4E2M1FN(), true); |
| EXPECT_EQ(test.next(false), APFloat::opOK); |
| EXPECT_TRUE(test.isNegZero()); |
| EXPECT_FALSE(test.isPosZero()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 3. nextDown of negative of largest value is the same |
| test = APFloat::getLargest(APFloat::Float4E2M1FN(), true); |
| expected = test; |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isInfinity()); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_FALSE(test.isNaN()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| |
| // 4. nextDown of +0 is smallest negative denormal |
| test = APFloat::getZero(APFloat::Float4E2M1FN(), false); |
| expected = APFloat::getSmallest(APFloat::Float4E2M1FN(), true); |
| EXPECT_EQ(test.next(true), APFloat::opOK); |
| EXPECT_FALSE(test.isZero()); |
| EXPECT_TRUE(test.isDenormal()); |
| EXPECT_TRUE(test.bitwiseIsEqual(expected)); |
| } |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| #ifndef NDEBUG |
| TEST(APFloatTest, Float6E3M2FNGetInfNaN) { |
| EXPECT_DEATH(APFloat::getInf(APFloat::Float6E3M2FN()), |
| "This floating point format does not support Inf"); |
| EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E3M2FN()), |
| "This floating point format does not support NaN"); |
| } |
| |
| TEST(APFloatTest, Float6E2M3FNGetInfNaN) { |
| EXPECT_DEATH(APFloat::getInf(APFloat::Float6E2M3FN()), |
| "This floating point format does not support Inf"); |
| EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E2M3FN()), |
| "This floating point format does not support NaN"); |
| } |
| |
| TEST(APFloatTest, Float4E2M1FNGetInfNaN) { |
| EXPECT_DEATH(APFloat::getInf(APFloat::Float4E2M1FN()), |
| "This floating point format does not support Inf"); |
| EXPECT_DEATH(APFloat::getNaN(APFloat::Float4E2M1FN()), |
| "This floating point format does not support NaN"); |
| } |
| #endif |
| #endif |
| |
| TEST(APFloatTest, Float6E3M2FNToDouble) { |
| APFloat One(APFloat::Float6E3M2FN(), "1.0"); |
| EXPECT_EQ(1.0, One.convertToDouble()); |
| APFloat Two(APFloat::Float6E3M2FN(), "2.0"); |
| EXPECT_EQ(2.0, Two.convertToDouble()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), false); |
| EXPECT_EQ(28., PosLargest.convertToDouble()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), true); |
| EXPECT_EQ(-28., NegLargest.convertToDouble()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false); |
| EXPECT_EQ(0x1p-2, PosSmallest.convertToDouble()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), true); |
| EXPECT_EQ(-0x1p-2, NegSmallest.convertToDouble()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E3M2FN(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x0.1p0, SmallestDenorm.convertToDouble()); |
| } |
| |
| TEST(APFloatTest, Float6E2M3FNToDouble) { |
| APFloat One(APFloat::Float6E2M3FN(), "1.0"); |
| EXPECT_EQ(1.0, One.convertToDouble()); |
| APFloat Two(APFloat::Float6E2M3FN(), "2.0"); |
| EXPECT_EQ(2.0, Two.convertToDouble()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), false); |
| EXPECT_EQ(7.5, PosLargest.convertToDouble()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), true); |
| EXPECT_EQ(-7.5, NegLargest.convertToDouble()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false); |
| EXPECT_EQ(0x1p0, PosSmallest.convertToDouble()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), true); |
| EXPECT_EQ(-0x1p0, NegSmallest.convertToDouble()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E2M3FN(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToDouble()); |
| } |
| |
| TEST(APFloatTest, Float4E2M1FNToDouble) { |
| APFloat One(APFloat::Float4E2M1FN(), "1.0"); |
| EXPECT_EQ(1.0, One.convertToDouble()); |
| APFloat Two(APFloat::Float4E2M1FN(), "2.0"); |
| EXPECT_EQ(2.0, Two.convertToDouble()); |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), false); |
| EXPECT_EQ(6, PosLargest.convertToDouble()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), true); |
| EXPECT_EQ(-6, NegLargest.convertToDouble()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false); |
| EXPECT_EQ(0x1p0, PosSmallest.convertToDouble()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), true); |
| EXPECT_EQ(-0x1p0, NegSmallest.convertToDouble()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float4E2M1FN(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x0.8p0, SmallestDenorm.convertToDouble()); |
| } |
| |
| TEST(APFloatTest, Float6E3M2FNToFloat) { |
| APFloat PosZero = APFloat::getZero(APFloat::Float6E3M2FN()); |
| APFloat PosZeroToFloat(PosZero.convertToFloat()); |
| EXPECT_TRUE(PosZeroToFloat.isPosZero()); |
| APFloat NegZero = APFloat::getZero(APFloat::Float6E3M2FN(), true); |
| APFloat NegZeroToFloat(NegZero.convertToFloat()); |
| EXPECT_TRUE(NegZeroToFloat.isNegZero()); |
| |
| APFloat One(APFloat::Float6E3M2FN(), "1.0"); |
| EXPECT_EQ(1.0F, One.convertToFloat()); |
| APFloat Two(APFloat::Float6E3M2FN(), "2.0"); |
| EXPECT_EQ(2.0F, Two.convertToFloat()); |
| |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), false); |
| EXPECT_EQ(28., PosLargest.convertToFloat()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), true); |
| EXPECT_EQ(-28, NegLargest.convertToFloat()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false); |
| EXPECT_EQ(0x1p-2, PosSmallest.convertToFloat()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), true); |
| EXPECT_EQ(-0x1p-2, NegSmallest.convertToFloat()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E3M2FN(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x0.1p0, SmallestDenorm.convertToFloat()); |
| } |
| |
| TEST(APFloatTest, Float6E2M3FNToFloat) { |
| APFloat PosZero = APFloat::getZero(APFloat::Float6E2M3FN()); |
| APFloat PosZeroToFloat(PosZero.convertToFloat()); |
| EXPECT_TRUE(PosZeroToFloat.isPosZero()); |
| APFloat NegZero = APFloat::getZero(APFloat::Float6E2M3FN(), true); |
| APFloat NegZeroToFloat(NegZero.convertToFloat()); |
| EXPECT_TRUE(NegZeroToFloat.isNegZero()); |
| |
| APFloat One(APFloat::Float6E2M3FN(), "1.0"); |
| EXPECT_EQ(1.0F, One.convertToFloat()); |
| APFloat Two(APFloat::Float6E2M3FN(), "2.0"); |
| EXPECT_EQ(2.0F, Two.convertToFloat()); |
| |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), false); |
| EXPECT_EQ(7.5, PosLargest.convertToFloat()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), true); |
| EXPECT_EQ(-7.5, NegLargest.convertToFloat()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false); |
| EXPECT_EQ(0x1p0, PosSmallest.convertToFloat()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), true); |
| EXPECT_EQ(-0x1p0, NegSmallest.convertToFloat()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E2M3FN(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToFloat()); |
| } |
| |
| TEST(APFloatTest, Float4E2M1FNToFloat) { |
| APFloat PosZero = APFloat::getZero(APFloat::Float4E2M1FN()); |
| APFloat PosZeroToFloat(PosZero.convertToFloat()); |
| EXPECT_TRUE(PosZeroToFloat.isPosZero()); |
| APFloat NegZero = APFloat::getZero(APFloat::Float4E2M1FN(), true); |
| APFloat NegZeroToFloat(NegZero.convertToFloat()); |
| EXPECT_TRUE(NegZeroToFloat.isNegZero()); |
| |
| APFloat One(APFloat::Float4E2M1FN(), "1.0"); |
| EXPECT_EQ(1.0F, One.convertToFloat()); |
| APFloat Two(APFloat::Float4E2M1FN(), "2.0"); |
| EXPECT_EQ(2.0F, Two.convertToFloat()); |
| |
| APFloat PosLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), false); |
| EXPECT_EQ(6, PosLargest.convertToFloat()); |
| APFloat NegLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), true); |
| EXPECT_EQ(-6, NegLargest.convertToFloat()); |
| APFloat PosSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false); |
| EXPECT_EQ(0x1p0, PosSmallest.convertToFloat()); |
| APFloat NegSmallest = |
| APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), true); |
| EXPECT_EQ(-0x1p0, NegSmallest.convertToFloat()); |
| |
| APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float4E2M1FN(), false); |
| EXPECT_TRUE(SmallestDenorm.isDenormal()); |
| EXPECT_EQ(0x0.8p0, SmallestDenorm.convertToFloat()); |
| } |
| |
| TEST(APFloatTest, AddOrSubtractSignificand) { |
| typedef detail::IEEEFloatUnitTestHelper Helper; |
| // Test cases are all combinations of: |
| // {equal exponents, LHS larger exponent, RHS larger exponent} |
| // {equal significands, LHS larger significand, RHS larger significand} |
| // {no loss, loss} |
| |
| // Equal exponents (loss cannot occur as their is no shifting) |
| Helper::runTest(true, false, 1, 0x10, false, 1, 0x5, false, 1, 0xb, |
| lfExactlyZero); |
| Helper::runTest(false, false, -2, 0x20, true, -2, 0x20, false, -2, 0, |
| lfExactlyZero); |
| Helper::runTest(false, true, 3, 0x20, false, 3, 0x30, false, 3, 0x10, |
| lfExactlyZero); |
| |
| // LHS larger exponent |
| // LHS significand greater after shitfing |
| Helper::runTest(true, false, 7, 0x100, false, 3, 0x100, false, 6, 0x1e0, |
| lfExactlyZero); |
| Helper::runTest(true, false, 7, 0x100, false, 3, 0x101, false, 6, 0x1df, |
| lfMoreThanHalf); |
| // Significands equal after shitfing |
| Helper::runTest(true, false, 7, 0x100, false, 3, 0x1000, false, 6, 0, |
| lfExactlyZero); |
| Helper::runTest(true, false, 7, 0x100, false, 3, 0x1001, true, 6, 0, |
| lfLessThanHalf); |
| // RHS significand greater after shitfing |
| Helper::runTest(true, false, 7, 0x100, false, 3, 0x10000, true, 6, 0x1e00, |
| lfExactlyZero); |
| Helper::runTest(true, false, 7, 0x100, false, 3, 0x10001, true, 6, 0x1e00, |
| lfLessThanHalf); |
| |
| // RHS larger exponent |
| // RHS significand greater after shitfing |
| Helper::runTest(true, false, 3, 0x100, false, 7, 0x100, true, 6, 0x1e0, |
| lfExactlyZero); |
| Helper::runTest(true, false, 3, 0x101, false, 7, 0x100, true, 6, 0x1df, |
| lfMoreThanHalf); |
| // Significands equal after shitfing |
| Helper::runTest(true, false, 3, 0x1000, false, 7, 0x100, false, 6, 0, |
| lfExactlyZero); |
| Helper::runTest(true, false, 3, 0x1001, false, 7, 0x100, false, 6, 0, |
| lfLessThanHalf); |
| // LHS significand greater after shitfing |
| Helper::runTest(true, false, 3, 0x10000, false, 7, 0x100, false, 6, 0x1e00, |
| lfExactlyZero); |
| Helper::runTest(true, false, 3, 0x10001, false, 7, 0x100, false, 6, 0x1e00, |
| lfLessThanHalf); |
| } |
| |
| TEST(APFloatTest, hasSignBitInMSB) { |
| EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::IEEEsingle())); |
| EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::x87DoubleExtended())); |
| EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::PPCDoubleDouble())); |
| EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::IEEEquad())); |
| EXPECT_FALSE(APFloat::hasSignBitInMSB(APFloat::Float8E8M0FNU())); |
| } |
| |
| } // namespace |