| //===----------------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| // Copyright (c) Microsoft Corporation. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| // Copyright 2018 Ulf Adams |
| // Copyright (c) Microsoft Corporation. All rights reserved. |
| |
| // Boost Software License - Version 1.0 - August 17th, 2003 |
| |
| // Permission is hereby granted, free of charge, to any person or organization |
| // obtaining a copy of the software and accompanying documentation covered by |
| // this license (the "Software") to use, reproduce, display, distribute, |
| // execute, and transmit the Software, and to prepare derivative works of the |
| // Software, and to permit third-parties to whom the Software is furnished to |
| // do so, all subject to the following: |
| |
| // The copyright notices in the Software and this entire statement, including |
| // the above license grant, this restriction and the following disclaimer, |
| // must be included in all copies of the Software, in whole or in part, and |
| // all derivative works of the Software, unless such copies or derivative |
| // works are solely in the form of machine-executable object code generated by |
| // a source language processor. |
| |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT |
| // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE |
| // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, |
| // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| // DEALINGS IN THE SOFTWARE. |
| |
| #ifndef _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H |
| #define _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H |
| |
| // Avoid formatting to keep the changes with the original code minimal. |
| // clang-format off |
| |
| #include "__config" |
| |
| _LIBCPP_BEGIN_NAMESPACE_STD |
| |
| #if defined(_M_X64) && defined(_MSC_VER) |
| #define _LIBCPP_INTRINSIC128 1 |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { |
| return _umul128(__a, __b, __productHi); |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { |
| // For the __shiftright128 intrinsic, the shift value is always |
| // modulo 64. |
| // In the current implementation of the double-precision version |
| // of Ryu, the shift value is always < 64. |
| // (The shift value is in the range [49, 58].) |
| // Check this here in case a future change requires larger shift |
| // values. In this case this function needs to be adjusted. |
| _LIBCPP_ASSERT(__dist < 64, ""); |
| return __shiftright128(__lo, __hi, static_cast<unsigned char>(__dist)); |
| } |
| |
| // ^^^ intrinsics available ^^^ / vvv __int128 available vvv |
| #elif defined(__SIZEOF_INT128__) && ( \ |
| (defined(__clang__) && !defined(_MSC_VER)) || \ |
| (defined(__GNUC__) && !defined(__clang__) && !defined(__CUDACC__))) |
| #define _LIBCPP_INTRINSIC128 1 |
| // We have __uint128 support in clang or gcc |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { |
| auto __temp = __a * (unsigned __int128)__b; |
| *__productHi = __temp >> 64; |
| return static_cast<uint64_t>(__temp); |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { |
| // In the current implementation of the double-precision version |
| // of Ryu, the shift value is always < 64. |
| // (The shift value is in the range [49, 58].) |
| // Check this here in case a future change requires larger shift |
| // values. In this case this function needs to be adjusted. |
| _LIBCPP_ASSERT(__dist < 64, ""); |
| auto __temp = __lo | ((unsigned __int128)__hi << 64); |
| // For x64 128-bit shfits using the `shrd` instruction and two 64-bit |
| // registers, the shift value is modulo 64. Thus the `& 63` is free. |
| return static_cast<uint64_t>(__temp >> (__dist & 63)); |
| } |
| #else // ^^^ __int128 available ^^^ / vvv intrinsics unavailable vvv |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { |
| // TRANSITION, VSO-634761 |
| // The casts here help MSVC to avoid calls to the __allmul library function. |
| const uint32_t __aLo = static_cast<uint32_t>(__a); |
| const uint32_t __aHi = static_cast<uint32_t>(__a >> 32); |
| const uint32_t __bLo = static_cast<uint32_t>(__b); |
| const uint32_t __bHi = static_cast<uint32_t>(__b >> 32); |
| |
| const uint64_t __b00 = static_cast<uint64_t>(__aLo) * __bLo; |
| const uint64_t __b01 = static_cast<uint64_t>(__aLo) * __bHi; |
| const uint64_t __b10 = static_cast<uint64_t>(__aHi) * __bLo; |
| const uint64_t __b11 = static_cast<uint64_t>(__aHi) * __bHi; |
| |
| const uint32_t __b00Lo = static_cast<uint32_t>(__b00); |
| const uint32_t __b00Hi = static_cast<uint32_t>(__b00 >> 32); |
| |
| const uint64_t __mid1 = __b10 + __b00Hi; |
| const uint32_t __mid1Lo = static_cast<uint32_t>(__mid1); |
| const uint32_t __mid1Hi = static_cast<uint32_t>(__mid1 >> 32); |
| |
| const uint64_t __mid2 = __b01 + __mid1Lo; |
| const uint32_t __mid2Lo = static_cast<uint32_t>(__mid2); |
| const uint32_t __mid2Hi = static_cast<uint32_t>(__mid2 >> 32); |
| |
| const uint64_t __pHi = __b11 + __mid1Hi + __mid2Hi; |
| const uint64_t __pLo = (static_cast<uint64_t>(__mid2Lo) << 32) | __b00Lo; |
| |
| *__productHi = __pHi; |
| return __pLo; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { |
| // We don't need to handle the case __dist >= 64 here (see above). |
| _LIBCPP_ASSERT(__dist < 64, ""); |
| #ifdef _LIBCPP_64_BIT |
| _LIBCPP_ASSERT(__dist > 0, ""); |
| return (__hi << (64 - __dist)) | (__lo >> __dist); |
| #else // ^^^ 64-bit ^^^ / vvv 32-bit vvv |
| // Avoid a 64-bit shift by taking advantage of the range of shift values. |
| _LIBCPP_ASSERT(__dist >= 32, ""); |
| return (__hi << (64 - __dist)) | (static_cast<uint32_t>(__lo >> 32) >> (__dist - 32)); |
| #endif // ^^^ 32-bit ^^^ |
| } |
| |
| #endif // ^^^ intrinsics unavailable ^^^ |
| |
| #ifndef _LIBCPP_64_BIT |
| |
| // Returns the high 64 bits of the 128-bit product of __a and __b. |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __umulh(const uint64_t __a, const uint64_t __b) { |
| // Reuse the __ryu_umul128 implementation. |
| // Optimizers will likely eliminate the instructions used to compute the |
| // low part of the product. |
| uint64_t __hi; |
| (void) __ryu_umul128(__a, __b, &__hi); |
| return __hi; |
| } |
| |
| // On 32-bit platforms, compilers typically generate calls to library |
| // functions for 64-bit divisions, even if the divisor is a constant. |
| // |
| // TRANSITION, LLVM-37932 |
| // |
| // The functions here perform division-by-constant using multiplications |
| // in the same way as 64-bit compilers would do. |
| // |
| // NB: |
| // The multipliers and shift values are the ones generated by clang x64 |
| // for expressions like x/5, x/10, etc. |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) { |
| return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 2; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) { |
| return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 3; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) { |
| return __umulh(__x >> 2, 0x28F5C28F5C28F5C3u) >> 2; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) { |
| return __umulh(__x, 0xABCC77118461CEFDu) >> 26; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) { |
| return __umulh(__x >> 9, 0x44B82FA09B5A53u) >> 11; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) { |
| // Avoid 64-bit math as much as possible. |
| // Returning static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)) would |
| // perform 32x64-bit multiplication and 64-bit subtraction. |
| // __x and 1000000000 * __div1e9(__x) are guaranteed to differ by |
| // less than 10^9, so their highest 32 bits must be identical, |
| // so we can truncate both sides to uint32_t before subtracting. |
| // We can also simplify static_cast<uint32_t>(1000000000 * __div1e9(__x)). |
| // We can truncate before multiplying instead of after, as multiplying |
| // the highest 32 bits of __div1e9(__x) can't affect the lowest 32 bits. |
| return static_cast<uint32_t>(__x) - 1000000000 * static_cast<uint32_t>(__div1e9(__x)); |
| } |
| |
| #else // ^^^ 32-bit ^^^ / vvv 64-bit vvv |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) { |
| return __x / 5; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) { |
| return __x / 10; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) { |
| return __x / 100; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) { |
| return __x / 100000000; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) { |
| return __x / 1000000000; |
| } |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) { |
| return static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)); |
| } |
| |
| #endif // ^^^ 64-bit ^^^ |
| |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint64_t __value) { |
| uint32_t __count = 0; |
| for (;;) { |
| _LIBCPP_ASSERT(__value != 0, ""); |
| const uint64_t __q = __div5(__value); |
| const uint32_t __r = static_cast<uint32_t>(__value) - 5 * static_cast<uint32_t>(__q); |
| if (__r != 0) { |
| break; |
| } |
| __value = __q; |
| ++__count; |
| } |
| return __count; |
| } |
| |
| // Returns true if __value is divisible by 5^__p. |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf5(const uint64_t __value, const uint32_t __p) { |
| // I tried a case distinction on __p, but there was no performance difference. |
| return __pow5Factor(__value) >= __p; |
| } |
| |
| // Returns true if __value is divisible by 2^__p. |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) { |
| _LIBCPP_ASSERT(__value != 0, ""); |
| _LIBCPP_ASSERT(__p < 64, ""); |
| // __builtin_ctzll doesn't appear to be faster here. |
| return (__value & ((1ull << __p) - 1)) == 0; |
| } |
| |
| _LIBCPP_END_NAMESPACE_STD |
| |
| // clang-format on |
| |
| #endif // _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H |