| // -*- C++ -*- |
| //===----------------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| // WARNING, this entire header is generated by |
| // utils/generate_indic_conjunct_break_table.py |
| // DO NOT MODIFY! |
| |
| // UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE |
| // |
| // See Terms of Use <https://www.unicode.org/copyright.html> |
| // for definitions of Unicode Inc.'s Data Files and Software. |
| // |
| // NOTICE TO USER: Carefully read the following legal agreement. |
| // BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S |
| // DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), |
| // YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE |
| // TERMS AND CONDITIONS OF THIS AGREEMENT. |
| // IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE |
| // THE DATA FILES OR SOFTWARE. |
| // |
| // COPYRIGHT AND PERMISSION NOTICE |
| // |
| // Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. |
| // Distributed under the Terms of Use in https://www.unicode.org/copyright.html. |
| // |
| // Permission is hereby granted, free of charge, to any person obtaining |
| // a copy of the Unicode data files and any associated documentation |
| // (the "Data Files") or Unicode software and any associated documentation |
| // (the "Software") to deal in the Data Files or Software |
| // without restriction, including without limitation the rights to use, |
| // copy, modify, merge, publish, distribute, and/or sell copies of |
| // the Data Files or Software, and to permit persons to whom the Data Files |
| // or Software are furnished to do so, provided that either |
| // (a) this copyright and permission notice appear with all copies |
| // of the Data Files or Software, or |
| // (b) this copyright and permission notice appear in associated |
| // Documentation. |
| // |
| // THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF |
| // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE |
| // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| // NONINFRINGEMENT OF THIRD PARTY RIGHTS. |
| // IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS |
| // NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL |
| // DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, |
| // DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER |
| // TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR |
| // PERFORMANCE OF THE DATA FILES OR SOFTWARE. |
| // |
| // Except as contained in this notice, the name of a copyright holder |
| // shall not be used in advertising or otherwise to promote the sale, |
| // use or other dealings in these Data Files or Software without prior |
| // written authorization of the copyright holder. |
| |
| #ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H |
| #define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H |
| |
| #include <__algorithm/ranges_upper_bound.h> |
| #include <__config> |
| #include <__cstddef/ptrdiff_t.h> |
| #include <__iterator/access.h> |
| #include <cstdint> |
| |
| #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
| # pragma GCC system_header |
| #endif |
| |
| _LIBCPP_BEGIN_NAMESPACE_STD |
| |
| #if _LIBCPP_STD_VER >= 20 |
| |
| namespace __indic_conjunct_break { |
| |
| enum class __property : uint8_t { |
| // Values generated from the data files. |
| __Consonant, |
| __Extend, |
| __Linker, |
| |
| // The code unit has none of above properties. |
| __none |
| }; |
| |
| /// The entries of the indic conjunct break property table. |
| /// |
| /// The data is generated from |
| /// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
| /// |
| /// The data has 3 values |
| /// - bits [0, 1] The property. One of the values generated from the datafiles |
| /// of \ref __property |
| /// - bits [2, 10] The size of the range. |
| /// - bits [11, 31] The lower bound code point of the range. The upper bound of |
| /// the range is lower bound + size. |
| /// |
| /// The 9 bits for the size allow a maximum range of 512 elements. Some ranges |
| /// in the Unicode tables are larger. They are stored in multiple consecutive |
| /// ranges in the data table. An alternative would be to store the sizes in a |
| /// separate 16-bit value. The original MSVC STL code had such an approach, but |
| /// this approach uses less space for the data and is about 4% faster in the |
| /// following benchmark. |
| /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp |
| // clang-format off |
| _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[403] = { |
| 0x001801bd, |
| 0x00241819, |
| 0x002c88b1, |
| 0x002df801, |
| 0x002e0805, |
| 0x002e2005, |
| 0x002e3801, |
| 0x00308029, |
| 0x00325851, |
| 0x00338001, |
| 0x0036b019, |
| 0x0036f815, |
| 0x00373805, |
| 0x0037500d, |
| 0x00388801, |
| 0x00398069, |
| 0x003d3029, |
| 0x003f5821, |
| 0x003fe801, |
| 0x0040b00d, |
| 0x0040d821, |
| 0x00412809, |
| 0x00414811, |
| 0x0042c809, |
| 0x0044b821, |
| 0x0046505d, |
| 0x0047187d, |
| 0x0048a890, |
| 0x0049d001, |
| 0x0049e001, |
| 0x004a081d, |
| 0x004a6802, |
| 0x004a8819, |
| 0x004ac01c, |
| 0x004b1005, |
| 0x004bc01c, |
| 0x004c0801, |
| 0x004ca84c, |
| 0x004d5018, |
| 0x004d9000, |
| 0x004db00c, |
| 0x004de001, |
| 0x004df001, |
| 0x004e080d, |
| 0x004e6802, |
| 0x004eb801, |
| 0x004ee004, |
| 0x004ef800, |
| 0x004f1005, |
| 0x004f8004, |
| 0x004ff001, |
| 0x00500805, |
| 0x0051e001, |
| 0x00520805, |
| 0x00523805, |
| 0x00525809, |
| 0x00528801, |
| 0x00538005, |
| 0x0053a801, |
| 0x00540805, |
| 0x0054a84c, |
| 0x00555018, |
| 0x00559004, |
| 0x0055a810, |
| 0x0055e001, |
| 0x00560811, |
| 0x00563805, |
| 0x00566802, |
| 0x00571005, |
| 0x0057c800, |
| 0x0057d015, |
| 0x00580801, |
| 0x0058a84c, |
| 0x00595018, |
| 0x00599004, |
| 0x0059a810, |
| 0x0059e001, |
| 0x0059f005, |
| 0x005a080d, |
| 0x005a6802, |
| 0x005aa809, |
| 0x005ae004, |
| 0x005af800, |
| 0x005b1005, |
| 0x005b8800, |
| 0x005c1001, |
| 0x005df001, |
| 0x005e0001, |
| 0x005e6801, |
| 0x005eb801, |
| 0x00600001, |
| 0x00602001, |
| 0x0060a84c, |
| 0x0061503c, |
| 0x0061e001, |
| 0x0061f009, |
| 0x00623009, |
| 0x00625009, |
| 0x00626802, |
| 0x0062a805, |
| 0x0062c008, |
| 0x00631005, |
| 0x00640801, |
| 0x0065e001, |
| 0x0065f805, |
| 0x00661001, |
| 0x00663009, |
| 0x0066500d, |
| 0x0066a805, |
| 0x00671005, |
| 0x00680005, |
| 0x0068a894, |
| 0x0069d805, |
| 0x0069f001, |
| 0x006a080d, |
| 0x006a6802, |
| 0x006ab801, |
| 0x006b1005, |
| 0x006c0801, |
| 0x006e5001, |
| 0x006e7801, |
| 0x006e9009, |
| 0x006eb001, |
| 0x006ef801, |
| 0x00718801, |
| 0x0071a019, |
| 0x0072381d, |
| 0x00758801, |
| 0x0075a021, |
| 0x00764019, |
| 0x0078c005, |
| 0x0079a801, |
| 0x0079b801, |
| 0x0079c801, |
| 0x007b8835, |
| 0x007c0011, |
| 0x007c3005, |
| 0x007c6829, |
| 0x007cc88d, |
| 0x007e3001, |
| 0x0081680d, |
| 0x00819015, |
| 0x0081c805, |
| 0x0081e805, |
| 0x0082c005, |
| 0x0082f009, |
| 0x0083880d, |
| 0x00841001, |
| 0x00842805, |
| 0x00846801, |
| 0x0084e801, |
| 0x009ae809, |
| 0x00b8900d, |
| 0x00b99009, |
| 0x00ba9005, |
| 0x00bb9005, |
| 0x00bda005, |
| 0x00bdb819, |
| 0x00be3001, |
| 0x00be4829, |
| 0x00bee801, |
| 0x00c05809, |
| 0x00c07801, |
| 0x00c42805, |
| 0x00c54801, |
| 0x00c90009, |
| 0x00c93805, |
| 0x00c99001, |
| 0x00c9c809, |
| 0x00d0b805, |
| 0x00d0d801, |
| 0x00d2b001, |
| 0x00d2c019, |
| 0x00d30001, |
| 0x00d31001, |
| 0x00d3281d, |
| 0x00d39825, |
| 0x00d3f801, |
| 0x00d58079, |
| 0x00d8000d, |
| 0x00d9a025, |
| 0x00da1009, |
| 0x00db5821, |
| 0x00dc0005, |
| 0x00dd100d, |
| 0x00dd4015, |
| 0x00df3001, |
| 0x00df4005, |
| 0x00df6801, |
| 0x00df7811, |
| 0x00e1601d, |
| 0x00e1b005, |
| 0x00e68009, |
| 0x00e6a031, |
| 0x00e71019, |
| 0x00e76801, |
| 0x00e7a001, |
| 0x00e7c005, |
| 0x00ee00fd, |
| 0x01006801, |
| 0x01068081, |
| 0x01677809, |
| 0x016bf801, |
| 0x016f007d, |
| 0x01815015, |
| 0x0184c805, |
| 0x0533780d, |
| 0x0533a025, |
| 0x0534f005, |
| 0x05378005, |
| 0x05401001, |
| 0x05403001, |
| 0x05405801, |
| 0x05412805, |
| 0x05416001, |
| 0x05462005, |
| 0x05470045, |
| 0x0547f801, |
| 0x0549301d, |
| 0x054a3829, |
| 0x054a9801, |
| 0x054c0009, |
| 0x054d9801, |
| 0x054db00d, |
| 0x054de005, |
| 0x054e0001, |
| 0x054f2801, |
| 0x05514815, |
| 0x05518805, |
| 0x0551a805, |
| 0x05521801, |
| 0x05526001, |
| 0x0553e001, |
| 0x05558001, |
| 0x05559009, |
| 0x0555b805, |
| 0x0555f005, |
| 0x05560801, |
| 0x05576005, |
| 0x0557b001, |
| 0x055f2801, |
| 0x055f4001, |
| 0x055f6801, |
| 0x07d8f001, |
| 0x07f0003d, |
| 0x07f1003d, |
| 0x07fcf005, |
| 0x080fe801, |
| 0x08170001, |
| 0x081bb011, |
| 0x08500809, |
| 0x08502805, |
| 0x0850600d, |
| 0x0851c009, |
| 0x0851f801, |
| 0x08572805, |
| 0x0869200d, |
| 0x086b4811, |
| 0x08755805, |
| 0x0877e00d, |
| 0x087a3029, |
| 0x087c100d, |
| 0x08800801, |
| 0x0881c039, |
| 0x08838001, |
| 0x08839805, |
| 0x0883f809, |
| 0x0885980d, |
| 0x0885c805, |
| 0x08861001, |
| 0x08880009, |
| 0x08893811, |
| 0x0889681d, |
| 0x088b9801, |
| 0x088c0005, |
| 0x088db021, |
| 0x088e0001, |
| 0x088e480d, |
| 0x088e7801, |
| 0x08917809, |
| 0x0891a00d, |
| 0x0891f001, |
| 0x08920801, |
| 0x0896f801, |
| 0x0897181d, |
| 0x08980005, |
| 0x0899d805, |
| 0x0899f001, |
| 0x089a0001, |
| 0x089a6801, |
| 0x089ab801, |
| 0x089b3019, |
| 0x089b8011, |
| 0x089dc001, |
| 0x089dd815, |
| 0x089e1001, |
| 0x089e2801, |
| 0x089e3809, |
| 0x089e7009, |
| 0x089e9001, |
| 0x089f0805, |
| 0x08a1c01d, |
| 0x08a21009, |
| 0x08a23001, |
| 0x08a2f001, |
| 0x08a58001, |
| 0x08a59815, |
| 0x08a5d001, |
| 0x08a5e801, |
| 0x08a5f805, |
| 0x08a61005, |
| 0x08ad7801, |
| 0x08ad900d, |
| 0x08ade005, |
| 0x08adf805, |
| 0x08aee005, |
| 0x08b1981d, |
| 0x08b1e801, |
| 0x08b1f805, |
| 0x08b55801, |
| 0x08b56801, |
| 0x08b5801d, |
| 0x08b8e801, |
| 0x08b8f801, |
| 0x08b9100d, |
| 0x08b93811, |
| 0x08c17821, |
| 0x08c1c805, |
| 0x08c98001, |
| 0x08c9d80d, |
| 0x08ca1801, |
| 0x08cea00d, |
| 0x08ced005, |
| 0x08cf0001, |
| 0x08d00825, |
| 0x08d19815, |
| 0x08d1d80d, |
| 0x08d23801, |
| 0x08d28815, |
| 0x08d2c809, |
| 0x08d45031, |
| 0x08d4c005, |
| 0x08e18019, |
| 0x08e1c015, |
| 0x08e1f801, |
| 0x08e49055, |
| 0x08e55019, |
| 0x08e59005, |
| 0x08e5a805, |
| 0x08e98815, |
| 0x08e9d001, |
| 0x08e9e005, |
| 0x08e9f819, |
| 0x08ea3801, |
| 0x08ec8005, |
| 0x08eca801, |
| 0x08ecb801, |
| 0x08f79805, |
| 0x08f80005, |
| 0x08f9b011, |
| 0x08fa0009, |
| 0x08fad001, |
| 0x09a20001, |
| 0x09a23839, |
| 0x0b08f02d, |
| 0x0b096809, |
| 0x0b578011, |
| 0x0b598019, |
| 0x0b7a7801, |
| 0x0b7c780d, |
| 0x0b7f2001, |
| 0x0b7f8005, |
| 0x0de4e805, |
| 0x0e7800b5, |
| 0x0e798059, |
| 0x0e8b2811, |
| 0x0e8b6815, |
| 0x0e8bd81d, |
| 0x0e8c2819, |
| 0x0e8d500d, |
| 0x0e921009, |
| 0x0ed000d9, |
| 0x0ed1d8c5, |
| 0x0ed3a801, |
| 0x0ed42001, |
| 0x0ed4d811, |
| 0x0ed50839, |
| 0x0f000019, |
| 0x0f004041, |
| 0x0f00d819, |
| 0x0f011805, |
| 0x0f013011, |
| 0x0f047801, |
| 0x0f098019, |
| 0x0f157001, |
| 0x0f17600d, |
| 0x0f27600d, |
| 0x0f2f7005, |
| 0x0f468019, |
| 0x0f4a2019, |
| 0x0f9fd811, |
| 0x7001017d, |
| 0x700803bd}; |
| // clang-format on |
| |
| /// Returns the indic conjuct break property of a code point. |
| [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept { |
| // The algorithm searches for the upper bound of the range and, when found, |
| // steps back one entry. This algorithm is used since the code point can be |
| // anywhere in the range. After a lower bound is found the next step is to |
| // compare whether the code unit is indeed in the range. |
| // |
| // Since the entry contains a code unit, size, and property the code point |
| // being sought needs to be adjusted. Just shifting the code point to the |
| // proper position doesn't work; suppose an entry has property 0, size 1, |
| // and lower bound 3. This results in the entry 0x1810. |
| // When searching for code point 3 it will search for 0x1800, find 0x1810 |
| // and moves to the previous entry. Thus the lower bound value will never |
| // be found. |
| // The simple solution is to set the bits belonging to the property and |
| // size. Then the upper bound for code point 3 will return the entry after |
| // 0x1810. After moving to the previous entry the algorithm arrives at the |
| // correct entry. |
| ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries; |
| if (__i == 0) |
| return __property::__none; |
| |
| --__i; |
| uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 2) & 0b1'1111'1111); |
| if (__code_point <= __upper_bound) |
| return static_cast<__property>(__entries[__i] & 0b11); |
| |
| return __property::__none; |
| } |
| |
| } // namespace __indic_conjunct_break |
| |
| #endif // _LIBCPP_STD_VER >= 20 |
| |
| _LIBCPP_END_NAMESPACE_STD |
| |
| #endif // _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H |