| //===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// This file implements NumericLiteralCaseFixer that standardizes character |
| /// case within numeric literals. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "NumericLiteralCaseFixer.h" |
| #include "NumericLiteralInfo.h" |
| |
| #include "llvm/ADT/StringExtras.h" |
| |
| #include <algorithm> |
| |
| namespace clang { |
| namespace format { |
| |
| static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) { |
| // Check if language is supported. |
| switch (Style.Language) { |
| case FormatStyle::LK_C: |
| case FormatStyle::LK_Cpp: |
| case FormatStyle::LK_ObjC: |
| case FormatStyle::LK_CSharp: |
| case FormatStyle::LK_Java: |
| case FormatStyle::LK_JavaScript: |
| break; |
| default: |
| return false; |
| } |
| |
| // Check if style options are set. |
| const auto &Option = Style.NumericLiteralCase; |
| const auto Leave = FormatStyle::NLCS_Leave; |
| return Option.Prefix != Leave || Option.HexDigit != Leave || |
| Option.ExponentLetter != Leave || Option.Suffix != Leave; |
| } |
| |
| static std::string |
| transformComponent(StringRef Component, |
| FormatStyle::NumericLiteralComponentStyle ConfigValue) { |
| switch (ConfigValue) { |
| case FormatStyle::NLCS_Upper: |
| return Component.upper(); |
| case FormatStyle::NLCS_Lower: |
| return Component.lower(); |
| default: |
| // Covers FormatStyle::NLCS_Leave. |
| return Component.str(); |
| } |
| } |
| |
| /// Test if Suffix matches a C++ literal reserved by the library. |
| /// Matches against all suffixes reserved in the C++23 standard. |
| static bool matchesReservedSuffix(StringRef Suffix) { |
| static constexpr std::array<StringRef, 11> SortedReservedSuffixes = { |
| "d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y", |
| }; |
| |
| // This can be static_assert when we have access to constexpr is_sorted in |
| // C++ 20. |
| assert(llvm::is_sorted(SortedReservedSuffixes) && |
| "Must be sorted as precondition for lower_bound()."); |
| |
| auto entry = llvm::lower_bound(SortedReservedSuffixes, Suffix); |
| if (entry == SortedReservedSuffixes.cend()) |
| return false; |
| return *entry == Suffix; |
| } |
| |
| static std::string format(StringRef NumericLiteral, const FormatStyle &Style) { |
| const char Separator = Style.isCpp() ? '\'' : '_'; |
| const NumericLiteralInfo Info(NumericLiteral, Separator); |
| const bool HasBaseLetter = Info.BaseLetterPos != StringRef::npos; |
| const bool HasExponent = Info.ExponentLetterPos != StringRef::npos; |
| const bool HasSuffix = Info.SuffixPos != StringRef::npos; |
| |
| std::string Formatted; |
| |
| if (HasBaseLetter) { |
| Formatted += |
| transformComponent(NumericLiteral.take_front(1 + Info.BaseLetterPos), |
| Style.NumericLiteralCase.Prefix); |
| } |
| // Reformat this slice as HexDigit whether or not the digit has hexadecimal |
| // characters because binary/decimal/octal digits are unchanged. |
| Formatted += transformComponent( |
| NumericLiteral.slice(HasBaseLetter ? 1 + Info.BaseLetterPos : 0, |
| HasExponent ? Info.ExponentLetterPos |
| : HasSuffix ? Info.SuffixPos |
| : NumericLiteral.size()), |
| Style.NumericLiteralCase.HexDigit); |
| |
| if (HasExponent) { |
| Formatted += transformComponent( |
| NumericLiteral.slice(Info.ExponentLetterPos, |
| HasSuffix ? Info.SuffixPos |
| : NumericLiteral.size()), |
| Style.NumericLiteralCase.ExponentLetter); |
| } |
| |
| if (HasSuffix) { |
| StringRef Suffix = NumericLiteral.drop_front(Info.SuffixPos); |
| if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') { |
| // In C++, it is idiomatic, but NOT standardized to define user-defined |
| // literals with a leading '_'. Omit user defined literals and standard |
| // reserved suffixes from transformation. |
| Formatted += Suffix.str(); |
| } else { |
| Formatted += transformComponent(Suffix, Style.NumericLiteralCase.Suffix); |
| } |
| } |
| |
| return Formatted; |
| } |
| |
| std::pair<tooling::Replacements, unsigned> |
| NumericLiteralCaseFixer::process(const Environment &Env, |
| const FormatStyle &Style) { |
| if (!isNumericLiteralCaseFixerNeeded(Style)) |
| return {}; |
| |
| const auto &SourceMgr = Env.getSourceManager(); |
| AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); |
| |
| const auto ID = Env.getFileID(); |
| const auto LangOpts = getFormattingLangOpts(Style); |
| Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); |
| Lex.SetCommentRetentionState(true); |
| |
| Token Tok; |
| tooling::Replacements Result; |
| |
| for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) { |
| // Skip tokens that are too small to contain a formattable literal. |
| // Size=2 is the smallest possible literal that could contain formattable |
| // components, for example "1u". |
| auto Length = Tok.getLength(); |
| if (Length < 2) |
| continue; |
| |
| // Service clang-format off/on comments. |
| auto Location = Tok.getLocation(); |
| auto Text = StringRef(SourceMgr.getCharacterData(Location), Length); |
| if (Tok.is(tok::comment)) { |
| if (isClangFormatOff(Text)) |
| Skip = true; |
| else if (isClangFormatOn(Text)) |
| Skip = false; |
| continue; |
| } |
| |
| if (Skip || Tok.isNot(tok::numeric_constant) || |
| !AffectedRangeMgr.affectsCharSourceRange( |
| CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) { |
| continue; |
| } |
| |
| const auto Formatted = format(Text, Style); |
| if (Formatted != Text) { |
| cantFail(Result.add( |
| tooling::Replacement(SourceMgr, Location, Length, Formatted))); |
| } |
| } |
| |
| return {Result, 0}; |
| } |
| |
| } // namespace format |
| } // namespace clang |