blob: b58b3c7ee0189535a77e27bdc448cdaff5af9fdb [file] [log] [blame]
//===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements NumericLiteralCaseFixer that standardizes character
/// case within numeric literals.
///
//===----------------------------------------------------------------------===//
#include "NumericLiteralCaseFixer.h"
#include "NumericLiteralInfo.h"
#include "llvm/ADT/StringExtras.h"
#include <algorithm>
namespace clang {
namespace format {
static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) {
// Check if language is supported.
switch (Style.Language) {
case FormatStyle::LK_C:
case FormatStyle::LK_Cpp:
case FormatStyle::LK_ObjC:
case FormatStyle::LK_CSharp:
case FormatStyle::LK_Java:
case FormatStyle::LK_JavaScript:
break;
default:
return false;
}
// Check if style options are set.
const auto &Option = Style.NumericLiteralCase;
const auto Leave = FormatStyle::NLCS_Leave;
return Option.Prefix != Leave || Option.HexDigit != Leave ||
Option.ExponentLetter != Leave || Option.Suffix != Leave;
}
static std::string
transformComponent(StringRef Component,
FormatStyle::NumericLiteralComponentStyle ConfigValue) {
switch (ConfigValue) {
case FormatStyle::NLCS_Upper:
return Component.upper();
case FormatStyle::NLCS_Lower:
return Component.lower();
default:
// Covers FormatStyle::NLCS_Leave.
return Component.str();
}
}
/// Test if Suffix matches a C++ literal reserved by the library.
/// Matches against all suffixes reserved in the C++23 standard.
static bool matchesReservedSuffix(StringRef Suffix) {
static constexpr std::array<StringRef, 11> SortedReservedSuffixes = {
"d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y",
};
// This can be static_assert when we have access to constexpr is_sorted in
// C++ 20.
assert(llvm::is_sorted(SortedReservedSuffixes) &&
"Must be sorted as precondition for lower_bound().");
auto entry = llvm::lower_bound(SortedReservedSuffixes, Suffix);
if (entry == SortedReservedSuffixes.cend())
return false;
return *entry == Suffix;
}
static std::string format(StringRef NumericLiteral, const FormatStyle &Style) {
const char Separator = Style.isCpp() ? '\'' : '_';
const NumericLiteralInfo Info(NumericLiteral, Separator);
const bool HasBaseLetter = Info.BaseLetterPos != StringRef::npos;
const bool HasExponent = Info.ExponentLetterPos != StringRef::npos;
const bool HasSuffix = Info.SuffixPos != StringRef::npos;
std::string Formatted;
if (HasBaseLetter) {
Formatted +=
transformComponent(NumericLiteral.take_front(1 + Info.BaseLetterPos),
Style.NumericLiteralCase.Prefix);
}
// Reformat this slice as HexDigit whether or not the digit has hexadecimal
// characters because binary/decimal/octal digits are unchanged.
Formatted += transformComponent(
NumericLiteral.slice(HasBaseLetter ? 1 + Info.BaseLetterPos : 0,
HasExponent ? Info.ExponentLetterPos
: HasSuffix ? Info.SuffixPos
: NumericLiteral.size()),
Style.NumericLiteralCase.HexDigit);
if (HasExponent) {
Formatted += transformComponent(
NumericLiteral.slice(Info.ExponentLetterPos,
HasSuffix ? Info.SuffixPos
: NumericLiteral.size()),
Style.NumericLiteralCase.ExponentLetter);
}
if (HasSuffix) {
StringRef Suffix = NumericLiteral.drop_front(Info.SuffixPos);
if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') {
// In C++, it is idiomatic, but NOT standardized to define user-defined
// literals with a leading '_'. Omit user defined literals and standard
// reserved suffixes from transformation.
Formatted += Suffix.str();
} else {
Formatted += transformComponent(Suffix, Style.NumericLiteralCase.Suffix);
}
}
return Formatted;
}
std::pair<tooling::Replacements, unsigned>
NumericLiteralCaseFixer::process(const Environment &Env,
const FormatStyle &Style) {
if (!isNumericLiteralCaseFixerNeeded(Style))
return {};
const auto &SourceMgr = Env.getSourceManager();
AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
const auto ID = Env.getFileID();
const auto LangOpts = getFormattingLangOpts(Style);
Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
Lex.SetCommentRetentionState(true);
Token Tok;
tooling::Replacements Result;
for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
// Skip tokens that are too small to contain a formattable literal.
// Size=2 is the smallest possible literal that could contain formattable
// components, for example "1u".
auto Length = Tok.getLength();
if (Length < 2)
continue;
// Service clang-format off/on comments.
auto Location = Tok.getLocation();
auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
if (Tok.is(tok::comment)) {
if (isClangFormatOff(Text))
Skip = true;
else if (isClangFormatOn(Text))
Skip = false;
continue;
}
if (Skip || Tok.isNot(tok::numeric_constant) ||
!AffectedRangeMgr.affectsCharSourceRange(
CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
continue;
}
const auto Formatted = format(Text, Style);
if (Formatted != Text) {
cantFail(Result.add(
tooling::Replacement(SourceMgr, Location, Length, Formatted)));
}
}
return {Result, 0};
}
} // namespace format
} // namespace clang