|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | /// | 
|  | /// \file | 
|  | /// Implementation of the FormatStringConverter class which is used to convert | 
|  | /// printf format strings to C++ std::formatter format strings. | 
|  | /// | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "FormatStringConverter.h" | 
|  | #include "../utils/FixItHintUtils.h" | 
|  | #include "clang/AST/Expr.h" | 
|  | #include "clang/ASTMatchers/ASTMatchFinder.h" | 
|  | #include "clang/Basic/LangOptions.h" | 
|  | #include "clang/Lex/Lexer.h" | 
|  | #include "clang/Lex/Preprocessor.h" | 
|  | #include "clang/Tooling/FixIt.h" | 
|  | #include "llvm/ADT/StringExtras.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  |  | 
|  | using namespace clang::ast_matchers; | 
|  | using namespace clang::analyze_printf; | 
|  |  | 
|  | namespace clang::tidy::utils { | 
|  | using clang::analyze_format_string::ConversionSpecifier; | 
|  |  | 
|  | /// Is the passed type the actual "char" type, whether that be signed or | 
|  | /// unsigned, rather than explicit signed char or unsigned char types. | 
|  | static bool isRealCharType(const clang::QualType &Ty) { | 
|  | using namespace clang; | 
|  | const Type *DesugaredType = Ty->getUnqualifiedDesugaredType(); | 
|  | if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType)) | 
|  | return (BT->getKind() == BuiltinType::Char_U || | 
|  | BT->getKind() == BuiltinType::Char_S); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /// If possible, return the text name of the signed type that corresponds to the | 
|  | /// passed integer type. If the passed type is already signed then its name is | 
|  | /// just returned. Only supports BuiltinTypes. | 
|  | static std::optional<std::string> | 
|  | getCorrespondingSignedTypeName(const clang::QualType &QT) { | 
|  | using namespace clang; | 
|  | const auto UQT = QT.getUnqualifiedType(); | 
|  | if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) { | 
|  | switch (BT->getKind()) { | 
|  | case BuiltinType::UChar: | 
|  | case BuiltinType::Char_U: | 
|  | case BuiltinType::SChar: | 
|  | case BuiltinType::Char_S: | 
|  | return "signed char"; | 
|  | case BuiltinType::UShort: | 
|  | case BuiltinType::Short: | 
|  | return "short"; | 
|  | case BuiltinType::UInt: | 
|  | case BuiltinType::Int: | 
|  | return "int"; | 
|  | case BuiltinType::ULong: | 
|  | case BuiltinType::Long: | 
|  | return "long"; | 
|  | case BuiltinType::ULongLong: | 
|  | case BuiltinType::LongLong: | 
|  | return "long long"; | 
|  | default: | 
|  | llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '" | 
|  | << QT.getAsString() << "'\n"; | 
|  | return std::nullopt; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only | 
|  | // if the argument type does. | 
|  | const std::string TypeName = UQT.getAsString(); | 
|  | StringRef SimplifiedTypeName{TypeName}; | 
|  | const bool InStd = SimplifiedTypeName.consume_front("std::"); | 
|  | const StringRef Prefix = InStd ? "std::" : ""; | 
|  |  | 
|  | if (SimplifiedTypeName.starts_with("uint") && | 
|  | SimplifiedTypeName.ends_with("_t")) | 
|  | return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str(); | 
|  |  | 
|  | if (SimplifiedTypeName == "size_t") | 
|  | return (Twine(Prefix) + "ssize_t").str(); | 
|  |  | 
|  | llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '" | 
|  | << UQT.getAsString() << "'\n"; | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | /// If possible, return the text name of the unsigned type that corresponds to | 
|  | /// the passed integer type. If the passed type is already unsigned then its | 
|  | /// name is just returned. Only supports BuiltinTypes. | 
|  | static std::optional<std::string> | 
|  | getCorrespondingUnsignedTypeName(const clang::QualType &QT) { | 
|  | using namespace clang; | 
|  | const auto UQT = QT.getUnqualifiedType(); | 
|  | if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) { | 
|  | switch (BT->getKind()) { | 
|  | case BuiltinType::SChar: | 
|  | case BuiltinType::Char_S: | 
|  | case BuiltinType::UChar: | 
|  | case BuiltinType::Char_U: | 
|  | return "unsigned char"; | 
|  | case BuiltinType::Short: | 
|  | case BuiltinType::UShort: | 
|  | return "unsigned short"; | 
|  | case BuiltinType::Int: | 
|  | case BuiltinType::UInt: | 
|  | return "unsigned int"; | 
|  | case BuiltinType::Long: | 
|  | case BuiltinType::ULong: | 
|  | return "unsigned long"; | 
|  | case BuiltinType::LongLong: | 
|  | case BuiltinType::ULongLong: | 
|  | return "unsigned long long"; | 
|  | default: | 
|  | llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '" | 
|  | << UQT.getAsString() << "'\n"; | 
|  | return std::nullopt; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only | 
|  | // if the argument type does. | 
|  | const std::string TypeName = UQT.getAsString(); | 
|  | StringRef SimplifiedTypeName{TypeName}; | 
|  | const bool InStd = SimplifiedTypeName.consume_front("std::"); | 
|  | const StringRef Prefix = InStd ? "std::" : ""; | 
|  |  | 
|  | if (SimplifiedTypeName.starts_with("int") && | 
|  | SimplifiedTypeName.ends_with("_t")) | 
|  | return (Twine(Prefix) + "u" + SimplifiedTypeName).str(); | 
|  |  | 
|  | if (SimplifiedTypeName == "ssize_t") | 
|  | return (Twine(Prefix) + "size_t").str(); | 
|  | if (SimplifiedTypeName == "ptrdiff_t") | 
|  | return (Twine(Prefix) + "size_t").str(); | 
|  |  | 
|  | llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '" | 
|  | << UQT.getAsString() << "'\n"; | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | static std::optional<std::string> | 
|  | castTypeForArgument(ConversionSpecifier::Kind ArgKind, | 
|  | const clang::QualType &QT) { | 
|  | if (ArgKind == ConversionSpecifier::Kind::uArg) | 
|  | return getCorrespondingUnsignedTypeName(QT); | 
|  | return getCorrespondingSignedTypeName(QT); | 
|  | } | 
|  |  | 
|  | static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, | 
|  | const clang::QualType &ArgType) { | 
|  | if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) { | 
|  | // Unadorned char never matches any expected signedness since it | 
|  | // could be signed or unsigned. | 
|  | const auto ArgTypeKind = BT->getKind(); | 
|  | if (ArgTypeKind == BuiltinType::Char_U || | 
|  | ArgTypeKind == BuiltinType::Char_S) | 
|  | return false; | 
|  | } | 
|  |  | 
|  | if (ArgKind == ConversionSpecifier::Kind::uArg) | 
|  | return ArgType->isUnsignedIntegerType(); | 
|  | return ArgType->isSignedIntegerType(); | 
|  | } | 
|  |  | 
|  | namespace { | 
|  | AST_MATCHER(clang::QualType, isRealChar) { | 
|  | return clang::tidy::utils::isRealCharType(Node); | 
|  | } | 
|  | } // namespace | 
|  |  | 
|  | static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) { | 
|  | /// For printf-style functions, the signedness of the type printed is | 
|  | /// indicated by the corresponding type in the format string. | 
|  | /// std::print will determine the signedness from the type of the | 
|  | /// argument. This means that it is necessary to generate a cast in | 
|  | /// StrictMode to ensure that the exact behaviour is maintained. | 
|  | /// However, for templated functions like absl::PrintF and | 
|  | /// fmt::printf, the signedness of the type printed is also taken from | 
|  | /// the actual argument like std::print, so such casts are never | 
|  | /// necessary. printf-style functions are variadic, whereas templated | 
|  | /// ones aren't, so we can use that to distinguish between the two | 
|  | /// cases. | 
|  | if (StrictMode) { | 
|  | const FunctionDecl *FuncDecl = Call->getDirectCallee(); | 
|  | assert(FuncDecl); | 
|  | return FuncDecl->isVariadic(); | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | FormatStringConverter::FormatStringConverter( | 
|  | ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset, | 
|  | const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM, | 
|  | Preprocessor &PP) | 
|  | : Context(ContextIn), Config(ConfigIn), | 
|  | CastMismatchedIntegerTypes( | 
|  | castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)), | 
|  | Args(Call->getArgs()), NumArgs(Call->getNumArgs()), | 
|  | ArgsOffset(FormatArgOffset + 1), LangOpts(LO) { | 
|  | assert(ArgsOffset <= NumArgs); | 
|  | FormatExpr = llvm::dyn_cast<StringLiteral>( | 
|  | Args[FormatArgOffset]->IgnoreUnlessSpelledInSource()); | 
|  |  | 
|  | assert(FormatExpr && FormatExpr->isOrdinary()); | 
|  |  | 
|  | if (const std::optional<StringRef> MaybeMacroName = | 
|  | formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP); | 
|  | MaybeMacroName) { | 
|  | conversionNotPossible( | 
|  | ("format string contains unreplaceable macro '" + *MaybeMacroName + "'") | 
|  | .str()); | 
|  | return; | 
|  | } | 
|  |  | 
|  | PrintfFormatString = FormatExpr->getString(); | 
|  |  | 
|  | // Assume that the output will be approximately the same size as the input, | 
|  | // but perhaps with a few escapes expanded. | 
|  | const size_t EstimatedGrowth = 8; | 
|  | StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth); | 
|  | StandardFormatString.push_back('\"'); | 
|  |  | 
|  | const bool IsFreeBsdkPrintf = false; | 
|  |  | 
|  | using clang::analyze_format_string::ParsePrintfString; | 
|  | ParsePrintfString(*this, PrintfFormatString.data(), | 
|  | PrintfFormatString.data() + PrintfFormatString.size(), | 
|  | LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf); | 
|  | finalizeFormatText(); | 
|  | } | 
|  |  | 
|  | std::optional<StringRef> | 
|  | FormatStringConverter::formatStringContainsUnreplaceableMacro( | 
|  | const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM, | 
|  | Preprocessor &PP) { | 
|  | // If a macro invocation surrounds the entire call then we don't want that to | 
|  | // inhibit conversion. The whole format string will appear to come from that | 
|  | // macro, as will the function call. | 
|  | std::optional<StringRef> MaybeSurroundingMacroName; | 
|  | if (SourceLocation BeginCallLoc = Call->getBeginLoc(); | 
|  | BeginCallLoc.isMacroID()) | 
|  | MaybeSurroundingMacroName = | 
|  | Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts()); | 
|  |  | 
|  | for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end(); | 
|  | I != E; ++I) { | 
|  | const SourceLocation &TokenLoc = *I; | 
|  | if (TokenLoc.isMacroID()) { | 
|  | const StringRef MacroName = | 
|  | Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts()); | 
|  |  | 
|  | if (MaybeSurroundingMacroName != MacroName) { | 
|  | // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes | 
|  | // for types that change size so we must look for multiple prefixes. | 
|  | if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI")) | 
|  | return MacroName; | 
|  |  | 
|  | const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc); | 
|  | const OptionalFileEntryRef MaybeFileEntry = | 
|  | SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc)); | 
|  | if (!MaybeFileEntry) | 
|  | return MacroName; | 
|  |  | 
|  | HeaderSearch &HS = PP.getHeaderSearchInfo(); | 
|  | // Check if the file is a system header | 
|  | if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) || | 
|  | llvm::sys::path::filename(MaybeFileEntry->getName()) != | 
|  | "inttypes.h") | 
|  | return MacroName; | 
|  | } | 
|  | } | 
|  | } | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS, | 
|  | std::string &FormatSpec) { | 
|  | ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); | 
|  |  | 
|  | // We only care about alignment if a field width is specified | 
|  | if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) { | 
|  | if (ArgKind == ConversionSpecifier::sArg) { | 
|  | // Strings are left-aligned by default with std::format, so we only | 
|  | // need to emit an alignment if this one needs to be right aligned. | 
|  | if (!FS.isLeftJustified()) | 
|  | FormatSpec.push_back('>'); | 
|  | } else { | 
|  | // Numbers are right-aligned by default with std::format, so we only | 
|  | // need to emit an alignment if this one needs to be left aligned. | 
|  | if (FS.isLeftJustified()) | 
|  | FormatSpec.push_back('<'); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void FormatStringConverter::emitSign(const PrintfSpecifier &FS, | 
|  | std::string &FormatSpec) { | 
|  | const ConversionSpecifier Spec = FS.getConversionSpecifier(); | 
|  |  | 
|  | // Ignore on something that isn't numeric. For printf it's would be a | 
|  | // compile-time warning but ignored at runtime, but for std::format it | 
|  | // ought to be a compile-time error. | 
|  | if (Spec.isAnyIntArg() || Spec.isDoubleArg()) { | 
|  | // + is preferred to ' ' | 
|  | if (FS.hasPlusPrefix()) | 
|  | FormatSpec.push_back('+'); | 
|  | else if (FS.hasSpacePrefix()) | 
|  | FormatSpec.push_back(' '); | 
|  | } | 
|  | } | 
|  |  | 
|  | void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS, | 
|  | std::string &FormatSpec) { | 
|  | if (FS.hasAlternativeForm()) { | 
|  | switch (FS.getConversionSpecifier().getKind()) { | 
|  | case ConversionSpecifier::Kind::aArg: | 
|  | case ConversionSpecifier::Kind::AArg: | 
|  | case ConversionSpecifier::Kind::eArg: | 
|  | case ConversionSpecifier::Kind::EArg: | 
|  | case ConversionSpecifier::Kind::fArg: | 
|  | case ConversionSpecifier::Kind::FArg: | 
|  | case ConversionSpecifier::Kind::gArg: | 
|  | case ConversionSpecifier::Kind::GArg: | 
|  | case ConversionSpecifier::Kind::xArg: | 
|  | case ConversionSpecifier::Kind::XArg: | 
|  | case ConversionSpecifier::Kind::oArg: | 
|  | FormatSpec.push_back('#'); | 
|  | break; | 
|  | default: | 
|  | // Alternative forms don't exist for other argument kinds | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS, | 
|  | std::string &FormatSpec) { | 
|  | { | 
|  | const OptionalAmount FieldWidth = FS.getFieldWidth(); | 
|  | switch (FieldWidth.getHowSpecified()) { | 
|  | case OptionalAmount::NotSpecified: | 
|  | break; | 
|  | case OptionalAmount::Constant: | 
|  | FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount())); | 
|  | break; | 
|  | case OptionalAmount::Arg: | 
|  | FormatSpec.push_back('{'); | 
|  | if (FieldWidth.usesPositionalArg()) { | 
|  | // std::format argument identifiers are zero-based, whereas printf | 
|  | // ones are one based. | 
|  | assert(FieldWidth.getPositionalArgIndex() > 0U); | 
|  | FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1)); | 
|  | } | 
|  | FormatSpec.push_back('}'); | 
|  | break; | 
|  | case OptionalAmount::Invalid: | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS, | 
|  | std::string &FormatSpec) { | 
|  | const OptionalAmount FieldPrecision = FS.getPrecision(); | 
|  | switch (FieldPrecision.getHowSpecified()) { | 
|  | case OptionalAmount::NotSpecified: | 
|  | break; | 
|  | case OptionalAmount::Constant: | 
|  | FormatSpec.push_back('.'); | 
|  | FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount())); | 
|  | break; | 
|  | case OptionalAmount::Arg: | 
|  | FormatSpec.push_back('.'); | 
|  | FormatSpec.push_back('{'); | 
|  | if (FieldPrecision.usesPositionalArg()) { | 
|  | // std::format argument identifiers are zero-based, whereas printf | 
|  | // ones are one based. | 
|  | assert(FieldPrecision.getPositionalArgIndex() > 0U); | 
|  | FormatSpec.append( | 
|  | llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1)); | 
|  | } | 
|  | FormatSpec.push_back('}'); | 
|  | break; | 
|  | case OptionalAmount::Invalid: | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) { | 
|  | unsigned ArgCount = 0; | 
|  | const OptionalAmount FieldWidth = FS.getFieldWidth(); | 
|  | const OptionalAmount FieldPrecision = FS.getPrecision(); | 
|  |  | 
|  | if (FieldWidth.getHowSpecified() == OptionalAmount::Arg && | 
|  | !FieldWidth.usesPositionalArg()) | 
|  | ++ArgCount; | 
|  | if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg && | 
|  | !FieldPrecision.usesPositionalArg()) | 
|  | ++ArgCount; | 
|  |  | 
|  | if (ArgCount) | 
|  | ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount); | 
|  | } | 
|  |  | 
|  | void FormatStringConverter::emitStringArgument(unsigned ArgIndex, | 
|  | const Expr *Arg) { | 
|  | // If the argument is the result of a call to std::string::c_str() or | 
|  | // data() with a return type of char then we can remove that call and | 
|  | // pass the std::string directly. We don't want to do so if the return | 
|  | // type is not a char pointer (though it's unlikely that such code would | 
|  | // compile without warnings anyway.) See RedundantStringCStrCheck. | 
|  |  | 
|  | if (!StringCStrCallExprMatcher) { | 
|  | // Lazily create the matcher | 
|  | const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType( | 
|  | hasDeclaration(cxxRecordDecl(hasName("::std::basic_string")))))); | 
|  | const auto StringExpr = expr( | 
|  | anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl))))); | 
|  |  | 
|  | StringCStrCallExprMatcher = | 
|  | cxxMemberCallExpr( | 
|  | on(StringExpr.bind("arg")), callee(memberExpr().bind("member")), | 
|  | callee(cxxMethodDecl(hasAnyName("c_str", "data"), | 
|  | returns(pointerType(pointee(isRealChar())))))) | 
|  | .bind("call"); | 
|  | } | 
|  |  | 
|  | auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context); | 
|  | if (CStrMatches.size() == 1) | 
|  | ArgCStrRemovals.push_back(CStrMatches.front()); | 
|  | else if (Arg->getType()->isPointerType()) { | 
|  | const QualType Pointee = Arg->getType()->getPointeeType(); | 
|  | // printf is happy to print signed char and unsigned char strings, but | 
|  | // std::format only likes char strings. | 
|  | if (Pointee->isCharType() && !isRealCharType(Pointee)) | 
|  | ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>("); | 
|  | } | 
|  | } | 
|  |  | 
|  | bool FormatStringConverter::emitIntegerArgument( | 
|  | ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex, | 
|  | std::string &FormatSpec) { | 
|  | const clang::QualType &ArgType = Arg->getType(); | 
|  | if (ArgType->isBooleanType()) { | 
|  | // std::format will print bool as either "true" or "false" by default, | 
|  | // but printf prints them as "0" or "1". Be compatible with printf by | 
|  | // requesting decimal output. | 
|  | FormatSpec.push_back('d'); | 
|  | } else if (ArgType->isEnumeralType()) { | 
|  | // std::format will try to find a specialization to print the enum | 
|  | // (and probably fail), whereas printf would have just expected it to | 
|  | // be passed as its underlying type. However, printf will have forced | 
|  | // the signedness based on the format string, so we need to do the | 
|  | // same. | 
|  | if (const auto *ED = ArgType->getAsEnumDecl()) { | 
|  | if (const std::optional<std::string> MaybeCastType = | 
|  | castTypeForArgument(ArgKind, ED->getIntegerType())) | 
|  | ArgFixes.emplace_back( | 
|  | ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str()); | 
|  | else | 
|  | return conversionNotPossible( | 
|  | (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type") | 
|  | .str()); | 
|  | } | 
|  | } else if (CastMismatchedIntegerTypes && | 
|  | !isMatchingSignedness(ArgKind, ArgType)) { | 
|  | // printf will happily print an unsigned type as signed if told to. | 
|  | // Even -Wformat doesn't warn for this. std::format will format as | 
|  | // unsigned unless we cast it. | 
|  | if (const std::optional<std::string> MaybeCastType = | 
|  | castTypeForArgument(ArgKind, ArgType)) | 
|  | ArgFixes.emplace_back( | 
|  | ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str()); | 
|  | else | 
|  | return conversionNotPossible( | 
|  | (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " + | 
|  | Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned" | 
|  | : "signed") + | 
|  | " integer type to match format" | 
|  | " specifier and StrictMode is enabled") | 
|  | .str()); | 
|  | } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) { | 
|  | // Only specify integer if the argument is of a different type | 
|  | FormatSpec.push_back('d'); | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /// Append the corresponding standard format string type fragment to FormatSpec, | 
|  | /// and store any argument fixes for later application. | 
|  | /// @returns true on success, false on failure | 
|  | bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg, | 
|  | std::string &FormatSpec) { | 
|  | ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); | 
|  | switch (ArgKind) { | 
|  | case ConversionSpecifier::Kind::sArg: | 
|  | emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::cArg: | 
|  | // The type must be "c" to get a character unless the type is exactly | 
|  | // char (whether that be signed or unsigned for the target.) | 
|  | if (!isRealCharType(Arg->getType())) | 
|  | FormatSpec.push_back('c'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::dArg: | 
|  | case ConversionSpecifier::Kind::iArg: | 
|  | case ConversionSpecifier::Kind::uArg: | 
|  | if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset, | 
|  | FormatSpec)) | 
|  | return false; | 
|  | break; | 
|  | case ConversionSpecifier::Kind::pArg: { | 
|  | const clang::QualType &ArgType = Arg->getType(); | 
|  | // std::format knows how to format void pointers and nullptrs | 
|  | if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType()) | 
|  | ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset, | 
|  | "static_cast<const void *>("); | 
|  | break; | 
|  | } | 
|  | case ConversionSpecifier::Kind::xArg: | 
|  | FormatSpec.push_back('x'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::XArg: | 
|  | FormatSpec.push_back('X'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::oArg: | 
|  | FormatSpec.push_back('o'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::aArg: | 
|  | FormatSpec.push_back('a'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::AArg: | 
|  | FormatSpec.push_back('A'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::eArg: | 
|  | FormatSpec.push_back('e'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::EArg: | 
|  | FormatSpec.push_back('E'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::fArg: | 
|  | FormatSpec.push_back('f'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::FArg: | 
|  | FormatSpec.push_back('F'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::gArg: | 
|  | FormatSpec.push_back('g'); | 
|  | break; | 
|  | case ConversionSpecifier::Kind::GArg: | 
|  | FormatSpec.push_back('G'); | 
|  | break; | 
|  | default: | 
|  | // Something we don't understand | 
|  | return conversionNotPossible((Twine("argument ") + | 
|  | Twine(FS.getArgIndex() + ArgsOffset) + | 
|  | " has an unsupported format specifier") | 
|  | .str()); | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /// Append the standard format string equivalent of the passed PrintfSpecifier | 
|  | /// to StandardFormatString and store any argument fixes for later application. | 
|  | /// @returns true on success, false on failure | 
|  | bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS, | 
|  | const Expr *Arg, | 
|  | std::string &StandardFormatString) { | 
|  | // The specifier must have an associated argument | 
|  | assert(FS.consumesDataArgument()); | 
|  |  | 
|  | StandardFormatString.push_back('{'); | 
|  |  | 
|  | if (FS.usesPositionalArg()) { | 
|  | // std::format argument identifiers are zero-based, whereas printf ones | 
|  | // are one based. | 
|  | assert(FS.getPositionalArgIndex() > 0U); | 
|  | StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1)); | 
|  | } | 
|  |  | 
|  | // std::format format argument parts to potentially emit: | 
|  | // [[fill]align][sign]["#"]["0"][width]["."precision][type] | 
|  | std::string FormatSpec; | 
|  |  | 
|  | // printf doesn't support specifying the fill character - it's always a | 
|  | // space, so we never need to generate one. | 
|  |  | 
|  | emitAlignment(FS, FormatSpec); | 
|  | emitSign(FS, FormatSpec); | 
|  | emitAlternativeForm(FS, FormatSpec); | 
|  |  | 
|  | if (FS.hasLeadingZeros()) | 
|  | FormatSpec.push_back('0'); | 
|  |  | 
|  | emitFieldWidth(FS, FormatSpec); | 
|  | emitPrecision(FS, FormatSpec); | 
|  | maybeRotateArguments(FS); | 
|  |  | 
|  | if (!emitType(FS, Arg, FormatSpec)) | 
|  | return false; | 
|  |  | 
|  | if (!FormatSpec.empty()) { | 
|  | StandardFormatString.push_back(':'); | 
|  | StandardFormatString.append(FormatSpec); | 
|  | } | 
|  |  | 
|  | StandardFormatString.push_back('}'); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /// Called for each format specifier by ParsePrintfString. | 
|  | bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS, | 
|  | const char *StartSpecifier, | 
|  | unsigned SpecifierLen, | 
|  | const TargetInfo &Target) { | 
|  |  | 
|  | const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data(); | 
|  | assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size()); | 
|  |  | 
|  | // Everything before the specifier needs copying verbatim | 
|  | assert(StartSpecifierPos >= PrintfFormatStringPos); | 
|  |  | 
|  | appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, | 
|  | StartSpecifierPos - PrintfFormatStringPos)); | 
|  |  | 
|  | const ConversionSpecifier::Kind ArgKind = | 
|  | FS.getConversionSpecifier().getKind(); | 
|  |  | 
|  | // Skip over specifier | 
|  | PrintfFormatStringPos = StartSpecifierPos + SpecifierLen; | 
|  | assert(PrintfFormatStringPos <= PrintfFormatString.size()); | 
|  |  | 
|  | FormatStringNeededRewriting = true; | 
|  |  | 
|  | if (ArgKind == ConversionSpecifier::Kind::nArg) { | 
|  | // std::print doesn't do the equivalent of %n | 
|  | return conversionNotPossible("'%n' is not supported in format string"); | 
|  | } | 
|  |  | 
|  | if (ArgKind == ConversionSpecifier::Kind::PrintErrno) { | 
|  | // std::print doesn't support %m. In theory we could insert a | 
|  | // strerror(errno) parameter (assuming that libc has a thread-safe | 
|  | // implementation, which glibc does), but that would require keeping track | 
|  | // of the input and output parameter indices for position arguments too. | 
|  | return conversionNotPossible("'%m' is not supported in format string"); | 
|  | } | 
|  |  | 
|  | if (ArgKind == ConversionSpecifier::PercentArg) { | 
|  | StandardFormatString.push_back('%'); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset; | 
|  | if (ArgIndex >= NumArgs) { | 
|  | // Argument index out of range. Give up. | 
|  | return conversionNotPossible( | 
|  | (Twine("argument index ") + Twine(ArgIndex) + " is out of range") | 
|  | .str()); | 
|  | } | 
|  |  | 
|  | return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(), | 
|  | StandardFormatString); | 
|  | } | 
|  |  | 
|  | /// Called at the very end just before applying fixes to capture the last part | 
|  | /// of the format string. | 
|  | void FormatStringConverter::finalizeFormatText() { | 
|  | appendFormatText( | 
|  | StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, | 
|  | PrintfFormatString.size() - PrintfFormatStringPos)); | 
|  | PrintfFormatStringPos = PrintfFormatString.size(); | 
|  |  | 
|  | // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n") | 
|  | // than to std::println("Hello\r"); | 
|  | // Use StringRef until C++20 std::string::ends_with() is available. | 
|  | const auto StandardFormatStringRef = StringRef(StandardFormatString); | 
|  | if (Config.AllowTrailingNewlineRemoval && | 
|  | StandardFormatStringRef.ends_with("\\n") && | 
|  | !StandardFormatStringRef.ends_with("\\\\n") && | 
|  | !StandardFormatStringRef.ends_with("\\r\\n")) { | 
|  | UsePrintNewlineFunction = true; | 
|  | FormatStringNeededRewriting = true; | 
|  | StandardFormatString.erase(StandardFormatString.end() - 2, | 
|  | StandardFormatString.end()); | 
|  | } | 
|  |  | 
|  | StandardFormatString.push_back('\"'); | 
|  | } | 
|  |  | 
|  | /// Append literal parts of the format text, reinstating escapes as required. | 
|  | void FormatStringConverter::appendFormatText(const StringRef Text) { | 
|  | for (const char Ch : Text) { | 
|  | if (Ch == '\a') | 
|  | StandardFormatString += "\\a"; | 
|  | else if (Ch == '\b') | 
|  | StandardFormatString += "\\b"; | 
|  | else if (Ch == '\f') | 
|  | StandardFormatString += "\\f"; | 
|  | else if (Ch == '\n') | 
|  | StandardFormatString += "\\n"; | 
|  | else if (Ch == '\r') | 
|  | StandardFormatString += "\\r"; | 
|  | else if (Ch == '\t') | 
|  | StandardFormatString += "\\t"; | 
|  | else if (Ch == '\v') | 
|  | StandardFormatString += "\\v"; | 
|  | else if (Ch == '\"') | 
|  | StandardFormatString += "\\\""; | 
|  | else if (Ch == '\\') | 
|  | StandardFormatString += "\\\\"; | 
|  | else if (Ch == '{') { | 
|  | StandardFormatString += "{{"; | 
|  | FormatStringNeededRewriting = true; | 
|  | } else if (Ch == '}') { | 
|  | StandardFormatString += "}}"; | 
|  | FormatStringNeededRewriting = true; | 
|  | } else if (Ch < 32) { | 
|  | StandardFormatString += "\\x"; | 
|  | StandardFormatString += llvm::hexdigit(Ch >> 4, true); | 
|  | StandardFormatString += llvm::hexdigit(Ch & 0xf, true); | 
|  | } else | 
|  | StandardFormatString += Ch; | 
|  | } | 
|  | } | 
|  |  | 
|  | static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, | 
|  | ASTContext &Context) { | 
|  | const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg"); | 
|  | const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member"); | 
|  | const bool Arrow = Member->isArrow(); | 
|  | return Arrow ? utils::fixit::formatDereference(*Arg, Context) | 
|  | : tooling::fixit::getText(*Arg, Context).str(); | 
|  | } | 
|  |  | 
|  | /// Called by the check when it is ready to apply the fixes. | 
|  | void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag, | 
|  | SourceManager &SM) { | 
|  | if (FormatStringNeededRewriting) { | 
|  | Diag << FixItHint::CreateReplacement( | 
|  | CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(), | 
|  | FormatExpr->getEndLoc()), | 
|  | StandardFormatString); | 
|  | } | 
|  |  | 
|  | // ArgCount is one less than the number of arguments to be rotated. | 
|  | for (auto [ValueArgIndex, ArgCount] : ArgRotates) { | 
|  | assert(ValueArgIndex < NumArgs); | 
|  | assert(ValueArgIndex > ArgCount); | 
|  |  | 
|  | // First move the value argument to the right place. But if there's a | 
|  | // pending c_str() removal then we must do that at the same time. | 
|  | if (const auto CStrRemovalMatch = | 
|  | std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(), | 
|  | [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()]( | 
|  | const BoundNodes &Match) { | 
|  | // This c_str() removal corresponds to the argument | 
|  | // being moved if they start at the same location. | 
|  | const Expr *CStrArg = Match.getNodeAs<Expr>("arg"); | 
|  | return ArgStartPos == CStrArg->getBeginLoc(); | 
|  | }); | 
|  | CStrRemovalMatch != ArgCStrRemovals.end()) { | 
|  | const std::string ArgText = | 
|  | withoutCStrReplacement(*CStrRemovalMatch, *Context); | 
|  | assert(!ArgText.empty()); | 
|  |  | 
|  | Diag << FixItHint::CreateReplacement( | 
|  | Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText); | 
|  |  | 
|  | // That c_str() removal is now dealt with, so we don't need to do it again | 
|  | ArgCStrRemovals.erase(CStrRemovalMatch); | 
|  | } else | 
|  | Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount], | 
|  | *Args[ValueArgIndex], *Context); | 
|  |  | 
|  | // Now shift down the field width and precision (if either are present) to | 
|  | // accommodate it. | 
|  | for (size_t Offset = 0; Offset < ArgCount; ++Offset) | 
|  | Diag << tooling::fixit::createReplacement( | 
|  | *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1], | 
|  | *Context); | 
|  |  | 
|  | // Now we need to modify the ArgFix index too so that we fix the right | 
|  | // argument. We don't need to care about the width and precision indices | 
|  | // since they never need fixing. | 
|  | for (auto &ArgFix : ArgFixes) { | 
|  | if (ArgFix.ArgIndex == ValueArgIndex) | 
|  | ArgFix.ArgIndex = ValueArgIndex - ArgCount; | 
|  | } | 
|  | } | 
|  |  | 
|  | for (const auto &[ArgIndex, Replacement] : ArgFixes) { | 
|  | SourceLocation AfterOtherSide = | 
|  | Lexer::findNextToken(Args[ArgIndex]->getEndLoc(), SM, LangOpts) | 
|  | ->getLocation(); | 
|  |  | 
|  | Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(), | 
|  | Replacement, true) | 
|  | << FixItHint::CreateInsertion(AfterOtherSide, ")", true); | 
|  | } | 
|  |  | 
|  | for (const auto &Match : ArgCStrRemovals) { | 
|  | const auto *Call = Match.getNodeAs<CallExpr>("call"); | 
|  | const std::string ArgText = withoutCStrReplacement(Match, *Context); | 
|  | if (!ArgText.empty()) | 
|  | Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText); | 
|  | } | 
|  | } | 
|  | } // namespace clang::tidy::utils |