blob: 79ae5ee98182b7aec5970c787590629c035e9a3e [file] [log] [blame]
//===--- ConfusableIdentifierCheck.cpp - clang-tidy -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "ConfusableIdentifierCheck.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/ConvertUTF.h"
namespace {
// Preprocessed version of
// https://www.unicode.org/Public/security/latest/confusables.txt
//
// This contains a sorted array of { UTF32 codepoint; UTF32 values[N];}
#include "Confusables.inc"
} // namespace
namespace clang::tidy::misc {
ConfusableIdentifierCheck::ConfusableIdentifierCheck(StringRef Name,
ClangTidyContext *Context)
: ClangTidyCheck(Name, Context) {}
ConfusableIdentifierCheck::~ConfusableIdentifierCheck() = default;
// Build a skeleton out of the Original identifier, inspired by the algorithm
// described in http://www.unicode.org/reports/tr39/#def-skeleton
//
// FIXME: TR39 mandates:
//
// For an input string X, define skeleton(X) to be the following transformation
// on the string:
//
// 1. Convert X to NFD format, as described in [UAX15].
// 2. Concatenate the prototypes for each character in X according to the
// specified data, producing a string of exemplar characters.
// 3. Reapply NFD.
//
// We're skipping 1. and 3. for the sake of simplicity, but this can lead to
// false positive.
static llvm::SmallString<64U> skeleton(StringRef Name) {
using namespace llvm;
SmallString<64U> Skeleton;
Skeleton.reserve(1U + Name.size());
const char *Curr = Name.data();
const char *End = Curr + Name.size();
while (Curr < End) {
const char *Prev = Curr;
UTF32 CodePoint = 0;
ConversionResult Result = convertUTF8Sequence(
reinterpret_cast<const UTF8 **>(&Curr),
reinterpret_cast<const UTF8 *>(End), &CodePoint, strictConversion);
if (Result != conversionOK) {
errs() << "Unicode conversion issue\n";
break;
}
StringRef Key(Prev, Curr - Prev);
auto *Where = llvm::lower_bound(ConfusableEntries, CodePoint,
[](decltype(ConfusableEntries[0]) X,
UTF32 Y) { return X.codepoint < Y; });
if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) {
Skeleton.append(Prev, Curr);
} else {
UTF8 Buffer[32];
UTF8 *BufferStart = std::begin(Buffer);
UTF8 *IBuffer = BufferStart;
const UTF32 *ValuesStart = std::begin(Where->values);
const UTF32 *ValuesEnd = llvm::find(Where->values, '\0');
if (ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer,
std::end(Buffer),
strictConversion) != conversionOK) {
errs() << "Unicode conversion issue\n";
break;
}
Skeleton.append((char *)BufferStart, (char *)IBuffer);
}
}
return Skeleton;
}
namespace {
struct Entry {
const NamedDecl *ND;
const Decl *Parent;
bool FromDerivedClass;
};
} // namespace
// Map from a context to the declarations in that context with the current
// skeleton. At most one entry per distinct identifier is tracked. The
// context is usually a `DeclContext`, but can also be a template declaration
// that has no corresponding context, such as an alias template or variable
// template.
using DeclsWithinContextMap =
llvm::DenseMap<const Decl *, llvm::SmallVector<Entry, 1>>;
static bool addToContext(DeclsWithinContextMap &DeclsWithinContext,
const Decl *Context, Entry E) {
auto &Decls = DeclsWithinContext[Context];
if (!Decls.empty() &&
Decls.back().ND->getIdentifier() == E.ND->getIdentifier()) {
// Already have a declaration with this identifier in this context. Don't
// track another one. This means that if an outer name is confusable with an
// inner name, we'll only diagnose the outer name once, pointing at the
// first inner declaration with that name.
if (Decls.back().FromDerivedClass && !E.FromDerivedClass) {
// Prefer the declaration that's not from the derived class, because that
// conflicts with more declarations.
Decls.back() = E;
return true;
}
return false;
}
Decls.push_back(E);
return true;
}
static void addToEnclosingContexts(DeclsWithinContextMap &DeclsWithinContext,
const Decl *Parent, const NamedDecl *ND) {
const Decl *Outer = Parent;
while (Outer) {
if (const auto *NS = dyn_cast<NamespaceDecl>(Outer))
Outer = NS->getCanonicalDecl();
if (!addToContext(DeclsWithinContext, Outer, {ND, Parent, false}))
return;
if (const auto *RD = dyn_cast<CXXRecordDecl>(Outer)) {
RD = RD->getDefinition();
if (RD) {
RD->forallBases([&](const CXXRecordDecl *Base) {
addToContext(DeclsWithinContext, Base, {ND, Parent, true});
return true;
});
}
}
auto *OuterDC = Outer->getDeclContext();
if (!OuterDC)
break;
Outer = cast_or_null<Decl>(OuterDC->getNonTransparentContext());
}
}
void ConfusableIdentifierCheck::check(
const ast_matchers::MatchFinder::MatchResult &Result) {
const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl");
if (!ND)
return;
addDeclToCheck(ND,
cast<Decl>(ND->getDeclContext()->getNonTransparentContext()));
// Associate template parameters with this declaration of this template.
if (const auto *TD = dyn_cast<TemplateDecl>(ND)) {
for (const NamedDecl *Param : *TD->getTemplateParameters())
addDeclToCheck(Param, TD->getTemplatedDecl());
}
// Associate function parameters with this declaration of this function.
if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
for (const NamedDecl *Param : FD->parameters())
addDeclToCheck(Param, ND);
}
}
void ConfusableIdentifierCheck::addDeclToCheck(const NamedDecl *ND,
const Decl *Parent) {
if (!ND || !Parent)
return;
const IdentifierInfo *NDII = ND->getIdentifier();
if (!NDII)
return;
StringRef NDName = NDII->getName();
if (NDName.empty())
return;
NameToDecls[NDII].push_back({ND, Parent});
}
void ConfusableIdentifierCheck::onEndOfTranslationUnit() {
llvm::StringMap<llvm::SmallVector<const IdentifierInfo *, 1>> SkeletonToNames;
// Compute the skeleton for each identifier.
for (auto &[Ident, Decls] : NameToDecls) {
SkeletonToNames[skeleton(Ident->getName())].push_back(Ident);
}
// Visit each skeleton with more than one identifier.
for (auto &[Skel, Idents] : SkeletonToNames) {
if (Idents.size() < 2) {
continue;
}
// Find the declaration contexts that transitively contain each identifier.
DeclsWithinContextMap DeclsWithinContext;
for (const IdentifierInfo *II : Idents) {
for (auto [ND, Parent] : NameToDecls[II]) {
addToEnclosingContexts(DeclsWithinContext, Parent, ND);
}
}
// Check to see if any declaration is declared in a context that
// transitively contains another declaration with a different identifier but
// the same skeleton.
for (const IdentifierInfo *II : Idents) {
for (auto [OuterND, OuterParent] : NameToDecls[II]) {
for (Entry Inner : DeclsWithinContext[OuterParent]) {
// Don't complain if the identifiers are the same.
if (OuterND->getIdentifier() == Inner.ND->getIdentifier())
continue;
// Don't complain about a derived-class name shadowing a base class
// private member.
if (OuterND->getAccess() == AS_private && Inner.FromDerivedClass)
continue;
// If the declarations are in the same context, only diagnose the
// later one.
if (OuterParent == Inner.Parent &&
Inner.ND->getASTContext()
.getSourceManager()
.isBeforeInTranslationUnit(Inner.ND->getLocation(),
OuterND->getLocation()))
continue;
diag(Inner.ND->getLocation(), "%0 is confusable with %1")
<< Inner.ND << OuterND;
diag(OuterND->getLocation(), "other declaration found here",
DiagnosticIDs::Note);
}
}
}
}
NameToDecls.clear();
}
void ConfusableIdentifierCheck::registerMatchers(
ast_matchers::MatchFinder *Finder) {
// Parameter declarations sometimes use the translation unit or some outer
// enclosing context as their `DeclContext`, instead of their parent, so
// we handle them specially in `check`.
auto AnyParamDecl = ast_matchers::anyOf(
ast_matchers::parmVarDecl(), ast_matchers::templateTypeParmDecl(),
ast_matchers::nonTypeTemplateParmDecl(),
ast_matchers::templateTemplateParmDecl());
Finder->addMatcher(ast_matchers::namedDecl(ast_matchers::unless(AnyParamDecl))
.bind("nameddecl"),
this);
}
} // namespace clang::tidy::misc