blob: c9d32a00fc15f3705c26af12bbaf0f20680adc33 [file] [log] [blame]
//===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "IncludeCleaner.h"
#include "Config.h"
#include "Diagnostics.h"
#include "Headers.h"
#include "ParsedAST.h"
#include "Preamble.h"
#include "Protocol.h"
#include "SourceCode.h"
#include "URI.h"
#include "clang-include-cleaner/Analysis.h"
#include "clang-include-cleaner/IncludeSpeller.h"
#include "clang-include-cleaner/Record.h"
#include "clang-include-cleaner/Types.h"
#include "support/Logger.h"
#include "support/Path.h"
#include "support/Trace.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/TemplateName.h"
#include "clang/AST/Type.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/Core/Replacement.h"
#include "clang/Tooling/Inclusions/HeaderIncludes.h"
#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/GenericUniformityImpl.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Regex.h"
#include <cassert>
#include <iterator>
#include <optional>
#include <string>
#include <utility>
#include <vector>
namespace clang {
namespace clangd {
static bool AnalyzeStdlib = false;
void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; }
namespace {
// Returns the range starting at '#' and ending at EOL. Escaped newlines are not
// handled.
clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) {
clangd::Range Result;
Result.end = Result.start = offsetToPosition(Code, HashOffset);
// Span the warning until the EOL or EOF.
Result.end.character +=
lspLength(Code.drop_front(HashOffset).take_until([](char C) {
return C == '\n' || C == '\r';
}));
return Result;
}
bool isFilteredByConfig(const Config &Cfg, llvm::StringRef HeaderPath) {
// Convert the path to Unix slashes and try to match against the filter.
llvm::SmallString<64> NormalizedPath(HeaderPath);
llvm::sys::path::native(NormalizedPath, llvm::sys::path::Style::posix);
for (auto &Filter : Cfg.Diagnostics.Includes.IgnoreHeader) {
if (Filter(NormalizedPath))
return true;
}
return false;
}
static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST,
const Config &Cfg,
const include_cleaner::PragmaIncludes *PI) {
// FIXME(kirillbobyrev): We currently do not support the umbrella headers.
// System headers are likely to be standard library headers.
// Until we have good support for umbrella headers, don't warn about them.
if (Inc.Written.front() == '<') {
if (AnalyzeStdlib && tooling::stdlib::Header::named(Inc.Written))
return true;
return false;
}
assert(Inc.HeaderID);
auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID);
auto FE = AST.getSourceManager().getFileManager().getFileRef(
AST.getIncludeStructure().getRealPath(HID));
assert(FE);
if (PI) {
if (PI->shouldKeep(Inc.HashLine + 1))
return false;
// Check if main file is the public interface for a private header. If so we
// shouldn't diagnose it as unused.
if (auto PHeader = PI->getPublic(*FE); !PHeader.empty()) {
PHeader = PHeader.trim("<>\"");
// Since most private -> public mappings happen in a verbatim way, we
// check textually here. This might go wrong in presence of symlinks or
// header mappings. But that's not different than rest of the places.
if (AST.tuPath().endswith(PHeader))
return false;
}
}
// Headers without include guards have side effects and are not
// self-contained, skip them.
if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
&FE->getFileEntry())) {
dlog("{0} doesn't have header guard and will not be considered unused",
FE->getName());
return false;
}
if (isFilteredByConfig(Cfg, Inc.Resolved)) {
dlog("{0} header is filtered out by the configuration", FE->getName());
return false;
}
return true;
}
llvm::StringRef getResolvedPath(const include_cleaner::Header &SymProvider) {
switch (SymProvider.kind()) {
case include_cleaner::Header::Physical:
return SymProvider.physical()->tryGetRealPathName();
case include_cleaner::Header::Standard:
return SymProvider.standard().name().trim("<>\"");
case include_cleaner::Header::Verbatim:
return SymProvider.verbatim().trim("<>\"");
}
llvm_unreachable("Unknown header kind");
}
std::string getSymbolName(const include_cleaner::Symbol &Sym) {
switch (Sym.kind()) {
case include_cleaner::Symbol::Macro:
return Sym.macro().Name->getName().str();
case include_cleaner::Symbol::Declaration:
return llvm::dyn_cast<NamedDecl>(&Sym.declaration())
->getQualifiedNameAsString();
}
llvm_unreachable("Unknown symbol kind");
}
std::vector<Diag> generateMissingIncludeDiagnostics(
ParsedAST &AST, llvm::ArrayRef<MissingIncludeDiagInfo> MissingIncludes,
llvm::StringRef Code) {
std::vector<Diag> Result;
const Config &Cfg = Config::current();
if (Cfg.Diagnostics.MissingIncludes != Config::IncludesPolicy::Strict ||
Cfg.Diagnostics.SuppressAll ||
Cfg.Diagnostics.Suppress.contains("missing-includes")) {
return Result;
}
const SourceManager &SM = AST.getSourceManager();
const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
auto FileStyle = format::getStyle(
format::DefaultFormatStyle, AST.tuPath(), format::DefaultFallbackStyle,
Code, &SM.getFileManager().getVirtualFileSystem());
if (!FileStyle) {
elog("Couldn't infer style", FileStyle.takeError());
FileStyle = format::getLLVMStyle();
}
tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
FileStyle->IncludeStyle);
for (const auto &SymbolWithMissingInclude : MissingIncludes) {
llvm::StringRef ResolvedPath =
getResolvedPath(SymbolWithMissingInclude.Providers.front());
if (isFilteredByConfig(Cfg, ResolvedPath)) {
dlog("IncludeCleaner: not diagnosing missing include {0}, filtered by "
"config",
ResolvedPath);
continue;
}
std::string Spelling = include_cleaner::spellHeader(
{SymbolWithMissingInclude.Providers.front(),
AST.getPreprocessor().getHeaderSearchInfo(), MainFile});
llvm::StringRef HeaderRef{Spelling};
bool Angled = HeaderRef.starts_with("<");
// We might suggest insertion of an existing include in edge cases, e.g.,
// include is present in a PP-disabled region, or spelling of the header
// turns out to be the same as one of the unresolved includes in the
// main file.
std::optional<tooling::Replacement> Replacement = HeaderIncludes.insert(
HeaderRef.trim("\"<>"), Angled, tooling::IncludeDirective::Include);
if (!Replacement.has_value())
continue;
Diag &D = Result.emplace_back();
D.Message =
llvm::formatv("No header providing \"{0}\" is directly included",
getSymbolName(SymbolWithMissingInclude.Symbol));
D.Name = "missing-includes";
D.Source = Diag::DiagSource::Clangd;
D.File = AST.tuPath();
D.InsideMainFile = true;
// We avoid the "warning" severity here in favor of LSP's "information".
//
// Users treat most warnings on code being edited as high-priority.
// They don't think of include cleanups the same way: they want to edit
// lines with existing violations without fixing them.
// Diagnostics at the same level tend to be visually indistinguishable,
// and a few missing includes can cause many diagnostics.
// Marking these as "information" leaves them visible, but less intrusive.
//
// (These concerns don't apply to unused #include warnings: these are fewer,
// they appear on infrequently-edited lines with few other warnings, and
// the 'Unneccesary' tag often result in a different rendering)
//
// Usually clang's "note" severity usually has special semantics, being
// translated into LSP RelatedInformation of a parent diagnostic.
// But not here: these aren't processed by clangd's DiagnosticConsumer.
D.Severity = DiagnosticsEngine::Note;
D.Range = clangd::Range{
offsetToPosition(Code,
SymbolWithMissingInclude.SymRefRange.beginOffset()),
offsetToPosition(Code,
SymbolWithMissingInclude.SymRefRange.endOffset())};
auto &F = D.Fixes.emplace_back();
F.Message = "#include " + Spelling;
TextEdit Edit = replacementToEdit(Code, *Replacement);
F.Edits.emplace_back(std::move(Edit));
}
return Result;
}
std::vector<Diag> generateUnusedIncludeDiagnostics(
PathRef FileName, llvm::ArrayRef<const Inclusion *> UnusedIncludes,
llvm::StringRef Code) {
std::vector<Diag> Result;
const Config &Cfg = Config::current();
if (Cfg.Diagnostics.UnusedIncludes == Config::IncludesPolicy::None ||
Cfg.Diagnostics.SuppressAll ||
Cfg.Diagnostics.Suppress.contains("unused-includes")) {
return Result;
}
for (const auto *Inc : UnusedIncludes) {
Diag &D = Result.emplace_back();
D.Message =
llvm::formatv("included header {0} is not used directly",
llvm::sys::path::filename(
Inc->Written.substr(1, Inc->Written.size() - 2),
llvm::sys::path::Style::posix));
D.Name = "unused-includes";
D.Source = Diag::DiagSource::Clangd;
D.File = FileName;
D.InsideMainFile = true;
D.Severity = DiagnosticsEngine::Warning;
D.Tags.push_back(Unnecessary);
D.Range = getDiagnosticRange(Code, Inc->HashOffset);
// FIXME(kirillbobyrev): Removing inclusion might break the code if the
// used headers are only reachable transitively through this one. Suggest
// including them directly instead.
// FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
// (keep/export) remove the warning once we support IWYU pragmas.
auto &F = D.Fixes.emplace_back();
F.Message = "remove #include directive";
F.Edits.emplace_back();
F.Edits.back().range.start.line = Inc->HashLine;
F.Edits.back().range.end.line = Inc->HashLine + 1;
}
return Result;
}
} // namespace
std::vector<include_cleaner::SymbolReference>
collectMacroReferences(ParsedAST &AST) {
const auto &SM = AST.getSourceManager();
// FIXME: !!this is a hacky way to collect macro references.
std::vector<include_cleaner::SymbolReference> Macros;
auto &PP = AST.getPreprocessor();
for (const syntax::Token &Tok :
AST.getTokens().spelledTokens(SM.getMainFileID())) {
auto Macro = locateMacroAt(Tok, PP);
if (!Macro)
continue;
if (auto DefLoc = Macro->Info->getDefinitionLoc(); DefLoc.isValid())
Macros.push_back(
{Tok.location(),
include_cleaner::Macro{/*Name=*/PP.getIdentifierInfo(Tok.text(SM)),
DefLoc},
include_cleaner::RefType::Explicit});
}
return Macros;
}
include_cleaner::Includes
convertIncludes(const SourceManager &SM,
const llvm::ArrayRef<Inclusion> Includes) {
include_cleaner::Includes ConvertedIncludes;
for (const Inclusion &Inc : Includes) {
include_cleaner::Include TransformedInc;
llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written);
TransformedInc.Spelled = WrittenRef.trim("\"<>");
TransformedInc.HashLocation =
SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset);
TransformedInc.Line = Inc.HashLine + 1;
TransformedInc.Angled = WrittenRef.starts_with("<");
auto FE = SM.getFileManager().getFile(Inc.Resolved);
if (!FE) {
elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}",
Inc.Resolved, FE.getError().message());
continue;
}
TransformedInc.Resolved = *FE;
ConvertedIncludes.add(std::move(TransformedInc));
}
return ConvertedIncludes;
}
std::vector<const Inclusion *>
getUnused(ParsedAST &AST,
const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles,
const llvm::StringSet<> &ReferencedPublicHeaders) {
trace::Span Tracer("IncludeCleaner::getUnused");
const Config &Cfg = Config::current();
std::vector<const Inclusion *> Unused;
for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
if (!MFI.HeaderID)
continue;
if (ReferencedPublicHeaders.contains(MFI.Written))
continue;
auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
bool Used = ReferencedFiles.contains(IncludeID);
if (!Used && !mayConsiderUnused(MFI, AST, Cfg, AST.getPragmaIncludes())) {
dlog("{0} was not used, but is not eligible to be diagnosed as unused",
MFI.Written);
continue;
}
if (!Used)
Unused.push_back(&MFI);
dlog("{0} is {1}", MFI.Written, Used ? "USED" : "UNUSED");
}
return Unused;
}
IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
const auto &SM = AST.getSourceManager();
const auto &Includes = AST.getIncludeStructure();
include_cleaner::Includes ConvertedIncludes =
convertIncludes(SM, Includes.MainFileIncludes);
const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
auto *PreamblePatch = PreamblePatch::getPatchEntry(AST.tuPath(), SM);
std::vector<include_cleaner::SymbolReference> Macros =
collectMacroReferences(AST);
std::vector<MissingIncludeDiagInfo> MissingIncludes;
llvm::DenseSet<IncludeStructure::HeaderID> Used;
trace::Span Tracer("include_cleaner::walkUsed");
include_cleaner::walkUsed(
AST.getLocalTopLevelDecls(), /*MacroRefs=*/Macros,
AST.getPragmaIncludes(), SM,
[&](const include_cleaner::SymbolReference &Ref,
llvm::ArrayRef<include_cleaner::Header> Providers) {
bool Satisfied = false;
for (const auto &H : Providers) {
if (H.kind() == include_cleaner::Header::Physical &&
(H.physical() == MainFile || H.physical() == PreamblePatch)) {
Satisfied = true;
continue;
}
for (auto *Inc : ConvertedIncludes.match(H)) {
Satisfied = true;
auto HeaderID = Includes.getID(Inc->Resolved);
assert(HeaderID.has_value() &&
"ConvertedIncludes only contains resolved includes.");
Used.insert(*HeaderID);
}
}
if (Satisfied || Providers.empty() ||
Ref.RT != include_cleaner::RefType::Explicit)
return;
// We actually always want to map usages to their spellings, but
// spelling locations can point into preamble section. Using these
// offsets could lead into crashes in presence of stale preambles. Hence
// we use "getFileLoc" instead to make sure it always points into main
// file.
// FIXME: Use presumed locations to map such usages back to patched
// locations safely.
auto Loc = SM.getFileLoc(Ref.RefLocation);
// File locations can be outside of the main file if macro is expanded
// through an #include.
while (SM.getFileID(Loc) != SM.getMainFileID())
Loc = SM.getIncludeLoc(SM.getFileID(Loc));
auto TouchingTokens =
syntax::spelledTokensTouching(Loc, AST.getTokens());
assert(!TouchingTokens.empty());
// Loc points to the start offset of the ref token, here we use the last
// element of the TouchingTokens, e.g. avoid getting the "::" for
// "ns::^abc".
MissingIncludeDiagInfo DiagInfo{
Ref.Target, TouchingTokens.back().range(SM), Providers};
MissingIncludes.push_back(std::move(DiagInfo));
});
// Put possibly equal diagnostics together for deduplication.
// The duplicates might be from macro arguments that get expanded multiple
// times.
llvm::stable_sort(MissingIncludes, [](const MissingIncludeDiagInfo &LHS,
const MissingIncludeDiagInfo &RHS) {
// First sort by reference location.
if (LHS.SymRefRange != RHS.SymRefRange) {
// We can get away just by comparing the offsets as all the ranges are in
// main file.
return LHS.SymRefRange.beginOffset() < RHS.SymRefRange.beginOffset();
}
// For the same location, break ties using the symbol. Note that this won't
// be stable across runs.
using MapInfo = llvm::DenseMapInfo<include_cleaner::Symbol>;
return MapInfo::getHashValue(LHS.Symbol) <
MapInfo::getHashValue(RHS.Symbol);
});
MissingIncludes.erase(llvm::unique(MissingIncludes), MissingIncludes.end());
std::vector<const Inclusion *> UnusedIncludes =
getUnused(AST, Used, /*ReferencedPublicHeaders*/ {});
return {std::move(UnusedIncludes), std::move(MissingIncludes)};
}
std::optional<Fix> removeAllUnusedIncludes(llvm::ArrayRef<Diag> UnusedIncludes) {
if (UnusedIncludes.empty())
return std::nullopt;
Fix RemoveAll;
RemoveAll.Message = "remove all unused includes";
for (const auto &Diag : UnusedIncludes) {
assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
RemoveAll.Edits.insert(RemoveAll.Edits.end(),
Diag.Fixes.front().Edits.begin(),
Diag.Fixes.front().Edits.end());
}
// TODO(hokein): emit a suitable text for the label.
ChangeAnnotation Annotation = {/*label=*/"",
/*needsConfirmation=*/true,
/*description=*/""};
static const ChangeAnnotationIdentifier RemoveAllUnusedID =
"RemoveAllUnusedIncludes";
for (unsigned I = 0; I < RemoveAll.Edits.size(); ++I) {
ChangeAnnotationIdentifier ID = RemoveAllUnusedID + std::to_string(I);
RemoveAll.Edits[I].annotationId = ID;
RemoveAll.Annotations.push_back({ID, Annotation});
}
return RemoveAll;
}
std::optional<Fix>
addAllMissingIncludes(llvm::ArrayRef<Diag> MissingIncludeDiags) {
if (MissingIncludeDiags.empty())
return std::nullopt;
Fix AddAllMissing;
AddAllMissing.Message = "add all missing includes";
// A map to deduplicate the edits with the same new text.
// newText (#include "my_missing_header.h") -> TextEdit.
llvm::StringMap<TextEdit> Edits;
for (const auto &Diag : MissingIncludeDiags) {
assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
for (const auto& Edit : Diag.Fixes.front().Edits) {
Edits.try_emplace(Edit.newText, Edit);
}
}
// FIXME(hokein): emit used symbol reference in the annotation.
ChangeAnnotation Annotation = {/*label=*/"",
/*needsConfirmation=*/true,
/*description=*/""};
static const ChangeAnnotationIdentifier AddAllMissingID =
"AddAllMissingIncludes";
unsigned I = 0;
for (auto &It : Edits) {
ChangeAnnotationIdentifier ID = AddAllMissingID + std::to_string(I++);
AddAllMissing.Edits.push_back(std::move(It.getValue()));
AddAllMissing.Edits.back().annotationId = ID;
AddAllMissing.Annotations.push_back({ID, Annotation});
}
return AddAllMissing;
}
Fix fixAll(const Fix& RemoveAllUnused, const Fix& AddAllMissing) {
Fix FixAll;
FixAll.Message = "fix all includes";
for (const auto &F : RemoveAllUnused.Edits)
FixAll.Edits.push_back(F);
for (const auto &F : AddAllMissing.Edits)
FixAll.Edits.push_back(F);
for (const auto& A : RemoveAllUnused.Annotations)
FixAll.Annotations.push_back(A);
for (const auto& A : AddAllMissing.Annotations)
FixAll.Annotations.push_back(A);
return FixAll;
}
std::vector<Diag> generateIncludeCleanerDiagnostic(
ParsedAST &AST, const IncludeCleanerFindings &Findings,
llvm::StringRef Code) {
std::vector<Diag> UnusedIncludes = generateUnusedIncludeDiagnostics(
AST.tuPath(), Findings.UnusedIncludes, Code);
std::optional<Fix> RemoveAllUnused = removeAllUnusedIncludes(UnusedIncludes);
std::vector<Diag> MissingIncludeDiags = generateMissingIncludeDiagnostics(
AST, Findings.MissingIncludes, Code);
std::optional<Fix> AddAllMissing = addAllMissingIncludes(MissingIncludeDiags);
std::optional<Fix> FixAll;
if (RemoveAllUnused && AddAllMissing)
FixAll = fixAll(*RemoveAllUnused, *AddAllMissing);
auto AddBatchFix = [](const std::optional<Fix> &F, clang::clangd::Diag *Out) {
if (!F) return;
Out->Fixes.push_back(*F);
};
for (auto &Diag : MissingIncludeDiags) {
AddBatchFix(MissingIncludeDiags.size() > 1
? AddAllMissing
: std::nullopt,
&Diag);
AddBatchFix(FixAll, &Diag);
}
for (auto &Diag : UnusedIncludes) {
AddBatchFix(UnusedIncludes.size() > 1 ? RemoveAllUnused
: std::nullopt,
&Diag);
AddBatchFix(FixAll, &Diag);
}
auto Result = std::move(MissingIncludeDiags);
llvm::move(UnusedIncludes,
std::back_inserter(Result));
return Result;
}
std::vector<Diag> issueIncludeCleanerDiagnostics(ParsedAST &AST,
llvm::StringRef Code) {
// Interaction is only polished for C/CPP.
if (AST.getLangOpts().ObjC)
return {};
trace::Span Tracer("IncludeCleaner::issueIncludeCleanerDiagnostics");
const Config &Cfg = Config::current();
IncludeCleanerFindings Findings;
if (Cfg.Diagnostics.MissingIncludes == Config::IncludesPolicy::Strict ||
Cfg.Diagnostics.UnusedIncludes == Config::IncludesPolicy::Strict) {
// will need include-cleaner results, call it once
Findings = computeIncludeCleanerFindings(AST);
}
return generateIncludeCleanerDiagnostic(AST, Findings, Code);
}
std::optional<include_cleaner::Header>
firstMatchedProvider(const include_cleaner::Includes &Includes,
llvm::ArrayRef<include_cleaner::Header> Providers) {
for (const auto &H : Providers) {
if (!Includes.match(H).empty())
return H;
}
// No match for this provider in the includes list.
return std::nullopt;
}
} // namespace clangd
} // namespace clang