blob: 7c84d9681ca84369aafeb041b8845dae8bf1e62e [file]
//===- PointerFlowExtractor.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SSAFAnalysesCommon.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTTypeTraits.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/TypeBase.h"
#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h"
#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h"
#include "clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h"
#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h"
#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h"
#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h"
#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h"
#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/Support/Error.h"
#include <memory>
#include <optional>
namespace clang::ssaf {
extern PointerFlowEntitySummary buildPointerFlowEntitySummary(EdgeSet Edges);
} // namespace clang::ssaf
namespace {
using namespace clang;
using namespace ssaf;
class PointerFlowMatcher {
public:
EdgeSet Results;
ASTContext &Ctx;
PointerFlowMatcher(ASTContext &Ctx,
std::function<EntityId(const EntityName &)> AddEntity)
: Ctx(Ctx), AddEntity(std::move(AddEntity)) {}
llvm::Error matches(const DynTypedNode &DynNode, const NamedDecl *RootDecl);
llvm::Error matchesInitializerList(const ValueDecl *Base,
const Expr *InitExpr,
unsigned ArrayElementIndirectLevel = 0);
llvm::Error matchesStmt(const Stmt *S, const NamedDecl *RootDecl);
llvm::Error matchesDecl(const Decl *D, const NamedDecl *RootDecl);
private:
std::function<EntityId(const EntityName &)> AddEntity;
Expected<EntityPointerLevelSet> toEPL(const NamedDecl *N,
bool IsRet = false) const;
Expected<EntityPointerLevelSet> toEPL(const Expr *N) const;
llvm::Error addEdges(Expected<EntityPointerLevelSet> &&LHS,
Expected<EntityPointerLevelSet> &&RHS);
template <typename ParmsProvider, typename ArgsProvider>
llvm::Error matchesArgsWithParams(unsigned ArgIdxStart, ParmsProvider *PP,
ArgsProvider *AP) {
unsigned ArgIdx = ArgIdxStart;
for (unsigned ParmIdx = 0;
ParmIdx < PP->getNumParams() && ArgIdx < AP->getNumArgs();
++ArgIdx, ++ParmIdx) {
if (const ParmVarDecl *PD = PP->getParamDecl(ParmIdx);
PD && hasPtrOrArrType(PD)) {
if (auto Err = addEdges(toEPL(PD), toEPL(AP->getArg(ArgIdx))))
return Err;
}
}
return llvm::Error::success();
}
};
Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const NamedDecl *N,
bool IsRet) const {
auto Ret = createEntityPointerLevel(N, AddEntity, IsRet);
if (Ret)
return EntityPointerLevelSet{*Ret};
return Ret.takeError();
}
Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const Expr *N) const {
return translateEntityPointerLevel(N, Ctx, AddEntity);
}
llvm::Error
PointerFlowMatcher::addEdges(Expected<EntityPointerLevelSet> &&LHS,
Expected<EntityPointerLevelSet> &&RHS) {
if (!LHS && !RHS)
return llvm::joinErrors(LHS.takeError(), RHS.takeError());
if (!LHS)
return LHS.takeError();
if (!RHS)
return RHS.takeError();
for (auto L : *LHS)
Results[L].insert(RHS->begin(), RHS->end());
return llvm::Error::success();
}
/// Match and extract pointer flow.
/// The extraction function 'XF' can be described by the following rules:
///
/// XF(l = r) := add edge "toEPL(l) -> toEPL(r))"
/// XF(foo(a, b, ...)) := XF(Param_1 = a), XF(Param_2 = b), ...
/// XF(return e;) := XF(FunRet = e), where 'FunRet' is the return
/// entity of the enclosing
/// function
/// XF(ctor(a, ...) : x1(y1), ... {...})
/// := XF(Param_1 = a), ...,
/// XF(x1 = y1), ...,
/// ctor's body will be visited separately.
/// XF(T var = e) := XF(var = e)
/// XF(T var = init-list) := see \ref
/// PointerFlowMatcher::matchInitializerList
llvm::Error PointerFlowMatcher::matches(const DynTypedNode &DynNode,
const NamedDecl *RootDecl) {
if (const Stmt *S = DynNode.get<Stmt>())
return matchesStmt(S, RootDecl);
if (const Decl *D = DynNode.get<Decl>())
return matchesDecl(D, RootDecl);
return llvm::Error::success();
}
llvm::Error PointerFlowMatcher::matchesStmt(const Stmt *S,
const NamedDecl *RootDecl) {
// Match 'p = q' whenever it has pointer or array type:
if (const auto *BO = dyn_cast<BinaryOperator>(S);
BO && BO->getOpcode() == BO_Assign && hasPtrOrArrType(BO)) {
return addEdges(toEPL(BO->getLHS()), toEPL(BO->getRHS()));
}
// Match arg-to-param passing (in CallExpr) for any pointer type argument:
if (const auto *CE = dyn_cast<CallExpr>(S)) {
const FunctionDecl *FD = CE->getDirectCallee();
if (!FD)
return llvm::Error::success();
unsigned ArgIdx = 0;
if (isa<CXXOperatorCallExpr>(CE))
if (auto *MD = dyn_cast<CXXMethodDecl>(FD);
MD && !MD->isExplicitObjectMemberFunction())
ArgIdx = 1;
return matchesArgsWithParams(ArgIdx, FD, CE);
}
// Match arg-to-param passing (in CXXConstructExpr) for any pointer type
// argument:
if (const auto *CCE = dyn_cast<CXXConstructExpr>(S)) {
return matchesArgsWithParams(/*ArgIdxStart=*/0, CCE->getConstructor(), CCE);
}
if (const auto *RS = dyn_cast<ReturnStmt>(S)) {
const Expr *RetExpr = RS->getRetValue();
if (!RetExpr || !hasPtrOrArrType(RetExpr))
return llvm::Error::success();
return addEdges(toEPL(RootDecl, true), toEPL(RetExpr));
}
return llvm::Error::success();
}
llvm::Error PointerFlowMatcher::matchesDecl(const Decl *D,
const NamedDecl *RootDecl) {
const Expr *InitExpr = nullptr;
if (const auto *VD = dyn_cast<ValueDecl>(D)) {
if (const auto *Var = dyn_cast<VarDecl>(VD))
InitExpr = Var->getInit();
if (const auto *Fd = dyn_cast<FieldDecl>(VD))
InitExpr = Fd->getInClassInitializer();
// Match initializer-list:
if (auto *InitLst = dyn_cast_or_null<InitListExpr>(InitExpr))
return matchesInitializerList(VD, InitLst);
// Match initializers to variables/fields of a pointer type:
if (InitExpr && hasPtrOrArrType(VD))
return addEdges(toEPL(VD), toEPL(InitExpr));
}
// Match C++ constructor member-initializers:
if (const auto *CtorD = dyn_cast<CXXConstructorDecl>(D)) {
for (auto *E : CtorD->inits()) {
if (E->isDelegatingInitializer())
return matches(DynTypedNode::create(*E->getInit()), RootDecl);
if (const FieldDecl *FD = E->getMember(); FD && hasPtrOrArrType(FD)) {
if (auto Err = addEdges(toEPL(E->getMember()), toEPL(E->getInit())))
return Err;
}
}
}
return llvm::Error::success();
}
// Helper function for matchInitializerList that handles record:
llvm::Error matchInitializerListForRecordDecl(PointerFlowMatcher &Matcher,
const RecordDecl *RecordTy,
const InitListExpr *ILE) {
if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RecordTy))
if (CXXRD->getNumBases() != 0) {
// FIXME: support this:
return makeErrAtNode(
Matcher.Ctx, ILE,
"attempt to create pointer assignment edges between "
"CXXRecordDecls with base classes and initializer-lists");
}
// Handle union:
if (RecordTy->isUnion()) {
auto *InitField = ILE->getInitializedFieldInUnion();
if (!InitField)
return llvm::Error::success();
assert(!ILE->inits().empty());
return Matcher.matchesInitializerList(InitField, ILE->getInit(0));
}
// Handle struct/class:
ILE = ILE->isSemanticForm() ? ILE : ILE->getSemanticForm();
auto FieldIter = RecordTy->field_begin();
assert(RecordTy->getNumFields() >= ILE->getNumInits());
for (auto *Init : ILE->inits())
if (auto Err = Matcher.matchesInitializerList(*(FieldIter++), Init))
return Err;
return llvm::Error::success();
}
// Helper function for matchInitializerList that handles array:
llvm::Error matchInitializerListForArray(PointerFlowMatcher &Matcher,
const ValueDecl *Array,
const InitListExpr *ILE,
unsigned ArrayIndirectLevel = 0) {
for (auto *E : ILE->inits())
if (auto Err =
Matcher.matchesInitializerList(Array, E, ArrayIndirectLevel + 1))
return Err;
return llvm::Error::success();
}
/// Match initializer lists of the form 'Var = {a, b, c, ...}':
///
/// If 'Var' is a struct/union:
/// XF(Var = {a, b, c, ...}) := XF(Var.field_1 = a)
/// XF(Var.field_2 = b)
/// ...
/// If 'Var' is an array:
/// XF(Var = {a, b, c, ...}) := XF(*Var = a)
/// XF(*Var = b)
/// ...
///
/// The process is recursive: 'a', 'b', 'c', ... may themselves be
/// initializer lists. We therefore use \p ArrayElementIndirectLevel to keep
/// track of the pointer level the left-hand side.
llvm::Error
PointerFlowMatcher::matchesInitializerList(const ValueDecl *Base,
const Expr *InitExpr,
unsigned ArrayElementIndirectLevel) {
const InitListExpr *ILE = dyn_cast<InitListExpr>(InitExpr);
if (!ILE) {
if (!hasPtrOrArrType(InitExpr))
return llvm::Error::success();
auto BaseEPL = toEPL(Base);
if (!BaseEPL)
return BaseEPL.takeError();
// Apply ArrayElementIndirectLevel to BaseEPL
auto R = llvm::map_range(*BaseEPL, [&ArrayElementIndirectLevel](
const EntityPointerLevel &EPL) {
EntityPointerLevel Result = EPL;
for ([[maybe_unused]] auto Ignored : llvm::seq(ArrayElementIndirectLevel))
Result = incrementPointerLevel(Result);
return Result;
});
return addEdges(EntityPointerLevelSet{R.begin(), R.end()}, toEPL(InitExpr));
}
// Note that `Base`'s type is NOT the real LHS type when
// ArrayElementIndirectLevel > 0:
QualType Type = InitExpr->getType();
if (auto *RD = Type->getAsRecordDecl())
return matchInitializerListForRecordDecl(*this, RD, ILE);
if (Type->isArrayType())
return matchInitializerListForArray(*this, Base, ILE,
ArrayElementIndirectLevel);
// Must be the case of using a initializer-list for a scalar:
return matchesInitializerList(Base, ILE->getInit(0));
}
class PointerFlowTUSummaryExtractor : public TUSummaryExtractor {
public:
PointerFlowTUSummaryExtractor(TUSummaryBuilder &Builder)
: TUSummaryExtractor(Builder) {}
EntityId addEntity(const EntityName &EN) {
return SummaryBuilder.addEntity(EN);
}
Expected<std::unique_ptr<PointerFlowEntitySummary>>
extractEntitySummary(const NamedDecl *Contributor, ASTContext &Ctx) {
PointerFlowMatcher Matcher(
Ctx, [this](const EntityName &EN) { return addEntity(EN); });
auto MatchAction = [&Matcher, &Contributor](const DynTypedNode &Node) {
auto Err = Matcher.matches(Node, Contributor);
if (Err)
llvm::report_fatal_error(std::move(Err));
};
findMatchesIn(Contributor, MatchAction);
return std::make_unique<PointerFlowEntitySummary>(
buildPointerFlowEntitySummary(std::move(Matcher.Results)));
}
void HandleTranslationUnit(ASTContext &Ctx) override {
std::vector<const NamedDecl *> Contributors;
findContributors(Ctx, Contributors);
for (auto *CD : Contributors) {
auto EntitySummary = extractEntitySummary(CD, Ctx);
if (!EntitySummary)
llvm::reportFatalInternalError(EntitySummary.takeError());
assert(*EntitySummary);
if ((*EntitySummary)->empty())
continue;
auto ContributorName = getEntityName(CD);
if (!ContributorName)
llvm::reportFatalInternalError(makeEntityNameErr(Ctx, CD));
[[maybe_unused]] auto [Ignored, InsertionSucceeded] = SummaryBuilder.addSummary(
addEntity(*ContributorName), std::move(*EntitySummary));
assert(InsertionSucceeded && "duplicated contributor extraction");
}
}
};
} // namespace
// NOLINTNEXTLINE(misc-use-internal-linkage)
volatile int PointerFlowTUSummaryExtractorAnchorSource = 0;
static TUSummaryExtractorRegistry::Add<PointerFlowTUSummaryExtractor>
RegisterExtractor(PointerFlowEntitySummary::Name,
"The TUSummaryExtractor for pointer flow");