blob: fe650200e4e36422df2e33a15f6ed86b35b67255 [file] [log] [blame]
//===- TypeErasedDataflowAnalysis.cpp -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines type-erased base types and functions for building dataflow
// analyses that run over Control-Flow Graphs (CFGs).
//
//===----------------------------------------------------------------------===//
#include <algorithm>
#include <memory>
#include <system_error>
#include <utility>
#include <vector>
#include "clang/AST/DeclCXX.h"
#include "clang/AST/OperationKinds.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Analysis/Analyses/PostOrderCFGView.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
#include "clang/Analysis/FlowSensitive/Transfer.h"
#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
namespace clang {
namespace dataflow {
class StmtToEnvMapImpl : public StmtToEnvMap {
public:
StmtToEnvMapImpl(
const ControlFlowContext &CFCtx,
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>>
BlockToState)
: CFCtx(CFCtx), BlockToState(BlockToState) {}
const Environment *getEnvironment(const Stmt &S) const override {
auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S));
assert(BlockIt != CFCtx.getStmtToBlock().end());
const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()];
assert(State);
return &State.value().Env;
}
private:
const ControlFlowContext &CFCtx;
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockToState;
};
/// Returns the index of `Block` in the successors of `Pred`.
static int blockIndexInPredecessor(const CFGBlock &Pred,
const CFGBlock &Block) {
auto BlockPos = llvm::find_if(
Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) {
return Succ && Succ->getBlockID() == Block.getBlockID();
});
return BlockPos - Pred.succ_begin();
}
/// Extends the flow condition of an environment based on a terminator
/// statement.
class TerminatorVisitor : public ConstStmtVisitor<TerminatorVisitor> {
public:
TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env,
int BlockSuccIdx, TransferOptions TransferOpts)
: StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx),
TransferOpts(TransferOpts) {}
void VisitIfStmt(const IfStmt *S) {
auto *Cond = S->getCond();
assert(Cond != nullptr);
extendFlowCondition(*Cond);
}
void VisitWhileStmt(const WhileStmt *S) {
auto *Cond = S->getCond();
assert(Cond != nullptr);
extendFlowCondition(*Cond);
}
void VisitDoStmt(const DoStmt *S) {
auto *Cond = S->getCond();
assert(Cond != nullptr);
extendFlowCondition(*Cond);
}
void VisitForStmt(const ForStmt *S) {
auto *Cond = S->getCond();
if (Cond != nullptr)
extendFlowCondition(*Cond);
}
void VisitBinaryOperator(const BinaryOperator *S) {
assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr);
auto *LHS = S->getLHS();
assert(LHS != nullptr);
extendFlowCondition(*LHS);
}
void VisitConditionalOperator(const ConditionalOperator *S) {
auto *Cond = S->getCond();
assert(Cond != nullptr);
extendFlowCondition(*Cond);
}
private:
void extendFlowCondition(const Expr &Cond) {
// The terminator sub-expression might not be evaluated.
if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr)
transfer(StmtToEnv, Cond, Env, TransferOpts);
// FIXME: The flow condition must be an r-value, so `SkipPast::None` should
// suffice.
auto *Val =
cast_or_null<BoolValue>(Env.getValue(Cond, SkipPast::Reference));
// Value merging depends on flow conditions from different environments
// being mutually exclusive -- that is, they cannot both be true in their
// entirety (even if they may share some clauses). So, we need *some* value
// for the condition expression, even if just an atom.
if (Val == nullptr) {
// FIXME: Consider introducing a helper for this get-or-create pattern.
auto *Loc = Env.getStorageLocation(Cond, SkipPast::None);
if (Loc == nullptr) {
Loc = &Env.createStorageLocation(Cond);
Env.setStorageLocation(Cond, *Loc);
}
Val = &Env.makeAtomicBoolValue();
Env.setValue(*Loc, *Val);
}
// The condition must be inverted for the successor that encompasses the
// "else" branch, if such exists.
if (BlockSuccIdx == 1)
Val = &Env.makeNot(*Val);
Env.addToFlowCondition(*Val);
}
const StmtToEnvMap &StmtToEnv;
Environment &Env;
int BlockSuccIdx;
TransferOptions TransferOpts;
};
/// Computes the input state for a given basic block by joining the output
/// states of its predecessors.
///
/// Requirements:
///
/// All predecessors of `Block` except those with loop back edges must have
/// already been transferred. States in `BlockStates` that are set to
/// `llvm::None` represent basic blocks that are not evaluated yet.
static TypeErasedDataflowAnalysisState computeBlockInputState(
const ControlFlowContext &CFCtx,
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,
const CFGBlock &Block, const Environment &InitEnv,
TypeErasedDataflowAnalysis &Analysis) {
llvm::DenseSet<const CFGBlock *> Preds;
Preds.insert(Block.pred_begin(), Block.pred_end());
if (Block.getTerminator().isTemporaryDtorsBranch()) {
// This handles a special case where the code that produced the CFG includes
// a conditional operator with a branch that constructs a temporary and
// calls a destructor annotated as noreturn. The CFG models this as follows:
//
// B1 (contains the condition of the conditional operator) - succs: B2, B3
// B2 (contains code that does not call a noreturn destructor) - succs: B4
// B3 (contains code that calls a noreturn destructor) - succs: B4
// B4 (has temporary destructor terminator) - succs: B5, B6
// B5 (noreturn block that is associated with the noreturn destructor call)
// B6 (contains code that follows the conditional operator statement)
//
// The first successor (B5 above) of a basic block with a temporary
// destructor terminator (B4 above) is the block that evaluates the
// destructor. If that block has a noreturn element then the predecessor
// block that constructed the temporary object (B3 above) is effectively a
// noreturn block and its state should not be used as input for the state
// of the block that has a temporary destructor terminator (B4 above). This
// holds regardless of which branch of the ternary operator calls the
// noreturn destructor. However, it doesn't cases where a nested ternary
// operator includes a branch that contains a noreturn destructor call.
//
// See `NoreturnDestructorTest` for concrete examples.
if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) {
auto StmtBlock = CFCtx.getStmtToBlock().find(Block.getTerminatorStmt());
assert(StmtBlock != CFCtx.getStmtToBlock().end());
Preds.erase(StmtBlock->getSecond());
}
}
llvm::Optional<TypeErasedDataflowAnalysisState> MaybeState;
bool ApplyBuiltinTransfer = Analysis.applyBuiltinTransfer();
for (const CFGBlock *Pred : Preds) {
// Skip if the `Block` is unreachable or control flow cannot get past it.
if (!Pred || Pred->hasNoReturnElement())
continue;
// Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a
// loop back edge to `Block`.
const llvm::Optional<TypeErasedDataflowAnalysisState> &MaybePredState =
BlockStates[Pred->getBlockID()];
if (!MaybePredState)
continue;
TypeErasedDataflowAnalysisState PredState = MaybePredState.value();
if (ApplyBuiltinTransfer) {
if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
const StmtToEnvMapImpl StmtToEnv(CFCtx, BlockStates);
TerminatorVisitor(StmtToEnv, PredState.Env,
blockIndexInPredecessor(*Pred, Block),
Analysis.builtinTransferOptions())
.Visit(PredTerminatorStmt);
}
}
if (MaybeState) {
Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice);
MaybeState->Env.join(PredState.Env, Analysis);
} else {
MaybeState = std::move(PredState);
}
}
if (!MaybeState) {
// FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()`
// to enable building analyses like computation of dominators that
// initialize the state of each basic block differently.
MaybeState.emplace(Analysis.typeErasedInitialElement(), InitEnv);
}
return *MaybeState;
}
/// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`.
/// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it
/// is evaluated.
static void transferCFGStmt(
const ControlFlowContext &CFCtx,
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,
const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis,
TypeErasedDataflowAnalysisState &State,
std::function<void(const CFGStmt &,
const TypeErasedDataflowAnalysisState &)>
HandleTransferredStmt) {
const Stmt *S = CfgStmt.getStmt();
assert(S != nullptr);
if (Analysis.applyBuiltinTransfer())
transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env,
Analysis.builtinTransferOptions());
Analysis.transferTypeErased(S, State.Lattice, State.Env);
if (HandleTransferredStmt != nullptr)
HandleTransferredStmt(CfgStmt, State);
}
/// Transfers `State` by evaluating `CfgInit`.
static void transferCFGInitializer(const CFGInitializer &CfgInit,
TypeErasedDataflowAnalysisState &State) {
const auto &ThisLoc = *cast<AggregateStorageLocation>(
State.Env.getThisPointeeStorageLocation());
const CXXCtorInitializer *Initializer = CfgInit.getInitializer();
assert(Initializer != nullptr);
const FieldDecl *Member = Initializer->getMember();
if (Member == nullptr)
// Not a field initializer.
return;
auto *InitStmt = Initializer->getInit();
assert(InitStmt != nullptr);
auto *InitStmtLoc =
State.Env.getStorageLocation(*InitStmt, SkipPast::Reference);
if (InitStmtLoc == nullptr)
return;
auto *InitStmtVal = State.Env.getValue(*InitStmtLoc);
if (InitStmtVal == nullptr)
return;
if (Member->getType()->isReferenceType()) {
auto &MemberLoc = ThisLoc.getChild(*Member);
State.Env.setValue(MemberLoc,
State.Env.takeOwnership(
std::make_unique<ReferenceValue>(*InitStmtLoc)));
} else {
auto &MemberLoc = ThisLoc.getChild(*Member);
State.Env.setValue(MemberLoc, *InitStmtVal);
}
}
TypeErasedDataflowAnalysisState transferBlock(
const ControlFlowContext &CFCtx,
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,
const CFGBlock &Block, const Environment &InitEnv,
TypeErasedDataflowAnalysis &Analysis,
std::function<void(const CFGStmt &,
const TypeErasedDataflowAnalysisState &)>
HandleTransferredStmt) {
TypeErasedDataflowAnalysisState State =
computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis);
for (const CFGElement &Element : Block) {
switch (Element.getKind()) {
case CFGElement::Statement:
transferCFGStmt(CFCtx, BlockStates, *Element.getAs<CFGStmt>(), Analysis,
State, HandleTransferredStmt);
break;
case CFGElement::Initializer:
if (Analysis.applyBuiltinTransfer())
transferCFGInitializer(*Element.getAs<CFGInitializer>(), State);
break;
default:
// FIXME: Evaluate other kinds of `CFGElement`.
break;
}
}
return State;
}
llvm::Expected<std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>>
runTypeErasedDataflowAnalysis(
const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis,
const Environment &InitEnv,
std::function<void(const CFGStmt &,
const TypeErasedDataflowAnalysisState &)>
PostVisitStmt) {
PostOrderCFGView POV(&CFCtx.getCFG());
ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV);
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates(
CFCtx.getCFG().size(), llvm::None);
// The entry basic block doesn't contain statements so it can be skipped.
const CFGBlock &Entry = CFCtx.getCFG().getEntry();
BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(),
InitEnv};
Worklist.enqueueSuccessors(&Entry);
// Bugs in lattices and transfer functions can prevent the analysis from
// converging. To limit the damage (infinite loops) that these bugs can cause,
// limit the number of iterations.
// FIXME: Consider making the maximum number of iterations configurable.
// FIXME: Consider restricting the number of backedges followed, rather than
// iterations.
// FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
// of iterations, number of functions that time out, etc.
static constexpr uint32_t MaxAverageVisitsPerBlock = 4;
static constexpr uint32_t AbsoluteMaxIterations = 1 << 16;
const uint32_t RelativeMaxIterations =
MaxAverageVisitsPerBlock * BlockStates.size();
const uint32_t MaxIterations =
std::min(RelativeMaxIterations, AbsoluteMaxIterations);
uint32_t Iterations = 0;
while (const CFGBlock *Block = Worklist.dequeue()) {
if (++Iterations > MaxIterations) {
return llvm::createStringError(std::errc::timed_out,
"maximum number of iterations reached");
}
const llvm::Optional<TypeErasedDataflowAnalysisState> &OldBlockState =
BlockStates[Block->getBlockID()];
TypeErasedDataflowAnalysisState NewBlockState =
transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis);
if (OldBlockState &&
Analysis.isEqualTypeErased(OldBlockState.value().Lattice,
NewBlockState.Lattice) &&
OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) {
// The state of `Block` didn't change after transfer so there's no need to
// revisit its successors.
continue;
}
BlockStates[Block->getBlockID()] = std::move(NewBlockState);
// Do not add unreachable successor blocks to `Worklist`.
if (Block->hasNoReturnElement())
continue;
Worklist.enqueueSuccessors(Block);
}
// FIXME: Consider evaluating unreachable basic blocks (those that have a
// state set to `llvm::None` at this point) to also analyze dead code.
if (PostVisitStmt) {
for (const CFGBlock *Block : CFCtx.getCFG()) {
// Skip blocks that were not evaluated.
if (!BlockStates[Block->getBlockID()])
continue;
transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis,
PostVisitStmt);
}
}
return BlockStates;
}
} // namespace dataflow
} // namespace clang