[analyzer] Improve messaging in security.VAList (#157846)
Previously the checker `security.VAList` only tracked the set of the
inintialized `va_list` objects; this commit replaces this with a mapping
that can distinguish the "uninitialized" `va_list` objects from the
"already released" ones. Moreover, a new "unknown" state is introduced
to replace the slightly hacky solutions that checked the `Symbolic`
nature of the region.
In addition to sligthly improving the messages, this commit also
prepares the ground for a follow-up change that would introduce an
"indeterminate" state (which needs `va_end` but cannot be otherwise
used) to model the requirements of SEI CERT rule MSC39-C, which states:
> The va_list may be passed as an argument to another function, but
> calling va_arg() within that function causes the va_list to have an
> indeterminate value in the calling function. As a result, attempting
> to read variable arguments without reinitializing the va_list can have
> unexpected behavior.
diff --git a/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp
index fe36e3b..79fd0bd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp
@@ -18,11 +18,36 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "llvm/Support/FormatVariadic.h"
using namespace clang;
using namespace ento;
+using llvm::formatv;
-REGISTER_SET_WITH_PROGRAMSTATE(InitializedVALists, const MemRegion *)
+namespace {
+enum class VAListState {
+ Uninitialized,
+ Unknown,
+ Initialized,
+ Released,
+};
+
+constexpr llvm::StringLiteral StateNames[] = {
+ "uninitialized", "unknown", "initialized", "already released"};
+} // end anonymous namespace
+
+static StringRef describeState(const VAListState S) {
+ return StateNames[static_cast<int>(S)];
+}
+
+REGISTER_MAP_WITH_PROGRAMSTATE(VAListStateMap, const MemRegion *, VAListState)
+
+static VAListState getVAListState(ProgramStateRef State, const MemRegion *Reg) {
+ if (const VAListState *Res = State->get<VAListStateMap>(Reg))
+ return *Res;
+ return Reg->getSymbolicBase() ? VAListState::Unknown
+ : VAListState::Uninitialized;
+}
namespace {
typedef SmallVector<const MemRegion *, 2> RegionVector;
@@ -48,7 +73,7 @@
private:
const MemRegion *getVAListAsRegion(SVal SV, const Expr *VAExpr,
- bool &IsSymbolic, CheckerContext &C) const;
+ CheckerContext &C) const;
const ExplodedNode *getStartCallSite(const ExplodedNode *N,
const MemRegion *Reg) const;
@@ -57,8 +82,8 @@
void reportLeaked(const RegionVector &Leaked, StringRef Msg1, StringRef Msg2,
CheckerContext &C, ExplodedNode *N) const;
- void checkVAListStartCall(const CallEvent &Call, CheckerContext &C,
- bool IsCopy) const;
+ void checkVAListStartCall(const CallEvent &Call, CheckerContext &C) const;
+ void checkVAListCopyCall(const CallEvent &Call, CheckerContext &C) const;
void checkVAListEndCall(const CallEvent &Call, CheckerContext &C) const;
class VAListBugVisitor : public BugReporterVisitor {
@@ -118,41 +143,35 @@
void VAListChecker::checkPreCall(const CallEvent &Call,
CheckerContext &C) const {
if (VaStart.matches(Call))
- checkVAListStartCall(Call, C, false);
+ checkVAListStartCall(Call, C);
else if (VaCopy.matches(Call))
- checkVAListStartCall(Call, C, true);
+ checkVAListCopyCall(Call, C);
else if (VaEnd.matches(Call))
checkVAListEndCall(Call, C);
else {
for (auto FuncInfo : VAListAccepters) {
if (!FuncInfo.Func.matches(Call))
continue;
- bool Symbolic;
const MemRegion *VAList =
getVAListAsRegion(Call.getArgSVal(FuncInfo.ParamIndex),
- Call.getArgExpr(FuncInfo.ParamIndex), Symbolic, C);
+ Call.getArgExpr(FuncInfo.ParamIndex), C);
if (!VAList)
return;
+ VAListState S = getVAListState(C.getState(), VAList);
- if (C.getState()->contains<InitializedVALists>(VAList))
+ if (S == VAListState::Initialized || S == VAListState::Unknown)
return;
- // We did not see va_start call, but the source of the region is unknown.
- // Be conservative and assume the best.
- if (Symbolic)
- return;
-
- SmallString<80> Errmsg("Function '");
- Errmsg += FuncInfo.Func.getFunctionName();
- Errmsg += "' is called with an uninitialized va_list argument";
- reportUninitializedAccess(VAList, Errmsg.c_str(), C);
+ std::string ErrMsg =
+ formatv("Function '{0}' is called with an {1} va_list argument",
+ FuncInfo.Func.getFunctionName(), describeState(S));
+ reportUninitializedAccess(VAList, ErrMsg, C);
break;
}
}
}
const MemRegion *VAListChecker::getVAListAsRegion(SVal SV, const Expr *E,
- bool &IsSymbolic,
CheckerContext &C) const {
const MemRegion *Reg = SV.getAsRegion();
if (!Reg)
@@ -168,7 +187,6 @@
if (isa<ParmVarDecl>(DeclReg->getDecl()))
Reg = C.getState()->getSVal(SV.castAs<Loc>()).getAsRegion();
}
- IsSymbolic = Reg && Reg->getBaseRegion()->getAs<SymbolicRegion>();
// Some VarRegion based VA lists reach here as ElementRegions.
const auto *EReg = dyn_cast_or_null<ElementRegion>(Reg);
return (EReg && VAListModelledAsArray) ? EReg->getSuperRegion() : Reg;
@@ -178,52 +196,53 @@
CheckerContext &C) const {
ProgramStateRef State = C.getState();
const Expr *ArgExpr = VAA->getSubExpr();
- SVal VAListSVal = C.getSVal(ArgExpr);
- bool Symbolic;
- const MemRegion *VAList = getVAListAsRegion(VAListSVal, ArgExpr, Symbolic, C);
+ const MemRegion *VAList = getVAListAsRegion(C.getSVal(ArgExpr), ArgExpr, C);
if (!VAList)
return;
- if (Symbolic)
+ VAListState S = getVAListState(C.getState(), VAList);
+ if (S == VAListState::Initialized || S == VAListState::Unknown)
return;
- if (!State->contains<InitializedVALists>(VAList))
- reportUninitializedAccess(
- VAList, "va_arg() is called on an uninitialized va_list", C);
+
+ std::string ErrMsg =
+ formatv("va_arg() is called on an {0} va_list", describeState(S));
+ reportUninitializedAccess(VAList, ErrMsg, C);
}
void VAListChecker::checkDeadSymbols(SymbolReaper &SR,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
- InitializedVAListsTy Tracked = State->get<InitializedVALists>();
+ VAListStateMapTy Tracked = State->get<VAListStateMap>();
RegionVector Leaked;
- for (const MemRegion *Reg : Tracked) {
+ for (const auto &[Reg, S] : Tracked) {
if (SR.isLiveRegion(Reg))
continue;
- Leaked.push_back(Reg);
- State = State->remove<InitializedVALists>(Reg);
+ if (S == VAListState::Initialized)
+ Leaked.push_back(Reg);
+ State = State->remove<VAListStateMap>(Reg);
}
- if (ExplodedNode *N = C.addTransition(State))
+ if (ExplodedNode *N = C.addTransition(State)) {
reportLeaked(Leaked, "Initialized va_list", " is leaked", C, N);
+ }
}
// This function traverses the exploded graph backwards and finds the node where
-// the va_list is initialized. That node is used for uniquing the bug paths.
-// It is not likely that there are several different va_lists that belongs to
-// different stack frames, so that case is not yet handled.
+// the va_list becomes initialized. That node is used for uniquing the bug
+// paths. It is not likely that there are several different va_lists that
+// belongs to different stack frames, so that case is not yet handled.
const ExplodedNode *
VAListChecker::getStartCallSite(const ExplodedNode *N,
const MemRegion *Reg) const {
const LocationContext *LeakContext = N->getLocationContext();
const ExplodedNode *StartCallNode = N;
- bool FoundInitializedState = false;
+ bool SeenInitializedState = false;
while (N) {
- ProgramStateRef State = N->getState();
- if (!State->contains<InitializedVALists>(Reg)) {
- if (FoundInitializedState)
- break;
- } else {
- FoundInitializedState = true;
+ VAListState S = getVAListState(N->getState(), Reg);
+ if (S == VAListState::Initialized) {
+ SeenInitializedState = true;
+ } else if (SeenInitializedState) {
+ break;
}
const LocationContext *NContext = N->getLocationContext();
if (NContext == LeakContext || NContext->isParentOf(LeakContext))
@@ -274,71 +293,84 @@
}
void VAListChecker::checkVAListStartCall(const CallEvent &Call,
- CheckerContext &C, bool IsCopy) const {
- bool Symbolic;
- const MemRegion *VAList =
- getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C);
- if (!VAList)
+ CheckerContext &C) const {
+ const MemRegion *Arg =
+ getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C);
+ if (!Arg)
return;
ProgramStateRef State = C.getState();
+ VAListState ArgState = getVAListState(State, Arg);
- if (IsCopy) {
- const MemRegion *Arg2 =
- getVAListAsRegion(Call.getArgSVal(1), Call.getArgExpr(1), Symbolic, C);
- if (Arg2) {
- if (VAList == Arg2) {
- RegionVector Leaked{VAList};
- if (ExplodedNode *N = C.addTransition(State))
- reportLeaked(Leaked, "va_list", " is copied onto itself", C, N);
- return;
- }
- if (!State->contains<InitializedVALists>(Arg2) && !Symbolic) {
- if (State->contains<InitializedVALists>(VAList)) {
- State = State->remove<InitializedVALists>(VAList);
- RegionVector Leaked{VAList};
- if (ExplodedNode *N = C.addTransition(State))
- reportLeaked(Leaked, "Initialized va_list",
- " is overwritten by an uninitialized one", C, N);
- } else {
- reportUninitializedAccess(Arg2, "Uninitialized va_list is copied", C);
- }
- return;
- }
- }
- }
- if (State->contains<InitializedVALists>(VAList)) {
- RegionVector Leaked{VAList};
+ if (ArgState == VAListState::Initialized) {
+ RegionVector Leaked{Arg};
if (ExplodedNode *N = C.addTransition(State))
reportLeaked(Leaked, "Initialized va_list", " is initialized again", C,
N);
return;
}
- State = State->add<InitializedVALists>(VAList);
+ State = State->set<VAListStateMap>(Arg, VAListState::Initialized);
+ C.addTransition(State);
+}
+
+void VAListChecker::checkVAListCopyCall(const CallEvent &Call,
+ CheckerContext &C) const {
+ const MemRegion *Arg1 =
+ getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C);
+ const MemRegion *Arg2 =
+ getVAListAsRegion(Call.getArgSVal(1), Call.getArgExpr(1), C);
+ if (!Arg1 || !Arg2)
+ return;
+
+ ProgramStateRef State = C.getState();
+ if (Arg1 == Arg2) {
+ RegionVector Leaked{Arg1};
+ if (ExplodedNode *N = C.addTransition(State))
+ reportLeaked(Leaked, "va_list", " is copied onto itself", C, N);
+ return;
+ }
+ VAListState State1 = getVAListState(State, Arg1);
+ VAListState State2 = getVAListState(State, Arg2);
+ // Update the ProgramState by copying the state of Arg2 to Arg1.
+ State = State->set<VAListStateMap>(Arg1, State2);
+ if (State1 == VAListState::Initialized) {
+ RegionVector Leaked{Arg1};
+ std::string Msg2 =
+ formatv(" is overwritten by {0} {1} one",
+ (State2 == VAListState::Initialized) ? "another" : "an",
+ describeState(State2));
+ if (ExplodedNode *N = C.addTransition(State))
+ reportLeaked(Leaked, "Initialized va_list", Msg2, C, N);
+ return;
+ }
+ if (State2 != VAListState::Initialized && State2 != VAListState::Unknown) {
+ std::string Msg = formatv("{0} va_list is copied", describeState(State2));
+ Msg[0] = toupper(Msg[0]);
+ reportUninitializedAccess(Arg2, Msg, C);
+ return;
+ }
C.addTransition(State);
}
void VAListChecker::checkVAListEndCall(const CallEvent &Call,
CheckerContext &C) const {
- bool Symbolic;
- const MemRegion *VAList =
- getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C);
- if (!VAList)
+ const MemRegion *Arg =
+ getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C);
+ if (!Arg)
return;
- // We did not see va_start call, but the source of the region is unknown.
- // Be conservative and assume the best.
- if (Symbolic)
- return;
+ ProgramStateRef State = C.getState();
+ VAListState ArgState = getVAListState(State, Arg);
- if (!C.getState()->contains<InitializedVALists>(VAList)) {
- reportUninitializedAccess(
- VAList, "va_end() is called on an uninitialized va_list", C);
+ if (ArgState != VAListState::Unknown &&
+ ArgState != VAListState::Initialized) {
+ std::string Msg = formatv("va_end() is called on an {0} va_list",
+ describeState(ArgState));
+ reportUninitializedAccess(Arg, Msg, C);
return;
}
- ProgramStateRef State = C.getState();
- State = State->remove<InitializedVALists>(VAList);
+ State = State->set<VAListStateMap>(Arg, VAListState::Released);
C.addTransition(State);
}
@@ -351,13 +383,26 @@
if (!S)
return nullptr;
+ VAListState After = getVAListState(State, Reg);
+ VAListState Before = getVAListState(StatePrev, Reg);
+ if (Before == After)
+ return nullptr;
+
StringRef Msg;
- if (State->contains<InitializedVALists>(Reg) &&
- !StatePrev->contains<InitializedVALists>(Reg))
+ switch (After) {
+ case VAListState::Uninitialized:
+ Msg = "Copied uninitialized contents into the va_list";
+ break;
+ case VAListState::Unknown:
+ Msg = "Copied unknown contents into the va_list";
+ break;
+ case VAListState::Initialized:
Msg = "Initialized va_list";
- else if (!State->contains<InitializedVALists>(Reg) &&
- StatePrev->contains<InitializedVALists>(Reg))
+ break;
+ case VAListState::Released:
Msg = "Ended va_list";
+ break;
+ }
if (Msg.empty())
return nullptr;
diff --git a/clang/test/Analysis/valist-uninitialized-no-undef.c b/clang/test/Analysis/valist-uninitialized-no-undef.c
index b8add29..da007e6 100644
--- a/clang/test/Analysis/valist-uninitialized-no-undef.c
+++ b/clang/test/Analysis/valist-uninitialized-no-undef.c
@@ -37,7 +37,7 @@
va_list va;
va_start(va, buffer); // expected-note{{Initialized va_list}}
va_end(va); // expected-note{{Ended va_list}}
- vsprintf(buffer, "%s %d %d %lf %03d", va); // expected-warning{{Function 'vsprintf' is called with an uninitialized va_list argument}}
- // expected-note@-1{{Function 'vsprintf' is called with an uninitialized va_list argument}}
+ vsprintf(buffer, "%s %d %d %lf %03d", va); // expected-warning{{Function 'vsprintf' is called with an already released va_list argument}}
+ // expected-note@-1{{Function 'vsprintf' is called with an already released va_list argument}}
}
diff --git a/clang/test/Analysis/valist-uninitialized.c b/clang/test/Analysis/valist-uninitialized.c
index 689fc95..f28f928 100644
--- a/clang/test/Analysis/valist-uninitialized.c
+++ b/clang/test/Analysis/valist-uninitialized.c
@@ -23,8 +23,8 @@
va_list va;
va_start(va, fst); // expected-note{{Initialized va_list}}
va_end(va); // expected-note{{Ended va_list}}
- return va_arg(va, int); // expected-warning{{va_arg() is called on an uninitialized va_list}}
- // expected-note@-1{{va_arg() is called on an uninitialized va_list}}
+ return va_arg(va, int); // expected-warning{{va_arg() is called on an already released va_list}}
+ // expected-note@-1{{va_arg() is called on an already released va_list}}
}
void f3(int fst, ...) {
@@ -60,8 +60,8 @@
va_list *y = &x;
va_start(*y,fst); // expected-note{{Initialized va_list}}
va_end(x); // expected-note{{Ended va_list}}
- (void)va_arg(*y, int); //expected-warning{{va_arg() is called on an uninitialized va_list}}
- // expected-note@-1{{va_arg() is called on an uninitialized va_list}}
+ (void)va_arg(*y, int); //expected-warning{{va_arg() is called on an already released va_list}}
+ // expected-note@-1{{va_arg() is called on an already released va_list}}
}
void reinitOk(int *fst, ...) {
@@ -82,8 +82,8 @@
va_start(va, fst); // expected-note{{Initialized va_list}}
(void)va_arg(va, int);
va_end(va); // expected-note{{Ended va_list}}
- (void)va_arg(va, int); //expected-warning{{va_arg() is called on an uninitialized va_list}}
- // expected-note@-1{{va_arg() is called on an uninitialized va_list}}
+ (void)va_arg(va, int); //expected-warning{{va_arg() is called on an already released va_list}}
+ // expected-note@-1{{va_arg() is called on an already released va_list}}
}
void copyUnint(int fst, ...) {
@@ -102,8 +102,8 @@
va_list va;
va_start(va, fst); // expected-note{{Initialized va_list}}
va_end(va); // expected-note{{Ended va_list}}
- va_end(va); // expected-warning{{va_end() is called on an uninitialized va_list}}
- // expected-note@-1{{va_end() is called on an uninitialized va_list}}
+ va_end(va); // expected-warning{{va_end() is called on an already released va_list}}
+ // expected-note@-1{{va_end() is called on an already released va_list}}
}
void is_sink(int fst, ...) {
@@ -151,3 +151,16 @@
va_copy(dst, arg);
va_end(dst);
}
+
+void all_state_changes(va_list unknown, int fst, ...) {
+ va_list va, va2;
+ va_start(va, fst); // expected-note{{Initialized va_list}}
+ va_copy(va, unknown); // expected-note{{Copied unknown contents into the va_list}}
+ va_end(va); // expected-note{{Ended va_list}}
+ va_start(va, fst); // expected-note{{Initialized va_list}}
+ va_copy(va, va2); // expected-note{{Copied uninitialized contents into the va_list}}
+ va_start(va, fst); // expected-note{{Initialized va_list}}
+ va_end(va); // expected-note{{Ended va_list}}
+ va_end(va); // expected-warning{{va_end() is called on an already released va_list}}
+ // expected-note@-1{{va_end() is called on an already released va_list}}
+}
diff --git a/clang/test/Analysis/valist-unterminated.c b/clang/test/Analysis/valist-unterminated.c
index cc89268..93e20740 100644
--- a/clang/test/Analysis/valist-unterminated.c
+++ b/clang/test/Analysis/valist-unterminated.c
@@ -97,12 +97,28 @@
// expected-note@-1{{Initialized va_list 'va' is overwritten by an uninitialized one}}
}
+void copyOverwriteUnknown(va_list other, int fst, ...) {
+ va_list va;
+ va_start(va, fst); // expected-note{{Initialized va_list}}
+ va_copy(va, other); // expected-warning{{Initialized va_list 'va' is overwritten by an unknown one}}
+ // expected-note@-1{{Initialized va_list 'va' is overwritten by an unknown one}}
+}
+
+void copyOverwriteReleased(int fst, ...) {
+ va_list va, va2;
+ va_start(va2, fst);
+ va_end(va2);
+ va_start(va, fst); // expected-note{{Initialized va_list}}
+ va_copy(va, va2); // expected-warning{{Initialized va_list 'va' is overwritten by an already released one}}
+ // expected-note@-1{{Initialized va_list 'va' is overwritten by an already released one}}
+}
+
void recopy(int fst, ...) {
va_list va, va2;
va_start(va, fst);
va_copy(va2, va); // expected-note{{Initialized va_list}}
- va_copy(va2, va); // expected-warning{{Initialized va_list 'va2' is initialized again}}
- // expected-note@-1{{Initialized va_list 'va2' is initialized again}}
+ va_copy(va2, va); // expected-warning{{Initialized va_list 'va2' is overwritten by another initialized one}}
+ // expected-note@-1{{Initialized va_list 'va2' is overwritten by another initialized one}}
va_end(va);
va_end(va2);
}