| //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This checker improves modeling of a few simple library functions. |
| // |
| // This checker provides a specification format - `Summary' - and |
| // contains descriptions of some library functions in this format. Each |
| // specification contains a list of branches for splitting the program state |
| // upon call, and range constraints on argument and return-value symbols that |
| // are satisfied on each branch. This spec can be expanded to include more |
| // items, like external effects of the function. |
| // |
| // The main difference between this approach and the body farms technique is |
| // in more explicit control over how many branches are produced. For example, |
| // consider standard C function `ispunct(int x)', which returns a non-zero value |
| // iff `x' is a punctuation character, that is, when `x' is in range |
| // ['!', '/'] [':', '@'] U ['[', '\`'] U ['{', '~']. |
| // `Summary' provides only two branches for this function. However, |
| // any attempt to describe this range with if-statements in the body farm |
| // would result in many more branches. Because each branch needs to be analyzed |
| // independently, this significantly reduces performance. Additionally, |
| // once we consider a branch on which `x' is in range, say, ['!', '/'], |
| // we assume that such branch is an important separate path through the program, |
| // which may lead to false positives because considering this particular path |
| // was not consciously intended, and therefore it might have been unreachable. |
| // |
| // This checker uses eval::Call for modeling pure functions (functions without |
| // side effets), for which their `Summary' is a precise model. This avoids |
| // unnecessary invalidation passes. Conflicts with other checkers are unlikely |
| // because if the function has no other effects, other checkers would probably |
| // never want to improve upon the modeling done by this checker. |
| // |
| // Non-pure functions, for which only partial improvement over the default |
| // behavior is expected, are modeled via check::PostCall, non-intrusively. |
| // |
| // The following standard C functions are currently supported: |
| // |
| // fgetc getline isdigit isupper |
| // fread isalnum isgraph isxdigit |
| // fwrite isalpha islower read |
| // getc isascii isprint write |
| // getchar isblank ispunct |
| // getdelim iscntrl isspace |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
| #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
| #include "clang/StaticAnalyzer/Core/Checker.h" |
| #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
| #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
| #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
| #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" |
| |
| using namespace clang; |
| using namespace clang::ento; |
| |
| namespace { |
| class StdLibraryFunctionsChecker |
| : public Checker<check::PreCall, check::PostCall, eval::Call> { |
| /// Below is a series of typedefs necessary to define function specs. |
| /// We avoid nesting types here because each additional qualifier |
| /// would need to be repeated in every function spec. |
| struct Summary; |
| |
| /// Specify how much the analyzer engine should entrust modeling this function |
| /// to us. If he doesn't, he performs additional invalidations. |
| enum InvalidationKind { NoEvalCall, EvalCallAsPure }; |
| |
| // The universal integral type to use in value range descriptions. |
| // Unsigned to make sure overflows are well-defined. |
| typedef uint64_t RangeInt; |
| |
| /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is |
| /// a non-negative integer, which less than 5 and not equal to 2. For |
| /// `ComparesToArgument', holds information about how exactly to compare to |
| /// the argument. |
| typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector; |
| |
| /// A reference to an argument or return value by its number. |
| /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but |
| /// obviously uint32_t should be enough for all practical purposes. |
| typedef uint32_t ArgNo; |
| static const ArgNo Ret; |
| |
| class ValueConstraint; |
| |
| // Pointer to the ValueConstraint. We need a copyable, polymorphic and |
| // default initialize able type (vector needs that). A raw pointer was good, |
| // however, we cannot default initialize that. unique_ptr makes the Summary |
| // class non-copyable, therefore not an option. Releasing the copyability |
| // requirement would render the initialization of the Summary map infeasible. |
| using ValueConstraintPtr = std::shared_ptr<ValueConstraint>; |
| |
| /// Polymorphic base class that represents a constraint on a given argument |
| /// (or return value) of a function. Derived classes implement different kind |
| /// of constraints, e.g range constraints or correlation between two |
| /// arguments. |
| class ValueConstraint { |
| public: |
| ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {} |
| virtual ~ValueConstraint() {} |
| /// Apply the effects of the constraint on the given program state. If null |
| /// is returned then the constraint is not feasible. |
| virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, |
| const Summary &Summary) const = 0; |
| virtual ValueConstraintPtr negate() const { |
| llvm_unreachable("Not implemented"); |
| }; |
| ArgNo getArgNo() const { return ArgN; } |
| |
| protected: |
| ArgNo ArgN; // Argument to which we apply the constraint. |
| }; |
| |
| /// Given a range, should the argument stay inside or outside this range? |
| enum RangeKind { OutOfRange, WithinRange }; |
| |
| /// Encapsulates a single range on a single symbol within a branch. |
| class RangeConstraint : public ValueConstraint { |
| RangeKind Kind; // Kind of range definition. |
| IntRangeVector Args; // Polymorphic arguments. |
| |
| public: |
| RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args) |
| : ValueConstraint(ArgN), Kind(Kind), Args(Args) {} |
| |
| const IntRangeVector &getRanges() const { |
| return Args; |
| } |
| |
| private: |
| ProgramStateRef applyAsOutOfRange(ProgramStateRef State, |
| const CallEvent &Call, |
| const Summary &Summary) const; |
| ProgramStateRef applyAsWithinRange(ProgramStateRef State, |
| const CallEvent &Call, |
| const Summary &Summary) const; |
| public: |
| ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, |
| const Summary &Summary) const override { |
| switch (Kind) { |
| case OutOfRange: |
| return applyAsOutOfRange(State, Call, Summary); |
| case WithinRange: |
| return applyAsWithinRange(State, Call, Summary); |
| } |
| llvm_unreachable("Unknown range kind!"); |
| } |
| |
| ValueConstraintPtr negate() const override { |
| RangeConstraint Tmp(*this); |
| switch (Kind) { |
| case OutOfRange: |
| Tmp.Kind = WithinRange; |
| break; |
| case WithinRange: |
| Tmp.Kind = OutOfRange; |
| break; |
| } |
| return std::make_shared<RangeConstraint>(Tmp); |
| } |
| }; |
| |
| class ComparisonConstraint : public ValueConstraint { |
| BinaryOperator::Opcode Opcode; |
| ArgNo OtherArgN; |
| |
| public: |
| ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode, |
| ArgNo OtherArgN) |
| : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {} |
| ArgNo getOtherArgNo() const { return OtherArgN; } |
| BinaryOperator::Opcode getOpcode() const { return Opcode; } |
| ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, |
| const Summary &Summary) const override; |
| }; |
| |
| class NotNullConstraint : public ValueConstraint { |
| using ValueConstraint::ValueConstraint; |
| // This variable has a role when we negate the constraint. |
| bool CannotBeNull = true; |
| |
| public: |
| ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, |
| const Summary &Summary) const override { |
| SVal V = getArgSVal(Call, getArgNo()); |
| if (V.isUndef()) |
| return State; |
| |
| DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>(); |
| if (!L.getAs<Loc>()) |
| return State; |
| |
| return State->assume(L, CannotBeNull); |
| } |
| |
| ValueConstraintPtr negate() const override { |
| NotNullConstraint Tmp(*this); |
| Tmp.CannotBeNull = !this->CannotBeNull; |
| return std::make_shared<NotNullConstraint>(Tmp); |
| } |
| }; |
| |
| /// The complete list of constraints that defines a single branch. |
| typedef std::vector<ValueConstraintPtr> ConstraintSet; |
| |
| using ArgTypes = std::vector<QualType>; |
| using Cases = std::vector<ConstraintSet>; |
| |
| /// Includes information about |
| /// * function prototype (which is necessary to |
| /// ensure we're modeling the right function and casting values properly), |
| /// * approach to invalidation, |
| /// * a list of branches - a list of list of ranges - |
| /// A branch represents a path in the exploded graph of a function (which |
| /// is a tree). So, a branch is a series of assumptions. In other words, |
| /// branches represent split states and additional assumptions on top of |
| /// the splitting assumption. |
| /// For example, consider the branches in `isalpha(x)` |
| /// Branch 1) |
| /// x is in range ['A', 'Z'] or in ['a', 'z'] |
| /// then the return value is not 0. (I.e. out-of-range [0, 0]) |
| /// Branch 2) |
| /// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z'] |
| /// then the return value is 0. |
| /// * a list of argument constraints, that must be true on every branch. |
| /// If these constraints are not satisfied that means a fatal error |
| /// usually resulting in undefined behaviour. |
| struct Summary { |
| const ArgTypes ArgTys; |
| const QualType RetTy; |
| const InvalidationKind InvalidationKd; |
| Cases CaseConstraints; |
| ConstraintSet ArgConstraints; |
| |
| Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd) |
| : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {} |
| |
| Summary &Case(ConstraintSet&& CS) { |
| CaseConstraints.push_back(std::move(CS)); |
| return *this; |
| } |
| Summary &ArgConstraint(ValueConstraintPtr VC) { |
| ArgConstraints.push_back(VC); |
| return *this; |
| } |
| |
| private: |
| static void assertTypeSuitableForSummary(QualType T) { |
| assert(!T->isVoidType() && |
| "We should have had no significant void types in the spec"); |
| assert(T.isCanonical() && |
| "We should only have canonical types in the spec"); |
| } |
| |
| public: |
| QualType getArgType(ArgNo ArgN) const { |
| QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN]; |
| assertTypeSuitableForSummary(T); |
| return T; |
| } |
| |
| /// Try our best to figure out if the call expression is the call of |
| /// *the* library function to which this specification applies. |
| bool matchesCall(const CallExpr *CE) const; |
| }; |
| |
| // The same function (as in, function identifier) may have different |
| // summaries assigned to it, with different argument and return value types. |
| // We call these "variants" of the function. This can be useful for handling |
| // C++ function overloads, and also it can be used when the same function |
| // may have different definitions on different platforms. |
| typedef std::vector<Summary> Summaries; |
| |
| // The map of all functions supported by the checker. It is initialized |
| // lazily, and it doesn't change after initialization. |
| mutable llvm::StringMap<Summaries> FunctionSummaryMap; |
| |
| mutable std::unique_ptr<BugType> BT_InvalidArg; |
| |
| // Auxiliary functions to support ArgNo within all structures |
| // in a unified manner. |
| static QualType getArgType(const Summary &Summary, ArgNo ArgN) { |
| return Summary.getArgType(ArgN); |
| } |
| static QualType getArgType(const CallEvent &Call, ArgNo ArgN) { |
| return ArgN == Ret ? Call.getResultType().getCanonicalType() |
| : Call.getArgExpr(ArgN)->getType().getCanonicalType(); |
| } |
| static QualType getArgType(const CallExpr *CE, ArgNo ArgN) { |
| return ArgN == Ret ? CE->getType().getCanonicalType() |
| : CE->getArg(ArgN)->getType().getCanonicalType(); |
| } |
| static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) { |
| return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN); |
| } |
| |
| public: |
| void checkPreCall(const CallEvent &Call, CheckerContext &C) const; |
| void checkPostCall(const CallEvent &Call, CheckerContext &C) const; |
| bool evalCall(const CallEvent &Call, CheckerContext &C) const; |
| |
| enum CheckKind { |
| CK_StdCLibraryFunctionArgsChecker, |
| CK_StdCLibraryFunctionsTesterChecker, |
| CK_NumCheckKinds |
| }; |
| DefaultBool ChecksEnabled[CK_NumCheckKinds]; |
| CheckerNameRef CheckNames[CK_NumCheckKinds]; |
| |
| private: |
| Optional<Summary> findFunctionSummary(const FunctionDecl *FD, |
| const CallExpr *CE, |
| CheckerContext &C) const; |
| Optional<Summary> findFunctionSummary(const CallEvent &Call, |
| CheckerContext &C) const; |
| |
| void initFunctionSummaries(CheckerContext &C) const; |
| |
| void reportBug(const CallEvent &Call, ExplodedNode *N, |
| CheckerContext &C) const { |
| if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker]) |
| return; |
| // TODO Add detailed diagnostic. |
| StringRef Msg = "Function argument constraint is not satisfied"; |
| if (!BT_InvalidArg) |
| BT_InvalidArg = std::make_unique<BugType>( |
| CheckNames[CK_StdCLibraryFunctionArgsChecker], |
| "Unsatisfied argument constraints", categories::LogicError); |
| auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N); |
| bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R); |
| C.emitReport(std::move(R)); |
| } |
| }; |
| |
| const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret = |
| std::numeric_limits<ArgNo>::max(); |
| |
| } // end of anonymous namespace |
| |
| ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange( |
| ProgramStateRef State, const CallEvent &Call, |
| const Summary &Summary) const { |
| |
| ProgramStateManager &Mgr = State->getStateManager(); |
| SValBuilder &SVB = Mgr.getSValBuilder(); |
| BasicValueFactory &BVF = SVB.getBasicValueFactory(); |
| ConstraintManager &CM = Mgr.getConstraintManager(); |
| QualType T = getArgType(Summary, getArgNo()); |
| SVal V = getArgSVal(Call, getArgNo()); |
| |
| if (auto N = V.getAs<NonLoc>()) { |
| const IntRangeVector &R = getRanges(); |
| size_t E = R.size(); |
| for (size_t I = 0; I != E; ++I) { |
| const llvm::APSInt &Min = BVF.getValue(R[I].first, T); |
| const llvm::APSInt &Max = BVF.getValue(R[I].second, T); |
| assert(Min <= Max); |
| State = CM.assumeInclusiveRange(State, *N, Min, Max, false); |
| if (!State) |
| break; |
| } |
| } |
| |
| return State; |
| } |
| |
| ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange( |
| ProgramStateRef State, const CallEvent &Call, |
| const Summary &Summary) const { |
| |
| ProgramStateManager &Mgr = State->getStateManager(); |
| SValBuilder &SVB = Mgr.getSValBuilder(); |
| BasicValueFactory &BVF = SVB.getBasicValueFactory(); |
| ConstraintManager &CM = Mgr.getConstraintManager(); |
| QualType T = getArgType(Summary, getArgNo()); |
| SVal V = getArgSVal(Call, getArgNo()); |
| |
| // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R". |
| // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary, |
| // and then cut away all holes in R one by one. |
| // |
| // E.g. consider a range list R as [A, B] and [C, D] |
| // -------+--------+------------------+------------+-----------> |
| // A B C D |
| // Then we assume that the value is not in [-inf, A - 1], |
| // then not in [D + 1, +inf], then not in [B + 1, C - 1] |
| if (auto N = V.getAs<NonLoc>()) { |
| const IntRangeVector &R = getRanges(); |
| size_t E = R.size(); |
| |
| const llvm::APSInt &MinusInf = BVF.getMinValue(T); |
| const llvm::APSInt &PlusInf = BVF.getMaxValue(T); |
| |
| const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T); |
| if (Left != PlusInf) { |
| assert(MinusInf <= Left); |
| State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false); |
| if (!State) |
| return nullptr; |
| } |
| |
| const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T); |
| if (Right != MinusInf) { |
| assert(Right <= PlusInf); |
| State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false); |
| if (!State) |
| return nullptr; |
| } |
| |
| for (size_t I = 1; I != E; ++I) { |
| const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T); |
| const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T); |
| if (Min <= Max) { |
| State = CM.assumeInclusiveRange(State, *N, Min, Max, false); |
| if (!State) |
| return nullptr; |
| } |
| } |
| } |
| |
| return State; |
| } |
| |
| ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply( |
| ProgramStateRef State, const CallEvent &Call, |
| const Summary &Summary) const { |
| |
| ProgramStateManager &Mgr = State->getStateManager(); |
| SValBuilder &SVB = Mgr.getSValBuilder(); |
| QualType CondT = SVB.getConditionType(); |
| QualType T = getArgType(Summary, getArgNo()); |
| SVal V = getArgSVal(Call, getArgNo()); |
| |
| BinaryOperator::Opcode Op = getOpcode(); |
| ArgNo OtherArg = getOtherArgNo(); |
| SVal OtherV = getArgSVal(Call, OtherArg); |
| QualType OtherT = getArgType(Call, OtherArg); |
| // Note: we avoid integral promotion for comparison. |
| OtherV = SVB.evalCast(OtherV, T, OtherT); |
| if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT) |
| .getAs<DefinedOrUnknownSVal>()) |
| State = State->assume(*CompV, true); |
| return State; |
| } |
| |
| void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call, |
| CheckerContext &C) const { |
| Optional<Summary> FoundSummary = findFunctionSummary(Call, C); |
| if (!FoundSummary) |
| return; |
| |
| const Summary &Summary = *FoundSummary; |
| ProgramStateRef State = C.getState(); |
| |
| ProgramStateRef NewState = State; |
| for (const ValueConstraintPtr& VC : Summary.ArgConstraints) { |
| ProgramStateRef SuccessSt = VC->apply(NewState, Call, Summary); |
| ProgramStateRef FailureSt = VC->negate()->apply(NewState, Call, Summary); |
| // The argument constraint is not satisfied. |
| if (FailureSt && !SuccessSt) { |
| if (ExplodedNode *N = C.generateErrorNode(NewState)) |
| reportBug(Call, N, C); |
| break; |
| } else { |
| // We will apply the constraint even if we cannot reason about the |
| // argument. This means both SuccessSt and FailureSt can be true. If we |
| // weren't applying the constraint that would mean that symbolic |
| // execution continues on a code whose behaviour is undefined. |
| assert(SuccessSt); |
| NewState = SuccessSt; |
| } |
| } |
| if (NewState && NewState != State) |
| C.addTransition(NewState); |
| } |
| |
| void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call, |
| CheckerContext &C) const { |
| Optional<Summary> FoundSummary = findFunctionSummary(Call, C); |
| if (!FoundSummary) |
| return; |
| |
| // Now apply the constraints. |
| const Summary &Summary = *FoundSummary; |
| ProgramStateRef State = C.getState(); |
| |
| // Apply case/branch specifications. |
| for (const auto &VRS : Summary.CaseConstraints) { |
| ProgramStateRef NewState = State; |
| for (const auto &VR: VRS) { |
| NewState = VR->apply(NewState, Call, Summary); |
| if (!NewState) |
| break; |
| } |
| |
| if (NewState && NewState != State) |
| C.addTransition(NewState); |
| } |
| } |
| |
| bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, |
| CheckerContext &C) const { |
| Optional<Summary> FoundSummary = findFunctionSummary(Call, C); |
| if (!FoundSummary) |
| return false; |
| |
| const Summary &Summary = *FoundSummary; |
| switch (Summary.InvalidationKd) { |
| case EvalCallAsPure: { |
| ProgramStateRef State = C.getState(); |
| const LocationContext *LC = C.getLocationContext(); |
| const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr()); |
| SVal V = C.getSValBuilder().conjureSymbolVal( |
| CE, LC, CE->getType().getCanonicalType(), C.blockCount()); |
| State = State->BindExpr(CE, LC, V); |
| C.addTransition(State); |
| return true; |
| } |
| case NoEvalCall: |
| // Summary tells us to avoid performing eval::Call. The function is possibly |
| // evaluated by another checker, or evaluated conservatively. |
| return false; |
| } |
| llvm_unreachable("Unknown invalidation kind!"); |
| } |
| |
| bool StdLibraryFunctionsChecker::Summary::matchesCall( |
| const CallExpr *CE) const { |
| // Check number of arguments: |
| if (CE->getNumArgs() != ArgTys.size()) |
| return false; |
| |
| // Check return type if relevant: |
| if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType()) |
| return false; |
| |
| // Check argument types when relevant: |
| for (size_t I = 0, E = ArgTys.size(); I != E; ++I) { |
| QualType FormalT = ArgTys[I]; |
| // Null type marks irrelevant arguments. |
| if (FormalT.isNull()) |
| continue; |
| |
| assertTypeSuitableForSummary(FormalT); |
| |
| QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I); |
| assert(ActualT.isCanonical()); |
| if (ActualT != FormalT) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| Optional<StdLibraryFunctionsChecker::Summary> |
| StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD, |
| const CallExpr *CE, |
| CheckerContext &C) const { |
| // Note: we cannot always obtain FD from CE |
| // (eg. virtual call, or call by pointer). |
| assert(CE); |
| |
| if (!FD) |
| return None; |
| |
| initFunctionSummaries(C); |
| |
| IdentifierInfo *II = FD->getIdentifier(); |
| if (!II) |
| return None; |
| StringRef Name = II->getName(); |
| if (Name.empty() || !C.isCLibraryFunction(FD, Name)) |
| return None; |
| |
| auto FSMI = FunctionSummaryMap.find(Name); |
| if (FSMI == FunctionSummaryMap.end()) |
| return None; |
| |
| // Verify that function signature matches the spec in advance. |
| // Otherwise we might be modeling the wrong function. |
| // Strict checking is important because we will be conducting |
| // very integral-type-sensitive operations on arguments and |
| // return values. |
| const Summaries &SpecVariants = FSMI->second; |
| for (const Summary &Spec : SpecVariants) |
| if (Spec.matchesCall(CE)) |
| return Spec; |
| |
| return None; |
| } |
| |
| Optional<StdLibraryFunctionsChecker::Summary> |
| StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call, |
| CheckerContext &C) const { |
| const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); |
| if (!FD) |
| return None; |
| const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); |
| if (!CE) |
| return None; |
| return findFunctionSummary(FD, CE, C); |
| } |
| |
| void StdLibraryFunctionsChecker::initFunctionSummaries( |
| CheckerContext &C) const { |
| if (!FunctionSummaryMap.empty()) |
| return; |
| |
| SValBuilder &SVB = C.getSValBuilder(); |
| BasicValueFactory &BVF = SVB.getBasicValueFactory(); |
| const ASTContext &ACtx = BVF.getContext(); |
| |
| // These types are useful for writing specifications quickly, |
| // New specifications should probably introduce more types. |
| // Some types are hard to obtain from the AST, eg. "ssize_t". |
| // In such cases it should be possible to provide multiple variants |
| // of function summary for common cases (eg. ssize_t could be int or long |
| // or long long, so three summary variants would be enough). |
| // Of course, function variants are also useful for C++ overloads. |
| const QualType |
| Irrelevant{}; // A placeholder, whenever we do not care about the type. |
| const QualType IntTy = ACtx.IntTy; |
| const QualType LongTy = ACtx.LongTy; |
| const QualType LongLongTy = ACtx.LongLongTy; |
| const QualType SizeTy = ACtx.getSizeType(); |
| const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *T |
| const QualType ConstVoidPtrTy = |
| ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *T |
| |
| const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue(); |
| const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue(); |
| const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue(); |
| |
| // Set UCharRangeMax to min of int or uchar maximum value. |
| // The C standard states that the arguments of functions like isalpha must |
| // be representable as an unsigned char. Their type is 'int', so the max |
| // value of the argument should be min(UCharMax, IntMax). This just happen |
| // to be true for commonly used and well tested instruction set |
| // architectures, but not for others. |
| const RangeInt UCharRangeMax = |
| std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax); |
| |
| // The platform dependent value of EOF. |
| // Try our best to parse this from the Preprocessor, otherwise fallback to -1. |
| const auto EOFv = [&C]() -> RangeInt { |
| if (const llvm::Optional<int> OptInt = |
| tryExpandAsInteger("EOF", C.getPreprocessor())) |
| return *OptInt; |
| return -1; |
| }(); |
| |
| // We are finally ready to define specifications for all supported functions. |
| // |
| // The signature needs to have the correct number of arguments. |
| // However, we insert `Irrelevant' when the type is insignificant. |
| // |
| // Argument ranges should always cover all variants. If return value |
| // is completely unknown, omit it from the respective range set. |
| // |
| // All types in the spec need to be canonical. |
| // |
| // Every item in the list of range sets represents a particular |
| // execution path the analyzer would need to explore once |
| // the call is modeled - a new program state is constructed |
| // for every range set, and each range line in the range set |
| // corresponds to a specific constraint within this state. |
| // |
| // Upon comparing to another argument, the other argument is casted |
| // to the current argument's type. This avoids proper promotion but |
| // seems useful. For example, read() receives size_t argument, |
| // and its return value, which is of type ssize_t, cannot be greater |
| // than this argument. If we made a promotion, and the size argument |
| // is equal to, say, 10, then we'd impose a range of [0, 10] on the |
| // return value, however the correct range is [-1, 10]. |
| // |
| // Please update the list of functions in the header after editing! |
| // |
| |
| // Below are helpers functions to create the summaries. |
| auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind, |
| IntRangeVector Ranges) { |
| return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges); |
| }; |
| struct { |
| auto operator()(RangeKind Kind, IntRangeVector Ranges) { |
| return std::make_shared<RangeConstraint>(Ret, Kind, Ranges); |
| } |
| auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) { |
| return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN); |
| } |
| } ReturnValueCondition; |
| auto Range = [](RangeInt b, RangeInt e) { |
| return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}}; |
| }; |
| auto SingleValue = [](RangeInt v) { |
| return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}}; |
| }; |
| auto LessThanOrEq = BO_LE; |
| auto NotNull = [&](ArgNo ArgN) { |
| return std::make_shared<NotNullConstraint>(ArgN); |
| }; |
| |
| using RetType = QualType; |
| // Templates for summaries that are reused by many functions. |
| auto Getc = [&]() { |
| return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall) |
| .Case({ReturnValueCondition(WithinRange, |
| {{EOFv, EOFv}, {0, UCharRangeMax}})}); |
| }; |
| auto Read = [&](RetType R, RangeInt Max) { |
| return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R}, |
| NoEvalCall) |
| .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)), |
| ReturnValueCondition(WithinRange, Range(-1, Max))}); |
| }; |
| auto Fread = [&]() { |
| return Summary(ArgTypes{VoidPtrTy, Irrelevant, SizeTy, Irrelevant}, |
| RetType{SizeTy}, NoEvalCall) |
| .Case({ |
| ReturnValueCondition(LessThanOrEq, ArgNo(2)), |
| }) |
| .ArgConstraint(NotNull(ArgNo(0))); |
| }; |
| auto Fwrite = [&]() { |
| return Summary(ArgTypes{ConstVoidPtrTy, Irrelevant, SizeTy, Irrelevant}, |
| RetType{SizeTy}, NoEvalCall) |
| .Case({ |
| ReturnValueCondition(LessThanOrEq, ArgNo(2)), |
| }) |
| .ArgConstraint(NotNull(ArgNo(0))); |
| }; |
| auto Getline = [&](RetType R, RangeInt Max) { |
| return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R}, |
| NoEvalCall) |
| .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})}); |
| }; |
| |
| FunctionSummaryMap = { |
| // The isascii() family of functions. |
| // The behavior is undefined if the value of the argument is not |
| // representable as unsigned char or is not equal to EOF. See e.g. C99 |
| // 7.4.1.2 The isalpha function (p: 181-182). |
| { |
| "isalnum", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| // Boils down to isupper() or islower() or isdigit(). |
| .Case( |
| {ArgumentCondition(0U, WithinRange, |
| {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| // The locale-specific range. |
| // No post-condition. We are completely unaware of |
| // locale-specific return values. |
| .Case({ArgumentCondition(0U, WithinRange, |
| {{128, UCharRangeMax}})}) |
| .Case({ArgumentCondition(0U, OutOfRange, |
| {{'0', '9'}, |
| {'A', 'Z'}, |
| {'a', 'z'}, |
| {128, UCharRangeMax}}), |
| ReturnValueCondition(WithinRange, SingleValue(0))}) |
| .ArgConstraint(ArgumentCondition( |
| 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))}, |
| }, |
| { |
| "isalpha", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .Case({ArgumentCondition(0U, WithinRange, |
| {{'A', 'Z'}, {'a', 'z'}}), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| // The locale-specific range. |
| .Case({ArgumentCondition(0U, WithinRange, |
| {{128, UCharRangeMax}})}) |
| .Case({ArgumentCondition( |
| 0U, OutOfRange, |
| {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "isascii", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "isblank", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .Case({ArgumentCondition(0U, WithinRange, |
| {{'\t', '\t'}, {' ', ' '}}), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| .Case({ArgumentCondition(0U, OutOfRange, |
| {{'\t', '\t'}, {' ', ' '}}), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "iscntrl", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .Case({ArgumentCondition(0U, WithinRange, |
| {{0, 32}, {127, 127}}), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| .Case( |
| {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "isdigit", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "isgraph", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "islower", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| // Is certainly lowercase. |
| .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| // Is ascii but not lowercase. |
| .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), |
| ArgumentCondition(0U, OutOfRange, Range('a', 'z')), |
| ReturnValueCondition(WithinRange, SingleValue(0))}) |
| // The locale-specific range. |
| .Case({ArgumentCondition(0U, WithinRange, |
| {{128, UCharRangeMax}})}) |
| // Is not an unsigned char. |
| .Case({ArgumentCondition(0U, OutOfRange, |
| Range(0, UCharRangeMax)), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "isprint", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "ispunct", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .Case({ArgumentCondition( |
| 0U, WithinRange, |
| {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| .Case({ArgumentCondition( |
| 0U, OutOfRange, |
| {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "isspace", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| // Space, '\f', '\n', '\r', '\t', '\v'. |
| .Case({ArgumentCondition(0U, WithinRange, |
| {{9, 13}, {' ', ' '}}), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| // The locale-specific range. |
| .Case({ArgumentCondition(0U, WithinRange, |
| {{128, UCharRangeMax}})}) |
| .Case({ArgumentCondition( |
| 0U, OutOfRange, |
| {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "isupper", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| // Is certainly uppercase. |
| .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| // The locale-specific range. |
| .Case({ArgumentCondition(0U, WithinRange, |
| {{128, UCharRangeMax}})}) |
| // Other. |
| .Case({ArgumentCondition(0U, OutOfRange, |
| {{'A', 'Z'}, {128, UCharRangeMax}}), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| { |
| "isxdigit", |
| Summaries{ |
| Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .Case( |
| {ArgumentCondition(0U, WithinRange, |
| {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), |
| ReturnValueCondition(OutOfRange, SingleValue(0))}) |
| .Case( |
| {ArgumentCondition(0U, OutOfRange, |
| {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), |
| ReturnValueCondition(WithinRange, SingleValue(0))})}, |
| }, |
| |
| // The getc() family of functions that returns either a char or an EOF. |
| {"getc", Summaries{Getc()}}, |
| {"fgetc", Summaries{Getc()}}, |
| {"getchar", |
| Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall) |
| .Case({ReturnValueCondition( |
| WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}}, |
| |
| // read()-like functions that never return more than buffer size. |
| // We are not sure how ssize_t is defined on every platform, so we |
| // provide three variants that should cover common cases. |
| {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax), |
| Read(LongLongTy, LongLongMax)}}, |
| {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax), |
| Read(LongLongTy, LongLongMax)}}, |
| {"fread", Summaries{Fread()}}, |
| {"fwrite", Summaries{Fwrite()}}, |
| // getline()-like functions either fail or read at least the delimiter. |
| {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax), |
| Getline(LongLongTy, LongLongMax)}}, |
| {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax), |
| Getline(LongLongTy, LongLongMax)}}, |
| }; |
| |
| // Functions for testing. |
| if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) { |
| llvm::StringMap<Summaries> TestFunctionSummaryMap = { |
| {"__two_constrained_args", |
| Summaries{ |
| Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .ArgConstraint( |
| ArgumentCondition(0U, WithinRange, SingleValue(1))) |
| .ArgConstraint( |
| ArgumentCondition(1U, WithinRange, SingleValue(1)))}}, |
| {"__arg_constrained_twice", |
| Summaries{Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) |
| .ArgConstraint( |
| ArgumentCondition(0U, OutOfRange, SingleValue(1))) |
| .ArgConstraint( |
| ArgumentCondition(0U, OutOfRange, SingleValue(2)))}}, |
| }; |
| for (auto &E : TestFunctionSummaryMap) { |
| auto InsertRes = |
| FunctionSummaryMap.insert({std::string(E.getKey()), E.getValue()}); |
| assert(InsertRes.second && |
| "Test functions must not clash with modeled functions"); |
| (void)InsertRes; |
| } |
| } |
| } |
| |
| void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) { |
| mgr.registerChecker<StdLibraryFunctionsChecker>(); |
| } |
| |
| bool ento::shouldRegisterStdCLibraryFunctionsChecker(const CheckerManager &mgr) { |
| return true; |
| } |
| |
| #define REGISTER_CHECKER(name) \ |
| void ento::register##name(CheckerManager &mgr) { \ |
| StdLibraryFunctionsChecker *checker = \ |
| mgr.getChecker<StdLibraryFunctionsChecker>(); \ |
| checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true; \ |
| checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] = \ |
| mgr.getCurrentCheckerName(); \ |
| } \ |
| \ |
| bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; } |
| |
| REGISTER_CHECKER(StdCLibraryFunctionArgsChecker) |
| REGISTER_CHECKER(StdCLibraryFunctionsTesterChecker) |