| //===-------- String.cpp - Secure C standard string library calls ---------===// |
| // |
| // The SAFECode Compiler |
| // |
| // This file was developed by the LLVM research group and is distributed under |
| // the University of Illinois Open Source License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass finds all calls to functions in the C standard string library and |
| // transforms them to a more secure form. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| // |
| // To add a new function to the CStdLib checks, the following modifications are |
| // necessary: |
| // |
| // In SAFECode: |
| // |
| // - Add the pool_* prototype of the function to |
| // runtime/include/CStdLibSupport.h. |
| // |
| // - Implement the pool_* version of the function in the relevant file in |
| // runtime/DebugRuntime. |
| // |
| // - Add debug instrumentation information to |
| // lib/DebugInstrumentation/DebugInstrumentation.cpp. |
| // |
| // - Update the StringTransform pass to transform calls of the library |
| // function into its pool_* version in lib/CStdLib/String.cpp. |
| // |
| // In poolalloc: |
| // |
| // - Add an entry for the pool_* version of the function containing the |
| // number of initial pool arguments to the structure in |
| // include/dsa/CStdLib.h. |
| // |
| // - Add an entry to lib/DSA/StdLibPass.cpp for the pool_* version of the |
| // function to allow DSA to recognize it. |
| // |
| |
| |
| #include "safecode/CStdLib.h" |
| #include "safecode/Config/config.h" |
| |
| #include <cstdarg> |
| #include <string> |
| |
| using std::string; |
| |
| using namespace llvm; |
| |
| namespace llvm |
| { |
| |
| // Identifier variable for the pass |
| char StringTransform::ID = 0; |
| |
| // Statistics counters |
| |
| STATISTIC(st_xform_memccpy, "Total memccpy() calls transformed"); |
| STATISTIC(st_xform_memchr, "Total memchr() calls transformed"); |
| STATISTIC(st_xform_memcmp, "Total memcmp() calls transformed"); |
| STATISTIC(st_xform_memcpy, "Total memcpy() calls transformed"); |
| STATISTIC(st_xform_memmove, "Total memmove() calls transformed"); |
| STATISTIC(st_xform_memset, "Total memset() calls transformed"); |
| STATISTIC(st_xform_strcat, "Total strcat() calls transformed"); |
| STATISTIC(st_xform_strchr, "Total strchr() calls transformed"); |
| STATISTIC(st_xform_strcmp, "Total strcmp() calls transformed"); |
| STATISTIC(st_xform_strcoll, "Total strcoll() calls transformed"); |
| STATISTIC(st_xform_strcpy, "Total strcpy() calls transformed"); |
| STATISTIC(st_xform_strcspn, "Total strcspn() calls transformed"); |
| // strerror_r |
| STATISTIC(st_xform_strlen, "Total strlen() calls transformed"); |
| STATISTIC(st_xform_strncat, "Total strncat() calls transformed"); |
| STATISTIC(st_xform_strncmp, "Total strncmp() calls transformed"); |
| STATISTIC(st_xform_strncpy, "Total strncpy() calls transformed"); |
| STATISTIC(st_xform_strpbrk, "Total strpbrk() calls transformed"); |
| STATISTIC(st_xform_strrchr, "Total strrchr() calls transformed"); |
| STATISTIC(st_xform_strspn, "Total strspn() calls transformed"); |
| STATISTIC(st_xform_strstr, "Total strstr() calls transformed"); |
| STATISTIC(st_xform_strxfrm, "Total strxfrm() calls transformed"); |
| // strtok, strtok_r, strxfrm |
| |
| #ifdef HAVE_MEMPCPY |
| STATISTIC(st_xform_mempcpy, "Total mempcpy() calls transformed"); |
| #endif |
| #ifdef HAVE_STRCASESTR |
| STATISTIC(st_xform_strcasestr, "Total strcasestr() calls transformed"); |
| #endif |
| #ifdef HAVE_STPCPY |
| STATISTIC(st_xform_stpcpy, "Total stpcpy() calls transformed"); |
| #endif |
| #ifdef HAVE_STRNLEN |
| STATISTIC(st_xform_strnlen, "Total strnlen() calls transformed"); |
| #endif |
| |
| STATISTIC(st_xform_bcmp, "Total bcmp() calls transformed"); |
| STATISTIC(st_xform_bcopy, "Total bcopy() calls transformed"); |
| STATISTIC(st_xform_bzero, "Total bzero() calls transformed"); |
| STATISTIC(st_xform_index, "Total index() calls transformed"); |
| STATISTIC(st_xform_rindex, "Total rindex() calls transformed"); |
| STATISTIC(st_xform_strcasecmp, "Total strcasecmp() calls transformed"); |
| STATISTIC(st_xform_strncasecmp, "Total strncasecmp() calls transformed"); |
| |
| // |
| // Functions that aren't handled (yet...): |
| // - stpncpy and __stpncpy_chk |
| // |
| |
| static RegisterPass<StringTransform> |
| ST("string_transform", "Secure C standard string library calls"); |
| |
| /** |
| * Entry point for the LLVM pass that transforms C standard string library calls |
| * |
| * @param M Module to scan |
| * @return Whether we modified the module |
| */ |
| bool |
| StringTransform::runOnModule(Module &M) |
| { |
| // Flags whether we modified the module. |
| bool chgd = false; |
| |
| tdata = &getAnalysis<TargetData>(); |
| |
| // Create needed pointer types (char * == i8 * == VoidPtrTy). |
| Type *VoidPtrTy = IntegerType::getInt8PtrTy(M.getContext()); |
| // Determine the type of size_t for functions that return this result. |
| Type *SizeTTy = tdata->getIntPtrType(M.getContext()); |
| // Create other return types (int, void). |
| Type *Int32Ty = IntegerType::getInt32Ty(M.getContext()); |
| Type *VoidTy = Type::getVoidTy(M.getContext()); |
| |
| // Functions from <string.h> |
| chgd |= transform(M, "memccpy", 4, 2, VoidPtrTy, st_xform_memccpy); |
| chgd |= transform(M, "memchr", 3, 1, VoidPtrTy, st_xform_memchr); |
| chgd |= transform(M, "memcmp", 3, 2, Int32Ty, st_xform_memcmp); |
| chgd |= transform(M, "memcpy", 3, 2, Int32Ty, st_xform_memcpy); |
| chgd |= transform(M, "memmove", 3, 2, VoidPtrTy, st_xform_memmove); |
| chgd |= transform(M, "memset", 2, 1, VoidPtrTy, st_xform_memset); |
| chgd |= transform(M, "strcat", 2, 2, VoidPtrTy, st_xform_strcat); |
| chgd |= transform(M, "strchr", 2, 1, VoidPtrTy, st_xform_strchr); |
| chgd |= transform(M, "strcmp", 2, 2, Int32Ty, st_xform_strcmp); |
| chgd |= transform(M, "strcoll", 2, 2, Int32Ty, st_xform_strcoll); |
| chgd |= transform(M, "strcpy", 2, 2, VoidPtrTy, st_xform_strcpy); |
| chgd |= transform(M, "strcspn", 2, 2, SizeTTy, st_xform_strcspn); |
| // chgd |= handle_strerror_r(M); |
| chgd |= transform(M, "strlen", 1, 1, SizeTTy, st_xform_strlen); |
| chgd |= transform(M, "strncat", 3, 2, VoidPtrTy, st_xform_strncat); |
| chgd |= transform(M, "strncmp", 3, 2, Int32Ty, st_xform_strncmp); |
| chgd |= transform(M, "strncpy", 3, 2, VoidPtrTy, st_xform_strncpy); |
| chgd |= transform(M, "strpbrk", 2, 2, VoidPtrTy, st_xform_strpbrk); |
| chgd |= transform(M, "strrchr", 2, 1, VoidPtrTy, st_xform_strrchr); |
| chgd |= transform(M, "strspn", 2, 2, SizeTTy, st_xform_strspn); |
| chgd |= transform(M, "strstr", 2, 2, VoidPtrTy, st_xform_strstr); |
| chgd |= transform(M, "strxfrm", 3, 2, SizeTTy, st_xform_strxfrm); |
| // Extensions to <string.h> |
| #ifdef HAVE_MEMPCPY |
| chgd |= transform(M, "mempcpy", 3, 2, VoidPtrTy, st_xform_mempcpy); |
| #endif |
| #ifdef HAVE_STRCASESTR |
| chgd |= transform(M, "strcasestr", 2, 2, VoidPtrTy, st_xform_strcasestr); |
| #endif |
| #ifdef HAVE_STPCPY |
| chgd |= transform(M, "stpcpy", 2, 2, VoidPtrTy, st_xform_stpcpy); |
| #endif |
| #ifdef HAVE_STRNLEN |
| chgd |= transform(M, "strnlen", 2, 1, SizeTTy, st_xform_strnlen); |
| #endif |
| // Functions from <strings.h> |
| chgd |= transform(M, "bcmp", 3, 2, Int32Ty, st_xform_bcmp); |
| chgd |= transform(M, "bcopy", 3, 2, VoidTy, st_xform_bcopy); |
| chgd |= transform(M, "bzero", 2, 1, VoidTy, st_xform_bzero); |
| chgd |= transform(M, "index", 2, 1, VoidPtrTy, st_xform_index); |
| chgd |= transform(M, "rindex", 2, 1, VoidPtrTy, st_xform_rindex); |
| chgd |= transform(M, "strcasecmp", 2, 2, Int32Ty, st_xform_strcasecmp); |
| chgd |= transform(M, "strncasecmp", 3, 2, Int32Ty, st_xform_strncasecmp); |
| // Darwin-specific secure extensions |
| SourceFunction MemcpyChk = { "__memcpy_chk", VoidPtrTy, 4 }; |
| SourceFunction MemmoveChk = { "__memmove_chk", VoidPtrTy, 4 }; |
| SourceFunction MemsetChk = { "__memset_chk", VoidPtrTy, 4 }; |
| SourceFunction StrcpyChk = { "__strcpy_chk", VoidPtrTy, 3 }; |
| SourceFunction StrcatChk = { "__strcat_chk", VoidPtrTy, 3 }; |
| SourceFunction StrncatChk = { "__strncat_chk", VoidPtrTy, 4 }; |
| SourceFunction StrncpyChk = { "__strncpy_chk", VoidPtrTy, 4 }; |
| DestFunction PoolMemcpy = { "pool_memcpy", 3, 2 }; |
| DestFunction PoolMemmove = { "pool_memmove", 3, 2 }; |
| DestFunction PoolMemset = { "pool_memset", 3, 1 }; |
| DestFunction PoolStrcpy = { "pool_strcpy", 2, 2 }; |
| DestFunction PoolStrcat = { "pool_strcat", 2, 2 }; |
| DestFunction PoolStrncat = { "pool_strncat", 3, 2 }; |
| DestFunction PoolStrncpy = { "pool_strncpy", 3, 2 }; |
| chgd |= vtransform(M, MemcpyChk, PoolMemcpy, st_xform_memcpy, 1, 2, 3); |
| chgd |= vtransform(M, MemmoveChk, PoolMemmove, st_xform_memmove, 1, 2, 3); |
| chgd |= vtransform(M, MemsetChk, PoolMemset, st_xform_memset, 1, 2, 3); |
| chgd |= vtransform(M, StrcpyChk, PoolStrcpy, st_xform_strcpy, 1, 2); |
| chgd |= vtransform(M, StrcatChk, PoolStrcat, st_xform_strcat, 1, 2); |
| chgd |= vtransform(M, StrncatChk, PoolStrncat, st_xform_strncat, 1, 2, 3); |
| chgd |= vtransform(M, StrncpyChk, PoolStrncpy, st_xform_strncpy, 1, 2, 3); |
| #ifdef HAVE_STPCPY |
| SourceFunction StpcpyChk = { "__stpcpy_chk", VoidPtrTy, 3 }; |
| DestFunction PoolStpcpy = { "pool_stpcpy", 2, 2 }; |
| chgd |= vtransform(M, StpcpyChk, PoolStpcpy, st_xform_stpcpy, 1, 2); |
| #endif |
| |
| return chgd; |
| } |
| |
| // |
| // Simple wrapper to gtransform() for when |
| // 1) the transformed function is named "pool_" + original name. |
| // 2) the order and number of arguments is preserved from the original to the |
| // transformed function. |
| // |
| // Parameters: |
| // M - the module to scan |
| // argc - the expected number of arguments to the original function |
| // pool_argc - the number of initial pool parameters to add to the transformed |
| // function |
| // ReturnTy - the expected return type of the original function |
| // statistic - a reference to a relevant Statistic for the number of |
| // transformation |
| // |
| // Returns: |
| // This function returns true if the module was modified and false otherwise. |
| // |
| bool |
| StringTransform::transform(Module &M, |
| const StringRef FunctionName, |
| const unsigned argc, |
| const unsigned pool_argc, |
| Type *ReturnTy, |
| Statistic &statistic) |
| { |
| SourceFunction src = { FunctionName.data(), ReturnTy, argc }; |
| string dst_name = "pool_" + FunctionName.str(); |
| DestFunction dst = { dst_name.c_str(), argc, pool_argc }; |
| vector<unsigned> args; |
| for (unsigned i = 1; i <= argc; i++) |
| args.push_back(i); |
| return gtransform(M, src, dst, statistic, args); |
| } |
| |
| // |
| // vtransform() - wrapper to gtransform() that takes variable arguments |
| // instead of a vector as the final parameter |
| // |
| bool |
| StringTransform::vtransform(Module &M, |
| const SourceFunction &from, |
| const DestFunction &to, |
| Statistic &stat, |
| ...) |
| { |
| vector<unsigned> args; |
| va_list ap; |
| va_start(ap, stat); |
| // Read the positions to append as vararg parameters. |
| for (unsigned i = 1; i <= to.source_argc; i++) { |
| unsigned position = va_arg(ap, unsigned); |
| args.push_back(position); |
| } |
| va_end(ap); |
| return gtransform(M, from, to, stat, args); |
| } |
| |
| // |
| // Secures C standard string library calls by transforming them into |
| // their corresponding runtime wrapper functions. |
| // |
| // The 'from' parameter describes a function to transform. It is struct of |
| // the form |
| // struct { const char *name; Type *return_type; unsigned argc }; |
| // where |
| // - 'name' is the name of the function to transform |
| // - 'return_type' is its expected return type |
| // - 'argc' is its expected number of arguments. |
| // |
| // The 'to' parameter describes the function to transform into. It is a struct |
| // of the form |
| // struct { const char *name, unsigned source_argc, unsigned pool_argc }; |
| // where |
| // - 'name' is the name of the resulting function |
| // - 'source_argc' is the number of parameters the function takes from the |
| // original function |
| // - 'pool_argc' is the number of initial pool parameters to add. |
| // |
| // The 'append_order' vector describes how to move the parameters of the |
| // original function into the transformed function call. |
| // |
| // @param M Module from runOnModule() to scan for functions to |
| // transform. |
| // @param from SourceFunction structure reference described above |
| // @param to DestFunction structure reference described above. |
| // @param stat A reference to the relevant transform statistic. |
| // @param append_order A vector describing the order to add the arguments from |
| // the source function into the destination function. |
| // @return Returns true if any calls were transformed, and |
| // false if no changes were made. |
| // |
| bool |
| StringTransform::gtransform(Module &M, |
| const SourceFunction &from, |
| const DestFunction &to, |
| Statistic &stat, |
| const vector<unsigned> &append_order) |
| { |
| // Get the source function if it exists in the module. |
| Function *src = M.getFunction(from.name); |
| if (!src) |
| return false; |
| // Make sure the source function behaves as described, otherwise skip it. |
| FunctionType *F_type = src->getFunctionType(); |
| if (F_type->getReturnType() != from.return_type || F_type->isVarArg() || |
| F_type->getNumParams() != from.argc) |
| return false; |
| // Make sure the append_order vector has the expected number of elements. |
| assert(append_order.size() == to.source_argc && |
| "Unexpected number of parameter positions in vector!"); |
| // Check that each pool parameter has a corresponding original parameter. |
| assert(to.pool_argc <= to.source_argc && "More pools than arguments?"); |
| // Check if the pool completeness information can be fit into a single 8 bit |
| // quantity. |
| assert(to.pool_argc <= 8 && "Only up to 8 pool parameters supported!"); |
| std::vector<Instruction *> toModify; |
| std::vector<Instruction *>::iterator modifyIter, modifyEnd; |
| // Scan through the module for uses of the function to transform. |
| for (Value::use_iterator UI = src->use_begin(), UE = src->use_end(); |
| UI != UE; |
| ++UI) { |
| CallSite CS(*UI); |
| // Ensure the use is an instruction and that the instruction calls the |
| // source function (as opposed to passing it as a parameter or other |
| // possible uses). |
| if (!CS || CS.getCalledValue() != src) |
| continue; |
| toModify.push_back(CS.getInstruction()); |
| } |
| // Return early if we've found nothing to modify. |
| if (toModify.empty()) |
| return false; |
| // The pool handle type is a void pointer (i8 *). |
| PointerType *VoidPtrTy = IntegerType::getInt8PtrTy(M.getContext()); |
| Type *Int8Ty = IntegerType::getInt8Ty(M.getContext()); |
| // Build the type of the parameters to the transformed function. This function |
| // has to.pool_argc initial arguments of type i8 *. |
| std::vector<Type *> ParamTy(to.pool_argc, VoidPtrTy); |
| // After the initial pool arguments, parameters from the original function go |
| // into the type. |
| for (unsigned i = 0; i < to.source_argc; i++) { |
| unsigned position = append_order[i]; |
| assert(0 < position && position <= from.argc && "Parameter out of bounds!"); |
| Type *ParamType = F_type->getParamType(position - 1); |
| if (i < to.pool_argc) |
| assert(isa<PointerType>(ParamType) && "Pointer type expected for parameter!"); |
| ParamTy.push_back(ParamType); |
| } |
| // The completeness bitvector goes at the end. |
| ParamTy.push_back(Int8Ty); |
| // Build the type of the transformed function. |
| FunctionType *FT = FunctionType::get(F_type->getReturnType(), ParamTy, false); |
| Function *PoolFInModule = M.getFunction(to.name); |
| // Make sure that the function declarations don't conflict. |
| if (PoolFInModule) |
| assert((PoolFInModule->getFunctionType() == FT || |
| PoolFInModule->hasLocalLinkage()) && |
| "Replacement function declared with wrong type!"); |
| // Build the actual transformed function. |
| Constant *PoolF = M.getOrInsertFunction(to.name, FT); |
| // This is a placeholder value for the pool handles (to be "filled in" later |
| // by poolalloc). |
| Value *PH = ConstantPointerNull::get(VoidPtrTy); |
| // Transform every valid use of the function that was found. |
| for (modifyIter = toModify.begin(), modifyEnd = toModify.end(); |
| modifyIter != modifyEnd; |
| ++modifyIter) { |
| Instruction *I = *modifyIter; |
| // Construct vector of parameters to transformed function call. Also insert |
| // NULL pool handles. |
| std::vector<Value *> Params(to.pool_argc, PH); |
| // Insert the original parameters. |
| for (unsigned i = 0; i < to.source_argc; i++) { |
| Value *f = I->getOperand(append_order[i] - 1); |
| Params.push_back(f); |
| } |
| // Add the DSA completeness bitvector. Set it to 0 (= incomplete). |
| Params.push_back(ConstantInt::get(Int8Ty, 0)); |
| // Create the call instruction for the transformed function and insert it |
| // before the current instruction. |
| CallInst *C = CallInst::Create(PoolF, Params, "", I); |
| // Transfer debugging metadata if it exists from the old call into the new |
| // one. |
| if (MDNode *DebugNode = I->getMetadata("dbg")) |
| C->setMetadata("dbg", DebugNode); |
| // Replace all uses of the function with its transformed equivalent. |
| I->replaceAllUsesWith(C); |
| I->eraseFromParent(); |
| // Record the transformation. |
| ++stat; |
| } |
| // Reaching here means some call has been modified; |
| return true; |
| } |
| |
| } |