blob: 24e5dc0fb22f5e18116c73a941a0ba25ae78a8d6 [file] [log] [blame]
//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Scanf/printf implementation for use in *Sanitizer interceptors.
// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
// with a few common GNU extensions.
//
//===----------------------------------------------------------------------===//
#include <stdarg.h>
static const char *parse_number(const char *p, int *out) {
*out = internal_atoll(p);
while (*p >= '0' && *p <= '9')
++p;
return p;
}
static const char *maybe_parse_param_index(const char *p, int *out) {
// n$
if (*p >= '0' && *p <= '9') {
int number;
const char *q = parse_number(p, &number);
CHECK(q);
if (*q == '$') {
*out = number;
p = q + 1;
}
}
// Otherwise, do not change p. This will be re-parsed later as the field
// width.
return p;
}
static bool char_is_one_of(char c, const char *s) {
return !!internal_strchr(s, c);
}
static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
if (char_is_one_of(*p, "jztLq")) {
ll[0] = *p;
++p;
} else if (*p == 'h') {
ll[0] = 'h';
++p;
if (*p == 'h') {
ll[1] = 'h';
++p;
}
} else if (*p == 'l') {
ll[0] = 'l';
++p;
if (*p == 'l') {
ll[1] = 'l';
++p;
}
}
return p;
}
// Returns true if the character is an integer conversion specifier.
static bool format_is_integer_conv(char c) {
return char_is_one_of(c, "diouxXn");
}
// Returns true if the character is an floating point conversion specifier.
static bool format_is_float_conv(char c) {
return char_is_one_of(c, "aAeEfFgG");
}
// Returns string output character size for string-like conversions,
// or 0 if the conversion is invalid.
static int format_get_char_size(char convSpecifier,
const char lengthModifier[2]) {
if (char_is_one_of(convSpecifier, "CS")) {
return sizeof(wchar_t);
}
if (char_is_one_of(convSpecifier, "cs[")) {
if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
return sizeof(wchar_t);
else if (lengthModifier[0] == '\0')
return sizeof(char);
}
return 0;
}
enum FormatStoreSize {
// Store size not known in advance; can be calculated as wcslen() of the
// destination buffer.
FSS_WCSLEN = -2,
// Store size not known in advance; can be calculated as strlen() of the
// destination buffer.
FSS_STRLEN = -1,
// Invalid conversion specifier.
FSS_INVALID = 0
};
// Returns the memory size of a format directive (if >0), or a value of
// FormatStoreSize.
static int format_get_value_size(char convSpecifier,
const char lengthModifier[2],
bool promote_float) {
if (format_is_integer_conv(convSpecifier)) {
switch (lengthModifier[0]) {
case 'h':
return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
case 'l':
return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
case 'q':
return sizeof(long long);
case 'L':
return sizeof(long long);
case 'j':
return sizeof(INTMAX_T);
case 'z':
return sizeof(SIZE_T);
case 't':
return sizeof(PTRDIFF_T);
case 0:
return sizeof(int);
default:
return FSS_INVALID;
}
}
if (format_is_float_conv(convSpecifier)) {
switch (lengthModifier[0]) {
case 'L':
case 'q':
return sizeof(long double);
case 'l':
return lengthModifier[1] == 'l' ? sizeof(long double)
: sizeof(double);
case 0:
// Printf promotes floats to doubles but scanf does not
return promote_float ? sizeof(double) : sizeof(float);
default:
return FSS_INVALID;
}
}
if (convSpecifier == 'p') {
if (lengthModifier[0] != 0)
return FSS_INVALID;
return sizeof(void *);
}
return FSS_INVALID;
}
struct ScanfDirective {
int argIdx; // argument index, or -1 if not specified ("%n$")
int fieldWidth;
const char *begin;
const char *end;
bool suppressed; // suppress assignment ("*")
bool allocate; // allocate space ("m")
char lengthModifier[2];
char convSpecifier;
bool maybeGnuMalloc;
};
// Parse scanf format string. If a valid directive in encountered, it is
// returned in dir. This function returns the pointer to the first
// unprocessed character, or 0 in case of error.
// In case of the end-of-string, a pointer to the closing \0 is returned.
static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
ScanfDirective *dir) {
internal_memset(dir, 0, sizeof(*dir));
dir->argIdx = -1;
while (*p) {
if (*p != '%') {
++p;
continue;
}
dir->begin = p;
++p;
// %%
if (*p == '%') {
++p;
continue;
}
if (*p == '\0') {
return nullptr;
}
// %n$
p = maybe_parse_param_index(p, &dir->argIdx);
CHECK(p);
// *
if (*p == '*') {
dir->suppressed = true;
++p;
}
// Field width
if (*p >= '0' && *p <= '9') {
p = parse_number(p, &dir->fieldWidth);
CHECK(p);
if (dir->fieldWidth <= 0) // Width if at all must be non-zero
return nullptr;
}
// m
if (*p == 'm') {
dir->allocate = true;
++p;
}
// Length modifier.
p = maybe_parse_length_modifier(p, dir->lengthModifier);
// Conversion specifier.
dir->convSpecifier = *p++;
// Consume %[...] expression.
if (dir->convSpecifier == '[') {
if (*p == '^')
++p;
if (*p == ']')
++p;
while (*p && *p != ']')
++p;
if (*p == 0)
return nullptr; // unexpected end of string
// Consume the closing ']'.
++p;
}
// This is unfortunately ambiguous between old GNU extension
// of %as, %aS and %a[...] and newer POSIX %a followed by
// letters s, S or [.
if (allowGnuMalloc && dir->convSpecifier == 'a' &&
!dir->lengthModifier[0]) {
if (*p == 's' || *p == 'S') {
dir->maybeGnuMalloc = true;
++p;
} else if (*p == '[') {
// Watch for %a[h-j%d], if % appears in the
// [...] range, then we need to give up, we don't know
// if scanf will parse it as POSIX %a [h-j %d ] or
// GNU allocation of string with range dh-j plus %.
const char *q = p + 1;
if (*q == '^')
++q;
if (*q == ']')
++q;
while (*q && *q != ']' && *q != '%')
++q;
if (*q == 0 || *q == '%')
return nullptr;
p = q + 1; // Consume the closing ']'.
dir->maybeGnuMalloc = true;
}
}
dir->end = p;
break;
}
return p;
}
static int scanf_get_value_size(ScanfDirective *dir) {
if (dir->allocate) {
if (!char_is_one_of(dir->convSpecifier, "cCsS["))
return FSS_INVALID;
return sizeof(char *);
}
if (dir->maybeGnuMalloc) {
if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
return FSS_INVALID;
// This is ambiguous, so check the smaller size of char * (if it is
// a GNU extension of %as, %aS or %a[...]) and float (if it is
// POSIX %a followed by s, S or [ letters).
return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
}
if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
unsigned charSize =
format_get_char_size(dir->convSpecifier, dir->lengthModifier);
if (charSize == 0)
return FSS_INVALID;
if (dir->fieldWidth == 0) {
if (!needsTerminator)
return charSize;
return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
}
return (dir->fieldWidth + needsTerminator) * charSize;
}
return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
}
// Common part of *scanf interceptors.
// Process format string and va_list, and report all store ranges.
// Stops when "consuming" n_inputs input items.
static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
const char *format, va_list aq) {
CHECK_GT(n_inputs, 0);
const char *p = format;
COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
while (*p) {
ScanfDirective dir;
p = scanf_parse_next(p, allowGnuMalloc, &dir);
if (!p)
break;
if (dir.convSpecifier == 0) {
// This can only happen at the end of the format string.
CHECK_EQ(*p, 0);
break;
}
// Here the directive is valid. Do what it says.
if (dir.argIdx != -1) {
// Unsupported.
break;
}
if (dir.suppressed)
continue;
int size = scanf_get_value_size(&dir);
if (size == FSS_INVALID) {
Report("%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n",
SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
break;
}
void *argp = va_arg(aq, void *);
if (dir.convSpecifier != 'n')
--n_inputs;
if (n_inputs < 0)
break;
if (size == FSS_STRLEN) {
size = internal_strlen((const char *)argp) + 1;
} else if (size == FSS_WCSLEN) {
// FIXME: actually use wcslen() to calculate it.
size = 0;
}
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
// For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well.
if (dir.allocate) {
if (char *buf = *(char **)argp) {
if (dir.convSpecifier == 'c')
size = 1;
else if (dir.convSpecifier == 'C')
size = sizeof(wchar_t);
else if (dir.convSpecifier == 'S')
size = (internal_wcslen((wchar_t *)buf) + 1) * sizeof(wchar_t);
else // 's' or '['
size = internal_strlen(buf) + 1;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size);
}
}
}
}
#if SANITIZER_INTERCEPT_PRINTF
struct PrintfDirective {
int fieldWidth;
int fieldPrecision;
int argIdx; // width argument index, or -1 if not specified ("%*n$")
int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
const char *begin;
const char *end;
bool starredWidth;
bool starredPrecision;
char lengthModifier[2];
char convSpecifier;
};
static const char *maybe_parse_number(const char *p, int *out) {
if (*p >= '0' && *p <= '9')
p = parse_number(p, out);
return p;
}
static const char *maybe_parse_number_or_star(const char *p, int *out,
bool *star) {
if (*p == '*') {
*star = true;
++p;
} else {
*star = false;
p = maybe_parse_number(p, out);
}
return p;
}
// Parse printf format string. Same as scanf_parse_next.
static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
internal_memset(dir, 0, sizeof(*dir));
dir->argIdx = -1;
dir->precisionIdx = -1;
while (*p) {
if (*p != '%') {
++p;
continue;
}
dir->begin = p;
++p;
// %%
if (*p == '%') {
++p;
continue;
}
if (*p == '\0') {
return nullptr;
}
// %n$
p = maybe_parse_param_index(p, &dir->precisionIdx);
CHECK(p);
// Flags
while (char_is_one_of(*p, "'-+ #0")) {
++p;
}
// Field width
p = maybe_parse_number_or_star(p, &dir->fieldWidth,
&dir->starredWidth);
if (!p)
return nullptr;
// Precision
if (*p == '.') {
++p;
// Actual precision is optional (surprise!)
p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
&dir->starredPrecision);
if (!p)
return nullptr;
// m$
if (dir->starredPrecision) {
p = maybe_parse_param_index(p, &dir->precisionIdx);
CHECK(p);
}
}
// Length modifier.
p = maybe_parse_length_modifier(p, dir->lengthModifier);
// Conversion specifier.
dir->convSpecifier = *p++;
dir->end = p;
break;
}
return p;
}
static int printf_get_value_size(PrintfDirective *dir) {
if (char_is_one_of(dir->convSpecifier, "cCsS")) {
unsigned charSize =
format_get_char_size(dir->convSpecifier, dir->lengthModifier);
if (charSize == 0)
return FSS_INVALID;
if (char_is_one_of(dir->convSpecifier, "sS")) {
return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
}
return charSize;
}
return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
}
#define SKIP_SCALAR_ARG(aq, convSpecifier, size) \
do { \
if (format_is_float_conv(convSpecifier)) { \
switch (size) { \
case 8: \
va_arg(*aq, double); \
break; \
case 12: \
va_arg(*aq, long double); \
break; \
case 16: \
va_arg(*aq, long double); \
break; \
default: \
Report("WARNING: unexpected floating-point arg size" \
" in printf interceptor: %zu\n", static_cast<uptr>(size)); \
return; \
} \
} else { \
switch (size) { \
case 1: \
case 2: \
case 4: \
va_arg(*aq, u32); \
break; \
case 8: \
va_arg(*aq, u64); \
break; \
default: \
Report("WARNING: unexpected arg size" \
" in printf interceptor: %zu\n", static_cast<uptr>(size)); \
return; \
} \
} \
} while (0)
// Common part of *printf interceptors.
// Process format string and va_list, and report all load ranges.
static void printf_common(void *ctx, const char *format, va_list aq) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
const char *p = format;
while (*p) {
PrintfDirective dir;
p = printf_parse_next(p, &dir);
if (!p)
break;
if (dir.convSpecifier == 0) {
// This can only happen at the end of the format string.
CHECK_EQ(*p, 0);
break;
}
// Here the directive is valid. Do what it says.
if (dir.argIdx != -1 || dir.precisionIdx != -1) {
// Unsupported.
break;
}
if (dir.starredWidth) {
// Dynamic width
SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
}
if (dir.starredPrecision) {
// Dynamic precision
SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
}
// %m does not require an argument: strlen(errno).
if (dir.convSpecifier == 'm')
continue;
int size = printf_get_value_size(&dir);
if (size == FSS_INVALID) {
static int ReportedOnce;
if (!ReportedOnce++)
Report(
"%s: WARNING: unexpected format specifier in printf "
"interceptor: %.*s (reported once per process)\n",
SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
break;
}
if (dir.convSpecifier == 'n') {
void *argp = va_arg(aq, void *);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
continue;
} else if (size == FSS_STRLEN) {
if (void *argp = va_arg(aq, void *)) {
uptr len;
if (dir.starredPrecision) {
// FIXME: properly support starred precision for strings.
len = 0;
} else if (dir.fieldPrecision > 0) {
// Won't read more than "precision" symbols.
len = internal_strnlen((const char *)argp, dir.fieldPrecision);
if (len < (uptr)dir.fieldPrecision)
len++;
} else {
// Whole string will be accessed.
len = internal_strlen((const char *)argp) + 1;
}
COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, len);
}
} else if (size == FSS_WCSLEN) {
if (void *argp = va_arg(aq, void *)) {
// FIXME: Properly support wide-character strings (via wcsrtombs).
COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, 0);
}
} else {
// Skip non-pointer args
SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
}
}
}
#endif // SANITIZER_INTERCEPT_PRINTF