| //===- ScanfSupport.cpp - Secure scanf() replacement ------------===// |
| // |
| // The SAFECode Compiler Project |
| // |
| // This file was developed by the LLVM research group and is distributed under |
| // the University of Illinois Open Source License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a secure runtime replacement for scanf() and similar |
| // functions. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| // |
| // This code is derived from MINIX's doscan.c; original license follows: |
| // |
| // /cvsup/minix/src/lib/stdio/doscan.c,v 1.1.1.1 2005/04/21 14:56:35 beng Exp $ |
| // |
| // Copyright (c) 1987,1997,2001 Prentice Hall |
| // All rights reserved. |
| // |
| // Redistribution and use of the MINIX operating system in source and |
| // binary forms, with or without modification, are permitted provided |
| // that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following |
| // disclaimer in the documentation and/or other materials provided |
| // with the distribution. |
| // |
| // * Neither the name of Prentice Hall nor the names of the software |
| // authors or contributors may be used to endorse or promote |
| // products derived from this software without specific prior |
| // written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND |
| // CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
| // INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
| // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| // IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
| // BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
| // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
| // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
| // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| // |
| |
| #include "safecode/Config/config.h" |
| #include "FormatStrings.h" |
| |
| #include <ctype.h> |
| #include <inttypes.h> |
| #include <limits.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <stdarg.h> |
| #include <wchar.h> |
| |
| using std::map; |
| |
| #ifdef __x86_64__ |
| #define set_pointer(flags) (flags |= FL_LONG) |
| #else |
| #define set_pointer(flags) // nothing |
| #endif |
| |
| // Maximum allowable size for an input number. |
| #define NUMLEN 512 |
| #define NR_CHARS 256 |
| |
| // |
| // Flags describing how to process the input |
| // |
| #define FL_CHAR 0x0001 // hh length modifier |
| #define FL_SHORT 0x0002 // h length modifier |
| #define FL_LLONG 0x0004 // ll length modifier |
| #define FL_LONG 0x0008 // l length modifier |
| #define FL_LONGDOUBLE 0x0010 // L length modifier |
| #define FL_INTMAX 0x0020 // j length modifier |
| #define FL_SIZET 0x0040 // z length modifier |
| #define FL_PTRDIFF 0x0080 // t length modifier |
| #define FL_NOASSIGN 0x0100 // do not assign (* flag) |
| #define FL_WIDTHSPEC 0x0200 // field width specified |
| |
| // |
| // _getc() |
| // |
| // Get the next character from the input. |
| // Returns EOF on reading error or end of file/string. |
| // |
| static inline int |
| _getc(input_parameter *i) |
| { |
| // |
| // Get the next character from the string. |
| // |
| if (i->input_kind == input_parameter::INPUT_FROM_STRING) |
| { |
| const char *string = i->input.string.string; |
| size_t &pos = i->input.string.pos; |
| if (string[pos] == '\0') |
| return EOF; |
| else |
| return string[pos++]; |
| } |
| // |
| // Get the next character from the stream. |
| // |
| else // i->InputKind == input_parameter::INPUT_FROM_STREAM |
| { |
| FILE *file = i->input.stream.stream; |
| char &lastch = i->input.stream.lastch; |
| int ch; |
| // |
| // Call fgetc_unlocked() for performance because the given input stream |
| // should already be locked by the thread. |
| // |
| // Use fgetc() on platforms without fgetc_unlocked(). |
| // |
| #ifndef HAVE_FGETC_UNLOCKED |
| #define fgetc_unlocked fgetc |
| #endif |
| if ((ch = fgetc_unlocked(file)) == EOF) |
| return EOF; |
| else |
| { |
| // Save the character we got in case it is pushed back via _ungetc(). |
| lastch = ch; |
| return ch; |
| } |
| } |
| } |
| |
| // |
| // _ungetc() |
| // |
| // 'Push back' the last character that was read from the input source. |
| // This function assumes at least one character has been read via _getc(). |
| // This function should be called at most once between calls to _getc(), |
| // so that at most one character is pushed back at any given time. |
| // |
| static inline void |
| _ungetc(input_parameter *i) |
| { |
| if (i->input_kind == input_parameter::INPUT_FROM_STRING) |
| // |
| // 'Push back' the string by just decrementing the position. |
| // |
| i->input.string.pos--; |
| else if (i->input_kind == input_parameter::INPUT_FROM_STREAM) |
| { |
| const char lastch = i->input.stream.lastch; |
| // |
| // Use ungetc() to push the last character back into the stream. |
| // See the note over internal_scanf() about the portability of this |
| // operation. |
| // |
| ungetc(lastch, i->input.stream.stream); |
| } |
| } |
| |
| // |
| // input_failure() |
| // |
| // Check if the parameter has had an input failure. |
| // This is defined as EOF or a read error, according to the standard. |
| // For strings an input error is the same as the end of the string. |
| // |
| // Returns: True if the parameter is said to have an input failure, false |
| // otherwise. |
| // |
| static inline bool |
| input_failure(input_parameter *i) |
| { |
| if (i->input_kind == input_parameter::INPUT_FROM_STRING) |
| return i->input.string.string[i->input.string.pos] == 0; |
| else // i->InputKind == input_parameter::INPUT_FROM_STREAM |
| { |
| FILE *f = i->input.stream.stream; |
| return ferror(f) || feof(f); |
| } |
| } |
| |
| // |
| // o_collect() |
| // |
| // Collect a number of characters which constitite an ordinal number. |
| // When the type is 'i', the base can be 8, 10, or 16, depending on the |
| // first 1 or 2 characters. This means that the base must be adjusted |
| // according to the format of the number. At the end of the function, base |
| // is then set to 0, so strtol() will get the right argument. |
| // |
| // Inputs: |
| // |
| // c - the first character to read as an input item |
| // stream - the input_parameter object which contains the rest of the |
| // characters to be read as input items |
| // inp_buf - the buffer into which the number should be written |
| // type - the type of the specifier associated with this conversion, one of |
| // 'i', 'p', 'x', 'X', 'd', 'o', or 'b' |
| // width - the maximum field width |
| // basep - A pointer to an integer. This value is written into with the |
| // numerical base of the value in the buffer, suitable for a call to |
| // strtol() or other function, as determined by this function. |
| // |
| // This function returns NULL if the input buffer was not filled with a valid |
| // integer that could be converted; and otherwise if the input buffer contains |
| // the digits of a valid integer, the function returns the last nonnul position |
| // of the buffer that was written. |
| // |
| // On success, the buffer is suited for a call to strtol() or other integer |
| // conversion function, with the base given in *basep. |
| // |
| static char * |
| o_collect(int c, |
| input_parameter *stream, |
| char *inp_buf, |
| char type, |
| unsigned int width, |
| int *basep) |
| { |
| char *bufp = inp_buf; |
| int base = 0; |
| |
| switch (type) |
| { |
| case 'i': // i means octal, decimal or hexadecimal |
| case 'p': |
| case 'x': |
| case 'X': base = 16; break; |
| case 'd': |
| case 'u': base = 10; break; |
| case 'o': base = 8; break; |
| case 'b': base = 2; break; |
| } |
| // |
| // Process any initial +/- sign. |
| // |
| if (c == '-' || c == '+') |
| { |
| *bufp++ = c; |
| if (--width) |
| c = _getc(stream); |
| else |
| return 0; // An initial [+-] is not a valid number. |
| } |
| // |
| // Determine whether an initial '0' means to process the number in |
| // hexadecimal or octal, if we are given a choice between the two. |
| // |
| if (width && c == '0' && base == 16) |
| { |
| *bufp++ = c; |
| if (--width) |
| c = _getc(stream); |
| if (c != 'x' && c != 'X') |
| { |
| if (type == 'i') base = 8; |
| } |
| else if (width) |
| { |
| *bufp++ = c; |
| if (--width) |
| c = _getc(stream); |
| } |
| else |
| return 0; // Don't accept only an initial [+-]?0[xX] as a valid number. |
| } |
| else if (type == 'i') |
| base = 10; |
| // |
| // Read as many digits as we can. |
| // |
| while (width) |
| { |
| if ( ((base == 10) && isdigit(c) ) |
| || ((base == 16) && isxdigit(c) ) |
| || ((base == 8) && isdigit(c) && (c < '8')) |
| || ((base == 2) && isdigit(c) && (c < '2'))) |
| { |
| *bufp++ = c; |
| if (--width) |
| c = _getc(stream); |
| } |
| else |
| break; |
| } |
| // |
| // Push back any extra read characters that aren't part of an integer. |
| // |
| if (width && c != EOF) |
| _ungetc(stream); |
| if (type == 'i') |
| base = 0; |
| *basep = base; |
| *bufp = '\0'; |
| return bufp == inp_buf ? 0 : bufp - 1; |
| } |
| |
| #ifdef FLOATING_POINT |
| |
| #include "ScanfTables.h" |
| |
| // |
| // f_collect() |
| // |
| // Read the longest valid floating point number prefix from the input buffer. |
| // Upon encountering an error, the function returns and leaves the character |
| // to have caused the error in the input stream. |
| // |
| // Inputs: |
| // c - the first character to read |
| // stream - the parameter from which to get the rest of the characters |
| // inp_buf - the buffer into which to write the number |
| // width - the maximum number of characters to read |
| // |
| // Returns: |
| // On error, the function returns NULL. On success, it returns a pointer to the |
| // last non-nul position in the input buffer. |
| // |
| char * |
| f_collect(int c, input_parameter *stream, char *inp_buf, unsigned int width) |
| { |
| int state = 1; // The start state from the scanner |
| char *buf = inp_buf; |
| int ch; |
| int accept; |
| |
| ch = c; |
| // |
| // This loop matches the input with the transition table. |
| // |
| while (width && state > 0) |
| { |
| // |
| // Handle an 8 bit character or EOF character by breaking immediately. |
| // |
| if (ch == EOF || ch & 0x80) // ch == EOF || ch > 127 |
| break; |
| state = yy_nxt[state][ch]; |
| // |
| // Advance to the next state and save the current character, if valid. |
| // |
| if (state > 0) |
| *buf++ = ch; |
| // |
| // Get the next character. |
| // |
| if (--width) |
| ch = _getc(stream); |
| } |
| // |
| // Push back the next character if it was a valid character not part of the |
| // input sequence. |
| // |
| if (width > 0 && ch != EOF) |
| _ungetc(stream); |
| // |
| // Get information about the next action of the scanner. |
| // |
| accept = yy_accept[state < 0 ? -state : state]; |
| // |
| // A value of 0 for accept indicates failure/that the scanner should revert |
| // to the previous accepting state. Since this is not possible without |
| // pushing back more than one character, we should just fail. |
| // |
| // A value of DEFAULT_RULE indicates to the scanner to echo the output |
| // since it is unmatched. This also indicates failure. |
| // |
| #define DEFAULT_RULE 5 |
| if (accept == 0 || accept == DEFAULT_RULE) |
| return 0; |
| else if (buf == inp_buf) |
| return 0; |
| else |
| { |
| *buf = '\0'; |
| return buf - 1; |
| } |
| } |
| #endif // FLOATING_POINT |
| |
| // |
| // eat_whitespace() |
| // |
| // Read all initial whitespace from the input stream. |
| // |
| // Inputs: |
| // stream - the input stream |
| // count - a reference to a counter of the number of characters read |
| // |
| // Returns: |
| // This function returns the first non whitespace character encountered in the |
| // input stream (which could be EOF). |
| // |
| static inline int |
| eat_whitespace(input_parameter *stream, int &count) |
| { |
| int ch; |
| do |
| { |
| ch = _getc(stream); |
| count++; |
| } while (isspace(ch)); |
| return ch; |
| } |
| |
| // |
| // This is a bitvector which specifies a set of characters to match. |
| // |
| typedef struct |
| { |
| enum { FUNCTION, TABLE } kind; |
| union |
| { |
| uint64_t t[4]; |
| int (*f)(int); |
| } s; |
| } scanset_t; |
| |
| const scanset_t all_chars = |
| { |
| scanset_t::TABLE, |
| { { 0xffffffffffffffffull, |
| 0xffffffffffffffffull, |
| 0xffffffffffffffffull, |
| 0xffffffffffffffffull } } |
| }; |
| |
| // Clear the scanset. |
| #define clear_scanset(set) \ |
| for (int i = 0; i < 4; i++) (set)->s.t[i] = 0ul |
| // Add a character to the scanset. |
| #define add_to_scanset(set, c) \ |
| (set)->s.t[((unsigned char) c) >> 6] |= ((uint64_t)1) << (c & 0x3f) |
| // Take the complement of the scanset. |
| #define invert_scanset(set) \ |
| for (int i = 0; i < 4; i++) (set)->s.t[i] = ~(set)->s.t[i] |
| // Query if a character is in the scanset. |
| #define is_in_scanset(set, c) \ |
| ( \ |
| (set)->kind == scanset_t::TABLE ? \ |
| (set)->s.t[((unsigned char) c) >> 6] & (((uint64_t) 1) << (c & 0x3f)) : \ |
| (set)->s.f(c) \ |
| ) |
| |
| // |
| // read_scanset() |
| // |
| // Read the %[...]-style directive embedded in the given format string, and |
| // construct the resuling scanset into *scanset. |
| // |
| // Inputs: |
| // format - the format string, assumed to be pointing to the next character |
| // after the sequence "%[" |
| // scanset - the scanset to fill with the characters in the sequence |
| // |
| // Returns: |
| // The function returns a pointer to the first subsequent character in the |
| // format string that is considered not part of the scanset. This character |
| // will be ']' in a valid directive, or '\0' if the directive is somehow |
| // malformed. |
| // |
| static inline const char * |
| read_scanset(const char *format, scanset_t *scanset) |
| { |
| int reverse; |
| const char *start = format; |
| // |
| // Determine if we take the complement of the scanset. |
| // |
| if (*++format == '^') |
| { |
| reverse = 1; |
| format++; |
| } |
| else |
| reverse = 0; |
| // |
| // Clear the scanset first. |
| // |
| clear_scanset(scanset); |
| // |
| // ']' appearing as the first character in the set does not close the |
| // directive, but rather adds ']' to the scanset. |
| // |
| if (*format == ']') |
| { |
| add_to_scanset(scanset, ']'); |
| format++; |
| } |
| // |
| // Parse the rest of the directive. |
| // |
| while (*format != '\0' && *format != ']') |
| { |
| // |
| // Add a character range to the scanset... |
| // |
| if (*format == '-') |
| { |
| format++; |
| // |
| // ...unless we're in a boundary condition, in which case just add '-'. |
| // |
| if (*format == ']' || &format[-2] == start || |
| (&format[-2] == &start[1] && start[1] == '^')) |
| { |
| add_to_scanset(scanset, '-'); |
| } |
| else if (*format >= format[-2]) |
| { |
| int c; |
| for (c = format[-2] + 1; c <= *format; c++) |
| add_to_scanset(scanset, c); |
| format++; |
| } |
| } |
| else |
| { |
| add_to_scanset(scanset, *format); |
| format++; |
| } |
| } |
| // |
| // Take the complement, if necessary. |
| // |
| if (reverse) |
| invert_scanset(scanset); |
| return format; |
| } |
| |
| // |
| // isnspace() |
| // |
| // Used by the %s directive for reading input. |
| // |
| int isnspace(int c) |
| { |
| return !isspace(c); |
| } |
| |
| // |
| // match_string() |
| // |
| // Read a string of characters that belong in the given scanset, up to size |
| // width. If 'dowrite' is true, write the string (with a nul terminator) into |
| // the buffer associated with the pointer_info argument. Report if there are |
| // any memory safety errors due to writing into the buffer. |
| // |
| // Inputs: |
| // |
| // ci - a pointer to the call_info structure for this function call |
| // options - options passed to internal_scanf() |
| // p - a pointer to the pointer_info structure that contains the buffer |
| // to write into |
| // flags - the set of flags associated with the current conversion |
| // c - the first character to read |
| // stream - the input stream which contains the rest of the characters |
| // width - the maximum number of characters to read |
| // dowrite - a boolean, if true, the string is written into a buffer |
| // termin - a boolean, if true, the buffer is terminated |
| // nrchars - a reference to a relevant counter of the number of characters |
| // read |
| // set - the set of permissible characters in the string |
| // |
| // Returns: |
| // This function returns the number of characters that it has read. |
| // Reading 0 characters indicates a conversion error. |
| // |
| // If dowrite is true and (flags&FL_LONG) is true, then the buffer is filled |
| // with wide characters converted from the multibyte input stream. Otherwise, |
| // the buffer is filled with the same characters that composed the input, if |
| // dowrite is true. |
| // |
| static inline size_t |
| match_string(call_info *ci, |
| const options_t options, |
| pointer_info *p, |
| int flags, |
| int c, |
| input_parameter *stream, |
| size_t width, |
| const bool dowrite, |
| const bool termin, |
| int &nrchars, |
| const scanset_t *set) |
| { |
| size_t maxwrite = SIZE_MAX; |
| size_t writecount = 0; |
| size_t readcount = 1; // We've "read" c already. |
| char *buf = 0; |
| char mbbuf[MB_LEN_MAX]; |
| size_t mbbufpos = 0; |
| wchar_t wc; |
| size_t wclen; |
| const bool wcs = flags & FL_LONG; |
| mbstate_t ps; |
| if (wcs) |
| memset(&ps, 0, sizeof(mbstate_t)); |
| // |
| // Read the input string. |
| // |
| while (width > 0 && c != EOF && is_in_scanset(set, c)) |
| { |
| // |
| // If after reading a character, the buffer is ready for writing, prepare |
| // it for writing. |
| // |
| if (readcount++ == 1 && dowrite) |
| { |
| // |
| // Get the output buffer. |
| // |
| buf = (char *) p; |
| if (p == NULL) |
| { |
| cerr << "Writing into a NULL object!" << endl; |
| c_library_error(ci, "scanf"); |
| } |
| else if (is_in_whitelist(ci, options, p) == false) |
| { |
| cerr << "Writing into a non-pointer!" << endl; |
| c_library_error(ci, "scanf"); |
| } |
| else |
| { |
| find_object(ci, p); |
| if (p->flags & HAVEBOUNDS) |
| maxwrite = (size_t) 1 + (char *)p->bounds[1] - (char *) p->ptr; |
| buf = (char *) p->ptr; |
| if (buf == 0) |
| { |
| cerr << "Writing into a NULL object!" << endl; |
| c_library_error(ci, "scanf"); |
| } |
| } |
| } |
| if (dowrite) |
| { |
| if (!wcs) |
| { |
| // |
| // Write directly into the output buffer. |
| // |
| // |
| // Check if this write will go out of bounds. Only report this the first |
| // time that it occurs. |
| // |
| if ((++writecount - maxwrite) == 1) |
| { |
| cerr << "Writing out of bounds!" << endl; |
| write_out_of_bounds_error(ci, p, maxwrite, writecount); |
| } |
| *buf++ = c; |
| } |
| else |
| { |
| // |
| // Read the next multibyte character incrementally. Write it into the |
| // output buffer once we've encountered a valid character. |
| // |
| mbbuf[mbbufpos++] = (char) c; |
| // |
| // Attempt a multibyte to wide character conversion. |
| // |
| wclen = mbrtowc(&wc, &mbbuf[0], mbbufpos, &ps); |
| if (wclen == (size_t) -1) // Conversion error |
| break; |
| else if (wclen == (size_t) -2) |
| ; // The buffer might be holding the prefix of a valid |
| // multibyte character sequence, so try reading more. |
| else |
| { |
| writecount += sizeof(wchar_t); |
| |
| // |
| // Check if the write will go out of bounds. Only report this the |
| // first time that it occurs. |
| // |
| if (writecount > maxwrite && |
| (writecount - maxwrite) <= sizeof(wchar_t)) |
| { |
| cerr << "Writing out of bounds!" << endl; |
| write_out_of_bounds_error(ci, p, maxwrite, writecount); |
| } |
| ((wchar_t *)buf)[(writecount/sizeof(wchar_t)) - 1] = wc; |
| // |
| // Reset the input buffer. |
| // |
| mbbufpos = 0; |
| } |
| } |
| } |
| if (--width > 0) |
| { |
| c = _getc(stream); |
| nrchars++; |
| } |
| } |
| // |
| // Terminate the string with the appropriate nul terminator, if necessary. |
| // |
| if (termin && dowrite && writecount > 0) |
| { |
| if (!wcs) |
| { |
| if ((++writecount - maxwrite) == 1) |
| { |
| cerr << "Writing out of bounds!" << endl; |
| write_out_of_bounds_error(ci, p, maxwrite, writecount); |
| } |
| *buf = '\0'; |
| } |
| else |
| { |
| writecount += sizeof(wchar_t); |
| if (writecount > maxwrite && |
| (writecount - maxwrite) <= sizeof(wchar_t)) |
| { |
| cerr << "Writing out of bounds!" << endl; |
| write_out_of_bounds_error(ci, p, maxwrite, writecount); |
| } |
| ((wchar_t *)buf)[(writecount/sizeof(wchar_t)) - 1] = L'\0'; |
| } |
| } |
| // |
| // Push back any character that was read and not in the scanset. |
| // |
| if (width > 0 && c != EOF) |
| { |
| _ungetc(stream); |
| readcount--; |
| nrchars--; |
| } |
| // |
| // Don't count EOF. |
| // |
| else if (c == EOF) |
| { |
| readcount--; |
| nrchars--; |
| } |
| return readcount; |
| } |
| |
| // |
| // internal_scanf() |
| // |
| // This is the main logic for the secured scanf() family of functions. |
| // |
| // |
| // IMPLEMENTATION NOTES |
| // - This function uses ungetc() to push back characters into a stream. |
| // This is an error because at most one character can be pushed back |
| // portably, but an ungetc() call is allowed to follow a call to a scan |
| // function without any intervening I/O. Hence if this function calls |
| // ungetc() and then the caller does so again on the same stream, the |
| // result might fail. |
| // |
| // However, glibc appears to have support for consecutive 2 character |
| // pushback, so this doesn't fail on glibc. |
| // |
| // Mac OS X's libc also has support for 2 character pushback. |
| // |
| // - A nonstandard %b specifier is supported, for reading binary integers. |
| // - No support for positional arguments (%n$-style directives) (yet ?). |
| // - The maximum supported width of a numerical constant in the input is 512 |
| // bytes. |
| // |
| int |
| internal_scanf(const options_t options, |
| input_parameter &i, |
| call_info &c, |
| const char *fmt, |
| va_list ap) |
| { |
| int done = 0; // number of items converted |
| int nrchars = 0; // number of characters read |
| int base; // conversion base |
| uintmax_t val; // an integer value |
| char *str = 0; // temporary pointer |
| char *tmp_string; // ditto |
| unsigned width = 0; // width of field |
| int flags; // some flags |
| int kind; // holds specifier value |
| int ic = EOF; // the input character |
| char inp_buf[NUMLEN + 1]; // buffer to hold numerical inputs |
| |
| #ifdef FLOATING_POINT |
| long double ld_val; |
| #endif |
| |
| const char *format = fmt; |
| input_parameter *stream = &i; |
| |
| pointer_info *p = 0; |
| |
| // Return immediately for an empty format string. |
| if (*format == '\0') |
| return 0; |
| |
| mbstate_t ps; |
| size_t len; |
| const char *mb_pos; |
| memset(&ps, 0, sizeof(ps)); |
| |
| call_info *ci = &c; |
| unsigned int arg = 1; |
| |
| bool wr; |
| size_t sz; |
| scanset_t nonws; |
| nonws.kind = scanset_t::FUNCTION; |
| nonws.s.f = isnspace; |
| scanset_t scanset; |
| scanset.kind = scanset_t::TABLE; |
| |
| // |
| // Map to hold the wrappers for pointers, if the va_list doesn't have wrapped |
| // pointers. |
| // |
| map<void *, pointer_info *> wrappers; |
| const options_t &opt = options; |
| |
| // |
| // Scanning macros |
| // |
| |
| // |
| // Get the next pointer argument, properly wrapped |
| // |
| #define GETPTRARG() \ |
| (pointer_info *) wrap_pointer( \ |
| options, \ |
| (varg_check(ci, options, arg++), va_arg(ap, void *)), \ |
| wrappers \ |
| ) |
| |
| // |
| // SAFEWRITE() |
| // |
| // A macro that attempts to securely write a value into the location pointed to |
| // by the next argument. |
| // |
| // Inputs: |
| // ci - pointer to the call_info structure |
| // options - options passed to the function call |
| // j - a pointer to the pointer_info structure that contains the |
| // destination pointer |
| // item - the item to write |
| // type - the type to write the item as |
| // |
| #define SAFEWRITE(ci, options, j, item, type) \ |
| do \ |
| { \ |
| const options_t opt = (options); \ |
| pointer_info *ptr = (j); \ |
| call_info *cinfo = (ci); \ |
| write_check(cinfo, opt, ptr, sizeof(type)); \ |
| void *dest = unwrap_pointer(cinfo, opt, ptr); \ |
| *(type *) dest = (type) item; \ |
| } while (0) |
| |
| // |
| // Version of SAFEWRITE() relevant to this function. |
| // |
| #define _SAFEWRITE(item, type) SAFEWRITE(ci, options, GETPTRARG(), item, type) |
| |
| // |
| // The main loop that processes the format string. |
| // At the end of the loop, the count of assignments is updated and the format |
| // string pointer is incremented one position. |
| // |
| while (1) |
| { |
| // |
| // Whitespace in the format string indicates to match all whitespace in the |
| // input stream. |
| // |
| if (isspace(*format)) |
| { |
| while (isspace(*format)) |
| format++; // Skip whitespace in the format string. |
| ic = eat_whitespace(stream, nrchars); |
| if (ic != EOF) |
| _ungetc(stream); |
| } |
| if (*format == '\0') |
| break; // End of format |
| // |
| // Match a multibyte character from the input. |
| // |
| if (*format != '%') |
| { |
| len = mbrtowc(NULL, format, MB_CUR_MAX, &ps); |
| mb_pos = format; |
| format += len; |
| while (mb_pos != format) |
| { |
| ic = _getc(stream); |
| if (ic != *mb_pos) |
| break; |
| else |
| { |
| nrchars++; |
| mb_pos++; |
| } |
| } |
| if (mb_pos != format && ic != *mb_pos) |
| { |
| // |
| // A directive that is an ordinary multibyte character is executed |
| // by reading the next characters of the stream. If any of those |
| // characters differ from the ones composing the directive, the |
| // directive fails and the differing and subsequent characters |
| // remain unread. |
| // |
| // - C99 Standard |
| // |
| if (ic != EOF) |
| { |
| _ungetc(stream); |
| goto match_failure; |
| } |
| else |
| goto failure; |
| } |
| continue; |
| } |
| format++; // We've read '%'; start processing a directive. |
| flags = 0; |
| // |
| // The '%' specifier |
| // |
| if (*format == '%') |
| { |
| ic = eat_whitespace(stream, nrchars); |
| if (ic == '%') |
| { |
| format++; |
| continue; |
| } |
| else |
| { |
| _ungetc(stream); |
| goto failure; |
| } |
| } |
| // |
| // '*' flag: Suppress assignment. |
| // |
| if (*format == '*') |
| { |
| format++; |
| flags |= FL_NOASSIGN; |
| } |
| // |
| // Get the field width, if there is any. |
| // |
| if (isdigit (*format)) |
| { |
| flags |= FL_WIDTHSPEC; |
| for (width = 0; isdigit (*format);) |
| width = width * 10 + *format++ - '0'; |
| } |
| // |
| // Process length modifiers. |
| // |
| switch (*format) |
| { |
| case 'h': |
| if (*++format == 'h') |
| { |
| format++; |
| flags |= FL_CHAR; |
| } |
| else |
| flags |= FL_SHORT; |
| break; |
| case 'l': |
| if (*++format == 'l') |
| { |
| format++; |
| flags |= FL_LLONG; |
| } |
| else |
| flags |= FL_LONG; |
| break; |
| case 'j': |
| format++; |
| flags |= FL_INTMAX; |
| break; |
| case 'z': |
| format++; |
| flags |= FL_SIZET; |
| break; |
| case 't': |
| format++; |
| flags |= FL_PTRDIFF; |
| break; |
| case 'L': |
| format++; |
| flags |= FL_LONGDOUBLE; |
| break; |
| } |
| // |
| // Read the actual specifier. |
| // |
| kind = *format; |
| // |
| // Eat any initial whitespace for specifiers that allow it. |
| // |
| if ((kind != 'c') && (kind != '[') && (kind != 'n')) |
| { |
| ic = eat_whitespace(stream, nrchars); |
| if (ic == EOF) |
| goto failure; |
| } |
| // |
| // Get the initial character of the input, for non-%n specifiers. |
| // |
| else if (kind != 'n') |
| { |
| ic = _getc(stream); |
| if (ic == EOF) |
| goto failure; |
| nrchars++; |
| } |
| // |
| // Process the format specifier. |
| // |
| switch (kind) |
| { |
| default: |
| // not recognized, like %q |
| goto failure; |
| // |
| // %n specifier |
| // |
| case 'n': |
| if (!(flags & FL_NOASSIGN)) |
| { |
| if (flags & FL_CHAR) |
| _SAFEWRITE(nrchars, char); |
| else if (flags & FL_SHORT) |
| _SAFEWRITE(nrchars, short); |
| else if (flags & FL_LONG) |
| _SAFEWRITE(nrchars, long); |
| else if (flags & FL_LLONG) |
| _SAFEWRITE(nrchars, long long); |
| else if (flags & FL_INTMAX) |
| _SAFEWRITE(nrchars, intmax_t); |
| else if (flags & FL_SIZET) |
| _SAFEWRITE(nrchars, size_t); |
| else if (flags & FL_PTRDIFF) |
| _SAFEWRITE(nrchars, ptrdiff_t); |
| else |
| _SAFEWRITE(nrchars, int); |
| } |
| break; |
| // |
| // %p specifier |
| // |
| case 'p': |
| // Set any additional flags regarding pointer representation size. |
| set_pointer(flags); |
| // fallthrough |
| // |
| // Integral specifiers: %b, %d, %i, %o, %u, %x, %X |
| // |
| case 'b': // binary |
| case 'd': // decimal |
| case 'i': // general integer |
| case 'o': // octal |
| case 'u': // unsigned |
| case 'x': // hexadecimal |
| case 'X': // ditto |
| // Don't read more than NUMLEN bytes. |
| if (!(flags & FL_WIDTHSPEC) || width > NUMLEN) |
| width = NUMLEN; |
| if (width == 0) |
| goto match_failure; |
| |
| str = o_collect(ic, stream, inp_buf, kind, width, &base); |
| |
| if (str == 0) |
| goto failure; |
| |
| // |
| // Although the length of the number is str-inp_buf+1 |
| // we don't add the 1 since we counted it already. |
| // |
| nrchars += str - inp_buf; |
| |
| if (!(flags & FL_NOASSIGN)) |
| { |
| if (kind == 'd' || kind == 'i') |
| val = strtoimax(inp_buf, &tmp_string, base); |
| else |
| val = strtoumax(inp_buf, &tmp_string, base); |
| if (flags & FL_CHAR) |
| _SAFEWRITE(val, unsigned char); |
| else if (flags & FL_SHORT) |
| _SAFEWRITE(val, unsigned short); |
| else if (flags & FL_LONG) |
| _SAFEWRITE(val, unsigned long); |
| else if (flags & FL_LLONG) |
| _SAFEWRITE(val, unsigned long long); |
| else if (flags & FL_INTMAX) |
| _SAFEWRITE(val, uintmax_t); |
| else if (flags & FL_SIZET) |
| _SAFEWRITE(val, size_t); |
| else if (flags & FL_PTRDIFF) |
| _SAFEWRITE(val, ptrdiff_t); |
| else |
| _SAFEWRITE(val, unsigned); |
| } |
| break; |
| // |
| // %c specifier |
| // |
| case 'c': |
| // |
| // No specified width means to read a single character. |
| // |
| if (!(flags & FL_WIDTHSPEC)) |
| width = 1; |
| if (width == 0) |
| goto match_failure; |
| wr = !(flags & FL_NOASSIGN); |
| if (wr) |
| { |
| p = GETPTRARG(); |
| } |
| sz = \ |
| match_string( |
| ci, opt, p, flags, ic, stream, width, wr, false, nrchars, &all_chars |
| ); |
| if (sz == 0) |
| goto failure; |
| break; |
| // |
| // %s specifier |
| // |
| case 's': |
| if (!(flags & FL_WIDTHSPEC)) |
| width = UINT_MAX; |
| if (width == 0) |
| goto match_failure; |
| wr = !(flags & FL_NOASSIGN); |
| if (wr) |
| { |
| p = GETPTRARG(); |
| } |
| sz = \ |
| match_string( |
| ci, opt, p, flags, ic, stream, width, wr, true, nrchars, &nonws |
| ); |
| if (sz == 0) |
| goto failure; |
| break; |
| // |
| // %[...] specifier |
| // |
| case '[': |
| if (!(flags & FL_WIDTHSPEC)) |
| width = UINT_MAX; |
| if (width == 0) |
| goto match_failure; |
| format = read_scanset(format, &scanset); |
| // |
| // If the format string points to a '\0', then there was an error |
| // parsing the scanset. |
| // |
| if (*format == '\0') |
| goto match_failure; |
| wr = !(flags & FL_NOASSIGN); |
| if (wr) |
| { |
| p = GETPTRARG(); |
| } |
| sz = \ |
| match_string( |
| ci, opt, p, flags, ic, stream, width, wr, true, nrchars, &scanset |
| ); |
| if (sz == 0) |
| goto failure; |
| break; |
| |
| #ifdef FLOATING_POINT |
| // |
| // Floating point specifiers: %a, %A, %e, %E, %f, %F, %g, %G |
| // |
| case 'a': |
| case 'A': |
| case 'e': |
| case 'E': |
| case 'f': |
| case 'F': |
| case 'g': |
| case 'G': |
| if (!(flags & FL_WIDTHSPEC) || width > NUMLEN) |
| width = NUMLEN; |
| |
| if (!width) |
| goto failure; |
| str = f_collect(ic, stream, inp_buf, width); |
| |
| if (str == 0) |
| goto failure; |
| |
| // |
| // Although the length of the number is str-inp_buf+1 |
| // we don't add the 1 since we counted it already |
| // |
| nrchars += str - inp_buf; |
| |
| if (!(flags & FL_NOASSIGN)) |
| { |
| ld_val = strtold(inp_buf, &tmp_string); |
| if (flags & FL_LONGDOUBLE) |
| _SAFEWRITE(ld_val, long double); |
| else if (flags & FL_LONG) |
| _SAFEWRITE(ld_val, double); |
| else |
| _SAFEWRITE(ld_val, float); |
| } |
| break; |
| #endif |
| |
| } |
| if (!(flags & FL_NOASSIGN) && kind != 'n') |
| done++; |
| format++; |
| } |
| |
| goto finish; |
| |
| failure: |
| // |
| // In the event of a possible input failure (=eof or read error), the |
| // execution should jump to here. |
| // |
| if (done == 0 && input_failure(stream)) |
| done = EOF; // fall through |
| |
| // |
| // The fscanf function returns the value of the macro EOF if an input failure |
| // occurs before the first conversion (if any) has completed. Otherwise, the |
| // function returns the number of input items assigned, which can be fewer |
| // than provided for, or even zero, in the event of an early matching failure. |
| // |
| // - C99 Standard |
| // |
| match_failure: |
| finish: |
| // |
| // Deallocate any wrappers that were allocated. |
| // |
| for (map<void *, pointer_info *>::iterator pos = wrappers.begin(), |
| end = wrappers.end(); |
| pos != end; |
| ++pos) |
| { |
| delete pos->second; |
| } |
| return done; |
| } |