| /* Language lexer for the GNU compiler for the Java(TM) language. |
| Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 |
| Free Software Foundation, Inc. |
| Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com) |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING. If not, write to |
| the Free Software Foundation, 59 Temple Place - Suite 330, |
| Boston, MA 02111-1307, USA. |
| |
| Java and all Java-based marks are trademarks or registered trademarks |
| of Sun Microsystems, Inc. in the United States and other countries. |
| The Free Software Foundation is independent of Sun Microsystems, Inc. */ |
| |
| /* It defines java_lex (yylex) that reads a Java ASCII source file |
| possibly containing Unicode escape sequence or utf8 encoded |
| characters and returns a token for everything found but comments, |
| white spaces and line terminators. When necessary, it also fills |
| the java_lval (yylval) union. It's implemented to be called by a |
| re-entrant parser generated by Bison. |
| |
| The lexical analysis conforms to the Java grammar described in "The |
| Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele. |
| Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */ |
| |
| #include "keyword.h" |
| #include "flags.h" |
| #include "chartables.h" |
| #ifndef JC1_LITE |
| #include "timevar.h" |
| #endif |
| |
| /* Function declarations. */ |
| static char *java_sprint_unicode (int); |
| static void java_unicode_2_utf8 (unicode_t); |
| static void java_lex_error (const char *, int); |
| #ifndef JC1_LITE |
| static int do_java_lex (YYSTYPE *); |
| static int java_lex (YYSTYPE *); |
| static int java_is_eol (FILE *, int); |
| static tree build_wfl_node (tree); |
| #endif |
| static int java_parse_escape_sequence (void); |
| static int java_start_char_p (unicode_t); |
| static int java_part_char_p (unicode_t); |
| static int java_space_char_p (unicode_t); |
| static void java_parse_doc_section (int); |
| static void java_parse_end_comment (int); |
| static int java_read_char (java_lexer *); |
| static int java_get_unicode (void); |
| static int java_peek_unicode (void); |
| static void java_next_unicode (void); |
| static int java_read_unicode (java_lexer *, int *); |
| #ifndef JC1_LITE |
| static int utf8_cmp (const unsigned char *, int, const char *); |
| #endif |
| |
| java_lexer *java_new_lexer (FILE *, const char *); |
| #ifndef JC1_LITE |
| static void error_if_numeric_overflow (tree); |
| #endif |
| |
| #ifdef HAVE_ICONV |
| /* This is nonzero if we have initialized `need_byteswap'. */ |
| static int byteswap_init = 0; |
| |
| /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in |
| big-endian order -- not native endian order. We handle this by |
| doing a conversion once at startup and seeing what happens. This |
| flag holds the results of this determination. */ |
| static int need_byteswap = 0; |
| #endif |
| |
| void |
| java_init_lex (FILE *finput, const char *encoding) |
| { |
| #ifndef JC1_LITE |
| int java_lang_imported = 0; |
| |
| if (!java_lang_id) |
| java_lang_id = get_identifier ("java.lang"); |
| if (!inst_id) |
| inst_id = get_identifier ("inst$"); |
| if (!wpv_id) |
| wpv_id = get_identifier ("write_parm_value$"); |
| |
| if (!java_lang_imported) |
| { |
| tree node = build_tree_list (build_unknown_wfl (java_lang_id), |
| NULL_TREE); |
| read_import_dir (TREE_PURPOSE (node)); |
| TREE_CHAIN (node) = ctxp->import_demand_list; |
| ctxp->import_demand_list = node; |
| java_lang_imported = 1; |
| } |
| |
| if (!wfl_operator) |
| { |
| #ifndef JC1_LITE |
| #ifdef USE_MAPPED_LOCATION |
| wfl_operator = build_expr_wfl (NULL_TREE, input_location); |
| #else |
| wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0); |
| #endif |
| #endif |
| } |
| if (!label_id) |
| label_id = get_identifier ("$L"); |
| if (!wfl_append) |
| wfl_append = build_unknown_wfl (get_identifier ("append")); |
| if (!wfl_string_buffer) |
| wfl_string_buffer = |
| build_unknown_wfl (get_identifier (flag_emit_class_files |
| ? "java.lang.StringBuffer" |
| : "gnu.gcj.runtime.StringBuffer")); |
| if (!wfl_to_string) |
| wfl_to_string = build_unknown_wfl (get_identifier ("toString")); |
| |
| CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) = |
| CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE; |
| |
| memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx)); |
| ctxp->current_parsed_class = NULL; |
| ctxp->package = NULL_TREE; |
| #endif |
| |
| #ifndef JC1_LITE |
| ctxp->save_location = input_location; |
| #endif |
| ctxp->java_error_flag = 0; |
| ctxp->lexer = java_new_lexer (finput, encoding); |
| } |
| |
| static char * |
| java_sprint_unicode (int c) |
| { |
| static char buffer [10]; |
| if (c < ' ' || c >= 127) |
| sprintf (buffer, "\\u%04x", c); |
| else |
| { |
| buffer [0] = c; |
| buffer [1] = '\0'; |
| } |
| return buffer; |
| } |
| |
| /* Create a new lexer object. */ |
| |
| java_lexer * |
| java_new_lexer (FILE *finput, const char *encoding) |
| { |
| java_lexer *lex = xmalloc (sizeof (java_lexer)); |
| int enc_error = 0; |
| |
| lex->finput = finput; |
| lex->bs_count = 0; |
| lex->unget_value = 0; |
| lex->next_unicode = 0; |
| lex->avail_unicode = 0; |
| lex->next_columns = 1; |
| lex->encoding = encoding; |
| lex->position.line = 1; |
| lex->position.col = 1; |
| #ifndef JC1_LITE |
| #ifdef USE_MAPPED_LOCATION |
| input_location |
| = linemap_line_start (&line_table, 1, 120); |
| #else |
| input_line = 1; |
| #endif |
| #endif |
| |
| #ifdef HAVE_ICONV |
| lex->handle = iconv_open ("UCS-2", encoding); |
| if (lex->handle != (iconv_t) -1) |
| { |
| lex->first = -1; |
| lex->last = -1; |
| lex->out_first = -1; |
| lex->out_last = -1; |
| lex->read_anything = 0; |
| lex->use_fallback = 0; |
| |
| /* Work around broken iconv() implementations by doing checking at |
| runtime. We assume that if the UTF-8 => UCS-2 encoder is broken, |
| then all UCS-2 encoders will be broken. Perhaps not a valid |
| assumption. */ |
| if (! byteswap_init) |
| { |
| iconv_t handle; |
| |
| byteswap_init = 1; |
| |
| handle = iconv_open ("UCS-2", "UTF-8"); |
| if (handle != (iconv_t) -1) |
| { |
| unicode_t result; |
| unsigned char in[3]; |
| char *inp, *outp; |
| size_t inc, outc, r; |
| |
| /* This is the UTF-8 encoding of \ufeff. */ |
| in[0] = 0xef; |
| in[1] = 0xbb; |
| in[2] = 0xbf; |
| |
| inp = (char *) in; |
| inc = 3; |
| outp = (char *) &result; |
| outc = 2; |
| |
| r = iconv (handle, (ICONV_CONST char **) &inp, &inc, |
| &outp, &outc); |
| iconv_close (handle); |
| /* Conversion must be complete for us to use the result. */ |
| if (r != (size_t) -1 && inc == 0 && outc == 0) |
| need_byteswap = (result != 0xfeff); |
| } |
| } |
| |
| lex->byte_swap = need_byteswap; |
| } |
| else |
| #endif /* HAVE_ICONV */ |
| { |
| /* If iconv failed, use the internal decoder if the default |
| encoding was requested. This code is used on platforms where |
| iconv exists but is insufficient for our needs. For |
| instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2. |
| |
| On Solaris the default encoding, as returned by nl_langinfo(), |
| is `646' (aka ASCII), but the Solaris iconv_open() doesn't |
| understand that. We work around that by pretending |
| `646' to be the same as UTF-8. */ |
| if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646")) |
| enc_error = 1; |
| #ifdef HAVE_ICONV |
| else |
| { |
| lex->use_fallback = 1; |
| lex->encoding = "UTF-8"; |
| } |
| #endif /* HAVE_ICONV */ |
| } |
| |
| if (enc_error) |
| fatal_error ("unknown encoding: %qs\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n%<--encoding=UTF-8%> option", encoding); |
| |
| return lex; |
| } |
| |
| void |
| java_destroy_lexer (java_lexer *lex) |
| { |
| #ifdef HAVE_ICONV |
| if (! lex->use_fallback) |
| iconv_close (lex->handle); |
| #endif |
| free (lex); |
| } |
| |
| static int |
| java_read_char (java_lexer *lex) |
| { |
| #ifdef HAVE_ICONV |
| if (! lex->use_fallback) |
| { |
| size_t ir, inbytesleft, in_save, out_count, out_save; |
| char *inp, *outp; |
| unicode_t result; |
| |
| /* If there is data which has already been converted, use it. */ |
| if (lex->out_first == -1 || lex->out_first >= lex->out_last) |
| { |
| lex->out_first = 0; |
| lex->out_last = 0; |
| |
| while (1) |
| { |
| /* See if we need to read more data. If FIRST == 0 then |
| the previous conversion attempt ended in the middle of |
| a character at the end of the buffer. Otherwise we |
| only have to read if the buffer is empty. */ |
| if (lex->first == 0 || lex->first >= lex->last) |
| { |
| int r; |
| |
| if (lex->first >= lex->last) |
| { |
| lex->first = 0; |
| lex->last = 0; |
| } |
| if (feof (lex->finput)) |
| return UEOF; |
| r = fread (&lex->buffer[lex->last], 1, |
| sizeof (lex->buffer) - lex->last, |
| lex->finput); |
| lex->last += r; |
| } |
| |
| inbytesleft = lex->last - lex->first; |
| out_count = sizeof (lex->out_buffer) - lex->out_last; |
| |
| if (inbytesleft == 0) |
| { |
| /* We've tried to read and there is nothing left. */ |
| return UEOF; |
| } |
| |
| in_save = inbytesleft; |
| out_save = out_count; |
| inp = &lex->buffer[lex->first]; |
| outp = (char *) &lex->out_buffer[lex->out_last]; |
| ir = iconv (lex->handle, (ICONV_CONST char **) &inp, |
| &inbytesleft, &outp, &out_count); |
| |
| /* If we haven't read any bytes, then look to see if we |
| have read a BOM. */ |
| if (! lex->read_anything && out_save - out_count >= 2) |
| { |
| unicode_t uc = * (unicode_t *) &lex->out_buffer[0]; |
| if (uc == 0xfeff) |
| { |
| lex->byte_swap = 0; |
| lex->out_first += 2; |
| } |
| else if (uc == 0xfffe) |
| { |
| lex->byte_swap = 1; |
| lex->out_first += 2; |
| } |
| lex->read_anything = 1; |
| } |
| |
| if (lex->byte_swap) |
| { |
| unsigned int i; |
| for (i = 0; i < out_save - out_count; i += 2) |
| { |
| char t = lex->out_buffer[lex->out_last + i]; |
| lex->out_buffer[lex->out_last + i] |
| = lex->out_buffer[lex->out_last + i + 1]; |
| lex->out_buffer[lex->out_last + i + 1] = t; |
| } |
| } |
| |
| lex->first += in_save - inbytesleft; |
| lex->out_last += out_save - out_count; |
| |
| /* If we converted anything at all, move along. */ |
| if (out_count != out_save) |
| break; |
| |
| if (ir == (size_t) -1) |
| { |
| if (errno == EINVAL) |
| { |
| /* This is ok. This means that the end of our buffer |
| is in the middle of a character sequence. We just |
| move the valid part of the buffer to the beginning |
| to force a read. */ |
| memmove (&lex->buffer[0], &lex->buffer[lex->first], |
| lex->last - lex->first); |
| lex->last -= lex->first; |
| lex->first = 0; |
| } |
| else |
| { |
| /* A more serious error. */ |
| char buffer[128]; |
| sprintf (buffer, |
| "Unrecognized character for encoding '%s'", |
| lex->encoding); |
| java_lex_error (buffer, 0); |
| return UEOF; |
| } |
| } |
| } |
| } |
| |
| if (lex->out_first == -1 || lex->out_first >= lex->out_last) |
| { |
| /* Don't have any data. */ |
| return UEOF; |
| } |
| |
| /* Success. */ |
| result = * ((unicode_t *) &lex->out_buffer[lex->out_first]); |
| lex->out_first += 2; |
| return result; |
| } |
| else |
| #endif /* HAVE_ICONV */ |
| { |
| int c, c1, c2; |
| c = getc (lex->finput); |
| |
| if (c == EOF) |
| return UEOF; |
| if (c < 128) |
| return (unicode_t) c; |
| else |
| { |
| if ((c & 0xe0) == 0xc0) |
| { |
| c1 = getc (lex->finput); |
| if ((c1 & 0xc0) == 0x80) |
| { |
| unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f)); |
| /* Check for valid 2-byte characters. We explicitly |
| allow \0 because this encoding is common in the |
| Java world. */ |
| if (r == 0 || (r >= 0x80 && r <= 0x7ff)) |
| return r; |
| } |
| } |
| else if ((c & 0xf0) == 0xe0) |
| { |
| c1 = getc (lex->finput); |
| if ((c1 & 0xc0) == 0x80) |
| { |
| c2 = getc (lex->finput); |
| if ((c2 & 0xc0) == 0x80) |
| { |
| unicode_t r = (unicode_t)(((c & 0xf) << 12) + |
| (( c1 & 0x3f) << 6) |
| + (c2 & 0x3f)); |
| /* Check for valid 3-byte characters. |
| Don't allow surrogate, \ufffe or \uffff. */ |
| if (IN_RANGE (r, 0x800, 0xffff) |
| && ! IN_RANGE (r, 0xd800, 0xdfff) |
| && r != 0xfffe && r != 0xffff) |
| return r; |
| } |
| } |
| } |
| |
| /* We simply don't support invalid characters. We also |
| don't support 4-, 5-, or 6-byte UTF-8 sequences, as these |
| cannot be valid Java characters. */ |
| java_lex_error ("malformed UTF-8 character", 0); |
| } |
| } |
| |
| /* We only get here on error. */ |
| return UEOF; |
| } |
| |
| static int |
| java_read_unicode (java_lexer *lex, int *unicode_escape_p) |
| { |
| int c; |
| |
| if (lex->unget_value) |
| { |
| c = lex->unget_value; |
| lex->unget_value = 0; |
| } |
| else |
| c = java_read_char (lex); |
| |
| *unicode_escape_p = 0; |
| |
| if (c != '\\') |
| { |
| lex->bs_count = 0; |
| return c; |
| } |
| |
| ++lex->bs_count; |
| if ((lex->bs_count) % 2 == 1) |
| { |
| /* Odd number of \ seen. */ |
| c = java_read_char (lex); |
| if (c == 'u') |
| { |
| unicode_t unicode = 0; |
| int shift = 12; |
| |
| /* Recognize any number of `u's in \u. */ |
| while ((c = java_read_char (lex)) == 'u') |
| ; |
| |
| shift = 12; |
| do |
| { |
| if (c == UEOF) |
| { |
| java_lex_error ("prematurely terminated \\u sequence", 0); |
| return UEOF; |
| } |
| |
| if (hex_p (c)) |
| unicode |= (unicode_t)(hex_value (c) << shift); |
| else |
| { |
| java_lex_error ("non-hex digit in \\u sequence", 0); |
| break; |
| } |
| |
| c = java_read_char (lex); |
| shift -= 4; |
| } |
| while (shift >= 0); |
| |
| if (c != UEOF) |
| lex->unget_value = c; |
| |
| lex->bs_count = 0; |
| *unicode_escape_p = 1; |
| return unicode; |
| } |
| lex->unget_value = c; |
| } |
| return (unicode_t) '\\'; |
| } |
| |
| /* Get the next Unicode character (post-Unicode-escape-handling). |
| Move the current position to just after returned character. */ |
| |
| static int |
| java_get_unicode (void) |
| { |
| int next = java_peek_unicode (); |
| java_next_unicode (); |
| return next; |
| } |
| |
| /* Return the next Unicode character (post-Unicode-escape-handling). |
| Do not move the current position, which remains just before |
| the returned character. */ |
| |
| static int |
| java_peek_unicode (void) |
| { |
| int unicode_escape_p; |
| java_lexer *lex = ctxp->lexer; |
| int next; |
| |
| if (lex->avail_unicode) |
| return lex->next_unicode; |
| |
| next = java_read_unicode (lex, &unicode_escape_p); |
| |
| if (next == '\r') |
| { |
| /* We have to read ahead to see if we got \r\n. |
| In that case we return a single line terminator. */ |
| int dummy; |
| next = java_read_unicode (lex, &dummy); |
| if (next != '\n' && next != UEOF) |
| lex->unget_value = next; |
| /* In either case we must return a newline. */ |
| next = '\n'; |
| } |
| |
| lex->next_unicode = next; |
| lex->avail_unicode = 1; |
| |
| if (next == UEOF) |
| { |
| lex->next_columns = 0; |
| return next; |
| } |
| |
| if (next == '\n') |
| { |
| lex->next_columns = 1 - lex->position.col; |
| } |
| else if (next == '\t') |
| { |
| int cur_col = lex->position.col; |
| lex->next_columns = ((cur_col + 7) & ~7) + 1 - cur_col; |
| |
| } |
| else |
| { |
| lex->next_columns = 1; |
| } |
| if (unicode_escape_p) |
| lex->next_columns = 6; |
| return next; |
| } |
| |
| /* Move forward one Unicode character (post-Unicode-escape-handling). |
| Only allowed after java_peek_unicode. The combination java_peek_unicode |
| followed by java_next_unicode is equivalent to java_get_unicode. */ |
| |
| static void java_next_unicode (void) |
| { |
| struct java_lexer *lex = ctxp->lexer; |
| lex->position.col += lex->next_columns; |
| if (lex->next_unicode == '\n') |
| { |
| lex->position.line++; |
| #ifndef JC1_LITE |
| #ifdef USE_MAPPED_LOCATION |
| input_location |
| = linemap_line_start (&line_table, lex->position.line, 120); |
| #else |
| input_line = lex->position.line; |
| #endif |
| #endif |
| } |
| lex->avail_unicode = 0; |
| } |
| |
| #if 0 |
| /* The inverse of java_next_unicode. |
| Not currently used, but could be if it would be cleaner or faster. |
| java_peek_unicode == java_get_unicode + java_unget_unicode. |
| java_get_unicode == java_peek_unicode + java_next_unicode. |
| */ |
| static void java_unget_unicode () |
| { |
| struct java_lexer *lex = ctxp->lexer; |
| if (lex->avail_unicode) |
| fatal_error ("internal error - bad unget"); |
| lex->avail_unicode = 1; |
| lex->position.col -= lex->next_columns; |
| } |
| #endif |
| |
| /* Parse the end of a C style comment. |
| * C is the first character following the '/' and '*'. */ |
| static void |
| java_parse_end_comment (int c) |
| { |
| for ( ;; c = java_get_unicode ()) |
| { |
| switch (c) |
| { |
| case UEOF: |
| java_lex_error ("Comment not terminated at end of input", 0); |
| return; |
| case '*': |
| switch (c = java_peek_unicode ()) |
| { |
| case UEOF: |
| java_lex_error ("Comment not terminated at end of input", 0); |
| return; |
| case '/': |
| java_next_unicode (); |
| return; |
| case '*': /* Reparse only '*'. */ |
| ; |
| } |
| } |
| } |
| } |
| |
| /* Parse the documentation section. Keywords must be at the beginning |
| of a documentation comment line (ignoring white space and any `*' |
| character). Parsed keyword(s): @DEPRECATED. */ |
| |
| static void |
| java_parse_doc_section (int c) |
| { |
| int last_was_star; |
| |
| /* We reset this here, because only the most recent doc comment |
| applies to the following declaration. */ |
| ctxp->deprecated = 0; |
| |
| /* We loop over all the lines of the comment. We'll eventually exit |
| if we hit EOF prematurely, or when we see the comment |
| terminator. */ |
| while (1) |
| { |
| /* These first steps need only be done if we're still looking |
| for the deprecated tag. If we've already seen it, we might |
| as well skip looking for it again. */ |
| if (! ctxp->deprecated) |
| { |
| /* Skip whitespace and '*'s. We must also check for the end |
| of the comment here. */ |
| while (JAVA_WHITE_SPACE_P (c) || c == '*') |
| { |
| last_was_star = (c == '*'); |
| c = java_get_unicode (); |
| if (last_was_star && c == '/') |
| { |
| /* We just saw the comment terminator. */ |
| return; |
| } |
| } |
| |
| if (c == UEOF) |
| goto eof; |
| |
| if (c == '@') |
| { |
| const char *deprecated = "@deprecated"; |
| int i; |
| |
| for (i = 0; deprecated[i]; ++i) |
| { |
| if (c != deprecated[i]) |
| break; |
| /* We write the code in this way, with the |
| update at the end, so that after the loop |
| we're left with the next character in C. */ |
| c = java_get_unicode (); |
| } |
| |
| if (c == UEOF) |
| goto eof; |
| |
| /* @deprecated must be followed by a space or newline. |
| We also allow a '*' in case it appears just before |
| the end of a comment. In this position only we also |
| must allow any Unicode space character. */ |
| if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c)) |
| { |
| if (! deprecated[i]) |
| ctxp->deprecated = 1; |
| } |
| } |
| } |
| |
| /* We've examined the relevant content from this line. Now we |
| skip the remaining characters and start over with the next |
| line. We also check for end of comment here. */ |
| while (c != '\n' && c != UEOF) |
| { |
| last_was_star = (c == '*'); |
| c = java_get_unicode (); |
| if (last_was_star && c == '/') |
| return; |
| } |
| |
| if (c == UEOF) |
| goto eof; |
| /* We have to advance past the \n. */ |
| c = java_get_unicode (); |
| if (c == UEOF) |
| goto eof; |
| } |
| |
| eof: |
| java_lex_error ("Comment not terminated at end of input", 0); |
| } |
| |
| /* Return true if C is a valid start character for a Java identifier. |
| This is only called if C >= 128 -- smaller values are handled |
| inline. However, this function handles all values anyway. */ |
| static int |
| java_start_char_p (unicode_t c) |
| { |
| unsigned int hi = c / 256; |
| const char *const page = type_table[hi]; |
| unsigned long val = (unsigned long) page; |
| int flags; |
| |
| if ((val & ~ LETTER_MASK) != 0) |
| flags = page[c & 255]; |
| else |
| flags = val; |
| |
| return flags & LETTER_START; |
| } |
| |
| /* Return true if C is a valid part character for a Java identifier. |
| This is only called if C >= 128 -- smaller values are handled |
| inline. However, this function handles all values anyway. */ |
| static int |
| java_part_char_p (unicode_t c) |
| { |
| unsigned int hi = c / 256; |
| const char *const page = type_table[hi]; |
| unsigned long val = (unsigned long) page; |
| int flags; |
| |
| if ((val & ~ LETTER_MASK) != 0) |
| flags = page[c & 255]; |
| else |
| flags = val; |
| |
| return flags & LETTER_PART; |
| } |
| |
| /* Return true if C is whitespace. */ |
| static int |
| java_space_char_p (unicode_t c) |
| { |
| unsigned int hi = c / 256; |
| const char *const page = type_table[hi]; |
| unsigned long val = (unsigned long) page; |
| int flags; |
| |
| if ((val & ~ LETTER_MASK) != 0) |
| flags = page[c & 255]; |
| else |
| flags = val; |
| |
| return flags & LETTER_SPACE; |
| } |
| |
| static int |
| java_parse_escape_sequence (void) |
| { |
| int c; |
| |
| switch (c = java_get_unicode ()) |
| { |
| case 'b': |
| return (unicode_t)0x8; |
| case 't': |
| return (unicode_t)0x9; |
| case 'n': |
| return (unicode_t)0xa; |
| case 'f': |
| return (unicode_t)0xc; |
| case 'r': |
| return (unicode_t)0xd; |
| case '"': |
| return (unicode_t)0x22; |
| case '\'': |
| return (unicode_t)0x27; |
| case '\\': |
| return (unicode_t)0x5c; |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': |
| { |
| int more = 3; |
| unicode_t char_lit = 0; |
| |
| if (c > '3') |
| { |
| /* According to the grammar, `\477' has a well-defined |
| meaning -- it is `\47' followed by `7'. */ |
| --more; |
| } |
| char_lit = 0; |
| for (;;) |
| { |
| char_lit = 8 * char_lit + c - '0'; |
| if (--more == 0) |
| break; |
| c = java_peek_unicode (); |
| if (! RANGE (c, '0', '7')) |
| break; |
| java_next_unicode (); |
| } |
| |
| return char_lit; |
| } |
| default: |
| java_lex_error ("Invalid character in escape sequence", -1); |
| return JAVA_CHAR_ERROR; |
| } |
| } |
| |
| #ifndef JC1_LITE |
| #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0) |
| |
| /* Subroutine of java_lex: converts floating-point literals to tree |
| nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to |
| store the result. FFLAG indicates whether the literal was tagged |
| with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING |
| is the line number on which to report any error. */ |
| |
| static void java_perform_atof (YYSTYPE *, char *, int, int); |
| |
| static void |
| java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag, |
| int number_beginning) |
| { |
| REAL_VALUE_TYPE value; |
| tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE); |
| |
| SET_REAL_VALUE_ATOF (value, |
| REAL_VALUE_ATOF (literal_token, TYPE_MODE (type))); |
| |
| if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value)) |
| { |
| JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double"); |
| value = DCONST0; |
| } |
| else if (IS_ZERO (value)) |
| { |
| /* We check to see if the value is really 0 or if we've found an |
| underflow. We do this in the most primitive imaginable way. */ |
| int really_zero = 1; |
| char *p = literal_token; |
| if (*p == '-') |
| ++p; |
| while (*p && *p != 'e' && *p != 'E') |
| { |
| if (*p != '0' && *p != '.') |
| { |
| really_zero = 0; |
| break; |
| } |
| ++p; |
| } |
| if (! really_zero) |
| { |
| int save_col = ctxp->lexer->position.col; |
| ctxp->lexer->position.col = number_beginning; |
| java_lex_error ("Floating point literal underflow", 0); |
| ctxp->lexer->position.col = save_col; |
| } |
| } |
| |
| SET_LVAL_NODE (build_real (type, value)); |
| } |
| #endif |
| |
| static int yylex (YYSTYPE *); |
| |
| static int |
| #ifdef JC1_LITE |
| yylex (YYSTYPE *java_lval) |
| #else |
| do_java_lex (YYSTYPE *java_lval) |
| #endif |
| { |
| int c; |
| char *string; |
| |
| /* Translation of the Unicode escape in the raw stream of Unicode |
| characters. Takes care of line terminator. */ |
| step1: |
| /* Skip white spaces: SP, TAB and FF or ULT. */ |
| for (;;) |
| { |
| c = java_peek_unicode (); |
| if (c != '\n' && ! JAVA_WHITE_SPACE_P (c)) |
| break; |
| java_next_unicode (); |
| } |
| |
| /* Handle EOF here. */ |
| if (c == UEOF) /* Should probably do something here... */ |
| return 0; |
| |
| #ifndef JC1_LITE |
| #ifdef USE_MAPPED_LOCATION |
| LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table, |
| ctxp->lexer->position.col); |
| #else |
| ctxp->lexer->token_start = ctxp->lexer->position; |
| #endif |
| #endif |
| |
| /* Numeric literals. */ |
| if (JAVA_ASCII_DIGIT (c) || (c == '.')) |
| { |
| /* This section of code is borrowed from gcc/c-lex.c. */ |
| #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2) |
| int parts[TOTAL_PARTS]; |
| HOST_WIDE_INT high, low; |
| /* End borrowed section. */ |
| char literal_token [256]; |
| int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes; |
| int found_hex_digits = 0, found_non_octal_digits = -1; |
| int i; |
| #ifndef JC1_LITE |
| int number_beginning = ctxp->lexer->position.col; |
| tree value; |
| #endif |
| |
| for (i = 0; i < TOTAL_PARTS; i++) |
| parts [i] = 0; |
| |
| if (c == '0') |
| { |
| java_next_unicode (); |
| c = java_peek_unicode (); |
| if (c == 'x' || c == 'X') |
| { |
| radix = 16; |
| java_next_unicode (); |
| c = java_peek_unicode (); |
| } |
| else if (JAVA_ASCII_DIGIT (c)) |
| { |
| literal_token [literal_index++] = '0'; |
| radix = 8; |
| } |
| else if (c == '.' || c == 'e' || c =='E') |
| { |
| literal_token [literal_index++] = '0'; |
| /* Handle C during floating-point parsing. */ |
| } |
| else |
| { |
| /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */ |
| switch (c) |
| { |
| case 'L': case 'l': |
| java_next_unicode (); |
| SET_LVAL_NODE (long_zero_node); |
| return (INT_LIT_TK); |
| case 'f': case 'F': |
| java_next_unicode (); |
| SET_LVAL_NODE (float_zero_node); |
| return (FP_LIT_TK); |
| case 'd': case 'D': |
| java_next_unicode (); |
| SET_LVAL_NODE (double_zero_node); |
| return (FP_LIT_TK); |
| default: |
| SET_LVAL_NODE (integer_zero_node); |
| return (INT_LIT_TK); |
| } |
| } |
| } |
| /* Parse the first part of the literal, until we find something |
| which is not a number. */ |
| while (radix == 16 ? JAVA_ASCII_HEXDIGIT (c) : JAVA_ASCII_DIGIT (c)) |
| { |
| /* We store in a string (in case it turns out to be a FP) and in |
| PARTS if we have to process a integer literal. */ |
| int numeric = hex_value (c); |
| int count; |
| |
| /* Remember when we find a valid hexadecimal digit. */ |
| if (radix == 16) |
| found_hex_digits = 1; |
| /* Remember when we find an invalid octal digit. */ |
| else if (radix == 8 && numeric >= 8 && found_non_octal_digits < 0) |
| found_non_octal_digits = literal_index; |
| |
| literal_token [literal_index++] = c; |
| /* This section of code if borrowed from gcc/c-lex.c. */ |
| for (count = 0; count < TOTAL_PARTS; count++) |
| { |
| parts[count] *= radix; |
| if (count) |
| { |
| parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR); |
| parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1; |
| } |
| else |
| parts[0] += numeric; |
| } |
| if (parts [TOTAL_PARTS-1] != 0) |
| overflow = 1; |
| /* End borrowed section. */ |
| java_next_unicode (); |
| c = java_peek_unicode (); |
| } |
| |
| /* If we have something from the FP char set but not a digit, parse |
| a FP literal. */ |
| if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c)) |
| { |
| /* stage==0: seen digits only |
| * stage==1: seen '.' |
| * stage==2: seen 'e' or 'E'. |
| * stage==3: seen '+' or '-' after 'e' or 'E'. |
| * stage==4: seen type suffix ('f'/'F'/'d'/'D') |
| */ |
| int stage = 0; |
| int seen_digit = (literal_index ? 1 : 0); |
| int seen_exponent = 0; |
| int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are |
| double unless specified. */ |
| |
| /* It is ok if the radix is 8 because this just means we've |
| seen a leading `0'. However, radix==16 is invalid. */ |
| if (radix == 16) |
| java_lex_error ("Can't express non-decimal FP literal", 0); |
| radix = 10; |
| |
| for (;;) |
| { |
| if (c == '.') |
| { |
| if (stage < 1) |
| { |
| stage = 1; |
| literal_token [literal_index++ ] = c; |
| java_next_unicode (); |
| c = java_peek_unicode (); |
| if (literal_index == 1 && !JAVA_ASCII_DIGIT (c)) |
| BUILD_OPERATOR (DOT_TK); |
| } |
| else |
| java_lex_error ("Invalid character in FP literal", 0); |
| } |
| |
| if (c == 'e' || c == 'E') |
| { |
| if (stage < 2) |
| { |
| /* {E,e} must have seen at least a digit. */ |
| if (!seen_digit) |
| java_lex_error |
| ("Invalid FP literal, mantissa must have digit", 0); |
| seen_digit = 0; |
| seen_exponent = 1; |
| stage = 2; |
| literal_token [literal_index++] = c; |
| java_next_unicode (); |
| c = java_peek_unicode (); |
| } |
| else |
| java_lex_error ("Invalid character in FP literal", 0); |
| } |
| if ( c == 'f' || c == 'F' || c == 'd' || c == 'D') |
| { |
| fflag = ((c == 'd') || (c == 'D')) ? 0 : 1; |
| stage = 4; /* So we fall through. */ |
| } |
| |
| if ((c=='-' || c =='+') && stage == 2) |
| { |
| stage = 3; |
| literal_token [literal_index++] = c; |
| java_next_unicode (); |
| c = java_peek_unicode (); |
| } |
| |
| if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) || |
| (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) || |
| (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) || |
| (stage == 3 && JAVA_ASCII_DIGIT (c))) |
| { |
| if (JAVA_ASCII_DIGIT (c)) |
| seen_digit = 1; |
| if (stage == 2) |
| stage = 3; |
| literal_token [literal_index++ ] = c; |
| java_next_unicode (); |
| c = java_peek_unicode (); |
| } |
| else |
| { |
| if (stage == 4) /* Don't push back fF/dD. */ |
| java_next_unicode (); |
| |
| /* An exponent (if any) must have seen a digit. */ |
| if (seen_exponent && !seen_digit) |
| java_lex_error |
| ("Invalid FP literal, exponent must have digit", 0); |
| |
| literal_token [literal_index] = '\0'; |
| |
| #ifndef JC1_LITE |
| java_perform_atof (java_lval, literal_token, |
| fflag, number_beginning); |
| #endif |
| return FP_LIT_TK; |
| } |
| } |
| } /* JAVA_ASCII_FPCHAR (c) */ |
| |
| /* Here we get back to converting the integral literal. */ |
| if (radix == 16 && ! found_hex_digits) |
| java_lex_error |
| ("0x must be followed by at least one hexadecimal digit", 0); |
| else if (radix == 8 && found_non_octal_digits >= 0) |
| { |
| int back = literal_index - found_non_octal_digits; |
| ctxp->lexer->position.col -= back; |
| java_lex_error ("Octal literal contains digit out of range", 0); |
| ctxp->lexer->position.col += back; |
| } |
| else if (c == 'L' || c == 'l') |
| { |
| java_next_unicode (); |
| long_suffix = 1; |
| } |
| |
| /* This section of code is borrowed from gcc/c-lex.c. */ |
| if (!overflow) |
| { |
| bytes = GET_TYPE_PRECISION (long_type_node); |
| for (i = bytes; i < TOTAL_PARTS; i++) |
| if (parts [i]) |
| { |
| overflow = 1; |
| break; |
| } |
| } |
| high = low = 0; |
| for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++) |
| { |
| high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT |
| / HOST_BITS_PER_CHAR)] |
| << (i * HOST_BITS_PER_CHAR)); |
| low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR); |
| } |
| /* End borrowed section. */ |
| |
| #ifndef JC1_LITE |
| /* Range checking. */ |
| /* Temporarily set type to unsigned. */ |
| value = build_int_cst_wide (long_suffix |
| ? unsigned_long_type_node |
| : unsigned_int_type_node, low, high); |
| SET_LVAL_NODE (value); |
| |
| /* For base 10 numbers, only values up to the highest value |
| (plus one) can be written. For instance, only ints up to |
| 2147483648 can be written. The special case of the largest |
| negative value is handled elsewhere. For other bases, any |
| number can be represented. */ |
| if (overflow || (radix == 10 |
| && tree_int_cst_lt (long_suffix |
| ? decimal_long_max |
| : decimal_int_max, |
| value))) |
| { |
| if (long_suffix) |
| JAVA_RANGE_ERROR ("Numeric overflow for 'long' literal"); |
| else |
| JAVA_RANGE_ERROR ("Numeric overflow for 'int' literal"); |
| } |
| |
| /* Sign extend the value. */ |
| value = build_int_cst_wide (long_suffix ? long_type_node : int_type_node, |
| low, high); |
| value = force_fit_type (value, 0, false, false); |
| |
| if (radix != 10) |
| { |
| value = copy_node (value); |
| JAVA_NOT_RADIX10_FLAG (value) = 1; |
| } |
| |
| SET_LVAL_NODE (value); |
| #endif |
| return INT_LIT_TK; |
| } |
| |
| /* We may have an ID here. */ |
| if (JAVA_START_CHAR_P (c)) |
| { |
| int ascii_index = 0, all_ascii = 1; |
| |
| /* Keyword, boolean literal or null literal. */ |
| while (c != UEOF && JAVA_PART_CHAR_P (c)) |
| { |
| java_unicode_2_utf8 (c); |
| if (c >= 128) |
| all_ascii = 0; |
| java_next_unicode (); |
| ascii_index++; |
| c = java_peek_unicode (); |
| } |
| |
| obstack_1grow (&temporary_obstack, '\0'); |
| string = obstack_finish (&temporary_obstack); |
| |
| /* If we have something all ascii, we consider a keyword, a boolean |
| literal, a null literal or an all ASCII identifier. Otherwise, |
| this is an identifier (possibly not respecting formation rule). */ |
| if (all_ascii) |
| { |
| const struct java_keyword *kw; |
| if ((kw=java_keyword (string, ascii_index))) |
| { |
| switch (kw->token) |
| { |
| case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK: |
| case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK: |
| case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK: |
| case PRIVATE_TK: case STRICT_TK: |
| SET_MODIFIER_CTX (kw->token); |
| return MODIFIER_TK; |
| case FLOAT_TK: |
| SET_LVAL_NODE (float_type_node); |
| return FP_TK; |
| case DOUBLE_TK: |
| SET_LVAL_NODE (double_type_node); |
| return FP_TK; |
| case BOOLEAN_TK: |
| SET_LVAL_NODE (boolean_type_node); |
| return BOOLEAN_TK; |
| case BYTE_TK: |
| SET_LVAL_NODE (byte_type_node); |
| return INTEGRAL_TK; |
| case SHORT_TK: |
| SET_LVAL_NODE (short_type_node); |
| return INTEGRAL_TK; |
| case INT_TK: |
| SET_LVAL_NODE (int_type_node); |
| return INTEGRAL_TK; |
| case LONG_TK: |
| SET_LVAL_NODE (long_type_node); |
| return INTEGRAL_TK; |
| case CHAR_TK: |
| SET_LVAL_NODE (char_type_node); |
| return INTEGRAL_TK; |
| |
| /* Keyword based literals. */ |
| case TRUE_TK: |
| case FALSE_TK: |
| SET_LVAL_NODE ((kw->token == TRUE_TK ? |
| boolean_true_node : boolean_false_node)); |
| return BOOL_LIT_TK; |
| case NULL_TK: |
| SET_LVAL_NODE (null_pointer_node); |
| return NULL_TK; |
| |
| case ASSERT_TK: |
| if (flag_assert) |
| { |
| BUILD_OPERATOR (kw->token); |
| return kw->token; |
| } |
| else |
| break; |
| |
| /* Some keyword we want to retain information on the location |
| they where found. */ |
| case CASE_TK: |
| case DEFAULT_TK: |
| case SUPER_TK: |
| case THIS_TK: |
| case RETURN_TK: |
| case BREAK_TK: |
| case CONTINUE_TK: |
| case TRY_TK: |
| case CATCH_TK: |
| case THROW_TK: |
| case INSTANCEOF_TK: |
| BUILD_OPERATOR (kw->token); |
| |
| default: |
| return kw->token; |
| } |
| } |
| } |
| |
| java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string)); |
| return ID_TK; |
| } |
| |
| java_next_unicode (); |
| |
| /* Character literals. */ |
| if (c == '\'') |
| { |
| int char_lit; |
| |
| if ((c = java_get_unicode ()) == '\\') |
| char_lit = java_parse_escape_sequence (); |
| else |
| { |
| if (c == '\n' || c == '\'') |
| java_lex_error ("Invalid character literal", 0); |
| char_lit = c; |
| } |
| |
| c = java_get_unicode (); |
| |
| if ((c == '\n') || (c == UEOF)) |
| java_lex_error ("Character literal not terminated at end of line", 0); |
| if (c != '\'') |
| java_lex_error ("Syntax error in character literal", 0); |
| |
| if (char_lit == JAVA_CHAR_ERROR) |
| char_lit = 0; /* We silently convert it to zero. */ |
| |
| SET_LVAL_NODE (build_int_cst (char_type_node, char_lit)); |
| return CHAR_LIT_TK; |
| } |
| |
| /* String literals. */ |
| if (c == '"') |
| { |
| int no_error = 1; |
| char *string; |
| |
| for (;;) |
| { |
| c = java_peek_unicode (); |
| if (c == '\n' || c == UEOF) /* ULT. */ |
| { |
| java_lex_error ("String not terminated at end of line", 0); |
| break; |
| } |
| java_next_unicode (); |
| if (c == '"') |
| break; |
| if (c == '\\') |
| c = java_parse_escape_sequence (); |
| if (c == JAVA_CHAR_ERROR) |
| { |
| no_error = 0; |
| c = 0; /* We silently convert it to zero. */ |
| } |
| java_unicode_2_utf8 (c); |
| } |
| |
| obstack_1grow (&temporary_obstack, '\0'); |
| string = obstack_finish (&temporary_obstack); |
| #ifndef JC1_LITE |
| if (!no_error || (c != '"')) |
| java_lval->node = error_mark_node; /* FIXME: Requires further |
| testing. */ |
| else |
| java_lval->node = build_string (strlen (string), string); |
| #endif |
| obstack_free (&temporary_obstack, string); |
| return STRING_LIT_TK; |
| } |
| |
| switch (c) |
| { |
| case '/': |
| /* Check for comment. */ |
| switch (c = java_peek_unicode ()) |
| { |
| case '/': |
| java_next_unicode (); |
| for (;;) |
| { |
| c = java_get_unicode (); |
| if (c == UEOF) |
| { |
| /* It is ok to end a `//' comment with EOF, unless |
| we're being pedantic. */ |
| if (pedantic) |
| java_lex_error ("Comment not terminated at end of input", |
| 0); |
| return 0; |
| } |
| if (c == '\n') /* ULT */ |
| goto step1; |
| } |
| break; |
| |
| case '*': |
| java_next_unicode (); |
| if ((c = java_get_unicode ()) == '*') |
| { |
| c = java_get_unicode (); |
| if (c == '/') |
| { |
| /* Empty documentation comment. We have to reset |
| the deprecation marker as only the most recent |
| doc comment applies. */ |
| ctxp->deprecated = 0; |
| } |
| else |
| java_parse_doc_section (c); |
| } |
| else |
| java_parse_end_comment ((c = java_get_unicode ())); |
| goto step1; |
| break; |
| |
| case '=': |
| java_next_unicode (); |
| BUILD_OPERATOR2 (DIV_ASSIGN_TK); |
| |
| default: |
| BUILD_OPERATOR (DIV_TK); |
| } |
| |
| case '(': |
| BUILD_OPERATOR (OP_TK); |
| case ')': |
| return CP_TK; |
| case '{': |
| #ifndef JC1_LITE |
| java_lval->operator.token = OCB_TK; |
| java_lval->operator.location = BUILD_LOCATION(); |
| #ifdef USE_MAPPED_LOCATION |
| if (ctxp->ccb_indent == 1) |
| ctxp->first_ccb_indent1 = input_location; |
| #else |
| if (ctxp->ccb_indent == 1) |
| ctxp->first_ccb_indent1 = input_line; |
| #endif |
| #endif |
| ctxp->ccb_indent++; |
| return OCB_TK; |
| case '}': |
| ctxp->ccb_indent--; |
| #ifndef JC1_LITE |
| java_lval->operator.token = CCB_TK; |
| java_lval->operator.location = BUILD_LOCATION(); |
| #ifdef USE_MAPPED_LOCATION |
| if (ctxp->ccb_indent == 1) |
| ctxp->last_ccb_indent1 = input_location; |
| #else |
| if (ctxp->ccb_indent == 1) |
| ctxp->last_ccb_indent1 = input_line; |
| #endif |
| #endif |
| return CCB_TK; |
| case '[': |
| BUILD_OPERATOR (OSB_TK); |
| case ']': |
| return CSB_TK; |
| case ';': |
| return SC_TK; |
| case ',': |
| return C_TK; |
| case '.': |
| BUILD_OPERATOR (DOT_TK); |
| |
| /* Operators. */ |
| case '=': |
| c = java_peek_unicode (); |
| if (c == '=') |
| { |
| java_next_unicode (); |
| BUILD_OPERATOR (EQ_TK); |
| } |
| else |
| { |
| /* Equals is used in two different locations. In the |
| variable_declarator: rule, it has to be seen as '=' as opposed |
| to being seen as an ordinary assignment operator in |
| assignment_operators: rule. */ |
| BUILD_OPERATOR (ASSIGN_TK); |
| } |
| |
| case '>': |
| switch ((c = java_peek_unicode ())) |
| { |
| case '=': |
| java_next_unicode (); |
| BUILD_OPERATOR (GTE_TK); |
| case '>': |
| java_next_unicode (); |
| switch ((c = java_peek_unicode ())) |
| { |
| case '>': |
| java_next_unicode (); |
| c = java_peek_unicode (); |
| if (c == '=') |
| { |
| java_next_unicode (); |
| BUILD_OPERATOR2 (ZRS_ASSIGN_TK); |
| } |
| else |
| { |
| BUILD_OPERATOR (ZRS_TK); |
| } |
| case '=': |
| java_next_unicode (); |
| BUILD_OPERATOR2 (SRS_ASSIGN_TK); |
| default: |
| BUILD_OPERATOR (SRS_TK); |
| } |
| default: |
| BUILD_OPERATOR (GT_TK); |
| } |
| |
| case '<': |
| switch ((c = java_peek_unicode ())) |
| { |
| case '=': |
| java_next_unicode (); |
| BUILD_OPERATOR (LTE_TK); |
| case '<': |
| java_next_unicode (); |
| if ((c = java_peek_unicode ()) == '=') |
| { |
| java_next_unicode (); |
| BUILD_OPERATOR2 (LS_ASSIGN_TK); |
| } |
| else |
| { |
| BUILD_OPERATOR (LS_TK); |
| } |
| default: |
| BUILD_OPERATOR (LT_TK); |
| } |
| |
| case '&': |
| switch ((c = java_peek_unicode ())) |
| { |
| case '&': |
| java_next_unicode (); |
| BUILD_OPERATOR (BOOL_AND_TK); |
| case '=': |
| java_next_unicode (); |
| BUILD_OPERATOR2 (AND_ASSIGN_TK); |
| default: |
| BUILD_OPERATOR (AND_TK); |
| } |
| |
| case '|': |
| switch ((c = java_peek_unicode ())) |
| { |
| case '|': |
| java_next_unicode (); |
| BUILD_OPERATOR (BOOL_OR_TK); |
| case '=': |
| java_next_unicode (); |
| BUILD_OPERATOR2 (OR_ASSIGN_TK); |
| default: |
| BUILD_OPERATOR (OR_TK); |
| } |
| |
| case '+': |
| switch ((c = java_peek_unicode ())) |
| { |
| case '+': |
| java_next_unicode (); |
| BUILD_OPERATOR (INCR_TK); |
| case '=': |
| java_next_unicode (); |
| BUILD_OPERATOR2 (PLUS_ASSIGN_TK); |
| default: |
| BUILD_OPERATOR (PLUS_TK); |
| } |
| |
| case '-': |
| switch ((c = java_peek_unicode ())) |
| { |
| case '-': |
| java_next_unicode (); |
| BUILD_OPERATOR (DECR_TK); |
| case '=': |
| java_next_unicode (); |
| BUILD_OPERATOR2 (MINUS_ASSIGN_TK); |
| default: |
| BUILD_OPERATOR (MINUS_TK); |
| } |
| |
| case '*': |
| if ((c = java_peek_unicode ()) == '=') |
| { |
| java_next_unicode (); |
| BUILD_OPERATOR2 (MULT_ASSIGN_TK); |
| } |
| else |
| { |
| BUILD_OPERATOR (MULT_TK); |
| } |
| |
| case '^': |
| if ((c = java_peek_unicode ()) == '=') |
| { |
| java_next_unicode (); |
| BUILD_OPERATOR2 (XOR_ASSIGN_TK); |
| } |
| else |
| { |
| BUILD_OPERATOR (XOR_TK); |
| } |
| |
| case '%': |
| if ((c = java_peek_unicode ()) == '=') |
| { |
| java_next_unicode (); |
| BUILD_OPERATOR2 (REM_ASSIGN_TK); |
| } |
| else |
| { |
| BUILD_OPERATOR (REM_TK); |
| } |
| |
| case '!': |
| if ((c = java_peek_unicode()) == '=') |
| { |
| java_next_unicode (); |
| BUILD_OPERATOR (NEQ_TK); |
| } |
| else |
| { |
| BUILD_OPERATOR (NEG_TK); |
| } |
| |
| case '?': |
| BUILD_OPERATOR (REL_QM_TK); |
| case ':': |
| BUILD_OPERATOR (REL_CL_TK); |
| case '~': |
| BUILD_OPERATOR (NOT_TK); |
| } |
| |
| if (c == 0x1a) /* CTRL-Z. */ |
| { |
| if ((c = java_peek_unicode ()) == UEOF) |
| return 0; /* Ok here. */ |
| } |
| |
| /* Everything else is an invalid character in the input. */ |
| { |
| char lex_error_buffer [128]; |
| sprintf (lex_error_buffer, "Invalid character '%s' in input", |
| java_sprint_unicode (c)); |
| java_lex_error (lex_error_buffer, -1); |
| } |
| return 0; |
| } |
| |
| #ifndef JC1_LITE |
| |
| /* The exported interface to the lexer. */ |
| static int |
| java_lex (YYSTYPE *java_lval) |
| { |
| int r; |
| |
| timevar_push (TV_LEX); |
| r = do_java_lex (java_lval); |
| timevar_pop (TV_LEX); |
| return r; |
| } |
| |
| /* This is called by the parser to see if an error should be generated |
| due to numeric overflow. This function only handles the particular |
| case of the largest negative value, and is only called in the case |
| where this value is not preceded by `-'. */ |
| static void |
| error_if_numeric_overflow (tree value) |
| { |
| if (TREE_CODE (value) == INTEGER_CST |
| && !JAVA_NOT_RADIX10_FLAG (value) |
| && tree_int_cst_sgn (value) < 0) |
| { |
| if (TREE_TYPE (value) == long_type_node) |
| java_lex_error ("Numeric overflow for 'long' literal", 0); |
| else |
| java_lex_error ("Numeric overflow for 'int' literal", 0); |
| } |
| } |
| |
| #endif /* JC1_LITE */ |
| |
| static void |
| java_unicode_2_utf8 (unicode_t unicode) |
| { |
| if (RANGE (unicode, 0x01, 0x7f)) |
| obstack_1grow (&temporary_obstack, (char)unicode); |
| else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0) |
| { |
| obstack_1grow (&temporary_obstack, |
| (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6))); |
| obstack_1grow (&temporary_obstack, |
| (unsigned char)(0x80 | (unicode & 0x3f))); |
| } |
| else /* Range 0x800-0xffff. */ |
| { |
| obstack_1grow (&temporary_obstack, |
| (unsigned char)(0xe0 | (unicode & 0xf000) >> 12)); |
| obstack_1grow (&temporary_obstack, |
| (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6)); |
| obstack_1grow (&temporary_obstack, |
| (unsigned char)(0x80 | (unicode & 0x003f))); |
| } |
| } |
| |
| #ifndef JC1_LITE |
| static tree |
| build_wfl_node (tree node) |
| { |
| #ifdef USE_MAPPED_LOCATION |
| node = build_expr_wfl (node, input_location); |
| #else |
| node = build_expr_wfl (node, ctxp->filename, |
| ctxp->lexer->token_start.line, |
| ctxp->lexer->token_start.col); |
| #endif |
| /* Prevent java_complete_lhs from short-circuiting node (if constant). */ |
| TREE_TYPE (node) = NULL_TREE; |
| return node; |
| } |
| #endif |
| |
| static void |
| java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED) |
| { |
| #ifndef JC1_LITE |
| int col = (ctxp->lexer->position.col |
| + forward * ctxp->lexer->next_columns); |
| #if USE_MAPPED_LOCATION |
| source_location save_location = input_location; |
| LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table, col); |
| |
| /* Might be caught in the middle of some error report. */ |
| ctxp->java_error_flag = 0; |
| java_error (NULL); |
| java_error (msg); |
| input_location = save_location; |
| #else |
| java_lc save = ctxp->lexer->token_start; |
| ctxp->lexer->token_start.line = ctxp->lexer->position.line; |
| ctxp->lexer->token_start.col = col; |
| |
| /* Might be caught in the middle of some error report. */ |
| ctxp->java_error_flag = 0; |
| java_error (NULL); |
| java_error (msg); |
| ctxp->lexer->token_start = save; |
| #endif |
| #endif |
| } |
| |
| #ifndef JC1_LITE |
| static int |
| java_is_eol (FILE *fp, int c) |
| { |
| int next; |
| switch (c) |
| { |
| case '\r': |
| next = getc (fp); |
| if (next != '\n' && next != EOF) |
| ungetc (next, fp); |
| return 1; |
| case '\n': |
| return 1; |
| default: |
| return 0; |
| } |
| } |
| #endif |
| |
| char * |
| java_get_line_col (const char *filename ATTRIBUTE_UNUSED, |
| int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED) |
| { |
| #ifdef JC1_LITE |
| return 0; |
| #else |
| /* Dumb implementation. Doesn't try to cache or optimize things. */ |
| /* First line of the file is line 1, first column is 1. */ |
| |
| /* COL == -1 means, at the CR/LF in LINE. */ |
| /* COL == -2 means, at the first non space char in LINE. */ |
| |
| FILE *fp; |
| int c, ccol, cline = 1; |
| int current_line_col = 0; |
| int first_non_space = 0; |
| char *base; |
| |
| if (!(fp = fopen (filename, "r"))) |
| fatal_error ("can't open %s: %m", filename); |
| |
| while (cline != line) |
| { |
| c = getc (fp); |
| if (c == EOF) |
| { |
| static const char msg[] = "<<file too short - unexpected EOF>>"; |
| obstack_grow (&temporary_obstack, msg, sizeof(msg)-1); |
| goto have_line; |
| } |
| if (java_is_eol (fp, c)) |
| cline++; |
| } |
| |
| /* Gather the chars of the current line in a buffer. */ |
| for (;;) |
| { |
| c = getc (fp); |
| if (c < 0 || java_is_eol (fp, c)) |
| break; |
| if (!first_non_space && !JAVA_WHITE_SPACE_P (c)) |
| first_non_space = current_line_col; |
| obstack_1grow (&temporary_obstack, c); |
| current_line_col++; |
| } |
| have_line: |
| |
| obstack_1grow (&temporary_obstack, '\n'); |
| |
| if (col == -1) |
| { |
| col = current_line_col; |
| first_non_space = 0; |
| } |
| else if (col == -2) |
| col = first_non_space; |
| else |
| first_non_space = 0; |
| |
| /* Place the '^' a the right position. */ |
| base = obstack_base (&temporary_obstack); |
| for (col += 2, ccol = 0; ccol < col; ccol++) |
| { |
| /* Compute \t when reaching first_non_space. */ |
| char c = (first_non_space ? |
| (base [ccol] == '\t' ? '\t' : ' ') : ' '); |
| obstack_1grow (&temporary_obstack, c); |
| } |
| obstack_grow0 (&temporary_obstack, "^", 1); |
| |
| fclose (fp); |
| return obstack_finish (&temporary_obstack); |
| #endif |
| } |
| |
| #ifndef JC1_LITE |
| static int |
| utf8_cmp (const unsigned char *str, int length, const char *name) |
| { |
| const unsigned char *limit = str + length; |
| int i; |
| |
| for (i = 0; name[i]; ++i) |
| { |
| int ch = UTF8_GET (str, limit); |
| if (ch != name[i]) |
| return ch - name[i]; |
| } |
| |
| return str == limit ? 0 : 1; |
| } |
| |
| /* A sorted list of all C++ keywords. */ |
| |
| static const char *const cxx_keywords[] = |
| { |
| "_Complex", |
| "__alignof", |
| "__alignof__", |
| "__asm", |
| "__asm__", |
| "__attribute", |
| "__attribute__", |
| "__builtin_va_arg", |
| "__complex", |
| "__complex__", |
| "__const", |
| "__const__", |
| "__extension__", |
| "__imag", |
| "__imag__", |
| "__inline", |
| "__inline__", |
| "__label__", |
| "__null", |
| "__real", |
| "__real__", |
| "__restrict", |
| "__restrict__", |
| "__signed", |
| "__signed__", |
| "__typeof", |
| "__typeof__", |
| "__volatile", |
| "__volatile__", |
| "and", |
| "and_eq", |
| "asm", |
| "auto", |
| "bitand", |
| "bitor", |
| "bool", |
| "break", |
| "case", |
| "catch", |
| "char", |
| "class", |
| "compl", |
| "const", |
| "const_cast", |
| "continue", |
| "default", |
| "delete", |
| "do", |
| "double", |
| "dynamic_cast", |
| "else", |
| "enum", |
| "explicit", |
| "export", |
| "extern", |
| "false", |
| "float", |
| "for", |
| "friend", |
| "goto", |
| "if", |
| "inline", |
| "int", |
| "long", |
| "mutable", |
| "namespace", |
| "new", |
| "not", |
| "not_eq", |
| "operator", |
| "or", |
| "or_eq", |
| "private", |
| "protected", |
| "public", |
| "register", |
| "reinterpret_cast", |
| "return", |
| "short", |
| "signed", |
| "sizeof", |
| "static", |
| "static_cast", |
| "struct", |
| "switch", |
| "template", |
| "this", |
| "throw", |
| "true", |
| "try", |
| "typedef", |
| "typeid", |
| "typename", |
| "typeof", |
| "union", |
| "unsigned", |
| "using", |
| "virtual", |
| "void", |
| "volatile", |
| "wchar_t", |
| "while", |
| "xor", |
| "xor_eq" |
| }; |
| |
| /* Return true if NAME is a C++ keyword. */ |
| |
| int |
| cxx_keyword_p (const char *name, int length) |
| { |
| int last = ARRAY_SIZE (cxx_keywords); |
| int first = 0; |
| int mid = (last + first) / 2; |
| int old = -1; |
| |
| for (mid = (last + first) / 2; |
| mid != old; |
| old = mid, mid = (last + first) / 2) |
| { |
| int kwl = strlen (cxx_keywords[mid]); |
| int min_length = kwl > length ? length : kwl; |
| int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]); |
| |
| if (r == 0) |
| { |
| int i; |
| /* We've found a match if all the remaining characters are `$'. */ |
| for (i = min_length; i < length && name[i] == '$'; ++i) |
| ; |
| if (i == length) |
| return 1; |
| r = 1; |
| } |
| |
| if (r < 0) |
| last = mid; |
| else |
| first = mid; |
| } |
| return 0; |
| } |
| #endif /* JC1_LITE */ |