From: Francois-Xavier Coudert Date: Tue, 6 May 2008 18:28:32 +0000 (+0200) Subject: openmp.c (gfc_match_omp_eos): Use gfc_next_ascii_char and gfc_peek_ascii_char. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8fc541d3a5225284038d28ebe0b80cba2da3371b;p=gcc.git openmp.c (gfc_match_omp_eos): Use gfc_next_ascii_char and gfc_peek_ascii_char. * openmp.c (gfc_match_omp_eos): Use gfc_next_ascii_char and gfc_peek_ascii_char. * decl.c (gfc_match_kind_spec, gfc_match_type_spec, gfc_match_implicit_none, match_implicit_range, gfc_match_implicit, match_string_p, match_attr_spec, gfc_match_suffix, match_procedure_decl, gfc_match_entry, gfc_match_subroutine): Likewise. * gfortran.h (gfc_char_t): New type. (gfc_linebuf): Make line member a gfc_char_t. (locus): Make nextc member a gfc_char_t. (gfc_wide_is_printable, gfc_wide_is_digit, gfc_wide_fits_in_byte, gfc_wide_tolower, gfc_wide_strlen, gfc_next_ascii_char, gfc_peek_ascii_char, gfc_check_digit): New prototypes. * error.c (print_wide_char): New function. (show_locus): Use print_wide_char and gfc_wide_strlen. * io.c (next_char): Use gfc_char_t type. (match_io): Use gfc_peek_ascii_char and gfc_next_ascii_char. * match.c (gfc_match_parens, gfc_match_eos, gfc_match_small_literal_int, gfc_match_name, gfc_match_name_C, gfc_match_intrinsic_op, gfc_match_char, gfc_match_return, gfc_match_common): Likewise. * match.h (gfc_match_special_char): Change prototype. * parse.c (decode_specification_statement, decode_statement, decode_omp_directive, next_free, next_fixed): Use gfc_peek_ascii_char and gfc_next_ascii_char. * primary.c (gfc_check_digit): Change name. (match_digits, match_hollerith_constant, match_boz_constant, match_real_constant, next_string_char, match_charkind_name, match_string_constant, match_logical_constant_string, match_complex_constant, match_actual_arg, match_varspec, gfc_match_rvalue, match_variable): Use gfc_peek_ascii_char and gfc_next_ascii_char. * scanner.c (gfc_wide_fits_in_byte, wide_is_ascii, gfc_wide_is_printable, gfc_wide_tolower, gfc_wide_is_digit, gfc_wide_is_digit, wide_atoi, gfc_wide_strlen, wide_strcpy, wide_strchr, widechar_to_char, wide_strncmp, wide_strncasecmp, gfc_next_ascii_char, gfc_peek_ascii_char): New functions. (next_char, gfc_define_undef_line, skip_free_comments, gfc_next_char_literal, gfc_next_char, gfc_peek_char, gfc_error_recovery, load_line, preprocessor_line, include_line, load_file, gfc_read_orig_filename): Use gfc_char_t for source characters and the {gfc_,}wide_* functions to manipulate wide strings. From-SVN: r134992 --- diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 83d3bcd8acf..3ce9b4e4b01 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,50 @@ +2008-05-06 Francois-Xavier Coudert + + * openmp.c (gfc_match_omp_eos): Use gfc_next_ascii_char and + gfc_peek_ascii_char. + * decl.c (gfc_match_kind_spec, gfc_match_type_spec, + gfc_match_implicit_none, match_implicit_range, gfc_match_implicit, + match_string_p, match_attr_spec, gfc_match_suffix, + match_procedure_decl, gfc_match_entry, gfc_match_subroutine): + Likewise. + * gfortran.h (gfc_char_t): New type. + (gfc_linebuf): Make line member a gfc_char_t. + (locus): Make nextc member a gfc_char_t. + (gfc_wide_is_printable, gfc_wide_is_digit, gfc_wide_fits_in_byte, + gfc_wide_tolower, gfc_wide_strlen, gfc_next_ascii_char, + gfc_peek_ascii_char, gfc_check_digit): New prototypes. + * error.c (print_wide_char): New function. + (show_locus): Use print_wide_char and gfc_wide_strlen. + * io.c (next_char): Use gfc_char_t type. + (match_io): Use gfc_peek_ascii_char and gfc_next_ascii_char. + * match.c (gfc_match_parens, gfc_match_eos, + gfc_match_small_literal_int, gfc_match_name, gfc_match_name_C, + gfc_match_intrinsic_op, gfc_match_char, gfc_match_return, + gfc_match_common): Likewise. + * match.h (gfc_match_special_char): Change prototype. + * parse.c (decode_specification_statement, decode_statement, + decode_omp_directive, next_free, next_fixed): Use + gfc_peek_ascii_char and gfc_next_ascii_char. + * primary.c (gfc_check_digit): Change name. + (match_digits, match_hollerith_constant, match_boz_constant, + match_real_constant, next_string_char, match_charkind_name, + match_string_constant, match_logical_constant_string, + match_complex_constant, match_actual_arg, match_varspec, + gfc_match_rvalue, match_variable): Use gfc_peek_ascii_char and + gfc_next_ascii_char. + * scanner.c (gfc_wide_fits_in_byte, wide_is_ascii, + gfc_wide_is_printable, gfc_wide_tolower, gfc_wide_is_digit, + gfc_wide_is_digit, wide_atoi, gfc_wide_strlen, wide_strcpy, + wide_strchr, widechar_to_char, wide_strncmp, wide_strncasecmp, + gfc_next_ascii_char, gfc_peek_ascii_char): + New functions. + (next_char, gfc_define_undef_line, skip_free_comments, + gfc_next_char_literal, gfc_next_char, gfc_peek_char, + gfc_error_recovery, load_line, preprocessor_line, include_line, + load_file, gfc_read_orig_filename): Use gfc_char_t for source + characters and the {gfc_,}wide_* functions to manipulate wide + strings. + 2008-05-06 Tobias Burnus PR fortran/36117 diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c index f52c2f1ec8f..6b462f97bb6 100644 --- a/gcc/fortran/decl.c +++ b/gcc/fortran/decl.c @@ -1940,7 +1940,8 @@ kind_expr: } gfc_gobble_whitespace (); - if ((c = gfc_next_char ()) != ')' && (ts->type != BT_CHARACTER || c != ',')) + if ((c = gfc_next_ascii_char ()) != ')' + && (ts->type != BT_CHARACTER || c != ',')) { if (ts->type == BT_CHARACTER) gfc_error ("Missing right parenthesis or comma at %C"); @@ -2213,7 +2214,7 @@ gfc_match_type_spec (gfc_typespec *ts, int implicit_flag) char name[GFC_MAX_SYMBOL_LEN + 1]; gfc_symbol *sym; match m; - int c; + char c; bool seen_deferred_kind; /* A belt and braces check that the typespec is correctly being treated @@ -2360,7 +2361,7 @@ get_kind: if (gfc_current_form == FORM_FREE) { - c = gfc_peek_char(); + c = gfc_peek_ascii_char(); if (!gfc_is_whitespace(c) && c != '*' && c != '(' && c != ':' && c != ',') return MATCH_NO; @@ -2400,13 +2401,14 @@ gfc_match_implicit_none (void) static match match_implicit_range (void) { - int c, c1, c2, inner; + char c, c1, c2; + int inner; locus cur_loc; cur_loc = gfc_current_locus; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (c != '(') { gfc_error ("Missing character range in IMPLICIT at %C"); @@ -2417,12 +2419,12 @@ match_implicit_range (void) while (inner) { gfc_gobble_whitespace (); - c1 = gfc_next_char (); + c1 = gfc_next_ascii_char (); if (!ISALPHA (c1)) goto bad; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); switch (c) { @@ -2435,12 +2437,12 @@ match_implicit_range (void) case '-': gfc_gobble_whitespace (); - c2 = gfc_next_char (); + c2 = gfc_next_ascii_char (); if (!ISALPHA (c2)) goto bad; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); if ((c != ',') && (c != ')')) goto bad; @@ -2503,7 +2505,7 @@ gfc_match_implicit (void) { gfc_typespec ts; locus cur_loc; - int c; + char c; match m; gfc_clear_ts (&ts); @@ -2534,7 +2536,7 @@ gfc_match_implicit (void) { /* We may have (). */ gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); if ((c == '\n') || (c == ',')) { /* Check for CHARACTER with no length parameter. */ @@ -2584,7 +2586,7 @@ gfc_match_implicit (void) goto syntax; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); if ((c != '\n') && (c != ',')) goto syntax; @@ -2713,7 +2715,7 @@ match_string_p (const char *target) const char *p; for (p = target; *p; p++) - if (gfc_next_char () != *p) + if ((char) gfc_next_ascii_char () != *p) return false; return true; } @@ -2765,22 +2767,22 @@ match_attr_spec (void) for (;;) { - int ch; + char ch; d = DECL_NONE; gfc_gobble_whitespace (); - ch = gfc_next_char (); + ch = gfc_next_ascii_char (); if (ch == ':') { /* This is the successful exit condition for the loop. */ - if (gfc_next_char () == ':') + if (gfc_next_ascii_char () == ':') break; } else if (ch == ',') { gfc_gobble_whitespace (); - switch (gfc_peek_char ()) + switch (gfc_peek_ascii_char ()) { case 'a': if (match_string_p ("allocatable")) @@ -2809,7 +2811,7 @@ match_attr_spec (void) case 'i': if (match_string_p ("int")) { - ch = gfc_next_char (); + ch = gfc_next_ascii_char (); if (ch == 'e') { if (match_string_p ("nt")) @@ -2841,8 +2843,8 @@ match_attr_spec (void) break; case 'p': - gfc_next_char (); - switch (gfc_next_char ()) + gfc_next_ascii_char (); + switch (gfc_next_ascii_char ()) { case 'a': if (match_string_p ("rameter")) @@ -2861,7 +2863,7 @@ match_attr_spec (void) break; case 'r': - ch = gfc_next_char (); + ch = gfc_next_ascii_char (); if (ch == 'i') { if (match_string_p ("vate")) @@ -2901,8 +2903,8 @@ match_attr_spec (void) break; case 'v': - gfc_next_char (); - ch = gfc_next_char (); + gfc_next_ascii_char (); + ch = gfc_next_ascii_char (); if (ch == 'a') { if (match_string_p ("lue")) @@ -3938,7 +3940,7 @@ gfc_match_suffix (gfc_symbol *sym, gfc_symbol **result) match is_bind_c; /* Found bind(c). */ match is_result; /* Found result clause. */ match found_match; /* Status of whether we've found a good match. */ - int peek_char; /* Character we're going to peek at. */ + char peek_char; /* Character we're going to peek at. */ bool allow_binding_name; /* Initialize to having found nothing. */ @@ -3948,7 +3950,7 @@ gfc_match_suffix (gfc_symbol *sym, gfc_symbol **result) /* Get the next char to narrow between result and bind(c). */ gfc_gobble_whitespace (); - peek_char = gfc_peek_char (); + peek_char = gfc_peek_ascii_char (); /* C binding names are not allowed for internal procedures. */ if (gfc_current_state () == COMP_CONTAINS @@ -4037,7 +4039,7 @@ match_procedure_decl (void) /* Get the type spec. for the procedure interface. */ old_loc = gfc_current_locus; m = gfc_match_type_spec (¤t_ts, 0); - if (m == MATCH_YES || (m == MATCH_NO && gfc_peek_char () == ')')) + if (m == MATCH_YES || (m == MATCH_NO && gfc_peek_ascii_char () == ')')) goto got_ts; if (m == MATCH_ERROR) @@ -4530,7 +4532,7 @@ gfc_match_entry (void) /* Check what next non-whitespace character is so we can tell if there is the required parens if we have a BIND(C). */ gfc_gobble_whitespace (); - peek_char = gfc_peek_char (); + peek_char = gfc_peek_ascii_char (); if (state == COMP_SUBROUTINE) { @@ -4686,7 +4688,7 @@ gfc_match_subroutine (void) /* Check what next non-whitespace character is so we can tell if there is the required parens if we have a BIND(C). */ gfc_gobble_whitespace (); - peek_char = gfc_peek_char (); + peek_char = gfc_peek_ascii_char (); if (gfc_add_subroutine (&sym->attr, sym->name, NULL) == FAILURE) return MATCH_ERROR; @@ -5486,7 +5488,7 @@ match gfc_match_pointer (void) { gfc_gobble_whitespace (); - if (gfc_peek_char () == '(') + if (gfc_peek_ascii_char () == '(') { if (!gfc_option.flag_cray_pointer) { diff --git a/gcc/fortran/error.c b/gcc/fortran/error.c index 40eccde5adf..c119bcadd7d 100644 --- a/gcc/fortran/error.c +++ b/gcc/fortran/error.c @@ -156,6 +156,66 @@ error_integer (long int i) locus. Calls error_printf() recursively, but the recursion is at most one level deep. */ +static void +print_wide_char (gfc_char_t c) +{ + static const char xdigit[16] = { '0', '1', '2', '3', '4', '5', '6', + '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + char buf[9]; + + if (gfc_wide_is_printable (c)) + error_char (c); + else if (c < ((gfc_char_t) 1 << 8)) + { + buf[2] = '\0'; + buf[1] = xdigit[c & 0x0F]; + c = c >> 4; + buf[0] = xdigit[c & 0x0F]; + + error_char ('\\'); + error_char ('x'); + error_string (buf); + } + else if (c < ((gfc_char_t) 1 << 16)) + { + buf[4] = '\0'; + buf[3] = xdigit[c & 0x0F]; + c = c >> 4; + buf[2] = xdigit[c & 0x0F]; + c = c >> 4; + buf[1] = xdigit[c & 0x0F]; + c = c >> 4; + buf[0] = xdigit[c & 0x0F]; + + error_char ('\\'); + error_char ('u'); + error_string (buf); + } + else + { + buf[8] = '\0'; + buf[7] = xdigit[c & 0x0F]; + c = c >> 4; + buf[6] = xdigit[c & 0x0F]; + c = c >> 4; + buf[5] = xdigit[c & 0x0F]; + c = c >> 4; + buf[4] = xdigit[c & 0x0F]; + c = c >> 4; + buf[3] = xdigit[c & 0x0F]; + c = c >> 4; + buf[2] = xdigit[c & 0x0F]; + c = c >> 4; + buf[1] = xdigit[c & 0x0F]; + c = c >> 4; + buf[0] = xdigit[c & 0x0F]; + + error_char ('\\'); + error_char ('U'); + error_string (buf); + } +} + static void error_printf (const char *, ...) ATTRIBUTE_GCC_GFC(1,2); static void @@ -163,8 +223,8 @@ show_locus (locus *loc, int c1, int c2) { gfc_linebuf *lb; gfc_file *f; - char c, *p; - int i, m, offset, cmax; + gfc_char_t c, *p; + int i, offset, cmax; /* TODO: Either limit the total length and number of included files displayed or add buffering of arbitrary number of characters in @@ -246,8 +306,8 @@ show_locus (locus *loc, int c1, int c2) to work correctly when nonprintable characters exist. A better solution should be found. */ - p = lb->line + offset; - i = strlen (p); + p = &(lb->line[offset]); + i = gfc_wide_strlen (p); if (i > terminal_width) i = terminal_width - 1; @@ -257,23 +317,7 @@ show_locus (locus *loc, int c1, int c2) if (c == '\t') c = ' '; - if (ISPRINT (c)) - error_char (c); - else - { - error_char ('\\'); - error_char ('x'); - - m = ((c >> 4) & 0x0F) + '0'; - if (m > '9') - m += 'A' - '9' - 1; - error_char (m); - - m = (c & 0x0F) + '0'; - if (m > '9') - m += 'A' - '9' - 1; - error_char (m); - } + print_wide_char (c); } error_char ('\n'); diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index f6a7c54123b..36c970ca0c8 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -700,6 +700,21 @@ typedef struct symbol_attribute; +/* We need to store source lines as sequences of multibyte source + characters. We define here a type wide enough to hold any multibyte + source character, just like libcpp does. A 32-bit type is enough. */ + +#if HOST_BITS_PER_INT >= 32 +typedef unsigned int gfc_char_t; +#elif HOST_BITS_PER_LONG >= 32 +typedef unsigned long gfc_char_t; +#elif defined(HAVE_LONG_LONG) && (HOST_BITS_PER_LONGLONG >= 32) +typedef unsigned long long gfc_char_t; +#else +# error "Cannot find an integer type with at least 32 bits" +#endif + + /* The following three structures are used to identify a location in the sources. @@ -729,7 +744,7 @@ typedef struct gfc_linebuf int truncated; bool dbg_emitted; - char line[1]; + gfc_char_t line[1]; } gfc_linebuf; #define gfc_linebuf_header_size (offsetof (gfc_linebuf, line)) @@ -738,7 +753,7 @@ typedef struct gfc_linebuf typedef struct { - char *nextc; + gfc_char_t *nextc; gfc_linebuf *lb; } locus; @@ -1940,10 +1955,18 @@ void gfc_advance_line (void); int gfc_check_include (void); int gfc_define_undef_line (void); +int gfc_wide_is_printable (gfc_char_t); +int gfc_wide_is_digit (gfc_char_t); +int gfc_wide_fits_in_byte (gfc_char_t); +gfc_char_t gfc_wide_tolower (gfc_char_t); +size_t gfc_wide_strlen (const gfc_char_t *); + void gfc_skip_comments (void); -int gfc_next_char_literal (int); -int gfc_next_char (void); -int gfc_peek_char (void); +gfc_char_t gfc_next_char_literal (int); +gfc_char_t gfc_next_char (void); +char gfc_next_ascii_char (void); +gfc_char_t gfc_peek_char (void); +char gfc_peek_ascii_char (void); void gfc_error_recovery (void); void gfc_gobble_whitespace (void); try gfc_new_file (void); @@ -2354,6 +2377,7 @@ bool gfc_check_access (gfc_access, gfc_access); symbol_attribute gfc_variable_attr (gfc_expr *, gfc_typespec *); symbol_attribute gfc_expr_attr (gfc_expr *); match gfc_match_rvalue (gfc_expr **); +int gfc_check_digit (char, int); /* trans.c */ void gfc_generate_code (gfc_namespace *); diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi index 712aa2140c3..88ede3b2a13 100644 --- a/gcc/fortran/invoke.texi +++ b/gcc/fortran/invoke.texi @@ -237,13 +237,17 @@ Allow @samp{$} as a valid character in a symbol name. @opindex @code{backslash} @cindex backslash @cindex escape characters -Change the interpretation of backslashes in string literals -from a single backslash character to ``C-style'' escape characters. -The following combinations are expanded \a, \b, \f, \n, \r, \t, -\v, \\, and \0 to the ASCII characters alert, backspace, form feed, -newline, carriage return, horizontal tab, vertical tab, backslash, -and NUL, respectively. All other combinations of a character preceded -by \ are unexpanded. +Change the interpretation of backslashes in string literals from a single +backslash character to ``C-style'' escape characters. The following +combinations are expanded @code{\a}, @code{\b}, @code{\f}, @code{\n}, +@code{\r}, @code{\t}, @code{\v}, @code{\\}, and @code{\0} to the ASCII +characters alert, backspace, form feed, newline, carriage return, +horizontal tab, vertical tab, backslash, and NUL, respectively. +Additionally, @code{\x}@var{nn}, @code{\u}@var{nnnn} and +@code{\U}@var{nnnnnnnn} (where each @var{n} is a hexadecimal digit) are +translated into the Unicode characters corresponding to the specified code +points. All other combinations of a character preceded by \ are +unexpanded. @item -fmodule-private @opindex @code{fmodule-private} diff --git a/gcc/fortran/io.c b/gcc/fortran/io.c index 4eb76309ede..07848a1cd6e 100644 --- a/gcc/fortran/io.c +++ b/gcc/fortran/io.c @@ -132,7 +132,7 @@ mode; static char next_char (int in_string) { - static char c; + static gfc_char_t c; if (use_last_char) { @@ -153,18 +153,11 @@ next_char (int in_string) if (gfc_option.flag_backslash && c == '\\') { - int tmp; locus old_locus = gfc_current_locus; - /* Use a temp variable to avoid side effects from gfc_match_special_char - since it uses an int * for its argument. */ - tmp = (int)c; - - if (gfc_match_special_char (&tmp) == MATCH_NO) + if (gfc_match_special_char (&c) == MATCH_NO) gfc_current_locus = old_locus; - c = (char)tmp; - if (!(gfc_option.allow_std & GFC_STD_GNU) && !inhibit_warnings) gfc_warning ("Extension: backslash character at %C"); } @@ -172,7 +165,7 @@ next_char (int in_string) if (mode == MODE_COPY) *format_string++ = c; - c = TOUPPER (c); + c = TOUPPER ((unsigned char) c); return c; } @@ -3185,7 +3178,7 @@ match_io (io_kind k) char name[GFC_MAX_SYMBOL_LEN + 1]; gfc_code *io_code; gfc_symbol *sym; - int comma_flag, c; + int comma_flag; locus where; locus spec_end; gfc_dt *dt; @@ -3203,7 +3196,7 @@ match_io (io_kind k) else if (k == M_PRINT) { /* Treat the non-standard case of PRINT namelist. */ - if ((gfc_current_form == FORM_FIXED || gfc_peek_char () == ' ') + if ((gfc_current_form == FORM_FIXED || gfc_peek_ascii_char () == ' ') && gfc_match_name (name) == MATCH_YES) { gfc_find_symbol (name, NULL, 1, &sym); @@ -3227,7 +3220,7 @@ match_io (io_kind k) if (gfc_current_form == FORM_FREE) { - c = gfc_peek_char(); + char c = gfc_peek_ascii_char (); if (c != ' ' && c != '*' && c != '\'' && c != '"') { m = MATCH_NO; diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c index 8512d03a0fb..8c836151954 100644 --- a/gcc/fortran/match.c +++ b/gcc/fortran/match.c @@ -111,8 +111,8 @@ match gfc_match_parens (void) { locus old_loc, where; - int c, count, instring; - char quote; + int count, instring; + gfc_char_t c, quote; old_loc = gfc_current_locus; count = 0; @@ -126,7 +126,7 @@ gfc_match_parens (void) break; if (quote == ' ' && ((c == '\'') || (c == '"'))) { - quote = (char) c; + quote = c; instring = 1; continue; } @@ -170,42 +170,66 @@ gfc_match_parens (void) escaped by a \ via the -fbackslash option. */ match -gfc_match_special_char (int *c) +gfc_match_special_char (gfc_char_t *res) { - + int len, i; + gfc_char_t c, n; match m; m = MATCH_YES; - switch (gfc_next_char_literal (1)) + switch ((c = gfc_next_char_literal (1))) { case 'a': - *c = '\a'; + *res = '\a'; break; case 'b': - *c = '\b'; + *res = '\b'; break; case 't': - *c = '\t'; + *res = '\t'; break; case 'f': - *c = '\f'; + *res = '\f'; break; case 'n': - *c = '\n'; + *res = '\n'; break; case 'r': - *c = '\r'; + *res = '\r'; break; case 'v': - *c = '\v'; + *res = '\v'; break; case '\\': - *c = '\\'; + *res = '\\'; break; case '0': - *c = '\0'; + *res = '\0'; + break; + + case 'x': + case 'u': + case 'U': + /* Hexadecimal form of wide characters. */ + len = (c == 'x' ? 2 : (c == 'u' ? 4 : 8)); + n = 0; + for (i = 0; i < len; i++) + { + char buf[2] = { '\0', '\0' }; + + c = gfc_next_char_literal (1); + if (!gfc_wide_fits_in_byte (c) + || !gfc_check_digit ((unsigned char) c, 16)) + return MATCH_NO; + + buf[0] = (unsigned char) c; + n = n << 4; + n += strtol (buf, NULL, 16); + } + *res = n; break; + default: /* Unknown backslash codes are simply not expanded. */ m = MATCH_NO; @@ -223,14 +247,14 @@ match gfc_match_space (void) { locus old_loc; - int c; + char c; if (gfc_current_form == FORM_FIXED) return MATCH_YES; old_loc = gfc_current_locus; - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (!gfc_is_whitespace (c)) { gfc_current_locus = old_loc; @@ -251,7 +275,8 @@ match gfc_match_eos (void) { locus old_loc; - int flag, c; + int flag; + char c; flag = 0; @@ -260,13 +285,13 @@ gfc_match_eos (void) old_loc = gfc_current_locus; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); switch (c) { case '!': do { - c = gfc_next_char (); + c = gfc_next_ascii_char (); } while (c != '\n'); @@ -302,8 +327,9 @@ gfc_match_small_literal_int (int *value, int *cnt) old_loc = gfc_current_locus; + *value = -1; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (cnt) *cnt = 0; @@ -319,7 +345,7 @@ gfc_match_small_literal_int (int *value, int *cnt) for (;;) { old_loc = gfc_current_locus; - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (!ISDIGIT (c)) break; @@ -488,12 +514,13 @@ match gfc_match_name (char *buffer) { locus old_loc; - int i, c; + int i; + char c; old_loc = gfc_current_locus; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (!(ISALPHA (c) || (c == '_' && gfc_option.flag_allow_leading_underscore))) { if (gfc_error_flag_test() == 0 && c != '(') @@ -515,13 +542,14 @@ gfc_match_name (char *buffer) } old_loc = gfc_current_locus; - c = gfc_next_char (); + c = gfc_next_ascii_char (); } while (ISALNUM (c) || c == '_' || (gfc_option.flag_dollar_ok && c == '$')); if (c == '$' && !gfc_option.flag_dollar_ok) { - gfc_error ("Invalid character '$' at %C. Use -fdollar-ok to allow it as an extension"); + gfc_error ("Invalid character '$' at %C. Use -fdollar-ok to allow it " + "as an extension"); return MATCH_ERROR; } @@ -551,7 +579,7 @@ gfc_match_name_C (char *buffer) { locus old_loc; int i = 0; - int c; + gfc_char_t c; old_loc = gfc_current_locus; gfc_gobble_whitespace (); @@ -579,7 +607,9 @@ gfc_match_name_C (char *buffer) /* Continue to read valid variable name characters. */ do { - buffer[i++] = c; + gcc_assert (gfc_wide_fits_in_byte (c)); + + buffer[i++] = (unsigned char) c; /* C does not define a maximum length of variable names, to my knowledge, but the compiler typically places a limit on them. @@ -606,7 +636,7 @@ gfc_match_name_C (char *buffer) if (c == ' ') { gfc_gobble_whitespace (); - c = gfc_peek_char (); + c = gfc_peek_ascii_char (); if (c != '"' && c != '\'') { gfc_error ("Embedded space in NAME= specifier at %C"); @@ -679,10 +709,10 @@ match gfc_match_intrinsic_op (gfc_intrinsic_op *result) { locus orig_loc = gfc_current_locus; - int ch; + char ch; gfc_gobble_whitespace (); - ch = gfc_next_char (); + ch = gfc_next_ascii_char (); switch (ch) { case '+': @@ -696,7 +726,7 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) return MATCH_YES; case '=': - if (gfc_next_char () == '=') + if (gfc_next_ascii_char () == '=') { /* Matched "==". */ *result = INTRINSIC_EQ; @@ -705,10 +735,10 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) break; case '<': - if (gfc_peek_char () == '=') + if (gfc_peek_ascii_char () == '=') { /* Matched "<=". */ - gfc_next_char (); + gfc_next_ascii_char (); *result = INTRINSIC_LE; return MATCH_YES; } @@ -717,10 +747,10 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) return MATCH_YES; case '>': - if (gfc_peek_char () == '=') + if (gfc_peek_ascii_char () == '=') { /* Matched ">=". */ - gfc_next_char (); + gfc_next_ascii_char (); *result = INTRINSIC_GE; return MATCH_YES; } @@ -729,10 +759,10 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) return MATCH_YES; case '*': - if (gfc_peek_char () == '*') + if (gfc_peek_ascii_char () == '*') { /* Matched "**". */ - gfc_next_char (); + gfc_next_ascii_char (); *result = INTRINSIC_POWER; return MATCH_YES; } @@ -741,18 +771,18 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) return MATCH_YES; case '/': - ch = gfc_peek_char (); + ch = gfc_peek_ascii_char (); if (ch == '=') { /* Matched "/=". */ - gfc_next_char (); + gfc_next_ascii_char (); *result = INTRINSIC_NE; return MATCH_YES; } else if (ch == '/') { /* Matched "//". */ - gfc_next_char (); + gfc_next_ascii_char (); *result = INTRINSIC_CONCAT; return MATCH_YES; } @@ -761,13 +791,13 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) return MATCH_YES; case '.': - ch = gfc_next_char (); + ch = gfc_next_ascii_char (); switch (ch) { case 'a': - if (gfc_next_char () == 'n' - && gfc_next_char () == 'd' - && gfc_next_char () == '.') + if (gfc_next_ascii_char () == 'n' + && gfc_next_ascii_char () == 'd' + && gfc_next_ascii_char () == '.') { /* Matched ".and.". */ *result = INTRINSIC_AND; @@ -776,9 +806,9 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) break; case 'e': - if (gfc_next_char () == 'q') + if (gfc_next_ascii_char () == 'q') { - ch = gfc_next_char (); + ch = gfc_next_ascii_char (); if (ch == '.') { /* Matched ".eq.". */ @@ -787,7 +817,7 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) } else if (ch == 'v') { - if (gfc_next_char () == '.') + if (gfc_next_ascii_char () == '.') { /* Matched ".eqv.". */ *result = INTRINSIC_EQV; @@ -798,10 +828,10 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) break; case 'g': - ch = gfc_next_char (); + ch = gfc_next_ascii_char (); if (ch == 'e') { - if (gfc_next_char () == '.') + if (gfc_next_ascii_char () == '.') { /* Matched ".ge.". */ *result = INTRINSIC_GE_OS; @@ -810,7 +840,7 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) } else if (ch == 't') { - if (gfc_next_char () == '.') + if (gfc_next_ascii_char () == '.') { /* Matched ".gt.". */ *result = INTRINSIC_GT_OS; @@ -820,10 +850,10 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) break; case 'l': - ch = gfc_next_char (); + ch = gfc_next_ascii_char (); if (ch == 'e') { - if (gfc_next_char () == '.') + if (gfc_next_ascii_char () == '.') { /* Matched ".le.". */ *result = INTRINSIC_LE_OS; @@ -832,7 +862,7 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) } else if (ch == 't') { - if (gfc_next_char () == '.') + if (gfc_next_ascii_char () == '.') { /* Matched ".lt.". */ *result = INTRINSIC_LT_OS; @@ -842,10 +872,10 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) break; case 'n': - ch = gfc_next_char (); + ch = gfc_next_ascii_char (); if (ch == 'e') { - ch = gfc_next_char (); + ch = gfc_next_ascii_char (); if (ch == '.') { /* Matched ".ne.". */ @@ -854,8 +884,8 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) } else if (ch == 'q') { - if (gfc_next_char () == 'v' - && gfc_next_char () == '.') + if (gfc_next_ascii_char () == 'v' + && gfc_next_ascii_char () == '.') { /* Matched ".neqv.". */ *result = INTRINSIC_NEQV; @@ -865,8 +895,8 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) } else if (ch == 'o') { - if (gfc_next_char () == 't' - && gfc_next_char () == '.') + if (gfc_next_ascii_char () == 't' + && gfc_next_ascii_char () == '.') { /* Matched ".not.". */ *result = INTRINSIC_NOT; @@ -876,8 +906,8 @@ gfc_match_intrinsic_op (gfc_intrinsic_op *result) break; case 'o': - if (gfc_next_char () == 'r' - && gfc_next_char () == '.') + if (gfc_next_ascii_char () == 'r' + && gfc_next_ascii_char () == '.') { /* Matched ".or.". */ *result = INTRINSIC_OR; @@ -1007,7 +1037,7 @@ gfc_match_char (char c) where = gfc_current_locus; gfc_gobble_whitespace (); - if (gfc_next_char () == c) + if (gfc_next_ascii_char () == c) return MATCH_YES; gfc_current_locus = where; @@ -1157,7 +1187,7 @@ loop: } default: - if (c == gfc_next_char ()) + if (c == gfc_next_ascii_char ()) goto loop; break; } @@ -2414,7 +2444,6 @@ gfc_match_return (void) gfc_expr *e; match m; gfc_compile_state s; - int c; e = NULL; if (gfc_match_eos () == MATCH_YES) @@ -2433,7 +2462,7 @@ gfc_match_return (void) RETURN keyword: return+1 return(1) */ - c = gfc_peek_char (); + char c = gfc_peek_ascii_char (); if (ISALPHA (c) || ISDIGIT (c)) return MATCH_NO; } @@ -2868,12 +2897,12 @@ gfc_match_common (void) gfc_gobble_whitespace (); if (gfc_match_eos () == MATCH_YES) goto done; - if (gfc_peek_char () == '/') + if (gfc_peek_ascii_char () == '/') break; if (gfc_match_char (',') != MATCH_YES) goto syntax; gfc_gobble_whitespace (); - if (gfc_peek_char () == '/') + if (gfc_peek_ascii_char () == '/') break; } } diff --git a/gcc/fortran/match.h b/gcc/fortran/match.h index 4a3776e2cd8..d46e1630136 100644 --- a/gcc/fortran/match.h +++ b/gcc/fortran/match.h @@ -38,7 +38,7 @@ extern gfc_st_label *gfc_statement_label; /* match.c. */ /* Generic match subroutines. */ -match gfc_match_special_char (int *); +match gfc_match_special_char (gfc_char_t *); match gfc_match_space (void); match gfc_match_eos (void); match gfc_match_small_literal_int (int *, int *); diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c index 245f7951ddc..9c0bae497bf 100644 --- a/gcc/fortran/openmp.c +++ b/gcc/fortran/openmp.c @@ -36,17 +36,17 @@ match gfc_match_omp_eos (void) { locus old_loc; - int c; + char c; old_loc = gfc_current_locus; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); switch (c) { case '!': do - c = gfc_next_char (); + c = gfc_next_ascii_char (); while (c != '\n'); /* Fall through */ diff --git a/gcc/fortran/parse.c b/gcc/fortran/parse.c index b133743c739..dd072feb30e 100644 --- a/gcc/fortran/parse.c +++ b/gcc/fortran/parse.c @@ -100,7 +100,7 @@ decode_specification_statement (void) { gfc_statement st; locus old_locus; - int c; + char c; if (gfc_match_eos () == MATCH_YES) return ST_NONE; @@ -121,7 +121,7 @@ decode_specification_statement (void) statement, we eliminate most possibilities by peeking at the first character. */ - c = gfc_peek_char (); + c = gfc_peek_ascii_char (); switch (c) { @@ -229,7 +229,7 @@ decode_statement (void) gfc_statement st; locus old_locus; match m; - int c; + char c; #ifdef GFC_DEBUG gfc_symbol_state (); @@ -315,7 +315,7 @@ decode_statement (void) statement, we eliminate most possibilities by peeking at the first character. */ - c = gfc_peek_char (); + c = gfc_peek_ascii_char (); switch (c) { @@ -462,7 +462,7 @@ static gfc_statement decode_omp_directive (void) { locus old_locus; - int c; + char c; #ifdef GFC_DEBUG gfc_symbol_state (); @@ -485,7 +485,7 @@ decode_omp_directive (void) statement, we eliminate most possibilities by peeking at the first character. */ - c = gfc_peek_char (); + c = gfc_peek_ascii_char (); switch (c) { @@ -569,31 +569,34 @@ static gfc_statement next_free (void) { match m; - int c, d, cnt, at_bol; + int i, cnt, at_bol; + char c; at_bol = gfc_at_bol (); gfc_gobble_whitespace (); - c = gfc_peek_char (); + c = gfc_peek_ascii_char (); if (ISDIGIT (c)) { + char d; + /* Found a statement label? */ m = gfc_match_st_label (&gfc_statement_label); - d = gfc_peek_char (); + d = gfc_peek_ascii_char (); if (m != MATCH_YES || !gfc_is_whitespace (d)) { - gfc_match_small_literal_int (&c, &cnt); + gfc_match_small_literal_int (&i, &cnt); if (cnt > 5) gfc_error_now ("Too many digits in statement label at %C"); - if (c == 0) + if (i == 0) gfc_error_now ("Zero is not a valid statement label at %C"); do - c = gfc_next_char (); + c = gfc_next_ascii_char (); while (ISDIGIT(c)); if (!gfc_is_whitespace (c)) @@ -607,11 +610,11 @@ next_free (void) gfc_gobble_whitespace (); - if (at_bol && gfc_peek_char () == ';') + if (at_bol && gfc_peek_ascii_char () == ';') { gfc_error_now ("Semicolon at %C needs to be preceded by " "statement"); - gfc_next_char (); /* Eat up the semicolon. */ + gfc_next_ascii_char (); /* Eat up the semicolon. */ return ST_NONE; } @@ -633,8 +636,8 @@ next_free (void) { int i; - c = gfc_next_char (); - for (i = 0; i < 5; i++, c = gfc_next_char ()) + c = gfc_next_ascii_char (); + for (i = 0; i < 5; i++, c = gfc_next_ascii_char ()) gcc_assert (c == "!$omp"[i]); gcc_assert (c == ' '); @@ -646,7 +649,7 @@ next_free (void) if (at_bol && c == ';') { gfc_error_now ("Semicolon at %C needs to be preceded by statement"); - gfc_next_char (); /* Eat up the semicolon. */ + gfc_next_ascii_char (); /* Eat up the semicolon. */ return ST_NONE; } @@ -661,7 +664,7 @@ next_fixed (void) { int label, digit_flag, i; locus loc; - char c; + gfc_char_t c; if (!gfc_at_bol ()) return decode_statement (); @@ -694,7 +697,7 @@ next_fixed (void) case '7': case '8': case '9': - label = label * 10 + c - '0'; + label = label * 10 + ((unsigned char) c - '0'); label_locus = gfc_current_locus; digit_flag = 1; break; @@ -705,7 +708,7 @@ next_fixed (void) if (gfc_option.flag_openmp) { for (i = 0; i < 5; i++, c = gfc_next_char_literal (0)) - gcc_assert (TOLOWER (c) == "*$omp"[i]); + gcc_assert ((char) gfc_wide_tolower (c) == "*$omp"[i]); if (c != ' ' && c != '0') { diff --git a/gcc/fortran/primary.c b/gcc/fortran/primary.c index 6b7fd519d6a..d7491c1a110 100644 --- a/gcc/fortran/primary.c +++ b/gcc/fortran/primary.c @@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see #include "arith.h" #include "match.h" #include "parse.h" +#include "toplev.h" /* Matches a kind-parameter expression, which is either a named symbolic constant or a nonnegative integer constant. If @@ -95,8 +96,8 @@ get_kind (void) /* Given a character and a radix, see if the character is a valid digit in that radix. */ -static int -check_digit (int c, int radix) +int +gfc_check_digit (char c, int radix) { int r; @@ -119,7 +120,7 @@ check_digit (int c, int radix) break; default: - gfc_internal_error ("check_digit(): bad radix"); + gfc_internal_error ("gfc_check_digit(): bad radix"); } return r; @@ -135,21 +136,22 @@ static int match_digits (int signflag, int radix, char *buffer) { locus old_loc; - int length, c; + int length; + char c; length = 0; - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (signflag && (c == '+' || c == '-')) { if (buffer != NULL) *buffer++ = c; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); length++; } - if (!check_digit (c, radix)) + if (!gfc_check_digit (c, radix)) return -1; length++; @@ -159,9 +161,9 @@ match_digits (int signflag, int radix, char *buffer) for (;;) { old_loc = gfc_current_locus; - c = gfc_next_char (); + c = gfc_next_ascii_char (); - if (!check_digit (c, radix)) + if (!gfc_check_digit (c, radix)) break; if (buffer != NULL) @@ -275,10 +277,13 @@ match_hollerith_constant (gfc_expr **result) &gfc_current_locus); e->representation.string = gfc_getmem (num + 1); + + /* FIXME -- determine what should be done for wide character + strings, and do it! */ for (i = 0; i < num; i++) - { - e->representation.string[i] = gfc_next_char_literal (1); - } + e->representation.string[i] + = (unsigned char) gfc_next_char_literal (1); + e->representation.string[num] = '\0'; e->representation.length = num; @@ -306,16 +311,16 @@ cleanup: static match match_boz_constant (gfc_expr **result) { - int post, radix, delim, length, x_hex, kind; + int radix, length, x_hex, kind; locus old_loc, start_loc; - char *buffer; + char *buffer, post, delim; gfc_expr *e; start_loc = old_loc = gfc_current_locus; gfc_gobble_whitespace (); x_hex = 0; - switch (post = gfc_next_char ()) + switch (post = gfc_next_ascii_char ()) { case 'b': radix = 2; @@ -346,7 +351,7 @@ match_boz_constant (gfc_expr **result) /* No whitespace allowed here. */ if (post == 0) - delim = gfc_next_char (); + delim = gfc_next_ascii_char (); if (delim != '\'' && delim != '\"') goto backup; @@ -366,7 +371,7 @@ match_boz_constant (gfc_expr **result) return MATCH_ERROR; } - if (gfc_next_char () != delim) + if (gfc_next_ascii_char () != delim) { gfc_error ("Illegal character in BOZ constant at %C"); return MATCH_ERROR; @@ -374,7 +379,7 @@ match_boz_constant (gfc_expr **result) if (post == 1) { - switch (gfc_next_char ()) + switch (gfc_next_ascii_char ()) { case 'b': radix = 2; @@ -403,9 +408,9 @@ match_boz_constant (gfc_expr **result) memset (buffer, '\0', length + 1); match_digits (0, radix, buffer); - gfc_next_char (); /* Eat delimiter. */ + gfc_next_ascii_char (); /* Eat delimiter. */ if (post == 1) - gfc_next_char (); /* Eat postfixed b, o, z, or x. */ + gfc_next_ascii_char (); /* Eat postfixed b, o, z, or x. */ /* In section 5.2.5 and following C567 in the Fortran 2003 standard, we find "If a data-stmt-constant is a boz-literal-constant, the corresponding @@ -448,9 +453,9 @@ backup: static match match_real_constant (gfc_expr **result, int signflag) { - int kind, c, count, seen_dp, seen_digits, exp_char; + int kind, count, seen_dp, seen_digits; locus old_loc, temp_loc; - char *p, *buffer; + char *p, *buffer, c, exp_char; gfc_expr *e; bool negate; @@ -465,18 +470,18 @@ match_real_constant (gfc_expr **result, int signflag) exp_char = ' '; negate = FALSE; - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (signflag && (c == '+' || c == '-')) { if (c == '-') negate = TRUE; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); } /* Scan significand. */ - for (;; c = gfc_next_char (), count++) + for (;; c = gfc_next_ascii_char (), count++) { if (c == '.') { @@ -486,11 +491,11 @@ match_real_constant (gfc_expr **result, int signflag) /* Check to see if "." goes with a following operator like ".eq.". */ temp_loc = gfc_current_locus; - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (c == 'e' || c == 'd' || c == 'q') { - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (c == '.') goto done; /* Operator named .e. or .d. */ } @@ -517,12 +522,12 @@ match_real_constant (gfc_expr **result, int signflag) exp_char = c; /* Scan exponent. */ - c = gfc_next_char (); + c = gfc_next_ascii_char (); count++; if (c == '+' || c == '-') { /* optional sign */ - c = gfc_next_char (); + c = gfc_next_ascii_char (); count++; } @@ -534,7 +539,7 @@ match_real_constant (gfc_expr **result, int signflag) while (ISDIGIT (c)) { - c = gfc_next_char (); + c = gfc_next_ascii_char (); count++; } @@ -554,11 +559,11 @@ done: memset (buffer, '\0', count + 1); p = buffer; - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (c == '+' || c == '-') { gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); } /* Hack for mpfr_set_str(). */ @@ -572,7 +577,7 @@ done: if (--count == 0) break; - c = gfc_next_char (); + c = gfc_next_ascii_char (); } kind = get_kind (); @@ -724,22 +729,26 @@ cleanup: return doubled delimiters on the input as a single instance of the delimiter. - Special return values are: + Special return values for "ret" argument are: -1 End of the string, as determined by the delimiter -2 Unterminated string detected Backslash codes are also expanded at this time. */ -static int -next_string_char (char delimiter) +static gfc_char_t +next_string_char (gfc_char_t delimiter, int *ret) { locus old_locus; - int c; + gfc_char_t c; c = gfc_next_char_literal (1); + *ret = 0; if (c == '\n') - return -2; + { + *ret = -2; + return 0; + } if (gfc_option.flag_backslash && c == '\\') { @@ -762,7 +771,8 @@ next_string_char (char delimiter) return c; gfc_current_locus = old_locus; - return -1; + *ret = -1; + return 0; } @@ -786,7 +796,7 @@ match_charkind_name (char *name) int len; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (!ISALPHA (c)) return MATCH_NO; @@ -796,11 +806,11 @@ match_charkind_name (char *name) for (;;) { old_loc = gfc_current_locus; - c = gfc_next_char (); + c = gfc_next_ascii_char (); if (c == '_') { - peek = gfc_peek_char (); + peek = gfc_peek_ascii_char (); if (peek == '\'' || peek == '\"') { @@ -834,13 +844,14 @@ match_charkind_name (char *name) static match match_string_constant (gfc_expr **result) { - char *p, name[GFC_MAX_SYMBOL_LEN + 1]; - int i, c, kind, length, delimiter, warn_ampersand; + char *p, name[GFC_MAX_SYMBOL_LEN + 1], peek; + int i, kind, length, warn_ampersand, ret; locus old_locus, start_locus; gfc_symbol *sym; gfc_expr *e; const char *q; match m; + gfc_char_t c, delimiter; old_locus = gfc_current_locus; @@ -855,11 +866,11 @@ match_string_constant (gfc_expr **result) goto got_delim; } - if (ISDIGIT (c)) + if (gfc_wide_is_digit (c)) { kind = 0; - while (ISDIGIT (c)) + while (gfc_wide_is_digit (c)) { kind = kind * 10 + c - '0'; if (kind > 9999999) @@ -929,10 +940,10 @@ got_delim: for (;;) { - c = next_string_char (delimiter); - if (c == -1) + c = next_string_char (delimiter, &ret); + if (ret == -1) break; - if (c == -2) + if (ret == -2) { gfc_current_locus = start_locus; gfc_error ("Unterminated character constant beginning at %C"); @@ -944,8 +955,8 @@ got_delim: /* Peek at the next character to see if it is a b, o, z, or x for the postfixed BOZ literal constants. */ - c = gfc_peek_char (); - if (c == 'b' || c == 'o' || c =='z' || c == 'x') + peek = gfc_peek_ascii_char (); + if (peek == 'b' || peek == 'o' || peek =='z' || peek == 'x') goto no_match; @@ -971,12 +982,24 @@ got_delim: gfc_option.warn_ampersand = 0; for (i = 0; i < length; i++) - *p++ = next_string_char (delimiter); + { + c = next_string_char (delimiter, &ret); + + if (!gfc_wide_fits_in_byte (c)) + { + gfc_error ("Unimplemented feature at %C: gfortran currently only " + "supports character strings with one-byte characters"); + return MATCH_ERROR; + } + + *p++ = (unsigned char) c; + } *p = '\0'; /* TODO: C-style string is for development/debug purposes. */ gfc_option.warn_ampersand = warn_ampersand; - if (next_string_char (delimiter) != -1) + next_string_char (delimiter, &ret); + if (ret != -1) gfc_internal_error ("match_string_constant(): Delimiter not found"); if (match_substring (NULL, 0, &e->ref) != MATCH_NO) @@ -1000,25 +1023,25 @@ match_logical_constant_string (void) locus orig_loc = gfc_current_locus; gfc_gobble_whitespace (); - if (gfc_next_char () == '.') + if (gfc_next_ascii_char () == '.') { - int ch = gfc_next_char(); + char ch = gfc_next_ascii_char (); if (ch == 'f') { - if (gfc_next_char () == 'a' - && gfc_next_char () == 'l' - && gfc_next_char () == 's' - && gfc_next_char () == 'e' - && gfc_next_char () == '.') + if (gfc_next_ascii_char () == 'a' + && gfc_next_ascii_char () == 'l' + && gfc_next_ascii_char () == 's' + && gfc_next_ascii_char () == 'e' + && gfc_next_ascii_char () == '.') /* Matched ".false.". */ return 0; } else if (ch == 't') { - if (gfc_next_char () == 'r' - && gfc_next_char () == 'u' - && gfc_next_char () == 'e' - && gfc_next_char () == '.') + if (gfc_next_ascii_char () == 'r' + && gfc_next_ascii_char () == 'u' + && gfc_next_ascii_char () == 'e' + && gfc_next_ascii_char () == '.') /* Matched ".true.". */ return 1; } @@ -1214,7 +1237,7 @@ match_complex_constant (gfc_expr **result) { /* Give the matcher for implied do-loops a chance to run. This yields a much saner error message for (/ (i, 4=i, 6) /). */ - if (gfc_peek_char () == '=') + if (gfc_peek_ascii_char () == '=') { m = MATCH_ERROR; goto cleanup; @@ -1328,7 +1351,7 @@ match_actual_arg (gfc_expr **result) gfc_symtree *symtree; locus where, w; gfc_expr *e; - int c; + char c; where = gfc_current_locus; @@ -1343,7 +1366,7 @@ match_actual_arg (gfc_expr **result) case MATCH_YES: w = gfc_current_locus; gfc_gobble_whitespace (); - c = gfc_next_char (); + c = gfc_next_ascii_char (); gfc_current_locus = w; if (c != ',' && c != ')') @@ -1684,7 +1707,7 @@ match_varspec (gfc_expr *primary, int equiv_flag) tail = NULL; gfc_gobble_whitespace (); - if ((equiv_flag && gfc_peek_char () == '(') || sym->attr.dimension) + if ((equiv_flag && gfc_peek_ascii_char () == '(') || sym->attr.dimension) { /* In EQUIVALENCE, we don't know yet whether we are seeing an array, character variable or array of character @@ -1698,7 +1721,7 @@ match_varspec (gfc_expr *primary, int equiv_flag) return m; gfc_gobble_whitespace (); - if (equiv_flag && gfc_peek_char () == '(') + if (equiv_flag && gfc_peek_ascii_char () == '(') { tail = extend_ref (primary, tail); tail->type = REF_ARRAY; @@ -2101,7 +2124,7 @@ gfc_match_rvalue (gfc_expr **result) /* See if this is a directly recursive function call. */ gfc_gobble_whitespace (); if (sym->attr.recursive - && gfc_peek_char () == '(' + && gfc_peek_ascii_char () == '(' && gfc_current_ns->proc_name == sym && !sym->attr.dimension) { @@ -2139,7 +2162,7 @@ gfc_match_rvalue (gfc_expr **result) { case FL_VARIABLE: variable: - if (sym->ts.type == BT_UNKNOWN && gfc_peek_char () == '%' + if (sym->ts.type == BT_UNKNOWN && gfc_peek_ascii_char () == '%' && gfc_get_default_type (sym, sym->ns)->type == BT_DERIVED) gfc_set_default_type (sym, 0, sym->ns); @@ -2304,7 +2327,7 @@ gfc_match_rvalue (gfc_expr **result) via an IMPLICIT statement. This can't wait for the resolution phase. */ - if (gfc_peek_char () == '%' + if (gfc_peek_ascii_char () == '%' && sym->ts.type == BT_UNKNOWN && gfc_get_default_type (sym, sym->ns)->type == BT_DERIVED) gfc_set_default_type (sym, 0, sym->ns); @@ -2333,7 +2356,7 @@ gfc_match_rvalue (gfc_expr **result) variable is just a scalar. */ gfc_gobble_whitespace (); - if (gfc_peek_char () != '(') + if (gfc_peek_ascii_char () != '(') { /* Assume a scalar variable */ e = gfc_get_expr (); @@ -2545,7 +2568,7 @@ match_variable (gfc_expr **result, int equiv_flag, int host_flag) break; /* These are definitive indicators that this is a variable. */ - else if (gfc_peek_char () != '(' || sym->ts.type != BT_UNKNOWN + else if (gfc_peek_ascii_char () != '(' || sym->ts.type != BT_UNKNOWN || sym->attr.pointer || sym->as != NULL) flavor = FL_VARIABLE; @@ -2605,7 +2628,7 @@ match_variable (gfc_expr **result, int equiv_flag, int host_flag) else implicit_ns = sym->ns; - if (gfc_peek_char () == '%' + if (gfc_peek_ascii_char () == '%' && sym->ts.type == BT_UNKNOWN && gfc_get_default_type (sym, implicit_ns)->type == BT_DERIVED) gfc_set_default_type (sym, 0, implicit_ns); diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c index 1aa52f5d576..871739c2c1e 100644 --- a/gcc/fortran/scanner.c +++ b/gcc/fortran/scanner.c @@ -72,7 +72,7 @@ static gfc_linebuf *line_head, *line_tail; locus gfc_current_locus; const char *gfc_source_file; static FILE *gfc_src_file; -static char *gfc_src_preprocessor_lines[2]; +static gfc_char_t *gfc_src_preprocessor_lines[2]; extern int pedantic; @@ -85,6 +85,135 @@ static struct gfc_file_change size_t file_changes_cur, file_changes_count; size_t file_changes_allocated; + +/* Functions dealing with our wide characters (gfc_char_t) and + sequences of such characters. */ + +int +gfc_wide_fits_in_byte (gfc_char_t c) +{ + return (c <= UCHAR_MAX); +} + +static inline int +wide_is_ascii (gfc_char_t c) +{ + return (gfc_wide_fits_in_byte (c) && ((unsigned char) c & ~0x7f) == 0); +} + +int +gfc_wide_is_printable (gfc_char_t c) +{ + return (gfc_wide_fits_in_byte (c) && ISPRINT ((unsigned char) c)); +} + +gfc_char_t +gfc_wide_tolower (gfc_char_t c) +{ + return (wide_is_ascii (c) ? (gfc_char_t) TOLOWER((unsigned char) c) : c); +} + +int +gfc_wide_is_digit (gfc_char_t c) +{ + return (c >= '0' && c <= '9'); +} + +static inline int +wide_atoi (gfc_char_t *c) +{ +#define MAX_DIGITS 20 + char buf[MAX_DIGITS+1]; + int i = 0; + + while (gfc_wide_is_digit(*c) && i < MAX_DIGITS) + buf[i++] = *c++; + buf[i] = '\0'; + return atoi (buf); +} + +size_t +gfc_wide_strlen (const gfc_char_t *str) +{ + size_t i; + + for (i = 0; str[i]; i++) + ; + + return i; +} + +static gfc_char_t * +wide_strcpy (gfc_char_t *dest, const gfc_char_t *src) +{ + gfc_char_t *d; + + for (d = dest; (*d = *src) != '\0'; ++src, ++d) + ; + + return dest; +} + +static gfc_char_t * +wide_strchr (gfc_char_t *s, gfc_char_t c) +{ + do { + if (*s == c) + { + return (gfc_char_t *) s; + } + } while (*s++); + return 0; +} + +static char * +widechar_to_char (gfc_char_t *s) +{ + size_t len = gfc_wide_strlen (s), i; + char *res = gfc_getmem (len + 1); + + for (i = 0; i < len; i++) + res[i] = gfc_wide_fits_in_byte (s[i]) ? (unsigned char) s[i] : '?'; + + res[len] = '\0'; + return res; +} + +static int +wide_strncmp (const gfc_char_t *s1, const char *s2, size_t n) +{ + gfc_char_t c1, c2; + + while (n-- > 0) + { + c1 = *s1++; + c2 = *s2++; + if (c1 != c2) + return (c1 > c2 ? 1 : -1); + if (c1 == '\0') + return 0; + } + return 0; +} + +static int +wide_strncasecmp (const gfc_char_t *s1, const char *s2, size_t n) +{ + gfc_char_t c1, c2; + + while (n-- > 0) + { + c1 = gfc_wide_tolower (*s1++); + c2 = TOLOWER (*s2++); + if (c1 != c2) + return (c1 > c2 ? 1 : -1); + if (c1 == '\0') + return 0; + } + return 0; +} + + /* Main scanner initialization. */ void @@ -406,15 +535,15 @@ gfc_advance_line (void) pointer from being on the wrong line if the current statement ends prematurely. */ -static int +static gfc_char_t next_char (void) { - int c; + gfc_char_t c; if (gfc_current_locus.nextc == NULL) return '\n'; - c = (unsigned char) *gfc_current_locus.nextc++; + c = *gfc_current_locus.nextc++; if (c == '\0') { gfc_current_locus.nextc--; /* Remain on this line. */ @@ -433,7 +562,7 @@ next_char (void) static void skip_comment_line (void) { - char c; + gfc_char_t c; do { @@ -448,17 +577,27 @@ skip_comment_line (void) int gfc_define_undef_line (void) { + char *tmp; + /* All lines beginning with '#' are either #define or #undef. */ - if (debug_info_level != DINFO_LEVEL_VERBOSE || gfc_peek_char () != '#') + if (debug_info_level != DINFO_LEVEL_VERBOSE || gfc_peek_ascii_char () != '#') return 0; - if (strncmp (gfc_current_locus.nextc, "#define ", 8) == 0) - (*debug_hooks->define) (gfc_linebuf_linenum (gfc_current_locus.lb), - &(gfc_current_locus.nextc[8])); + if (wide_strncmp (gfc_current_locus.nextc, "#define ", 8) == 0) + { + tmp = widechar_to_char (&gfc_current_locus.nextc[8]); + (*debug_hooks->define) (gfc_linebuf_linenum (gfc_current_locus.lb), + tmp); + gfc_free (tmp); + } - if (strncmp (gfc_current_locus.nextc, "#undef ", 7) == 0) - (*debug_hooks->undef) (gfc_linebuf_linenum (gfc_current_locus.lb), - &(gfc_current_locus.nextc[7])); + if (wide_strncmp (gfc_current_locus.nextc, "#undef ", 7) == 0) + { + tmp = widechar_to_char (&gfc_current_locus.nextc[7]); + (*debug_hooks->undef) (gfc_linebuf_linenum (gfc_current_locus.lb), + tmp); + gfc_free (tmp); + } /* Skip the rest of the line. */ skip_comment_line (); @@ -476,7 +615,7 @@ static bool skip_free_comments (void) { locus start; - char c; + gfc_char_t c; int at_bol; for (;;) @@ -570,7 +709,7 @@ skip_fixed_comments (void) { locus start; int col; - char c; + gfc_char_t c; if (! gfc_at_bol ()) { @@ -738,11 +877,12 @@ gfc_skip_comments (void) line. The in_string flag denotes whether we're inside a character context or not. */ -int +gfc_char_t gfc_next_char_literal (int in_string) { locus old_loc; - int i, c, prev_openmp_flag; + int i, prev_openmp_flag; + gfc_char_t c; continue_flag = 0; @@ -859,7 +999,7 @@ restart: { for (i = 0; i < 5; i++, c = next_char ()) { - gcc_assert (TOLOWER (c) == "!$omp"[i]); + gcc_assert (gfc_wide_tolower (c) == (unsigned char) "!$omp"[i]); if (i == 4) old_loc = gfc_current_locus; } @@ -932,7 +1072,7 @@ restart: for (i = 0; i < 5; i++) { c = next_char (); - if (TOLOWER (c) != "*$omp"[i]) + if (gfc_wide_tolower (c) != (unsigned char) "*$omp"[i]) goto not_continuation; } @@ -980,10 +1120,10 @@ done: parsing character literals, they have to call gfc_next_char_literal(). */ -int +gfc_char_t gfc_next_char (void) { - int c; + gfc_char_t c; do { @@ -991,15 +1131,24 @@ gfc_next_char (void) } while (gfc_current_form == FORM_FIXED && gfc_is_whitespace (c)); - return TOLOWER (c); + return gfc_wide_tolower (c); } +char +gfc_next_ascii_char (void) +{ + gfc_char_t c = gfc_next_char (); -int + return (gfc_wide_fits_in_byte (c) ? (unsigned char) c + : (unsigned char) UCHAR_MAX); +} + + +gfc_char_t gfc_peek_char (void) { locus old_loc; - int c; + gfc_char_t c; old_loc = gfc_current_locus; c = gfc_next_char (); @@ -1009,6 +1158,16 @@ gfc_peek_char (void) } +char +gfc_peek_ascii_char (void) +{ + gfc_char_t c = gfc_peek_char (); + + return (gfc_wide_fits_in_byte (c) ? (unsigned char) c + : (unsigned char) UCHAR_MAX); +} + + /* Recover from an error. We try to get past the current statement and get lined up for the next. The next statement follows a '\n' or a ';'. We also assume that we are not within a character @@ -1017,7 +1176,7 @@ gfc_peek_char (void) void gfc_error_recovery (void) { - char c, delim; + gfc_char_t c, delim; if (gfc_at_eof ()) return; @@ -1064,7 +1223,7 @@ gfc_gobble_whitespace (void) { static int linenum = 0; locus old_loc; - int c; + gfc_char_t c; do { @@ -1106,13 +1265,13 @@ gfc_gobble_whitespace (void) parts of gfortran. */ static int -load_line (FILE *input, char **pbuf, int *pbuflen) +load_line (FILE *input, gfc_char_t **pbuf, int *pbuflen) { static int linenum = 0, current_line = 1; int c, maxlen, i, preprocessor_flag, buflen = *pbuflen; int trunc_flag = 0, seen_comment = 0; int seen_printable = 0, seen_ampersand = 0; - char *buffer; + gfc_char_t *buffer; bool found_tab = false; /* Determine the maximum allowed line length. */ @@ -1135,7 +1294,7 @@ load_line (FILE *input, char **pbuf, int *pbuflen) else buflen = 132; - *pbuf = gfc_getmem (buflen + 1); + *pbuf = gfc_getmem ((buflen + 1) * sizeof (gfc_char_t)); } i = 0; @@ -1234,7 +1393,7 @@ load_line (FILE *input, char **pbuf, int *pbuflen) /* Reallocate line buffer to double size to hold the overlong line. */ buflen = buflen * 2; - *pbuf = xrealloc (*pbuf, buflen + 1); + *pbuf = xrealloc (*pbuf, (buflen + 1) * sizeof (gfc_char_t)); buffer = (*pbuf) + i; } } @@ -1297,17 +1456,19 @@ get_file (const char *name, enum lc_reason reason ATTRIBUTE_UNUSED) return f; } + /* Deal with a line from the C preprocessor. The initial octothorp has already been seen. */ static void -preprocessor_line (char *c) +preprocessor_line (gfc_char_t *c) { bool flag[5]; int i, line; - char *filename; + gfc_char_t *wide_filename; gfc_file *f; int escaped, unescape; + char *filename; c++; while (*c == ' ' || *c == '\t') @@ -1316,9 +1477,9 @@ preprocessor_line (char *c) if (*c < '0' || *c > '9') goto bad_cpp_line; - line = atoi (c); + line = wide_atoi (c); - c = strchr (c, ' '); + c = wide_strchr (c, ' '); if (c == NULL) { /* No file name given. Set new line number. */ @@ -1335,7 +1496,7 @@ preprocessor_line (char *c) goto bad_cpp_line; ++c; - filename = c; + wide_filename = c; /* Make filename end at quote. */ unescape = 0; @@ -1361,10 +1522,10 @@ preprocessor_line (char *c) /* Undo effects of cpp_quote_string. */ if (unescape) { - char *s = filename; - char *d = gfc_getmem (c - filename - unescape); + gfc_char_t *s = wide_filename; + gfc_char_t *d = gfc_getmem (c - wide_filename - unescape); - filename = d; + wide_filename = d; while (*s) { if (*s == '\\') @@ -1382,17 +1543,21 @@ preprocessor_line (char *c) for (;;) { - c = strchr (c, ' '); + c = wide_strchr (c, ' '); if (c == NULL) break; c++; - i = atoi (c); + i = wide_atoi (c); if (1 <= i && i <= 4) flag[i] = true; } + /* Convert the filename in wide characters into a filename in narrow + characters. */ + filename = widechar_to_char (wide_filename); + /* Interpret flags. */ if (flag[1]) /* Starting new file. */ @@ -1411,7 +1576,8 @@ preprocessor_line (char *c) current_file->filename, current_file->line, filename); if (unescape) - gfc_free (filename); + gfc_free (wide_filename); + gfc_free (filename); return; } @@ -1434,7 +1600,8 @@ preprocessor_line (char *c) /* Set new line number. */ current_file->line = line; if (unescape) - gfc_free (filename); + gfc_free (wide_filename); + gfc_free (filename); return; bad_cpp_line: @@ -1453,9 +1620,10 @@ static try load_file (const char *, bool); processed or true if we matched an include. */ static bool -include_line (char *line) +include_line (gfc_char_t *line) { - char quote, *c, *begin, *stop; + gfc_char_t quote, *c, *begin, *stop; + char *filename; c = line; @@ -1479,8 +1647,8 @@ include_line (char *line) while (*c == ' ' || *c == '\t') c++; - if (strncasecmp (c, "include", 7)) - return false; + if (wide_strncasecmp (c, "include", 7)) + return false; c += 7; while (*c == ' ' || *c == '\t') @@ -1513,7 +1681,9 @@ include_line (char *line) *stop = '\0'; /* It's ok to trash the buffer, as this line won't be read by anything else. */ - load_file (begin, false); + filename = widechar_to_char (begin); + load_file (filename, false); + gfc_free (filename); return true; } @@ -1523,7 +1693,7 @@ include_line (char *line) static try load_file (const char *filename, bool initial) { - char *line; + gfc_char_t *line; gfc_linebuf *b; gfc_file *f; FILE *input; @@ -1590,7 +1760,7 @@ load_file (const char *filename, bool initial) { int trunc = load_line (input, &line, &line_len); - len = strlen (line); + len = gfc_wide_strlen (line); if (feof (input) && len == 0) break; @@ -1600,15 +1770,18 @@ load_file (const char *filename, bool initial) FE FF is UTF-16 big endian, EF BB BF is UTF-8. */ if (first_line - && ((line_len >= 2 && line[0] == '\xFF' && line[1] == '\xFE') - || (line_len >= 2 && line[0] == '\xFE' && line[1] == '\xFF') - || (line_len >= 3 && line[0] == '\xEF' && line[1] == '\xBB' - && line[2] == '\xBF'))) + && ((line_len >= 2 && line[0] == (unsigned char) '\xFF' + && line[1] == (unsigned char) '\xFE') + || (line_len >= 2 && line[0] == (unsigned char) '\xFE' + && line[1] == (unsigned char) '\xFF') + || (line_len >= 3 && line[0] == (unsigned char) '\xEF' + && line[1] == (unsigned char) '\xBB' + && line[2] == (unsigned char) '\xBF'))) { - int n = line[1] == '\xBB' ? 3 : 2; - char * new = gfc_getmem (line_len); + int n = line[1] == (unsigned char) '\xBB' ? 3 : 2; + gfc_char_t *new = gfc_getmem (line_len * sizeof (gfc_char_t)); - strcpy (new, line + n); + wide_strcpy (new, &line[n]); gfc_free (line); line = new; len -= n; @@ -1623,8 +1796,8 @@ load_file (const char *filename, bool initial) and #undef lines, which we need to pass to the middle-end so that it can emit correct debug info. */ if (debug_info_level == DINFO_LEVEL_VERBOSE - && (strncmp (line, "#define ", 8) == 0 - || strncmp (line, "#undef ", 7) == 0)) + && (wide_strncmp (line, "#define ", 8) == 0 + || wide_strncmp (line, "#undef ", 7) == 0)) ; else { @@ -1646,13 +1819,14 @@ load_file (const char *filename, bool initial) /* Add line. */ - b = gfc_getmem (gfc_linebuf_header_size + len + 1); + b = gfc_getmem (gfc_linebuf_header_size + + (len + 1) * sizeof (gfc_char_t)); b->location = linemap_line_start (line_table, current_file->line++, 120); b->file = current_file; b->truncated = trunc; - strcpy (b->line, line); + wide_strcpy (b->line, line); if (line_head == NULL) line_head = b; @@ -1752,7 +1926,7 @@ const char * gfc_read_orig_filename (const char *filename, const char **canon_source_file) { int c, len; - char *dirname; + char *dirname, *tmp; gfc_src_file = gfc_open_file (filename); if (gfc_src_file == NULL) @@ -1767,10 +1941,12 @@ gfc_read_orig_filename (const char *filename, const char **canon_source_file) len = 0; load_line (gfc_src_file, &gfc_src_preprocessor_lines[0], &len); - if (strncmp (gfc_src_preprocessor_lines[0], "# 1 \"", 5) != 0) + if (wide_strncmp (gfc_src_preprocessor_lines[0], "# 1 \"", 5) != 0) return NULL; - filename = unescape_filename (gfc_src_preprocessor_lines[0] + 5); + tmp = widechar_to_char (&gfc_src_preprocessor_lines[0][5]); + filename = unescape_filename (tmp); + gfc_free (tmp); if (filename == NULL) return NULL; @@ -1783,10 +1959,12 @@ gfc_read_orig_filename (const char *filename, const char **canon_source_file) len = 0; load_line (gfc_src_file, &gfc_src_preprocessor_lines[1], &len); - if (strncmp (gfc_src_preprocessor_lines[1], "# 1 \"", 5) != 0) + if (wide_strncmp (gfc_src_preprocessor_lines[1], "# 1 \"", 5) != 0) return filename; - dirname = unescape_filename (gfc_src_preprocessor_lines[1] + 5); + tmp = widechar_to_char (&gfc_src_preprocessor_lines[1][5]); + dirname = unescape_filename (tmp); + gfc_free (tmp); if (dirname == NULL) return filename;