From 006601890b953c9177624f9f533b997f344802ad Mon Sep 17 00:00:00 2001 From: Francois-Xavier Coudert Date: Tue, 6 May 2008 21:06:20 +0000 Subject: [PATCH] arith.c: (gfc_arith_concat... * arith.c: (gfc_arith_concat, gfc_compare_string, gfc_compare_with_Cstring, hollerith2representation, gfc_hollerith2int, gfc_hollerith2real, gfc_hollerith2complex, gfc_hollerith2character, gfc_hollerith2logical): Use wide characters for character constants. * data.c (create_character_intializer): Likewise. * decl.c (gfc_set_constant_character_len): Likewise. * dump-parse-tree.c (show_char_const): Correctly dump wide character strings. error.c (print_wide_char): Rename into gfc_print_wide_char. (show_locus): Adapt to new prototype of gfc_print_wide_char. expr.c (free_expr0): Representation is now disjunct from character string value, so we always free it. (gfc_copy_expr, find_substring_ref, gfc_simplify_expr): Adapt to wide character strings. * gfortran.h (gfc_expr): Make value.character.string a wide string. (gfc_wide_toupper, gfc_wide_strncasecmp, gfc_wide_memset, gfc_widechar_to_char, gfc_char_to_widechar): New prototypes. (gfc_get_wide_string): New macro. (gfc_print_wide_char): New prototype. * io.c (format_string): Make a wide string. (next_char, gfc_match_format, compare_to_allowed_values, gfc_match_open): Deal with wide strings. * module.c (mio_expr): Convert between wide strings and ASCII ones. * primary.c (match_hollerith_constant, match_charkind_name): Handle wide strings. * resolve.c (build_default_init_expr): Likewise. * scanner.c (gfc_wide_toupper, gfc_wide_memset, gfc_char_to_widechar): New functions. (wide_strchr, gfc_widechar_to_char, gfc_wide_strncasecmp): Changes in prototypes. (gfc_define_undef_line, load_line, preprocessor_line, include_line, load_file, gfc_read_orig_filename): Handle wide strings. * simplify.c (gfc_simplify_achar, gfc_simplify_adjustl, gfc_simplify_adjustr, gfc_simplify_char, gfc_simplify_iachar, gfc_simplify_ichar, simplify_min_max, gfc_simplify_new_line, gfc_simplify_repeat): Handle wide strings. (wide_strspn, wide_strcspn): New helper functions. (gfc_simplify_scan, gfc_simplify_trim, gfc_simplify_verify): Handle wide strings. * symbol.c (generate_isocbinding_symbol): Likewise. * target-memory.c (size_character, gfc_target_expr_size, encode_character, gfc_target_encode_expr, gfc_interpret_character, gfc_target_interpret_expr): Handle wide strings. * trans-const.c (gfc_conv_string_init): Lower wide strings to narrow ones. (gfc_conv_constant_to_tree): Likewise. * trans-expr.c (gfc_conv_substring_expr): Handle wide strings. * trans-io.c (gfc_new_nml_name_expr): Likewise. * trans-stmt.c (gfc_trans_label_assign): Likewise. From-SVN: r135006 --- gcc/fortran/ChangeLog | 54 +++++++++++++++++ gcc/fortran/arith.c | 50 ++++++++-------- gcc/fortran/data.c | 36 ++++++------ gcc/fortran/decl.c | 9 +-- gcc/fortran/dump-parse-tree.c | 6 +- gcc/fortran/error.c | 64 ++++++++++---------- gcc/fortran/expr.c | 42 ++++++------- gcc/fortran/gfortran.h | 11 +++- gcc/fortran/io.c | 84 +++++++++++++++----------- gcc/fortran/module.c | 13 +++-- gcc/fortran/primary.c | 23 +++++--- gcc/fortran/resolve.c | 7 +-- gcc/fortran/scanner.c | 83 ++++++++++++++++++++------ gcc/fortran/simplify.c | 107 ++++++++++++++++++++++++---------- gcc/fortran/symbol.c | 4 +- gcc/fortran/target-memory.c | 49 +++++++++++----- gcc/fortran/trans-const.c | 31 +++++++--- gcc/fortran/trans-expr.c | 9 ++- gcc/fortran/trans-io.c | 3 +- gcc/fortran/trans-stmt.c | 5 +- 20 files changed, 458 insertions(+), 232 deletions(-) diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 083a1def659..66873c0723c 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,57 @@ +2008-05-06 Francois-Xavier Coudert + + * arith.c: (gfc_arith_concat, gfc_compare_string, + gfc_compare_with_Cstring, hollerith2representation, + gfc_hollerith2int, gfc_hollerith2real, gfc_hollerith2complex, + gfc_hollerith2character, gfc_hollerith2logical): Use wide + characters for character constants. + * data.c (create_character_intializer): Likewise. + * decl.c (gfc_set_constant_character_len): Likewise. + * dump-parse-tree.c (show_char_const): Correctly dump wide + character strings. + error.c (print_wide_char): Rename into gfc_print_wide_char. + (show_locus): Adapt to new prototype of gfc_print_wide_char. + expr.c (free_expr0): Representation is now disjunct from + character string value, so we always free it. + (gfc_copy_expr, find_substring_ref, gfc_simplify_expr): Adapt + to wide character strings. + * gfortran.h (gfc_expr): Make value.character.string a wide string. + (gfc_wide_toupper, gfc_wide_strncasecmp, gfc_wide_memset, + gfc_widechar_to_char, gfc_char_to_widechar): New prototypes. + (gfc_get_wide_string): New macro. + (gfc_print_wide_char): New prototype. + * io.c (format_string): Make a wide string. + (next_char, gfc_match_format, compare_to_allowed_values, + gfc_match_open): Deal with wide strings. + * module.c (mio_expr): Convert between wide strings and ASCII ones. + * primary.c (match_hollerith_constant, match_charkind_name): + Handle wide strings. + * resolve.c (build_default_init_expr): Likewise. + * scanner.c (gfc_wide_toupper, gfc_wide_memset, + gfc_char_to_widechar): New functions. + (wide_strchr, gfc_widechar_to_char, gfc_wide_strncasecmp): + Changes in prototypes. + (gfc_define_undef_line, load_line, preprocessor_line, + include_line, load_file, gfc_read_orig_filename): Handle wide + strings. + * simplify.c (gfc_simplify_achar, gfc_simplify_adjustl, + gfc_simplify_adjustr, gfc_simplify_char, gfc_simplify_iachar, + gfc_simplify_ichar, simplify_min_max, gfc_simplify_new_line, + gfc_simplify_repeat): Handle wide strings. + (wide_strspn, wide_strcspn): New helper functions. + (gfc_simplify_scan, gfc_simplify_trim, gfc_simplify_verify): + Handle wide strings. + * symbol.c (generate_isocbinding_symbol): Likewise. + * target-memory.c (size_character, gfc_target_expr_size, + encode_character, gfc_target_encode_expr, gfc_interpret_character, + gfc_target_interpret_expr): Handle wide strings. + * trans-const.c (gfc_conv_string_init): Lower wide strings to + narrow ones. + (gfc_conv_constant_to_tree): Likewise. + * trans-expr.c (gfc_conv_substring_expr): Handle wide strings. + * trans-io.c (gfc_new_nml_name_expr): Likewise. + * trans-stmt.c (gfc_trans_label_assign): Likewise. + 2008-05-06 Francois-Xavier Coudert * simplify.c (gfc_simplify_bessel_j0,gfc_simplify_bessel_j1, diff --git a/gcc/fortran/arith.c b/gcc/fortran/arith.c index 4b8d45b189b..cbfcf291049 100644 --- a/gcc/fortran/arith.c +++ b/gcc/fortran/arith.c @@ -1102,14 +1102,15 @@ gfc_arith_concat (gfc_expr *op1, gfc_expr *op2, gfc_expr **resultp) len = op1->value.character.length + op2->value.character.length; - result->value.character.string = gfc_getmem (len + 1); + result->value.character.string = gfc_get_wide_string (len + 1); result->value.character.length = len; memcpy (result->value.character.string, op1->value.character.string, - op1->value.character.length); + op1->value.character.length * sizeof (gfc_char_t)); - memcpy (result->value.character.string + op1->value.character.length, - op2->value.character.string, op2->value.character.length); + memcpy (&result->value.character.string[op1->value.character.length], + op2->value.character.string, + op2->value.character.length * sizeof (gfc_char_t)); result->value.character.string[len] = '\0'; @@ -1203,7 +1204,8 @@ compare_complex (gfc_expr *op1, gfc_expr *op2) int gfc_compare_string (gfc_expr *a, gfc_expr *b) { - int len, alen, blen, i, ac, bc; + int len, alen, blen, i; + gfc_char_t ac, bc; alen = a->value.character.length; blen = b->value.character.length; @@ -1212,10 +1214,8 @@ gfc_compare_string (gfc_expr *a, gfc_expr *b) for (i = 0; i < len; i++) { - /* We cast to unsigned char because default char, if it is signed, - would lead to ac < 0 for string[i] > 127. */ - ac = (unsigned char) ((i < alen) ? a->value.character.string[i] : ' '); - bc = (unsigned char) ((i < blen) ? b->value.character.string[i] : ' '); + ac = ((i < alen) ? a->value.character.string[i] : ' '); + bc = ((i < blen) ? b->value.character.string[i] : ' '); if (ac < bc) return -1; @@ -1231,7 +1231,8 @@ gfc_compare_string (gfc_expr *a, gfc_expr *b) int gfc_compare_with_Cstring (gfc_expr *a, const char *b, bool case_sensitive) { - int len, alen, blen, i, ac, bc; + int len, alen, blen, i; + gfc_char_t ac, bc; alen = a->value.character.length; blen = strlen (b); @@ -1240,10 +1241,8 @@ gfc_compare_with_Cstring (gfc_expr *a, const char *b, bool case_sensitive) for (i = 0; i < len; i++) { - /* We cast to unsigned char because default char, if it is signed, - would lead to ac < 0 for string[i] > 127. */ - ac = (unsigned char) ((i < alen) ? a->value.character.string[i] : ' '); - bc = (unsigned char) ((i < blen) ? b[i] : ' '); + ac = ((i < alen) ? a->value.character.string[i] : ' '); + bc = ((i < blen) ? b[i] : ' '); if (!case_sensitive) { @@ -2438,7 +2437,7 @@ hollerith2representation (gfc_expr *result, gfc_expr *src) result->representation.string = gfc_getmem (result_len + 1); memcpy (result->representation.string, src->representation.string, - MIN (result_len, src_len)); + MIN (result_len, src_len)); if (src_len < result_len) memset (&result->representation.string[src_len], ' ', result_len - src_len); @@ -2462,8 +2461,8 @@ gfc_hollerith2int (gfc_expr *src, int kind) result->where = src->where; hollerith2representation (result, src); - gfc_interpret_integer(kind, (unsigned char *) result->representation.string, - result->representation.length, result->value.integer); + gfc_interpret_integer (kind, (unsigned char *) result->representation.string, + result->representation.length, result->value.integer); return result; } @@ -2486,8 +2485,8 @@ gfc_hollerith2real (gfc_expr *src, int kind) result->where = src->where; hollerith2representation (result, src); - gfc_interpret_float(kind, (unsigned char *) result->representation.string, - result->representation.length, result->value.real); + gfc_interpret_float (kind, (unsigned char *) result->representation.string, + result->representation.length, result->value.real); return result; } @@ -2510,9 +2509,9 @@ gfc_hollerith2complex (gfc_expr *src, int kind) result->where = src->where; hollerith2representation (result, src); - gfc_interpret_complex(kind, (unsigned char *) result->representation.string, - result->representation.length, result->value.complex.r, - result->value.complex.i); + gfc_interpret_complex (kind, (unsigned char *) result->representation.string, + result->representation.length, result->value.complex.r, + result->value.complex.i); return result; } @@ -2529,8 +2528,9 @@ gfc_hollerith2character (gfc_expr *src, int kind) result->ts.type = BT_CHARACTER; result->ts.kind = kind; - result->value.character.string = result->representation.string; result->value.character.length = result->representation.length; + result->value.character.string + = gfc_char_to_widechar (result->representation.string); return result; } @@ -2553,8 +2553,8 @@ gfc_hollerith2logical (gfc_expr *src, int kind) result->where = src->where; hollerith2representation (result, src); - gfc_interpret_logical(kind, (unsigned char *) result->representation.string, - result->representation.length, &result->value.logical); + gfc_interpret_logical (kind, (unsigned char *) result->representation.string, + result->representation.length, &result->value.logical); return result; } diff --git a/gcc/fortran/data.c b/gcc/fortran/data.c index 13af445dfd4..6cc7223af2f 100644 --- a/gcc/fortran/data.c +++ b/gcc/fortran/data.c @@ -151,10 +151,8 @@ static gfc_expr * create_character_intializer (gfc_expr *init, gfc_typespec *ts, gfc_ref *ref, gfc_expr *rvalue) { - int len; - int start; - int end; - char *dest, *rvalue_string; + int len, start, end; + gfc_char_t *dest; gfc_extract_int (ts->cl->length, &len); @@ -165,13 +163,13 @@ create_character_intializer (gfc_expr *init, gfc_typespec *ts, init->expr_type = EXPR_CONSTANT; init->ts = *ts; - dest = gfc_getmem (len + 1); + dest = gfc_get_wide_string (len + 1); dest[len] = '\0'; init->value.character.length = len; init->value.character.string = dest; /* Blank the string if we're only setting a substring. */ if (ref != NULL) - memset (dest, ' ', len); + gfc_wide_memset (dest, ' ', len); } else dest = init->value.character.string; @@ -208,15 +206,9 @@ create_character_intializer (gfc_expr *init, gfc_typespec *ts, /* Copy the initial value. */ if (rvalue->ts.type == BT_HOLLERITH) - { - len = rvalue->representation.length; - rvalue_string = rvalue->representation.string; - } + len = rvalue->representation.length; else - { - len = rvalue->value.character.length; - rvalue_string = rvalue->value.character.string; - } + len = rvalue->value.character.length; if (len > end - start) { @@ -225,16 +217,26 @@ create_character_intializer (gfc_expr *init, gfc_typespec *ts, "at %L", &rvalue->where); } - memcpy (&dest[start], rvalue_string, len); + if (rvalue->ts.type == BT_HOLLERITH) + { + int i; + for (i = 0; i < len; i++) + dest[start+i] = rvalue->representation.string[i]; + } + else + memcpy (&dest[start], rvalue->value.character.string, + len * sizeof (gfc_char_t)); /* Pad with spaces. Substrings will already be blanked. */ if (len < end - start && ref == NULL) - memset (&dest[start + len], ' ', end - (start + len)); + gfc_wide_memset (&dest[start + len], ' ', end - (start + len)); if (rvalue->ts.type == BT_HOLLERITH) { init->representation.length = init->value.character.length; - init->representation.string = init->value.character.string; + init->representation.string + = gfc_widechar_to_char (init->value.character.string, + init->value.character.length); } return init; diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c index 6b462f97bb6..24606c448cf 100644 --- a/gcc/fortran/decl.c +++ b/gcc/fortran/decl.c @@ -1089,7 +1089,7 @@ build_sym (const char *name, gfc_charlen *cl, void gfc_set_constant_character_len (int len, gfc_expr *expr, bool array) { - char *s; + gfc_char_t *s; int slen; gcc_assert (expr->expr_type == EXPR_CONSTANT); @@ -1098,10 +1098,11 @@ gfc_set_constant_character_len (int len, gfc_expr *expr, bool array) slen = expr->value.character.length; if (len != slen) { - s = gfc_getmem (len + 1); - memcpy (s, expr->value.character.string, MIN (len, slen)); + s = gfc_get_wide_string (len + 1); + memcpy (s, expr->value.character.string, + MIN (len, slen) * sizeof (gfc_char_t)); if (len > slen) - memset (&s[slen], ' ', len - slen); + gfc_wide_memset (&s[slen], ' ', len - slen); if (gfc_option.warn_character_truncation && slen > len) gfc_warning_now ("CHARACTER expression at %L is being truncated " diff --git a/gcc/fortran/dump-parse-tree.c b/gcc/fortran/dump-parse-tree.c index c195dcf977f..44a4941e7b4 100644 --- a/gcc/fortran/dump-parse-tree.c +++ b/gcc/fortran/dump-parse-tree.c @@ -301,7 +301,7 @@ show_constructor (gfc_constructor *c) static void -show_char_const (const char *c, int length) +show_char_const (const gfc_char_t *c, int length) { int i; @@ -310,10 +310,8 @@ show_char_const (const char *c, int length) { if (c[i] == '\'') fputs ("''", dumpfile); - else if (ISPRINT (c[i])) - fputc (c[i], dumpfile); else - fprintf (dumpfile, "' // ACHAR(%d) // '", c[i]); + fputs (gfc_print_wide_char (c[i]), dumpfile); } fputc ('\'', dumpfile); } diff --git a/gcc/fortran/error.c b/gcc/fortran/error.c index c119bcadd7d..a9cbe9ef5f2 100644 --- a/gcc/fortran/error.c +++ b/gcc/fortran/error.c @@ -152,48 +152,51 @@ error_integer (long int i) } -/* Show the file, where it was included, and the source line, give a - locus. Calls error_printf() recursively, but the recursion is at - most one level deep. */ +static char wide_char_print_buffer[11]; -static void -print_wide_char (gfc_char_t c) +const char * +gfc_print_wide_char (gfc_char_t c) { static const char xdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - char buf[9]; + char *buf = wide_char_print_buffer; if (gfc_wide_is_printable (c)) - error_char (c); + { + buf[1] = '\0'; + buf[0] = (unsigned char) c; + } else if (c < ((gfc_char_t) 1 << 8)) { - buf[2] = '\0'; - buf[1] = xdigit[c & 0x0F]; + buf[4] = '\0'; + buf[3] = xdigit[c & 0x0F]; c = c >> 4; - buf[0] = xdigit[c & 0x0F]; + buf[2] = xdigit[c & 0x0F]; - error_char ('\\'); - error_char ('x'); - error_string (buf); + buf[1] = '\\'; + buf[0] = 'x'; } else if (c < ((gfc_char_t) 1 << 16)) { - buf[4] = '\0'; - buf[3] = xdigit[c & 0x0F]; + buf[6] = '\0'; + buf[5] = xdigit[c & 0x0F]; c = c >> 4; - buf[2] = xdigit[c & 0x0F]; + buf[4] = xdigit[c & 0x0F]; c = c >> 4; - buf[1] = xdigit[c & 0x0F]; + buf[3] = xdigit[c & 0x0F]; c = c >> 4; - buf[0] = xdigit[c & 0x0F]; + buf[2] = xdigit[c & 0x0F]; - error_char ('\\'); - error_char ('u'); - error_string (buf); + buf[1] = '\\'; + buf[0] = 'u'; } else { - buf[8] = '\0'; + buf[10] = '\0'; + buf[9] = xdigit[c & 0x0F]; + c = c >> 4; + buf[8] = xdigit[c & 0x0F]; + c = c >> 4; buf[7] = xdigit[c & 0x0F]; c = c >> 4; buf[6] = xdigit[c & 0x0F]; @@ -205,17 +208,18 @@ print_wide_char (gfc_char_t c) buf[3] = xdigit[c & 0x0F]; c = c >> 4; buf[2] = xdigit[c & 0x0F]; - c = c >> 4; - buf[1] = xdigit[c & 0x0F]; - c = c >> 4; - buf[0] = xdigit[c & 0x0F]; - error_char ('\\'); - error_char ('U'); - error_string (buf); + buf[1] = '\\'; + buf[0] = 'U'; } + + return buf; } +/* Show the file, where it was included, and the source line, give a + locus. Calls error_printf() recursively, but the recursion is at + most one level deep. */ + static void error_printf (const char *, ...) ATTRIBUTE_GCC_GFC(1,2); static void @@ -317,7 +321,7 @@ show_locus (locus *loc, int c1, int c2) if (c == '\t') c = ' '; - print_wide_char (c); + error_string (gfc_print_wide_char (c)); } error_char ('\n'); diff --git a/gcc/fortran/expr.c b/gcc/fortran/expr.c index 70914c14aab..87ea9e95090 100644 --- a/gcc/fortran/expr.c +++ b/gcc/fortran/expr.c @@ -164,9 +164,8 @@ free_expr0 (gfc_expr *e) break; } - /* Free the representation, except in character constants where it - is the same as value.character.string and thus already freed. */ - if (e->representation.string && e->ts.type != BT_CHARACTER) + /* Free the representation. */ + if (e->representation.string) gfc_free (e->representation.string); break; @@ -393,7 +392,8 @@ gfc_expr * gfc_copy_expr (gfc_expr *p) { gfc_expr *q; - char *s; + gfc_char_t *s; + char *c; if (p == NULL) return NULL; @@ -404,20 +404,19 @@ gfc_copy_expr (gfc_expr *p) switch (q->expr_type) { case EXPR_SUBSTRING: - s = gfc_getmem (p->value.character.length + 1); + s = gfc_get_wide_string (p->value.character.length + 1); q->value.character.string = s; - - memcpy (s, p->value.character.string, p->value.character.length + 1); + memcpy (s, p->value.character.string, + (p->value.character.length + 1) * sizeof (gfc_char_t)); break; case EXPR_CONSTANT: /* Copy target representation, if it exists. */ if (p->representation.string) { - s = gfc_getmem (p->representation.length + 1); - q->representation.string = s; - - memcpy (s, p->representation.string, p->representation.length + 1); + c = gfc_getmem (p->representation.length + 1); + q->representation.string = c; + memcpy (c, p->representation.string, (p->representation.length + 1)); } /* Copy the values of any pointer components of p->value. */ @@ -443,10 +442,11 @@ gfc_copy_expr (gfc_expr *p) case BT_CHARACTER: if (p->representation.string) - q->value.character.string = q->representation.string; + q->value.character.string + = gfc_char_to_widechar (q->representation.string); else { - s = gfc_getmem (p->value.character.length + 1); + s = gfc_get_wide_string (p->value.character.length + 1); q->value.character.string = s; /* This is the case for the C_NULL_CHAR named constant. */ @@ -460,7 +460,7 @@ gfc_copy_expr (gfc_expr *p) } else memcpy (s, p->value.character.string, - p->value.character.length + 1); + (p->value.character.length + 1) * sizeof (gfc_char_t)); } break; @@ -1379,7 +1379,7 @@ find_substring_ref (gfc_expr *p, gfc_expr **newp) int end; int start; int length; - char *chr; + gfc_char_t *chr; if (p->ref->u.ss.start->expr_type != EXPR_CONSTANT || p->ref->u.ss.end->expr_type != EXPR_CONSTANT) @@ -1392,9 +1392,10 @@ find_substring_ref (gfc_expr *p, gfc_expr **newp) start = (int) mpz_get_ui (p->ref->u.ss.start->value.integer); length = end - start + 1; - chr = (*newp)->value.character.string = gfc_getmem (length + 1); + chr = (*newp)->value.character.string = gfc_get_wide_string (length + 1); (*newp)->value.character.length = length; - memcpy (chr, &p->value.character.string[start - 1], length); + memcpy (chr, &p->value.character.string[start - 1], + length * sizeof (gfc_char_t)); chr[length] = '\0'; return SUCCESS; } @@ -1592,7 +1593,7 @@ gfc_simplify_expr (gfc_expr *p, int type) if (gfc_is_constant_expr (p)) { - char *s; + gfc_char_t *s; int start, end; if (p->ref && p->ref->u.ss.start) @@ -1608,8 +1609,9 @@ gfc_simplify_expr (gfc_expr *p, int type) else end = p->value.character.length; - s = gfc_getmem (end - start + 2); - memcpy (s, p->value.character.string + start, end - start); + s = gfc_get_wide_string (end - start + 2); + memcpy (s, p->value.character.string + start, + (end - start) * sizeof (gfc_char_t)); s[end - start + 1] = '\0'; /* TODO: C-style string. */ gfc_free (p->value.character.string); p->value.character.string = s; diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 36c970ca0c8..b11cfa3c24d 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1497,7 +1497,7 @@ typedef struct gfc_expr struct { int length; - char *string; + gfc_char_t *string; } character; @@ -1959,7 +1959,14 @@ int gfc_wide_is_printable (gfc_char_t); int gfc_wide_is_digit (gfc_char_t); int gfc_wide_fits_in_byte (gfc_char_t); gfc_char_t gfc_wide_tolower (gfc_char_t); +gfc_char_t gfc_wide_toupper (gfc_char_t); size_t gfc_wide_strlen (const gfc_char_t *); +int gfc_wide_strncasecmp (const gfc_char_t *, const char *, size_t); +gfc_char_t *gfc_wide_memset (gfc_char_t *, gfc_char_t, size_t); +char *gfc_widechar_to_char (const gfc_char_t *, int); +gfc_char_t *gfc_char_to_widechar (const char *); + +#define gfc_get_wide_string(n) gfc_getmem((n) * sizeof(gfc_char_t)) void gfc_skip_comments (void); gfc_char_t gfc_next_char_literal (int); @@ -2019,6 +2026,8 @@ typedef struct gfc_error_buf void gfc_error_init_1 (void); void gfc_buffer_error (int); +const char *gfc_print_wide_char (gfc_char_t); + void gfc_warning (const char *, ...) ATTRIBUTE_GCC_GFC(1,2); void gfc_warning_now (const char *, ...) ATTRIBUTE_GCC_GFC(1,2); void gfc_clear_warning (void); diff --git a/gcc/fortran/io.c b/gcc/fortran/io.c index 07848a1cd6e..736253fe159 100644 --- a/gcc/fortran/io.c +++ b/gcc/fortran/io.c @@ -117,7 +117,7 @@ format_token; /* Local variables for checking format strings. The saved_token is used to back up by a single format token during the parsing process. */ -static char *format_string; +static gfc_char_t *format_string; static int format_length, use_last_char; static format_token saved_token; @@ -165,7 +165,7 @@ next_char (int in_string) if (mode == MODE_COPY) *format_string++ = c; - c = TOUPPER ((unsigned char) c); + c = gfc_wide_toupper (c); return c; } @@ -782,7 +782,7 @@ data_desc: gfc_warning ("The H format specifier at %C is" " a Fortran 95 deleted feature"); - if(mode == MODE_STRING) + if (mode == MODE_STRING) { format_string += value; format_length -= value; @@ -1010,7 +1010,8 @@ gfc_match_format (void) e->ts.type = BT_CHARACTER; e->ts.kind = gfc_default_character_kind; e->where = start; - e->value.character.string = format_string = gfc_getmem (format_length + 1); + e->value.character.string = format_string + = gfc_get_wide_string (format_length + 1); e->value.character.length = format_length; gfc_statement_label->format = e; @@ -1412,13 +1413,13 @@ gfc_resolve_open (gfc_open *open) static int compare_to_allowed_values (const char *specifier, const char *allowed[], const char *allowed_f2003[], - const char *allowed_gnu[], char *value, + const char *allowed_gnu[], gfc_char_t *value, const char *statement, bool warn) { int i; unsigned int len; - len = strlen (value); + len = gfc_wide_strlen (value); if (len > 0) { for (len--; len > 0; len--) @@ -1429,13 +1430,13 @@ compare_to_allowed_values (const char *specifier, const char *allowed[], for (i = 0; allowed[i]; i++) if (len == strlen (allowed[i]) - && strncasecmp (value, allowed[i], strlen (allowed[i])) == 0) + && gfc_wide_strncasecmp (value, allowed[i], strlen (allowed[i])) == 0) return 1; for (i = 0; allowed_f2003 && allowed_f2003[i]; i++) if (len == strlen (allowed_f2003[i]) - && strncasecmp (value, allowed_f2003[i], strlen (allowed_f2003[i])) - == 0) + && gfc_wide_strncasecmp (value, allowed_f2003[i], + strlen (allowed_f2003[i])) == 0) { notification n = gfc_notification_std (GFC_STD_F2003); @@ -1461,7 +1462,8 @@ compare_to_allowed_values (const char *specifier, const char *allowed[], for (i = 0; allowed_gnu && allowed_gnu[i]; i++) if (len == strlen (allowed_gnu[i]) - && strncasecmp (value, allowed_gnu[i], strlen (allowed_gnu[i])) == 0) + && gfc_wide_strncasecmp (value, allowed_gnu[i], + strlen (allowed_gnu[i])) == 0) { notification n = gfc_notification_std (GFC_STD_GNU); @@ -1487,14 +1489,18 @@ compare_to_allowed_values (const char *specifier, const char *allowed[], if (warn) { + char *s = gfc_widechar_to_char (value, -1); gfc_warning ("%s specifier in %s statement at %C has invalid value '%s'", - specifier, statement, value); + specifier, statement, s); + gfc_free (s); return 1; } else { + char *s = gfc_widechar_to_char (value, -1); gfc_error ("%s specifier in %s statement at %C has invalid value '%s'", - specifier, statement, value); + specifier, statement, s); + gfc_free (s); return 0; } } @@ -1773,20 +1779,22 @@ gfc_match_open (void) /* F2003, 9.4.5: If the STATUS= specifier has the value NEW or REPLACE, the FILE= specifier shall appear. */ if (open->file == NULL - && (strncasecmp (open->status->value.character.string, "replace", 7) - == 0 - || strncasecmp (open->status->value.character.string, "new", 3) - == 0)) + && (gfc_wide_strncasecmp (open->status->value.character.string, + "replace", 7) == 0 + || gfc_wide_strncasecmp (open->status->value.character.string, + "new", 3) == 0)) { + char *s = gfc_widechar_to_char (open->status->value.character.string, + -1); warn_or_error ("The STATUS specified in OPEN statement at %C is " - "'%s' and no FILE specifier is present", - open->status->value.character.string); + "'%s' and no FILE specifier is present", s); + gfc_free (s); } /* F2003, 9.4.5: If the STATUS= specifier has the value SCRATCH, the FILE= specifier shall not appear. */ - if (strncasecmp (open->status->value.character.string, "scratch", 7) - == 0 && open->file) + if (gfc_wide_strncasecmp (open->status->value.character.string, + "scratch", 7) == 0 && open->file) { warn_or_error ("The STATUS specified in OPEN statement at %C " "cannot have the value SCRATCH if a FILE specifier " @@ -1798,8 +1806,8 @@ gfc_match_open (void) if (open->form && open->form->expr_type == EXPR_CONSTANT && (open->delim || open->decimal || open->encoding || open->round || open->sign || open->pad || open->blank) - && strncasecmp (open->form->value.character.string, - "unformatted", 11) == 0) + && gfc_wide_strncasecmp (open->form->value.character.string, + "unformatted", 11) == 0) { const char *spec = (open->delim ? "DELIM " : (open->pad ? "PAD " : open->blank @@ -1810,7 +1818,8 @@ gfc_match_open (void) } if (open->recl && open->access && open->access->expr_type == EXPR_CONSTANT - && strncasecmp (open->access->value.character.string, "stream", 6) == 0) + && gfc_wide_strncasecmp (open->access->value.character.string, + "stream", 6) == 0) { warn_or_error ("RECL specifier not allowed in OPEN statement at %C for " "stream I/O"); @@ -1818,12 +1827,12 @@ gfc_match_open (void) if (open->position && open->access && open->access->expr_type == EXPR_CONSTANT - && !(strncasecmp (open->access->value.character.string, - "sequential", 10) == 0 - || strncasecmp (open->access->value.character.string, - "stream", 6) == 0 - || strncasecmp (open->access->value.character.string, - "append", 6) == 0)) + && !(gfc_wide_strncasecmp (open->access->value.character.string, + "sequential", 10) == 0 + || gfc_wide_strncasecmp (open->access->value.character.string, + "stream", 6) == 0 + || gfc_wide_strncasecmp (open->access->value.character.string, + "append", 6) == 0)) { warn_or_error ("POSITION specifier in OPEN statement at %C only allowed " "for stream or sequential ACCESS"); @@ -2939,9 +2948,12 @@ if (condition) \ if (dt->id) { - io_constraint (!dt->asynchronous - || strcmp (dt->asynchronous->value.character.string, - "yes"), + bool not_yes + = !dt->asynchronous + || gfc_wide_strlen (dt->asynchronous->value.character.string) != 3 + || gfc_wide_strncasecmp (dt->asynchronous->value.character.string, + "yes", 3) != 0; + io_constraint (not_yes, "ID= specifier at %L must be with ASYNCHRONOUS='yes' " "specifier", &dt->id->where); } @@ -3137,9 +3149,11 @@ if (condition) \ if (expr->expr_type == EXPR_CONSTANT && expr->ts.type == BT_CHARACTER) { - const char * advance = expr->value.character.string; - not_no = strcasecmp (advance, "no") != 0; - not_yes = strcasecmp (advance, "yes") != 0; + const gfc_char_t *advance = expr->value.character.string; + not_no = gfc_wide_strlen (advance) != 2 + || gfc_wide_strncasecmp (advance, "no", 2) != 0; + not_yes = gfc_wide_strlen (advance) != 3 + || gfc_wide_strncasecmp (advance, "yes", 3) != 0; } else { diff --git a/gcc/fortran/module.c b/gcc/fortran/module.c index 832f68698b4..8d8b22a5704 100644 --- a/gcc/fortran/module.c +++ b/gcc/fortran/module.c @@ -2708,6 +2708,7 @@ mio_expr (gfc_expr **ep) { gfc_expr *e; atom_type t; + char *s; int flag; mio_lparen (); @@ -2832,8 +2833,10 @@ mio_expr (gfc_expr **ep) break; case EXPR_SUBSTRING: - e->value.character.string - = CONST_CAST (char *, mio_allocated_string (e->value.character.string)); + s = gfc_widechar_to_char (e->value.character.string, -1); + s = CONST_CAST (char *, mio_allocated_string (s)); + e->value.character.string = gfc_char_to_widechar (s); + gfc_free (s); mio_ref_list (&e->ref); break; @@ -2867,8 +2870,10 @@ mio_expr (gfc_expr **ep) case BT_CHARACTER: mio_integer (&e->value.character.length); - e->value.character.string - = CONST_CAST (char *, mio_allocated_string (e->value.character.string)); + s = gfc_widechar_to_char (e->value.character.string, -1); + s = CONST_CAST (char *, mio_allocated_string (s)); + e->value.character.string = gfc_char_to_widechar (s); + gfc_free (s); break; default: diff --git a/gcc/fortran/primary.c b/gcc/fortran/primary.c index d7491c1a110..fbc26af76e9 100644 --- a/gcc/fortran/primary.c +++ b/gcc/fortran/primary.c @@ -278,11 +278,18 @@ match_hollerith_constant (gfc_expr **result) e->representation.string = gfc_getmem (num + 1); - /* FIXME -- determine what should be done for wide character - strings, and do it! */ for (i = 0; i < num; i++) - e->representation.string[i] - = (unsigned char) gfc_next_char_literal (1); + { + gfc_char_t c = gfc_next_char_literal (1); + if (! gfc_wide_fits_in_byte (c)) + { + gfc_error ("Invalid Hollerith constant at %L contains a " + "wide character", &old_loc); + goto cleanup; + } + + e->representation.string[i] = (unsigned char) c; + } e->representation.string[num] = '\0'; e->representation.length = num; @@ -844,14 +851,14 @@ match_charkind_name (char *name) static match match_string_constant (gfc_expr **result) { - char *p, name[GFC_MAX_SYMBOL_LEN + 1], peek; + char name[GFC_MAX_SYMBOL_LEN + 1], peek; int i, kind, length, warn_ampersand, ret; locus old_locus, start_locus; gfc_symbol *sym; gfc_expr *e; const char *q; match m; - gfc_char_t c, delimiter; + gfc_char_t c, delimiter, *p; old_locus = gfc_current_locus; @@ -970,7 +977,7 @@ got_delim: e->ts.is_iso_c = 0; e->where = start_locus; - e->value.character.string = p = gfc_getmem (length + 1); + e->value.character.string = p = gfc_get_wide_string (length + 1); e->value.character.length = length; gfc_current_locus = start_locus; @@ -992,7 +999,7 @@ got_delim: return MATCH_ERROR; } - *p++ = (unsigned char) c; + *p++ = c; } *p = '\0'; /* TODO: C-style string is for development/debug purposes. */ diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index 424420583ed..6338b068ecc 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -6804,7 +6804,6 @@ build_default_init_expr (gfc_symbol *sym) int char_len; gfc_expr *init_expr; int i; - char *ch; /* These symbols should never have a default initialization. */ if ((sym->attr.dimension && !gfc_is_compile_time_shape (sym->as)) @@ -6922,10 +6921,10 @@ build_default_init_expr (gfc_symbol *sym) { char_len = mpz_get_si (sym->ts.cl->length->value.integer); init_expr->value.character.length = char_len; - init_expr->value.character.string = gfc_getmem (char_len+1); - ch = init_expr->value.character.string; + init_expr->value.character.string = gfc_get_wide_string (char_len+1); for (i = 0; i < char_len; i++) - *(ch++) = gfc_option.flag_init_character_value; + init_expr->value.character.string[i] + = (unsigned char) gfc_option.flag_init_character_value; } else { diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c index 871739c2c1e..21b9311d12a 100644 --- a/gcc/fortran/scanner.c +++ b/gcc/fortran/scanner.c @@ -113,6 +113,12 @@ gfc_wide_tolower (gfc_char_t c) return (wide_is_ascii (c) ? (gfc_char_t) TOLOWER((unsigned char) c) : c); } +gfc_char_t +gfc_wide_toupper (gfc_char_t c) +{ + return (wide_is_ascii (c) ? (gfc_char_t) TOUPPER((unsigned char) c) : c); +} + int gfc_wide_is_digit (gfc_char_t c) { @@ -143,6 +149,17 @@ gfc_wide_strlen (const gfc_char_t *str) return i; } +gfc_char_t * +gfc_wide_memset (gfc_char_t *b, gfc_char_t c, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + b[i] = c; + + return b; +} + static gfc_char_t * wide_strcpy (gfc_char_t *dest, const gfc_char_t *src) { @@ -155,25 +172,55 @@ wide_strcpy (gfc_char_t *dest, const gfc_char_t *src) } static gfc_char_t * -wide_strchr (gfc_char_t *s, gfc_char_t c) +wide_strchr (const gfc_char_t *s, gfc_char_t c) { do { if (*s == c) { - return (gfc_char_t *) s; + return CONST_CAST(gfc_char_t *, s); } } while (*s++); return 0; } -static char * -widechar_to_char (gfc_char_t *s) +char * +gfc_widechar_to_char (const gfc_char_t *s, int length) +{ + size_t len, i; + char *res; + + if (s == NULL) + return NULL; + + /* Passing a negative length is used to indicate that length should be + calculated using gfc_wide_strlen(). */ + len = (length >= 0 ? (size_t) length : gfc_wide_strlen (s)); + res = gfc_getmem (len + 1); + + for (i = 0; i < len; i++) + { + gcc_assert (gfc_wide_fits_in_byte (s[i])); + res[i] = (unsigned char) s[i]; + } + + res[len] = '\0'; + return res; +} + +gfc_char_t * +gfc_char_to_widechar (const char *s) { - size_t len = gfc_wide_strlen (s), i; - char *res = gfc_getmem (len + 1); + size_t len, i; + gfc_char_t *res; + + if (s == NULL) + return NULL; + + len = strlen (s); + res = gfc_get_wide_string (len + 1); for (i = 0; i < len; i++) - res[i] = gfc_wide_fits_in_byte (s[i]) ? (unsigned char) s[i] : '?'; + res[i] = (unsigned char) s[i]; res[len] = '\0'; return res; @@ -196,8 +243,8 @@ wide_strncmp (const gfc_char_t *s1, const char *s2, size_t n) return 0; } -static int -wide_strncasecmp (const gfc_char_t *s1, const char *s2, size_t n) +int +gfc_wide_strncasecmp (const gfc_char_t *s1, const char *s2, size_t n) { gfc_char_t c1, c2; @@ -585,7 +632,7 @@ gfc_define_undef_line (void) if (wide_strncmp (gfc_current_locus.nextc, "#define ", 8) == 0) { - tmp = widechar_to_char (&gfc_current_locus.nextc[8]); + tmp = gfc_widechar_to_char (&gfc_current_locus.nextc[8], -1); (*debug_hooks->define) (gfc_linebuf_linenum (gfc_current_locus.lb), tmp); gfc_free (tmp); @@ -593,7 +640,7 @@ gfc_define_undef_line (void) if (wide_strncmp (gfc_current_locus.nextc, "#undef ", 7) == 0) { - tmp = widechar_to_char (&gfc_current_locus.nextc[7]); + tmp = gfc_widechar_to_char (&gfc_current_locus.nextc[7], -1); (*debug_hooks->undef) (gfc_linebuf_linenum (gfc_current_locus.lb), tmp); gfc_free (tmp); @@ -1294,7 +1341,7 @@ load_line (FILE *input, gfc_char_t **pbuf, int *pbuflen) else buflen = 132; - *pbuf = gfc_getmem ((buflen + 1) * sizeof (gfc_char_t)); + *pbuf = gfc_get_wide_string (buflen + 1); } i = 0; @@ -1556,7 +1603,7 @@ preprocessor_line (gfc_char_t *c) /* Convert the filename in wide characters into a filename in narrow characters. */ - filename = widechar_to_char (wide_filename); + filename = gfc_widechar_to_char (wide_filename, -1); /* Interpret flags. */ @@ -1647,7 +1694,7 @@ include_line (gfc_char_t *line) while (*c == ' ' || *c == '\t') c++; - if (wide_strncasecmp (c, "include", 7)) + if (gfc_wide_strncasecmp (c, "include", 7)) return false; c += 7; @@ -1681,7 +1728,7 @@ include_line (gfc_char_t *line) *stop = '\0'; /* It's ok to trash the buffer, as this line won't be read by anything else. */ - filename = widechar_to_char (begin); + filename = gfc_widechar_to_char (begin, -1); load_file (filename, false); gfc_free (filename); return true; @@ -1779,7 +1826,7 @@ load_file (const char *filename, bool initial) && line[2] == (unsigned char) '\xBF'))) { int n = line[1] == (unsigned char) '\xBB' ? 3 : 2; - gfc_char_t *new = gfc_getmem (line_len * sizeof (gfc_char_t)); + gfc_char_t *new = gfc_get_wide_string (line_len); wide_strcpy (new, &line[n]); gfc_free (line); @@ -1944,7 +1991,7 @@ gfc_read_orig_filename (const char *filename, const char **canon_source_file) if (wide_strncmp (gfc_src_preprocessor_lines[0], "# 1 \"", 5) != 0) return NULL; - tmp = widechar_to_char (&gfc_src_preprocessor_lines[0][5]); + tmp = gfc_widechar_to_char (&gfc_src_preprocessor_lines[0][5], -1); filename = unescape_filename (tmp); gfc_free (tmp); if (filename == NULL) @@ -1962,7 +2009,7 @@ gfc_read_orig_filename (const char *filename, const char **canon_source_file) if (wide_strncmp (gfc_src_preprocessor_lines[1], "# 1 \"", 5) != 0) return filename; - tmp = widechar_to_char (&gfc_src_preprocessor_lines[1][5]); + tmp = gfc_widechar_to_char (&gfc_src_preprocessor_lines[1][5], -1); dirname = unescape_filename (tmp); gfc_free (tmp); if (dirname == NULL) diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c index 5de686fb957..e87804cef0c 100644 --- a/gcc/fortran/simplify.c +++ b/gcc/fortran/simplify.c @@ -284,7 +284,7 @@ gfc_simplify_achar (gfc_expr *e, gfc_expr *k) result = gfc_constant_result (BT_CHARACTER, kind, &e->where); - result->value.character.string = gfc_getmem (2); + result->value.character.string = gfc_get_wide_string (2); result->value.character.length = 1; result->value.character.string[0] = c; @@ -343,7 +343,7 @@ gfc_simplify_adjustl (gfc_expr *e) { gfc_expr *result; int count, i, len; - char ch; + gfc_char_t ch; if (e->expr_type != EXPR_CONSTANT) return NULL; @@ -353,7 +353,7 @@ gfc_simplify_adjustl (gfc_expr *e) result = gfc_constant_result (BT_CHARACTER, e->ts.kind, &e->where); result->value.character.length = len; - result->value.character.string = gfc_getmem (len + 1); + result->value.character.string = gfc_get_wide_string (len + 1); for (count = 0, i = 0; i < len; ++i) { @@ -380,7 +380,7 @@ gfc_simplify_adjustr (gfc_expr *e) { gfc_expr *result; int count, i, len; - char ch; + gfc_char_t ch; if (e->expr_type != EXPR_CONSTANT) return NULL; @@ -390,7 +390,7 @@ gfc_simplify_adjustr (gfc_expr *e) result = gfc_constant_result (BT_CHARACTER, e->ts.kind, &e->where); result->value.character.length = len; - result->value.character.string = gfc_getmem (len + 1); + result->value.character.string = gfc_get_wide_string (len + 1); for (count = 0, i = len - 1; i >= 0; --i) { @@ -843,7 +843,7 @@ gfc_simplify_char (gfc_expr *e, gfc_expr *k) result = gfc_constant_result (BT_CHARACTER, kind, &e->where); result->value.character.length = 1; - result->value.character.string = gfc_getmem (2); + result->value.character.string = gfc_get_wide_string (2); result->value.character.string[0] = c; result->value.character.string[1] = '\0'; /* For debugger */ @@ -1460,7 +1460,7 @@ gfc_expr * gfc_simplify_iachar (gfc_expr *e, gfc_expr *kind) { gfc_expr *result; - int index; + gfc_char_t index; if (e->expr_type != EXPR_CONSTANT) return NULL; @@ -1471,7 +1471,7 @@ gfc_simplify_iachar (gfc_expr *e, gfc_expr *kind) return &gfc_bad_expr; } - index = (unsigned char) e->value.character.string[0]; + index = e->value.character.string[0]; if (gfc_option.warn_surprising && index > 127) gfc_warning ("Argument of IACHAR function at %L outside of range 0..127", @@ -1649,7 +1649,7 @@ gfc_expr * gfc_simplify_ichar (gfc_expr *e, gfc_expr *kind) { gfc_expr *result; - int index; + gfc_char_t index; if (e->expr_type != EXPR_CONSTANT) return NULL; @@ -1660,9 +1660,8 @@ gfc_simplify_ichar (gfc_expr *e, gfc_expr *kind) return &gfc_bad_expr; } - index = (unsigned char) e->value.character.string[0]; - - if (index < 0 || index > UCHAR_MAX) + index = e->value.character.string[0]; + if (index > UCHAR_MAX) gfc_internal_error("Argument of ICHAR at %L out of range", &e->where); if ((result = int_expr_with_kind (index, kind, "ICHAR")) == NULL) @@ -2687,12 +2686,13 @@ simplify_min_max (gfc_expr *expr, int sign) #define STRING(x) ((x)->expr->value.character.string) if (LENGTH(extremum) < LENGTH(arg)) { - char * tmp = STRING(extremum); + gfc_char_t *tmp = STRING(extremum); - STRING(extremum) = gfc_getmem (LENGTH(arg) + 1); - memcpy (STRING(extremum), tmp, LENGTH(extremum)); - memset (&STRING(extremum)[LENGTH(extremum)], ' ', - LENGTH(arg) - LENGTH(extremum)); + STRING(extremum) = gfc_get_wide_string (LENGTH(arg) + 1); + memcpy (STRING(extremum), tmp, + LENGTH(extremum) * sizeof (gfc_char_t)); + gfc_wide_memset (&STRING(extremum)[LENGTH(extremum)], ' ', + LENGTH(arg) - LENGTH(extremum)); STRING(extremum)[LENGTH(arg)] = '\0'; /* For debugger */ LENGTH(extremum) = LENGTH(arg); gfc_free (tmp); @@ -2701,10 +2701,11 @@ simplify_min_max (gfc_expr *expr, int sign) if (gfc_compare_string (arg->expr, extremum->expr) * sign > 0) { gfc_free (STRING(extremum)); - STRING(extremum) = gfc_getmem (LENGTH(extremum) + 1); - memcpy (STRING(extremum), STRING(arg), LENGTH(arg)); - memset (&STRING(extremum)[LENGTH(arg)], ' ', - LENGTH(extremum) - LENGTH(arg)); + STRING(extremum) = gfc_get_wide_string (LENGTH(extremum) + 1); + memcpy (STRING(extremum), STRING(arg), + LENGTH(arg) * sizeof (gfc_char_t)); + gfc_wide_memset (&STRING(extremum)[LENGTH(arg)], ' ', + LENGTH(extremum) - LENGTH(arg)); STRING(extremum)[LENGTH(extremum)] = '\0'; /* For debugger */ } #undef LENGTH @@ -3008,7 +3009,7 @@ gfc_simplify_new_line (gfc_expr *e) gfc_expr *result; result = gfc_constant_result (BT_CHARACTER, e->ts.kind, &e->where); - result->value.character.string = gfc_getmem (2); + result->value.character.string = gfc_get_wide_string (2); result->value.character.length = 1; result->value.character.string[0] = '\n'; result->value.character.string[1] = '\0'; /* For debugger */ @@ -3329,19 +3330,18 @@ gfc_simplify_repeat (gfc_expr *e, gfc_expr *n) if (ncop == 0) { - result->value.character.string = gfc_getmem (1); + result->value.character.string = gfc_get_wide_string (1); result->value.character.length = 0; result->value.character.string[0] = '\0'; return result; } result->value.character.length = nlen; - result->value.character.string = gfc_getmem (nlen + 1); + result->value.character.string = gfc_get_wide_string (nlen + 1); for (i = 0; i < ncop; i++) for (j = 0; j < len; j++) - result->value.character.string[j + i * len] - = e->value.character.string[j]; + result->value.character.string[j+i*len]= e->value.character.string[j]; result->value.character.string[nlen] = '\0'; /* For debugger */ return result; @@ -3696,6 +3696,51 @@ gfc_simplify_scale (gfc_expr *x, gfc_expr *i) } +/* Variants of strspn and strcspn that operate on wide characters. */ + +static size_t +wide_strspn (const gfc_char_t *s1, const gfc_char_t *s2) +{ + size_t i = 0; + const gfc_char_t *c; + + while (s1[i]) + { + for (c = s2; *c; c++) + { + if (s1[i] == *c) + break; + } + if (*c == '\0') + break; + i++; + } + + return i; +} + +static size_t +wide_strcspn (const gfc_char_t *s1, const gfc_char_t *s2) +{ + size_t i = 0; + const gfc_char_t *c; + + while (s1[i]) + { + for (c = s2; *c; c++) + { + if (s1[i] == *c) + break; + } + if (*c) + break; + i++; + } + + return i; +} + + gfc_expr * gfc_simplify_scan (gfc_expr *e, gfc_expr *c, gfc_expr *b, gfc_expr *kind) { @@ -3729,8 +3774,8 @@ gfc_simplify_scan (gfc_expr *e, gfc_expr *c, gfc_expr *b, gfc_expr *kind) { if (back == 0) { - indx = strcspn (e->value.character.string, c->value.character.string) - + 1; + indx = wide_strcspn (e->value.character.string, + c->value.character.string) + 1; if (indx > len) indx = 0; } @@ -4435,7 +4480,7 @@ gfc_simplify_trim (gfc_expr *e) lentrim = len - count; result->value.character.length = lentrim; - result->value.character.string = gfc_getmem (lentrim + 1); + result->value.character.string = gfc_get_wide_string (lentrim + 1); for (i = 0; i < lentrim; i++) result->value.character.string[i] = e->value.character.string[i]; @@ -4492,8 +4537,8 @@ gfc_simplify_verify (gfc_expr *s, gfc_expr *set, gfc_expr *b, gfc_expr *kind) return result; } - index = strspn (s->value.character.string, set->value.character.string) - + 1; + index = wide_strspn (s->value.character.string, + set->value.character.string) + 1; if (index > len) index = 0; diff --git a/gcc/fortran/symbol.c b/gcc/fortran/symbol.c index 6e878817363..1d6867bd89c 100644 --- a/gcc/fortran/symbol.c +++ b/gcc/fortran/symbol.c @@ -3833,9 +3833,9 @@ generate_isocbinding_symbol (const char *mod_name, iso_c_binding_symbol s, tmp_sym->value->ts.is_c_interop = 1; tmp_sym->value->ts.is_iso_c = 1; tmp_sym->value->value.character.length = 1; - tmp_sym->value->value.character.string = gfc_getmem (2); + tmp_sym->value->value.character.string = gfc_get_wide_string (2); tmp_sym->value->value.character.string[0] - = (char) c_interop_kinds_table[s].value; + = (gfc_char_t) c_interop_kinds_table[s].value; tmp_sym->value->value.character.string[1] = '\0'; tmp_sym->ts.cl = gfc_get_charlen (); tmp_sym->ts.cl->length = gfc_int_expr (1); diff --git a/gcc/fortran/target-memory.c b/gcc/fortran/target-memory.c index e16c163ceaa..149afa1e8ca 100644 --- a/gcc/fortran/target-memory.c +++ b/gcc/fortran/target-memory.c @@ -73,9 +73,9 @@ size_logical (int kind) static size_t -size_character (int length) +size_character (int length, int kind) { - return length; + return length * kind; } @@ -100,7 +100,7 @@ gfc_target_expr_size (gfc_expr *e) case BT_LOGICAL: return size_logical (e->ts.kind); case BT_CHARACTER: - return size_character (e->value.character.length); + return size_character (e->value.character.length, e->ts.kind); case BT_HOLLERITH: return e->representation.length; case BT_DERIVED: @@ -174,11 +174,20 @@ encode_logical (int kind, int logical, unsigned char *buffer, size_t buffer_size static int -encode_character (int length, char *string, unsigned char *buffer, - size_t buffer_size) +encode_character (int kind, int length, gfc_char_t *string, + unsigned char *buffer, size_t buffer_size) { - gcc_assert (buffer_size >= size_character (length)); - memcpy (buffer, string, length); + char *s; + + gcc_assert (buffer_size >= size_character (length, kind)); + /* FIXME -- when we support wide character types, we'll need to go + via integers for them. For now, we keep the simple memcpy(). */ + gcc_assert (kind == gfc_default_character_kind); + + s = gfc_widechar_to_char (string, length); + memcpy (buffer, s, length); + gfc_free (s); + return length; } @@ -248,7 +257,7 @@ gfc_target_encode_expr (gfc_expr *source, unsigned char *buffer, return encode_logical (source->ts.kind, source->value.logical, buffer, buffer_size); case BT_CHARACTER: - return encode_character (source->value.character.length, + return encode_character (source->ts.kind, source->value.character.length, source->value.character.string, buffer, buffer_size); case BT_DERIVED: @@ -351,18 +360,24 @@ gfc_interpret_logical (int kind, unsigned char *buffer, size_t buffer_size, int -gfc_interpret_character (unsigned char *buffer, size_t buffer_size, gfc_expr *result) +gfc_interpret_character (unsigned char *buffer, size_t buffer_size, + gfc_expr *result) { + int i; + if (result->ts.cl && result->ts.cl->length) result->value.character.length = - (int)mpz_get_ui (result->ts.cl->length->value.integer); + (int) mpz_get_ui (result->ts.cl->length->value.integer); - gcc_assert (buffer_size >= size_character (result->value.character.length)); + gcc_assert (buffer_size >= size_character (result->value.character.length, + result->ts.kind)); result->value.character.string = - gfc_getmem (result->value.character.length + 1); - memcpy (result->value.character.string, buffer, - result->value.character.length); - result->value.character.string [result->value.character.length] = '\0'; + gfc_get_wide_string (result->value.character.length + 1); + + gcc_assert (result->ts.kind == gfc_default_character_kind); + for (i = 0; i < result->value.character.length; i++) + result->value.character.string[i] = (gfc_char_t) buffer[i]; + result->value.character.string[result->value.character.length] = '\0'; return result->value.character.length; } @@ -481,7 +496,9 @@ gfc_target_interpret_expr (unsigned char *buffer, size_t buffer_size, } if (result->ts.type == BT_CHARACTER) - result->representation.string = result->value.character.string; + result->representation.string + = gfc_widechar_to_char (result->value.character.string, + result->value.character.length); else { result->representation.string = diff --git a/gcc/fortran/trans-const.c b/gcc/fortran/trans-const.c index 37251eff6db..6c9032f972a 100644 --- a/gcc/fortran/trans-const.c +++ b/gcc/fortran/trans-const.c @@ -105,7 +105,8 @@ gfc_build_localized_cstring_const (const char *msgid) tree gfc_conv_string_init (tree length, gfc_expr * expr) { - char *s; + gfc_char_t *s; + char *c; HOST_WIDE_INT len; int slen; tree str; @@ -120,14 +121,21 @@ gfc_conv_string_init (tree length, gfc_expr * expr) if (len > slen) { - s = gfc_getmem (len); - memcpy (s, expr->value.character.string, slen); - memset (&s[slen], ' ', len - slen); - str = gfc_build_string_const (len, s); + s = gfc_get_wide_string (len); + memcpy (s, expr->value.character.string, slen * sizeof (gfc_char_t)); + gfc_wide_memset (&s[slen], ' ', len - slen); + + /* FIXME -- currently ignore wide character strings; see assert + above. */ + c = gfc_widechar_to_char (s, len); gfc_free (s); } else - str = gfc_build_string_const (len, expr->value.character.string); + c = gfc_widechar_to_char (expr->value.character.string, + expr->value.character.length); + + str = gfc_build_string_const (len, c); + gfc_free (c); return str; } @@ -214,6 +222,9 @@ gfc_conv_tree_to_mpfr (mpfr_ptr f, tree source) tree gfc_conv_constant_to_tree (gfc_expr * expr) { + tree res; + char *s; + gcc_assert (expr->expr_type == EXPR_CONSTANT); /* If it is has a prescribed memory representation, we build a string @@ -267,8 +278,12 @@ gfc_conv_constant_to_tree (gfc_expr * expr) } case BT_CHARACTER: - return gfc_build_string_const (expr->value.character.length, - expr->value.character.string); + gcc_assert (expr->ts.kind == 1); + s = gfc_widechar_to_char (expr->value.character.string, + expr->value.character.length); + res = gfc_build_string_const (expr->value.character.length, s); + gfc_free (s); + return res; case BT_HOLLERITH: return gfc_build_string_const (expr->representation.length, diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index aae1d72fe1f..08c25917355 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -3488,13 +3488,18 @@ static void gfc_conv_substring_expr (gfc_se * se, gfc_expr * expr) { gfc_ref *ref; + char *s; ref = expr->ref; gcc_assert (ref == NULL || ref->type == REF_SUBSTRING); - se->expr = gfc_build_string_const (expr->value.character.length, - expr->value.character.string); + gcc_assert (expr->ts.kind == gfc_default_character_kind); + s = gfc_widechar_to_char (expr->value.character.string, + expr->value.character.length); + se->expr = gfc_build_string_const (expr->value.character.length, s); + gfc_free (s); + se->string_length = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (se->expr))); TYPE_STRING_FLAG (TREE_TYPE (se->expr)) = 1; diff --git a/gcc/fortran/trans-io.c b/gcc/fortran/trans-io.c index 6316a426918..2f35002a5ac 100644 --- a/gcc/fortran/trans-io.c +++ b/gcc/fortran/trans-io.c @@ -1391,8 +1391,7 @@ gfc_new_nml_name_expr (const char * name) nml_name->ts.kind = gfc_default_character_kind; nml_name->ts.type = BT_CHARACTER; nml_name->value.character.length = strlen(name); - nml_name->value.character.string = gfc_getmem (strlen (name) + 1); - strcpy (nml_name->value.character.string, name); + nml_name->value.character.string = gfc_char_to_widechar (name); return nml_name; } diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index 5660ae61811..9220315461c 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -119,11 +119,14 @@ gfc_trans_label_assign (gfc_code * code) } else { - label_str = code->label->format->value.character.string; label_len = code->label->format->value.character.length; + label_str + = gfc_widechar_to_char (code->label->format->value.character.string, + label_len); len_tree = build_int_cst (NULL_TREE, label_len); label_tree = gfc_build_string_const (label_len + 1, label_str); label_tree = gfc_build_addr_expr (pvoid_type_node, label_tree); + gfc_free (label_str); } gfc_add_modify_expr (&se.pre, len, len_tree); -- 2.30.2