From cea93abbe2d08a5a1df8cf6522d7d34aad3dca78 Mon Sep 17 00:00:00 2001 From: Jerry DeLisle Date: Fri, 13 Jun 2008 20:28:08 +0000 Subject: [PATCH] re PR libfortran/35863 ([F2003] Implement ENCODING="UTF-8") 2008-06-13 Jerry DeLisle PR fortran/35863 * libgfortran.h: Change l8_to_l4_offset to big_endian and add endian_off. * runtime/main.c: Fix error in comment. Change l8_to_l4_offset to big_endian. (determine_endianness): Add endian_off and set its value according to big_endian. * gfortran.map: Add symbol for new _gfortran_transfer_character_wide. * io/io.h: Add prototype declarations for new functions. * io/list_read.c (list_formatted_read_scalar): Modify to handle kind=4. (list_formatted_read): Calculate stride based on kind for character type and use it when calling list_formatted_read_scalar. * io/inquire.c (inquire_via_unit): Change l8_to_l4_offset to big_endian. * io/open.c (st_open): Change l8_to_l4_offset to big_endian. * io/read.c (read_a_char4): New function to handle formatted read. * io/write.c: Define GFC_CHAR4(x) to improve readability of code. (write_a_char4): New function to handle formatted write. (write_character): Modify to accept the kind parameter and adjust for endianess of the machine. (list_formatted_write): Calculate the stride resulting from the kind and adjust the list_formatted_write_scalar call accordingly. (nml_write_obj): Adjust calls to write_character. (namelist_write): Likewise. * io/transfer.c (formatted_transfer_scaler): Rename 'len' argument to 'kind' argument to better describe what it is. Add calls to new functions for kind == 4. (formatted_transfer): Modify to handle the case of type character and kind equals 4 to pass in the kind to the transfer routines. (transfer_character_wide): Add this new function. (transfer_array): Don't set kind to the character string length. Adjust strides bases on character kind. (unformatted_read): Adjust size based on kind for character types. (unformatted_write): Likewise. (data_transfer_init): Change l8_to_l4_offset to big_endian. From-SVN: r136763 --- libgfortran/ChangeLog | 33 ++++++ libgfortran/gfortran.map | 1 + libgfortran/io/fbuf.c | 2 +- libgfortran/io/inquire.c | 9 +- libgfortran/io/io.h | 6 + libgfortran/io/list_read.c | 31 +++++- libgfortran/io/open.c | 8 +- libgfortran/io/read.c | 37 +++++++ libgfortran/io/transfer.c | 182 +++++++++++++++++++----------- libgfortran/io/write.c | 222 ++++++++++++++++++++++++++++++------- libgfortran/libgfortran.h | 7 +- libgfortran/runtime/main.c | 11 +- 12 files changed, 420 insertions(+), 129 deletions(-) diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog index 5c931e61cc0..044896764a9 100644 --- a/libgfortran/ChangeLog +++ b/libgfortran/ChangeLog @@ -1,3 +1,36 @@ +2008-06-13 Jerry DeLisle + + PR fortran/35863 + * libgfortran.h: Change l8_to_l4_offset to big_endian and add endian_off. + * runtime/main.c: Fix error in comment. Change l8_to_l4_offset to + big_endian. (determine_endianness): Add endian_off and set its value + according to big_endian. + * gfortran.map: Add symbol for new _gfortran_transfer_character_wide. + * io/io.h: Add prototype declarations for new functions. + * io/list_read.c (list_formatted_read_scalar): Modify to handle kind=4. + (list_formatted_read): Calculate stride based on kind for character type + and use it when calling list_formatted_read_scalar. + * io/inquire.c (inquire_via_unit): Change l8_to_l4_offset to big_endian. + * io/open.c (st_open): Change l8_to_l4_offset to big_endian. + * io/read.c (read_a_char4): New function to handle formatted read. + * io/write.c: Define GFC_CHAR4(x) to improve readability of code. + (write_a_char4): New function to handle formatted write. + (write_character): Modify to accept the kind parameter and adjust for + endianess of the machine. (list_formatted_write): Calculate the stride + resulting from the kind and adjust the list_formatted_write_scalar call + accordingly. (nml_write_obj): Adjust calls to write_character. + (namelist_write): Likewise. + * io/transfer.c (formatted_transfer_scaler): Rename 'len' argument to + 'kind' argument to better describe what it is. Add calls to new + functions for kind == 4. (formatted_transfer): Modify to handle the case + of type character and kind equals 4 to pass in the kind to the transfer + routines. (transfer_character_wide): Add this new function. + (transfer_array): Don't set kind to the character string length. Adjust + strides bases on character kind. + (unformatted_read): Adjust size based on kind for character types. + (unformatted_write): Likewise. (data_transfer_init): Change + l8_to_l4_offset to big_endian. + 2008-06-13 Tobias Burnus * configure.ac (AM_CFLAGS): Remove -Werror again. diff --git a/libgfortran/gfortran.map b/libgfortran/gfortran.map index 60ef8532275..0671b60fb86 100644 --- a/libgfortran/gfortran.map +++ b/libgfortran/gfortran.map @@ -1083,6 +1083,7 @@ GFORTRAN_1.1 { _gfortran_string_trim_char4; _gfortran_string_verify_char4; _gfortran_st_wait; + _gfortran_transfer_character_wide; _gfortran_transpose_char4; _gfortran_unpack0_char4; _gfortran_unpack1_char4; diff --git a/libgfortran/io/fbuf.c b/libgfortran/io/fbuf.c index a0d033bf875..f2b1599ed87 100644 --- a/libgfortran/io/fbuf.c +++ b/libgfortran/io/fbuf.c @@ -157,7 +157,7 @@ fbuf_seek (gfc_unit * u, gfc_offset off) /* Moving to the left past the flushed marked would imply moving past the left tab limit, which is never allowed. So return error if that is attempted. */ - if (pos < u->fbuf->flushed) + if (pos < (gfc_offset) u->fbuf->flushed) return -1; u->fbuf->pos = pos; return 0; diff --git a/libgfortran/io/inquire.c b/libgfortran/io/inquire.c index 5e0cf3e646c..9eb63d7b4d7 100644 --- a/libgfortran/io/inquire.c +++ b/libgfortran/io/inquire.c @@ -268,10 +268,9 @@ inquire_via_unit (st_parameter_inquire *iqp, gfc_unit * u) case ENCODING_DEFAULT: p = "UNKNOWN"; break; - /* TODO: Enable UTF-8 case here when implemented. case ENCODING_UTF8: p = "UTF-8"; - break; */ + break; default: internal_error (&iqp->common, "inquire_via_unit(): Bad encoding"); } @@ -497,13 +496,13 @@ inquire_via_unit (st_parameter_inquire *iqp, gfc_unit * u) else switch (u->flags.convert) { - /* l8_to_l4_offset is 0 for little-endian, 1 for big-endian. */ + /* big_endian is 0 for little-endian, 1 for big-endian. */ case GFC_CONVERT_NATIVE: - p = l8_to_l4_offset ? "BIG_ENDIAN" : "LITTLE_ENDIAN"; + p = big_endian ? "BIG_ENDIAN" : "LITTLE_ENDIAN"; break; case GFC_CONVERT_SWAP: - p = l8_to_l4_offset ? "LITTLE_ENDIAN" : "BIG_ENDIAN"; + p = big_endian ? "LITTLE_ENDIAN" : "BIG_ENDIAN"; break; default: diff --git a/libgfortran/io/io.h b/libgfortran/io/io.h index ea75bdbc405..cb7147db398 100644 --- a/libgfortran/io/io.h +++ b/libgfortran/io/io.h @@ -869,6 +869,9 @@ internal_proto(convert_real); extern void read_a (st_parameter_dt *, const fnode *, char *, int); internal_proto(read_a); +extern void read_a_char4 (st_parameter_dt *, const fnode *, char *, int); +internal_proto(read_a); + extern void read_f (st_parameter_dt *, const fnode *, char *, int); internal_proto(read_f); @@ -904,6 +907,9 @@ internal_proto(namelist_write); extern void write_a (st_parameter_dt *, const fnode *, const char *, int); internal_proto(write_a); +extern void write_a_char4 (st_parameter_dt *, const fnode *, const char *, int); +internal_proto(write_a_char4); + extern void write_b (st_parameter_dt *, const fnode *, const char *, int); internal_proto(write_b); diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c index 1aa84704d8a..c99e3a8c23b 100644 --- a/libgfortran/io/list_read.c +++ b/libgfortran/io/list_read.c @@ -1728,7 +1728,8 @@ list_formatted_read_scalar (st_parameter_dt *dtp, volatile bt type, void *p, int kind, size_t size) { char c; - int m; + gfc_char4_t *q; + int i, m; jmp_buf eof_jump; dtp->u.p.namelist_mode = 0; @@ -1831,17 +1832,33 @@ list_formatted_read_scalar (st_parameter_dt *dtp, volatile bt type, void *p, case BT_CHARACTER: if (dtp->u.p.saved_string) - { + { m = ((int) size < dtp->u.p.saved_used) ? (int) size : dtp->u.p.saved_used; - memcpy (p, dtp->u.p.saved_string, m); - } + if (kind == 1) + memcpy (p, dtp->u.p.saved_string, m); + else + { + q = (gfc_char4_t *) p; + for (i = 0; i < m; i++) + q[i] = (unsigned char) dtp->u.p.saved_string[i]; + } + } else /* Just delimiters encountered, nothing to copy but SPACE. */ m = 0; if (m < (int) size) - memset (((char *) p) + m, ' ', size - m); + { + if (kind == 1) + memset (((char *) p) + m, ' ', size - m); + else + { + q = (gfc_char4_t *) p; + for (i = m; i < (int) size; i++) + q[i] = (unsigned char) ' '; + } + } break; case BT_NULL: @@ -1862,6 +1879,8 @@ list_formatted_read (st_parameter_dt *dtp, bt type, void *p, int kind, { size_t elem; char *tmp; + size_t stride = type == BT_CHARACTER ? + size * GFC_SIZE_OF_CHAR_KIND(kind) : size; tmp = (char *) p; @@ -1869,7 +1888,7 @@ list_formatted_read (st_parameter_dt *dtp, bt type, void *p, int kind, for (elem = 0; elem < nelems; elem++) { dtp->u.p.item_count++; - list_formatted_read_scalar (dtp, type, tmp + size*elem, kind, size); + list_formatted_read_scalar (dtp, type, tmp + stride*elem, kind, size); } } diff --git a/libgfortran/io/open.c b/libgfortran/io/open.c index 84575f7bb01..4a78efa01fc 100644 --- a/libgfortran/io/open.c +++ b/libgfortran/io/open.c @@ -107,7 +107,7 @@ static const st_option decimal_opt[] = static const st_option encoding_opt[] = { - /* TODO { "utf-8", ENCODING_UTF8}, */ + { "utf-8", ENCODING_UTF8}, { "default", ENCODING_DEFAULT}, { NULL, 0} }; @@ -795,7 +795,7 @@ st_open (st_parameter_open *opp) conv = compile_options.convert; } - /* We use l8_to_l4_offset, which is 0 on little-endian machines + /* We use big_endian, which is 0 on little-endian machines and 1 on big-endian machines. */ switch (conv) { @@ -804,11 +804,11 @@ st_open (st_parameter_open *opp) break; case GFC_CONVERT_BIG: - conv = l8_to_l4_offset ? GFC_CONVERT_NATIVE : GFC_CONVERT_SWAP; + conv = big_endian ? GFC_CONVERT_NATIVE : GFC_CONVERT_SWAP; break; case GFC_CONVERT_LITTLE: - conv = l8_to_l4_offset ? GFC_CONVERT_SWAP : GFC_CONVERT_NATIVE; + conv = big_endian ? GFC_CONVERT_SWAP : GFC_CONVERT_NATIVE; break; default: diff --git a/libgfortran/io/read.c b/libgfortran/io/read.c index a09d663dc1c..11a1ac018f7 100644 --- a/libgfortran/io/read.c +++ b/libgfortran/io/read.c @@ -270,6 +270,43 @@ read_a (st_parameter_dt *dtp, const fnode *f, char *p, int length) memset (p + m, ' ', n); } +void +read_a_char4 (st_parameter_dt *dtp, const fnode *f, char *p, int length) +{ + char *s; + gfc_char4_t *dest; + int m, n, wi, status; + size_t w; + + wi = f->u.w; + if (wi == -1) /* '(A)' edit descriptor */ + wi = length; + + w = wi; + + s = gfc_alloca (w); + + /* Read in w bytes, treating comma as not a separator. */ + dtp->u.p.sf_read_comma = 0; + status = read_block_form (dtp, s, &w); + dtp->u.p.sf_read_comma = + dtp->u.p.decimal_status == DECIMAL_COMMA ? 0 : 1; + + if (status == FAILURE) + return; + if (w > (size_t) length) + s += (w - length); + + m = ((int) w > length) ? length : (int) w; + + dest = (gfc_char4_t *) p; + + for (n = 0; n < m; n++, dest++, s++) + *dest = (unsigned char ) *s; + + for (n = 0; n < length - (int) w; n++, dest++) + *dest = (unsigned char) ' '; +} /* eat_leading_spaces()-- Given a character pointer and a width, * ignore the leading spaces. */ diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c index 36181f6fc05..fd63139146e 100644 --- a/libgfortran/io/transfer.c +++ b/libgfortran/io/transfer.c @@ -54,6 +54,7 @@ Boston, MA 02110-1301, USA. */ transfer_integer transfer_logical transfer_character + transfer_character_wide transfer_real transfer_complex @@ -76,6 +77,9 @@ export_proto(transfer_logical); extern void transfer_character (st_parameter_dt *, void *, int); export_proto(transfer_character); +extern void transfer_character_wide (st_parameter_dt *, void *, int, int); +export_proto(transfer_character_wide); + extern void transfer_complex (st_parameter_dt *, void *, int); export_proto(transfer_complex); @@ -730,35 +734,43 @@ write_buf (st_parameter_dt *dtp, void *buf, size_t nbytes) static void unformatted_read (st_parameter_dt *dtp, bt type, - void *dest, int kind __attribute__((unused)), - size_t size, size_t nelems) + void *dest, int kind, size_t size, size_t nelems) { size_t i, sz; - /* Currently, character implies size=1. */ if (dtp->u.p.current_unit->flags.convert == GFC_CONVERT_NATIVE - || size == 1 || type == BT_CHARACTER) + || size == 1) { sz = size * nelems; + if (type == BT_CHARACTER) + sz *= GFC_SIZE_OF_CHAR_KIND(kind); read_block_direct (dtp, dest, &sz); } else { char buffer[16]; char *p; - + + p = dest; + + /* Handle wide chracters. */ + if (type == BT_CHARACTER && kind != 1) + { + nelems *= size; + size = kind; + } + /* Break up complex into its constituent reals. */ if (type == BT_COMPLEX) { nelems *= 2; size /= 2; } - p = dest; /* By now, all complex variables have been split into their constituent reals. */ - for (i=0; iu.p.current_unit->flags.convert == GFC_CONVERT_NATIVE || - size == 1 || type == BT_CHARACTER) + size == 1) { - size *= nelems; - write_buf (dtp, source, size); + size_t stride = type == BT_CHARACTER ? + size * GFC_SIZE_OF_CHAR_KIND(kind) : size; + + write_buf (dtp, source, stride * nelems); } else { char buffer[16]; char *p; size_t i; + + p = source; + + /* Handle wide chracters. */ + if (type == BT_CHARACTER && kind != 1) + { + nelems *= size; + size = kind; + } /* Break up complex into its constituent reals. */ if (type == BT_COMPLEX) @@ -797,16 +819,13 @@ unformatted_write (st_parameter_dt *dtp, bt type, size /= 2; } - p = source; - /* By now, all complex variables have been split into their constituent reals. */ - - for (i=0; iu.p.mode == READING) - read_decimal (dtp, f, p, len); + read_decimal (dtp, f, p, kind); else - write_i (dtp, f, p, len); + write_i (dtp, f, p, kind); break; @@ -1019,9 +1038,9 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, return; if (dtp->u.p.mode == READING) - read_radix (dtp, f, p, len, 2); + read_radix (dtp, f, p, kind, 2); else - write_b (dtp, f, p, len); + write_b (dtp, f, p, kind); break; @@ -1034,9 +1053,9 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, return; if (dtp->u.p.mode == READING) - read_radix (dtp, f, p, len, 8); + read_radix (dtp, f, p, kind, 8); else - write_o (dtp, f, p, len); + write_o (dtp, f, p, kind); break; @@ -1049,9 +1068,9 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, return; if (dtp->u.p.mode == READING) - read_radix (dtp, f, p, len, 16); + read_radix (dtp, f, p, kind, 16); else - write_z (dtp, f, p, len); + write_z (dtp, f, p, kind); break; @@ -1059,11 +1078,23 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, if (n == 0) goto need_data; + /* It is possible to have FMT_A with something not BT_CHARACTER such + as when writing out hollerith strings, so check both type + and kind before calling wide character routines. */ if (dtp->u.p.mode == READING) - read_a (dtp, f, p, len); + { + if (type == BT_CHARACTER && kind == 4) + read_a_char4 (dtp, f, p, size); + else + read_a (dtp, f, p, size); + } else - write_a (dtp, f, p, len); - + { + if (type == BT_CHARACTER && kind == 4) + write_a_char4 (dtp, f, p, size); + else + write_a (dtp, f, p, size); + } break; case FMT_L: @@ -1071,9 +1102,9 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, goto need_data; if (dtp->u.p.mode == READING) - read_l (dtp, f, p, len); + read_l (dtp, f, p, kind); else - write_l (dtp, f, p, len); + write_l (dtp, f, p, kind); break; @@ -1084,9 +1115,9 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, return; if (dtp->u.p.mode == READING) - read_f (dtp, f, p, len); + read_f (dtp, f, p, kind); else - write_d (dtp, f, p, len); + write_d (dtp, f, p, kind); break; @@ -1097,9 +1128,9 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, return; if (dtp->u.p.mode == READING) - read_f (dtp, f, p, len); + read_f (dtp, f, p, kind); else - write_e (dtp, f, p, len); + write_e (dtp, f, p, kind); break; case FMT_EN: @@ -1109,9 +1140,9 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, return; if (dtp->u.p.mode == READING) - read_f (dtp, f, p, len); + read_f (dtp, f, p, kind); else - write_en (dtp, f, p, len); + write_en (dtp, f, p, kind); break; @@ -1122,9 +1153,9 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, return; if (dtp->u.p.mode == READING) - read_f (dtp, f, p, len); + read_f (dtp, f, p, kind); else - write_es (dtp, f, p, len); + write_es (dtp, f, p, kind); break; @@ -1135,9 +1166,9 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, return; if (dtp->u.p.mode == READING) - read_f (dtp, f, p, len); + read_f (dtp, f, p, kind); else - write_f (dtp, f, p, len); + write_f (dtp, f, p, kind); break; @@ -1148,16 +1179,19 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, switch (type) { case BT_INTEGER: - read_decimal (dtp, f, p, len); + read_decimal (dtp, f, p, kind); break; case BT_LOGICAL: - read_l (dtp, f, p, len); + read_l (dtp, f, p, kind); break; case BT_CHARACTER: - read_a (dtp, f, p, len); + if (kind == 4) + read_a_char4 (dtp, f, p, size); + else + read_a (dtp, f, p, size); break; case BT_REAL: - read_f (dtp, f, p, len); + read_f (dtp, f, p, kind); break; default: goto bad_type; @@ -1166,19 +1200,22 @@ formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len, switch (type) { case BT_INTEGER: - write_i (dtp, f, p, len); + write_i (dtp, f, p, kind); break; case BT_LOGICAL: - write_l (dtp, f, p, len); + write_l (dtp, f, p, kind); break; case BT_CHARACTER: - write_a (dtp, f, p, len); + if (kind == 4) + write_a_char4 (dtp, f, p, size); + else + write_a (dtp, f, p, size); break; case BT_REAL: if (f->u.real.w == 0) - write_real (dtp, p, len); + write_real (dtp, p, kind); else - write_d (dtp, f, p, len); + write_d (dtp, f, p, kind); break; default: bad_type: @@ -1407,12 +1444,13 @@ formatted_transfer (st_parameter_dt *dtp, bt type, void *p, int kind, char *tmp; tmp = (char *) p; - + size_t stride = type == BT_CHARACTER ? + size * GFC_SIZE_OF_CHAR_KIND(kind) : size; /* Big loop over all the elements. */ for (elem = 0; elem < nelems; elem++) { dtp->u.p.item_count++; - formatted_transfer_scalar (dtp, type, tmp + size*elem, kind, size); + formatted_transfer_scalar (dtp, type, tmp + stride*elem, kind, size); } } @@ -1465,10 +1503,26 @@ transfer_character (st_parameter_dt *dtp, void *p, int len) if (len == 0 && p == NULL) p = empty_string; - /* Currently we support only 1 byte chars, and the library is a bit - confused of character kind vs. length, so we kludge it by setting - kind = length. */ - dtp->u.p.transfer (dtp, BT_CHARACTER, p, len, len, 1); + /* Set kind here to 1. */ + dtp->u.p.transfer (dtp, BT_CHARACTER, p, 1, len, 1); +} + +void +transfer_character_wide (st_parameter_dt *dtp, void *p, int len, int kind) +{ + static char *empty_string[0]; + + if ((dtp->common.flags & IOPARM_LIBRETURN_MASK) != IOPARM_LIBRETURN_OK) + return; + + /* Strings of zero length can have p == NULL, which confuses the + transfer routines into thinking we need more data elements. To avoid + this, we give them a nice pointer. */ + if (len == 0 && p == NULL) + p = empty_string; + + /* Here we pass the actual kind value. */ + dtp->u.p.transfer (dtp, BT_CHARACTER, p, kind, len, 1); } @@ -1522,13 +1576,7 @@ transfer_array (st_parameter_dt *dtp, gfc_array_char *desc, int kind, break; case GFC_DTYPE_CHARACTER: iotype = BT_CHARACTER; - /* FIXME: Currently dtype contains the charlen, which is - clobbered if charlen > 2**24. That's why we use a separate - argument for the charlen. However, if we want to support - non-8-bit charsets we need to fix dtype to contain - sizeof(chartype) and fix the code below. */ size = charlen; - kind = charlen; break; case GFC_DTYPE_DERIVED: internal_error (&dtp->common, @@ -1542,7 +1590,9 @@ transfer_array (st_parameter_dt *dtp, gfc_array_char *desc, int kind, for (n = 0; n < rank; n++) { count[n] = 0; - stride[n] = desc->dim[n].stride; + stride[n] = iotype == BT_CHARACTER ? + desc->dim[n].stride * GFC_SIZE_OF_CHAR_KIND(kind) : + desc->dim[n].stride; extent[n] = desc->dim[n].ubound + 1 - desc->dim[n].lbound; /* If the extent of even one dimension is zero, then the entire @@ -1815,7 +1865,7 @@ data_transfer_init (st_parameter_dt *dtp, int read_flag) if (conv == GFC_CONVERT_NONE) conv = compile_options.convert; - /* We use l8_to_l4_offset, which is 0 on little-endian machines + /* We use big_endian, which is 0 on little-endian machines and 1 on big-endian machines. */ switch (conv) { @@ -1824,11 +1874,11 @@ data_transfer_init (st_parameter_dt *dtp, int read_flag) break; case GFC_CONVERT_BIG: - conv = l8_to_l4_offset ? GFC_CONVERT_NATIVE : GFC_CONVERT_SWAP; + conv = big_endian ? GFC_CONVERT_NATIVE : GFC_CONVERT_SWAP; break; case GFC_CONVERT_LITTLE: - conv = l8_to_l4_offset ? GFC_CONVERT_SWAP : GFC_CONVERT_NATIVE; + conv = big_endian ? GFC_CONVERT_SWAP : GFC_CONVERT_NATIVE; break; default: diff --git a/libgfortran/io/write.c b/libgfortran/io/write.c index 6135d60fe5d..ed50e0d5705 100644 --- a/libgfortran/io/write.c +++ b/libgfortran/io/write.c @@ -124,6 +124,108 @@ write_a (st_parameter_dt *dtp, const fnode *f, const char *source, int len) #endif } + +/* The primary difference between write_a_char4 and write_a is that we have to + deal with writing from the first byte of the 4-byte character and take care + of endianess. This currently implements encoding="default" which means we + write the lowest significant byte. If the 3 most significant bytes are + not representable emit a '?'. TODO: Implement encoding="UTF-8" + which will process all 4 bytes and translate to the encoded output. */ + +void +write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len) +{ + int wlen; + char *p; + gfc_char4_t *q; + + wlen = f->u.string.length < 0 + || (f->format == FMT_G && f->u.string.length == 0) + ? len : f->u.string.length; + + q = (gfc_char4_t *) source; +#ifdef HAVE_CRLF + /* If this is formatted STREAM IO convert any embedded line feed characters + to CR_LF on systems that use that sequence for newlines. See F2003 + Standard sections 10.6.3 and 9.9 for further information. */ + if (is_stream_io (dtp)) + { + const char crlf[] = "\r\n"; + int i, j, bytes; + gfc_char4_t *qq; + bytes = 0; + + /* Write out any padding if needed. */ + if (len < wlen) + { + p = write_block (dtp, wlen - len); + if (p == NULL) + return; + memset (p, ' ', wlen - len); + } + + /* Scan the source string looking for '\n' and convert it if found. */ + qq = (gfc_char4_t *) source; + for (i = 0; i < wlen; i++) + { + if (qq[i] == '\n') + { + /* Write out the previously scanned characters in the string. */ + if (bytes > 0) + { + p = write_block (dtp, bytes); + if (p == NULL) + return; + for (j = 0; j < bytes; j++) + p[j] = q[j] > 255 ? '?' : (unsigned char) q[j]; + bytes = 0; + } + + /* Write out the CR_LF sequence. */ + p = write_block (dtp, 2); + if (p == NULL) + return; + memcpy (p, crlf, 2); + } + else + bytes++; + } + + /* Write out any remaining bytes if no LF was found. */ + if (bytes > 0) + { + p = write_block (dtp, bytes); + if (p == NULL) + return; + for (j = 0; j < bytes; j++) + p[j] = q[j] > 255 ? '?' : (unsigned char) q[j]; + } + } + else + { +#endif + int j; + p = write_block (dtp, wlen); + if (p == NULL) + return; + + if (wlen < len) + { + for (j = 0; j < wlen; j++) + p[j] = q[j] > 255 ? '?' : (unsigned char) q[j]; + } + else + { + memset (p, ' ', wlen - len); + for (j = wlen - len; j < wlen; j++) + p[j] = q[j] > 255 ? '?' : (unsigned char) q[j]; + } +#ifdef HAVE_CRLF + } +#endif +} + + static GFC_INTEGER_LARGEST extract_int (const void *p, int len) { @@ -639,10 +741,12 @@ write_integer (st_parameter_dt *dtp, const char *source, int length) the strings if the file has been opened in that mode. */ static void -write_character (st_parameter_dt *dtp, const char *source, int length) +write_character (st_parameter_dt *dtp, const char *source, int kind, int length) { int i, extra; char *p, d; + gfc_char4_t *q; + switch (dtp->u.p.delim_status) { @@ -657,35 +761,77 @@ write_character (st_parameter_dt *dtp, const char *source, int length) break; } - if (d == ' ') - extra = 0; - else + if (kind == 1) { - extra = 2; + if (d == ' ') + extra = 0; + else + { + extra = 2; - for (i = 0; i < length; i++) - if (source[i] == d) - extra++; - } + for (i = 0; i < length; i++) + if (source[i] == d) + extra++; + } - p = write_block (dtp, length + extra); - if (p == NULL) - return; + p = write_block (dtp, length + extra); + if (p == NULL) + return; + + if (d == ' ') + memcpy (p, source, length); + else + { + *p++ = d; - if (d == ' ') - memcpy (p, source, length); + for (i = 0; i < length; i++) + { + *p++ = source[i]; + if (source[i] == d) + *p++ = d; + } + + *p = d; + } + } else { - *p++ = d; - - for (i = 0; i < length; i++) + /* We have to scan the source string looking for delimiters to determine + how large the write block needs to be. */ + if (d == ' ') + extra = 0; + else { - *p++ = source[i]; - if (source[i] == d) - *p++ = d; + extra = 2; + + q = (gfc_char4_t *) source; + for (i = 0; i < length; i++, q++) + if (*q == (gfc_char4_t) d) + extra++; } - *p = d; + p = write_block (dtp, length + extra); + if (p == NULL) + return; + + if (d == ' ') + { + q = (gfc_char4_t *) source; + for (i = 0; i < length; i++, q++) + p[i] = *q > 255 ? '?' : (unsigned char) *q; + } + else + { + *p++ = d; + q = (gfc_char4_t *) source; + for (i = 0; i < length; i++, q++) + { + *p++ = *q > 255 ? '?' : (unsigned char) *q; + if (*q == (gfc_char4_t) d) + *p++ = d; + } + *p = d; + } } } @@ -796,7 +942,7 @@ list_formatted_write_scalar (st_parameter_dt *dtp, bt type, void *p, int kind, write_logical (dtp, p, kind); break; case BT_CHARACTER: - write_character (dtp, p, kind); + write_character (dtp, p, kind, size); break; case BT_REAL: write_real (dtp, p, kind); @@ -818,6 +964,8 @@ list_formatted_write (st_parameter_dt *dtp, bt type, void *p, int kind, { size_t elem; char *tmp; + size_t stride = type == BT_CHARACTER ? + size * GFC_SIZE_OF_CHAR_KIND(kind) : size; tmp = (char *) p; @@ -825,7 +973,7 @@ list_formatted_write (st_parameter_dt *dtp, bt type, void *p, int kind, for (elem = 0; elem < nelems; elem++) { dtp->u.p.item_count++; - list_formatted_write_scalar (dtp, type, tmp + size*elem, kind, size); + list_formatted_write_scalar (dtp, type, tmp + elem * stride, kind, size); } } @@ -889,9 +1037,9 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset, if (obj->type != GFC_DTYPE_DERIVED) { #ifdef HAVE_CRLF - write_character (dtp, "\r\n ", 3); + write_character (dtp, "\r\n ", 1, 3); #else - write_character (dtp, "\n ", 2); + write_character (dtp, "\n ", 1, 2); #endif len = 0; if (base) @@ -900,15 +1048,15 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset, for (dim_i = 0; dim_i < (index_type) strlen (base_name); dim_i++) { cup = toupper (base_name[dim_i]); - write_character (dtp, &cup, 1); + write_character (dtp, &cup, 1, 1); } } for (dim_i =len; dim_i < (index_type) strlen (obj->var_name); dim_i++) { cup = toupper (obj->var_name[dim_i]); - write_character (dtp, &cup, 1); + write_character (dtp, &cup, 1, 1); } - write_character (dtp, "=", 1); + write_character (dtp, "=", 1, 1); } /* Counts the number of data output on a line, including names. */ @@ -978,7 +1126,7 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset, if (rep_ctr > 1) { sprintf(rep_buff, " %d*", rep_ctr); - write_character (dtp, rep_buff, strlen (rep_buff)); + write_character (dtp, rep_buff, 1, strlen (rep_buff)); dtp->u.p.no_leading_blank = 1; } num++; @@ -1003,7 +1151,7 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset, dtp->u.p.delim_status = DELIM_QUOTE; if (dtp->u.p.nml_delim == '\'') dtp->u.p.delim_status = DELIM_APOSTROPHE; - write_character (dtp, p, obj->string_length); + write_character (dtp, p, 1, obj->string_length); dtp->u.p.delim_status = tmp_delim; break; @@ -1093,14 +1241,14 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset, to column 2. Reset the repeat counter. */ dtp->u.p.no_leading_blank = 0; - write_character (dtp, &semi_comma, 1); + write_character (dtp, &semi_comma, 1, 1); if (num > 5) { num = 0; #ifdef HAVE_CRLF - write_character (dtp, "\r\n ", 3); + write_character (dtp, "\r\n ", 1, 3); #else - write_character (dtp, "\n ", 2); + write_character (dtp, "\n ", 1, 2); #endif } rep_ctr = 1; @@ -1164,13 +1312,13 @@ namelist_write (st_parameter_dt *dtp) /* Temporarily disable namelist delimters. */ dtp->u.p.delim_status = DELIM_NONE; - write_character (dtp, "&", 1); + write_character (dtp, "&", 1, 1); /* Write namelist name in upper case - f95 std. */ for (i = 0 ;i < dtp->namelist_name_len ;i++ ) { c = toupper (dtp->namelist_name[i]); - write_character (dtp, &c ,1); + write_character (dtp, &c, 1 ,1); } if (dtp->u.p.ionml != NULL) @@ -1184,9 +1332,9 @@ namelist_write (st_parameter_dt *dtp) } #ifdef HAVE_CRLF - write_character (dtp, " /\r\n", 5); + write_character (dtp, " /\r\n", 1, 5); #else - write_character (dtp, " /\n", 4); + write_character (dtp, " /\n", 1, 4); #endif /* Restore the original delimiter. */ diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h index 6ff9f4fd072..8c0f1b4a796 100644 --- a/libgfortran/libgfortran.h +++ b/libgfortran/libgfortran.h @@ -272,13 +272,12 @@ typedef GFC_UINTEGER_4 gfc_char4_t; simply equal to the kind parameter itself. */ #define GFC_SIZE_OF_CHAR_KIND(kind) (kind) - /* This will be 0 on little-endian machines and one on big-endian machines. */ -extern int l8_to_l4_offset; -internal_proto(l8_to_l4_offset); +extern int big_endian; +internal_proto(big_endian); #define GFOR_POINTER_TO_L1(p, kind) \ - (l8_to_l4_offset * (kind - 1) + (GFC_LOGICAL_1 *)(p)) + (big_endian * (kind - 1) + (GFC_LOGICAL_1 *)(p)) #define GFC_INTEGER_1_HUGE \ (GFC_INTEGER_1)((((GFC_UINTEGER_1)1) << 7) - 1) diff --git a/libgfortran/runtime/main.c b/libgfortran/runtime/main.c index 8632f152c95..71b481a7deb 100644 --- a/libgfortran/runtime/main.c +++ b/libgfortran/runtime/main.c @@ -45,10 +45,9 @@ stupid_function_name_for_static_linking (void) return; } -/* This is the offset (in bytes) required to cast from logical(8)* to - logical(4)*. and still get the same result. Will be 0 for little-endian - machines and 4 for big-endian machines. */ -int l8_to_l4_offset = 0; +/* This will be 0 for little-endian + machines and 1 for big-endian machines. */ +int big_endian = 0; /* Figure out endianness for this machine. */ @@ -64,9 +63,9 @@ determine_endianness (void) u.l8 = 1; if (u.l4[0]) - l8_to_l4_offset = 0; + big_endian = 0; else if (u.l4[1]) - l8_to_l4_offset = 1; + big_endian = 1; else runtime_error ("Unable to determine machine endianness"); } -- 2.30.2