From: Francois-Xavier Coudert Date: Sun, 29 Apr 2007 10:45:57 +0000 (+0000) Subject: re PR fortran/31645 (Error on reading Byte Order Mark) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=caef7872f0dc4a62dfc449785974eaa179b0a449;p=gcc.git re PR fortran/31645 (Error on reading Byte Order Mark) PR fortran/31645 * scanner.c (load_file): Discard the byte order mark if one is found on the first non-preprocessor line of a file. * testsuite/gfortran.dg/bom_error.f90: New test. * testsuite/gfortran.dg/bom_include.f90: New test. * testsuite/gfortran.dg/bom_UTF16-LE.f90: New test. * testsuite/gfortran.dg/bom_UTF16-BE.f90: New test. * testsuite/gfortran.dg/bom_UTF-8.f90: New test. * testsuite/gfortran.dg/bom_UTF-32.f90: New test. * testsuite/gfortran.dg/bom_UTF-8.F90: New test. * testsuite/gfortran.dg/bom_include.inc: New file. From-SVN: r124274 --- diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 6bba251dc48..4d481c378a7 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2007-04-29 Francois-Xavier Coudert + + PR fortran/31645 + * scanner.c (load_file): Discard the byte order mark if one is + found on the first non-preprocessor line of a file. + 2007-04-29 Paul Thomas PR fortran/31711 diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c index c3d3e6216f6..5eaa34fd294 100644 --- a/gcc/fortran/scanner.c +++ b/gcc/fortran/scanner.c @@ -1404,6 +1404,7 @@ load_file (const char *filename, bool initial) gfc_file *f; FILE *input; int len, line_len; + bool first_line; for (f = current_file; f; f = f->up) if (strcmp (filename, f->filename) == 0) @@ -1445,6 +1446,7 @@ load_file (const char *filename, bool initial) current_file->line = 1; line = NULL; line_len = 0; + first_line = true; if (initial && gfc_src_preprocessor_lines[0]) { @@ -1467,6 +1469,26 @@ load_file (const char *filename, bool initial) if (feof (input) && len == 0) break; + /* If this is the first line of the file, it can contain a byte + order mark (BOM), which we will ignore: + FF FE is UTF-16 little endian, + FE FF is UTF-16 big endian, + EF BB BF is UTF-8. */ + if (first_line + && ((line_len >= 2 && line[0] == '\xFF' && line[1] == '\xFE') + || (line_len >= 2 && line[0] == '\xFE' && line[1] == '\xFF') + || (line_len >= 3 && line[0] == '\xEF' && line[1] == '\xBB' + && line[2] == '\xBF'))) + { + int n = line[1] == '\xBB' ? 3 : 2; + char * new = gfc_getmem (line_len); + + strcpy (new, line + n); + gfc_free (line); + line = new; + len -= n; + } + /* There are three things this line can be: a line of Fortran source, an include line or a C preprocessor directive. */ @@ -1476,6 +1498,11 @@ load_file (const char *filename, bool initial) continue; } + /* Preprocessed files have preprocessor lines added before the byte + order mark, so first_line is not about the first line of the file + but the first line that's not a preprocessor line. */ + first_line = false; + if (include_line (line)) { current_file->line++; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3113b303265..f7283eccab4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,15 @@ +2007-04-29 Francois-Xavier Coudert + + PR fortran/31645 + * testsuite/gfortran.dg/bom_error.f90: New test. + * testsuite/gfortran.dg/bom_include.f90: New test. + * testsuite/gfortran.dg/bom_UTF16-LE.f90: New test. + * testsuite/gfortran.dg/bom_UTF16-BE.f90: New test. + * testsuite/gfortran.dg/bom_UTF-8.f90: New test. + * testsuite/gfortran.dg/bom_UTF-32.f90: New test. + * testsuite/gfortran.dg/bom_UTF-8.F90: New test. + * testsuite/gfortran.dg/bom_include.inc: New file. + 2007-04-28 Andrew Pinski PR C++/30221 diff --git a/gcc/testsuite/gfortran.dg/bom_UTF-32.f90 b/gcc/testsuite/gfortran.dg/bom_UTF-32.f90 new file mode 100644 index 00000000000..d4243031366 Binary files /dev/null and b/gcc/testsuite/gfortran.dg/bom_UTF-32.f90 differ diff --git a/gcc/testsuite/gfortran.dg/bom_UTF-8.F90 b/gcc/testsuite/gfortran.dg/bom_UTF-8.F90 new file mode 100644 index 00000000000..f9d9e88d478 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/bom_UTF-8.F90 @@ -0,0 +1,3 @@ +print *, "Hello world" +end +! { dg-do compile } diff --git a/gcc/testsuite/gfortran.dg/bom_UTF-8.f90 b/gcc/testsuite/gfortran.dg/bom_UTF-8.f90 new file mode 100644 index 00000000000..f9d9e88d478 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/bom_UTF-8.f90 @@ -0,0 +1,3 @@ +print *, "Hello world" +end +! { dg-do compile } diff --git a/gcc/testsuite/gfortran.dg/bom_UTF16-BE.f90 b/gcc/testsuite/gfortran.dg/bom_UTF16-BE.f90 new file mode 100644 index 00000000000..f590e71f0d8 Binary files /dev/null and b/gcc/testsuite/gfortran.dg/bom_UTF16-BE.f90 differ diff --git a/gcc/testsuite/gfortran.dg/bom_UTF16-LE.f90 b/gcc/testsuite/gfortran.dg/bom_UTF16-LE.f90 new file mode 100644 index 00000000000..29e7ca68271 Binary files /dev/null and b/gcc/testsuite/gfortran.dg/bom_UTF16-LE.f90 differ diff --git a/gcc/testsuite/gfortran.dg/bom_error.f90 b/gcc/testsuite/gfortran.dg/bom_error.f90 new file mode 100644 index 00000000000..7c2c86d1811 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/bom_error.f90 @@ -0,0 +1,4 @@ +ÿþprint *, "Hello world!" +ÿþend ! { dg-error "Invalid character" } +! { dg-do compile } +! { dg-excess-errors "Unexpected end of file" } diff --git a/gcc/testsuite/gfortran.dg/bom_include.f90 b/gcc/testsuite/gfortran.dg/bom_include.f90 new file mode 100644 index 00000000000..65a2898036e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/bom_include.f90 @@ -0,0 +1,2 @@ +! { dg-do compile } +include "bom_include.inc" diff --git a/gcc/testsuite/gfortran.dg/bom_include.inc b/gcc/testsuite/gfortran.dg/bom_include.inc new file mode 100644 index 00000000000..b30290103da --- /dev/null +++ b/gcc/testsuite/gfortran.dg/bom_include.inc @@ -0,0 +1,2 @@ +print *, "Hello world!" +end