From: Francois-Xavier Coudert <fxcoudert@gcc.gnu.org>
Date: Sun, 29 Apr 2007 10:45:57 +0000 (+0000)
Subject: re PR fortran/31645 (Error on reading Byte Order Mark)
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=caef7872f0dc4a62dfc449785974eaa179b0a449;p=gcc.git

re PR fortran/31645 (Error on reading Byte Order Mark)

	PR fortran/31645

	* scanner.c (load_file): Discard the byte order mark if one is
	found on the first non-preprocessor line of a file.

	* testsuite/gfortran.dg/bom_error.f90: New test.
	* testsuite/gfortran.dg/bom_include.f90: New test.
	* testsuite/gfortran.dg/bom_UTF16-LE.f90: New test.
	* testsuite/gfortran.dg/bom_UTF16-BE.f90: New test.
	* testsuite/gfortran.dg/bom_UTF-8.f90: New test.
	* testsuite/gfortran.dg/bom_UTF-32.f90: New test.
	* testsuite/gfortran.dg/bom_UTF-8.F90: New test.
	* testsuite/gfortran.dg/bom_include.inc: New file.

From-SVN: r124274
---

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 6bba251dc48..4d481c378a7 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,9 @@
+2007-04-29  Francois-Xavier Coudert  <fxcoudert@gcc.gnu.org>
+
+	PR fortran/31645
+	* scanner.c (load_file): Discard the byte order mark if one is
+	found on the first non-preprocessor line of a file.
+
 2007-04-29  Paul Thomas  <pault@gcc.gnu.org>
 
 	PR fortran/31711
diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c
index c3d3e6216f6..5eaa34fd294 100644
--- a/gcc/fortran/scanner.c
+++ b/gcc/fortran/scanner.c
@@ -1404,6 +1404,7 @@ load_file (const char *filename, bool initial)
   gfc_file *f;
   FILE *input;
   int len, line_len;
+  bool first_line;
 
   for (f = current_file; f; f = f->up)
     if (strcmp (filename, f->filename) == 0)
@@ -1445,6 +1446,7 @@ load_file (const char *filename, bool initial)
   current_file->line = 1;
   line = NULL;
   line_len = 0;
+  first_line = true;
 
   if (initial && gfc_src_preprocessor_lines[0])
     {
@@ -1467,6 +1469,26 @@ load_file (const char *filename, bool initial)
       if (feof (input) && len == 0)
 	break;
 
+      /* If this is the first line of the file, it can contain a byte
+	 order mark (BOM), which we will ignore:
+	   FF FE is UTF-16 little endian,
+	   FE FF is UTF-16 big endian,
+	   EF BB BF is UTF-8.  */
+      if (first_line
+	  && ((line_len >= 2 && line[0] == '\xFF' && line[1] == '\xFE')
+	      || (line_len >= 2 && line[0] == '\xFE' && line[1] == '\xFF')
+	      || (line_len >= 3 && line[0] == '\xEF' && line[1] == '\xBB'
+		  && line[2] == '\xBF')))
+	{
+	  int n = line[1] == '\xBB' ? 3 : 2;
+	  char * new = gfc_getmem (line_len);
+
+	  strcpy (new, line + n);
+	  gfc_free (line);
+	  line = new;
+	  len -= n;
+	}
+
       /* There are three things this line can be: a line of Fortran
 	 source, an include line or a C preprocessor directive.  */
 
@@ -1476,6 +1498,11 @@ load_file (const char *filename, bool initial)
 	  continue;
 	}
 
+      /* Preprocessed files have preprocessor lines added before the byte
+         order mark, so first_line is not about the first line of the file
+	 but the first line that's not a preprocessor line.  */
+      first_line = false;
+
       if (include_line (line))
 	{
 	  current_file->line++;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3113b303265..f7283eccab4 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,15 @@
+2007-04-29  Francois-Xavier Coudert  <fxcoudert@gcc.gnu.org>
+
+	PR fortran/31645
+	* testsuite/gfortran.dg/bom_error.f90: New test.
+	* testsuite/gfortran.dg/bom_include.f90: New test.
+	* testsuite/gfortran.dg/bom_UTF16-LE.f90: New test.
+	* testsuite/gfortran.dg/bom_UTF16-BE.f90: New test.
+	* testsuite/gfortran.dg/bom_UTF-8.f90: New test.
+	* testsuite/gfortran.dg/bom_UTF-32.f90: New test.
+	* testsuite/gfortran.dg/bom_UTF-8.F90: New test.
+	* testsuite/gfortran.dg/bom_include.inc: New file.
+
 2007-04-28  Andrew Pinski  <andrew_pinski@playstation.sony.com>
 
 	PR C++/30221
diff --git a/gcc/testsuite/gfortran.dg/bom_UTF-32.f90 b/gcc/testsuite/gfortran.dg/bom_UTF-32.f90
new file mode 100644
index 00000000000..d4243031366
Binary files /dev/null and b/gcc/testsuite/gfortran.dg/bom_UTF-32.f90 differ
diff --git a/gcc/testsuite/gfortran.dg/bom_UTF-8.F90 b/gcc/testsuite/gfortran.dg/bom_UTF-8.F90
new file mode 100644
index 00000000000..f9d9e88d478
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/bom_UTF-8.F90
@@ -0,0 +1,3 @@
+ï»¿print *, "Hello world"
+end
+! { dg-do compile }
diff --git a/gcc/testsuite/gfortran.dg/bom_UTF-8.f90 b/gcc/testsuite/gfortran.dg/bom_UTF-8.f90
new file mode 100644
index 00000000000..f9d9e88d478
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/bom_UTF-8.f90
@@ -0,0 +1,3 @@
+ï»¿print *, "Hello world"
+end
+! { dg-do compile }
diff --git a/gcc/testsuite/gfortran.dg/bom_UTF16-BE.f90 b/gcc/testsuite/gfortran.dg/bom_UTF16-BE.f90
new file mode 100644
index 00000000000..f590e71f0d8
Binary files /dev/null and b/gcc/testsuite/gfortran.dg/bom_UTF16-BE.f90 differ
diff --git a/gcc/testsuite/gfortran.dg/bom_UTF16-LE.f90 b/gcc/testsuite/gfortran.dg/bom_UTF16-LE.f90
new file mode 100644
index 00000000000..29e7ca68271
Binary files /dev/null and b/gcc/testsuite/gfortran.dg/bom_UTF16-LE.f90 differ
diff --git a/gcc/testsuite/gfortran.dg/bom_error.f90 b/gcc/testsuite/gfortran.dg/bom_error.f90
new file mode 100644
index 00000000000..7c2c86d1811
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/bom_error.f90
@@ -0,0 +1,4 @@
+ÿþprint *, "Hello world!"
+ÿþend ! { dg-error "Invalid character" }
+! { dg-do compile }
+! { dg-excess-errors "Unexpected end of file" }
diff --git a/gcc/testsuite/gfortran.dg/bom_include.f90 b/gcc/testsuite/gfortran.dg/bom_include.f90
new file mode 100644
index 00000000000..65a2898036e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/bom_include.f90
@@ -0,0 +1,2 @@
+! { dg-do compile }
+include "bom_include.inc"
diff --git a/gcc/testsuite/gfortran.dg/bom_include.inc b/gcc/testsuite/gfortran.dg/bom_include.inc
new file mode 100644
index 00000000000..b30290103da
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/bom_include.inc
@@ -0,0 +1,2 @@
+ï»¿print *, "Hello world!"
+end