re PR preprocessor/53690 ([C++11] \u0000 and \U00000000 are wrongly encoded as U...

author Paolo Carlini <paolo@gcc.gnu.org>

Thu, 2 Jul 2015 18:54:41 +0000 (18:54 +0000)

committer Paolo Carlini <paolo@gcc.gnu.org>

Thu, 2 Jul 2015 18:54:41 +0000 (18:54 +0000)
author Paolo Carlini <paolo@gcc.gnu.org>
Thu, 2 Jul 2015 18:54:41 +0000 (18:54 +0000)
committer Paolo Carlini <paolo@gcc.gnu.org>
Thu, 2 Jul 2015 18:54:41 +0000 (18:54 +0000)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 8d7005f4077675fcaf68f6af69c024b31c16167f..530f8fc0138a77fd5d5a78877f58ebf90745c12f 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2015-07-02  Paolo Carlini  <paolo.carlini@oracle.com>
+
+       PR c++/53690
+       * g++.dg/cpp/pr53690.C: New.
+
  2015-07-02  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
  
         * gcc.target/powerpc/vec-cmp.c: New test.
diff --git a/gcc/testsuite/g++.dg/cpp/pr53690.C b/gcc/testsuite/g++.dg/cpp/pr53690.C

new file mode 100644 (file)

index 0000000..ea91359
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/pr53690.C
@@ -0,0 +1,7 @@
+// PR c++/53690
+// { dg-do compile { target c++11 } }
+
+int array1[U'\U00000000' == 0 ? 1 : -1];
+int array2[U'\u0000' == 0 ? 1 : -1];
+int array3[u'\U00000000' == 0 ? 1 : -1];
+int array4[u'\u0000' == 0 ? 1 : -1];
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog

index 4e2980222b357048458f73569c86aa943e8aea36..ab259c583d71614484818ad977256137aecc04fd 100644 (file)
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,12 @@
+2015-07-02  Paolo Carlini  <paolo.carlini@oracle.com>
+
+       PR c++/53690
+       * charset.c (_cpp_valid_ucn): Add cppchar_t * parameter and change
+       return type to bool.  Fix encoding of \u0000 and \U00000000 in C++.
+       (convert_ucn): Adjust call.
+       * lex.c (forms_identifier_p): Likewise.
+       * internal.h (_cpp_valid_ucn): Adjust declaration.
+
  2015-06-30  Edward Smith-Rowland  <3dw4rd@verizon.net>
  
         Implement N4197 - Adding u8 character literals
@@ -5,7 +14,7 @@
         (struct cpp_options): Add utf8_char_literals.
         * init.c (struct lang_flags): Add utf8_char_literals;
         (struct lang_flags lang_defaults): Add column for utf8_char_literals.
-       * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token; 
+       * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token;
         * expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()):
         Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens;
         (cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token;
diff --git a/libcpp/charset.c b/libcpp/charset.c

index 8e92bc65f90097cb9770bca0c4539087fc94d2d7..5a1c929d83516f2c131a151a10ba7e1855b07fdd 100644 (file)
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -972,21 +972,20 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
     or 0060 (`), nor one in the range D800 through DFFF inclusive.
  
     *PSTR must be preceded by "\u" or "\U"; it is assumed that the
-   buffer end is delimited by a non-hex digit.  Returns zero if the
-   UCN has not been consumed.
+   buffer end is delimited by a non-hex digit.  Returns false if the
+   UCN has not been consumed, true otherwise.
  
-   Otherwise the nonzero value of the UCN, whether valid or invalid,
-   is returned.  Diagnostics are emitted for invalid values.  PSTR
-   is updated to point one beyond the UCN, or to the syntactically
-   invalid character.
+   The value of the UCN, whether valid or invalid, is returned in *CP.
+   Diagnostics are emitted for invalid values.  PSTR is updated to point
+   one beyond the UCN, or to the syntactically invalid character.
  
     IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
     an identifier, or 2 otherwise.  */
  
-cppchar_t
+bool
  _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
                 const uchar *limit, int identifier_pos,
-               struct normalize_state *nst)
+               struct normalize_state *nst, cppchar_t *cp)
  {
    cppchar_t result, c;
    unsigned int length;
@@ -1030,8 +1029,11 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
       multiple tokens in identifiers, so we can't give a helpful
       error message in that case.  */
    if (length && identifier_pos)
-    return 0;
-  
+    {
+      *cp = 0;
+      return false;
+    }
+
    *pstr = str;
    if (length)
      {
@@ -1079,10 +1081,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
                    (int) (str - base), base);
      }
  
-  if (result == 0)
-    result = 1;
-
-  return result;
+  *cp = result;
+  return true;
  }
  
  /* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
@@ -1100,7 +1100,7 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
    struct normalize_state nst = INITIAL_NORMALIZE_STATE;
  
    from++;  /* Skip u/U.  */
-  ucn = _cpp_valid_ucn (pfile, &from, limit, 0, &nst);
+  _cpp_valid_ucn (pfile, &from, limit, 0, &nst, &ucn);
  
    rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
    if (rval)
diff --git a/libcpp/internal.h b/libcpp/internal.h

index 95cf9c2bfd0a500bba5dec4c65b5498cd51a1e3d..abd464ff422c10cdb212810d89fbbd4f3cc9264d 100644 (file)
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -744,9 +744,10 @@ struct normalize_state
  #define NORMALIZE_STATE_UPDATE_IDNUM(st, c)    \
    ((st)->previous = (c), (st)->prev_class = 0)
  
-extern cppchar_t _cpp_valid_ucn (cpp_reader *, const unsigned char **,
-                                const unsigned char *, int,
-                                struct normalize_state *state);
+extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
+                           const unsigned char *, int,
+                           struct normalize_state *state,
+                           cppchar_t *);
  extern void _cpp_destroy_iconv (cpp_reader *);
  extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
                                           unsigned char *, size_t, size_t,
diff --git a/libcpp/lex.c b/libcpp/lex.c

index 8f2bdc80e117214adf733200434f8e906bf92da7..0aa109057523be250d02ce4e4b95ff3ed816fd2d 100644 (file)
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1244,9 +1244,10 @@ forms_identifier_p (cpp_reader *pfile, int first,
        && *buffer->cur == '\\'
        && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
      {
+      cppchar_t s;
        buffer->cur += 2;
        if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
-                         state))
+                         state, &s))
         return true;
        buffer->cur -= 2;
      }
author	Paolo Carlini <paolo@gcc.gnu.org>
	Thu, 2 Jul 2015 18:54:41 +0000 (18:54 +0000)
committer	Paolo Carlini <paolo@gcc.gnu.org>
	Thu, 2 Jul 2015 18:54:41 +0000 (18:54 +0000)
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/g++.dg/cpp/pr53690.C	[new file with mode: 0644]	patch \| blob
libcpp/ChangeLog		patch \| blob \| history
libcpp/charset.c		patch \| blob \| history
libcpp/internal.h		patch \| blob \| history
libcpp/lex.c		patch \| blob \| history