From fbb22910cfa4e4567b46fc8b74ccfad92fa745d8 Mon Sep 17 00:00:00 2001 From: Paolo Carlini Date: Thu, 2 Jul 2015 18:54:41 +0000 Subject: [PATCH] re PR preprocessor/53690 ([C++11] \u0000 and \U00000000 are wrongly encoded as U+0001.) /libcpp 2015-07-02 Paolo Carlini PR c++/53690 * charset.c (_cpp_valid_ucn): Add cppchar_t * parameter and change return type to bool. Fix encoding of \u0000 and \U00000000 in C++. (convert_ucn): Adjust call. * lex.c (forms_identifier_p): Likewise. * internal.h (_cpp_valid_ucn): Adjust declaration. /gcc/testsuite 2015-07-02 Paolo Carlini PR c++/53690 * g++.dg/cpp/pr53690.C: New. From-SVN: r225353 --- gcc/testsuite/ChangeLog | 5 +++++ gcc/testsuite/g++.dg/cpp/pr53690.C | 7 +++++++ libcpp/ChangeLog | 11 ++++++++++- libcpp/charset.c | 30 +++++++++++++++--------------- libcpp/internal.h | 7 ++++--- libcpp/lex.c | 3 ++- 6 files changed, 43 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp/pr53690.C diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8d7005f4077..530f8fc0138 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-07-02 Paolo Carlini + + PR c++/53690 + * g++.dg/cpp/pr53690.C: New. + 2015-07-02 Bill Schmidt * gcc.target/powerpc/vec-cmp.c: New test. diff --git a/gcc/testsuite/g++.dg/cpp/pr53690.C b/gcc/testsuite/g++.dg/cpp/pr53690.C new file mode 100644 index 00000000000..ea913597513 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp/pr53690.C @@ -0,0 +1,7 @@ +// PR c++/53690 +// { dg-do compile { target c++11 } } + +int array1[U'\U00000000' == 0 ? 1 : -1]; +int array2[U'\u0000' == 0 ? 1 : -1]; +int array3[u'\U00000000' == 0 ? 1 : -1]; +int array4[u'\u0000' == 0 ? 1 : -1]; diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 4e2980222b3..ab259c583d7 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,12 @@ +2015-07-02 Paolo Carlini + + PR c++/53690 + * charset.c (_cpp_valid_ucn): Add cppchar_t * parameter and change + return type to bool. Fix encoding of \u0000 and \U00000000 in C++. + (convert_ucn): Adjust call. + * lex.c (forms_identifier_p): Likewise. + * internal.h (_cpp_valid_ucn): Adjust declaration. + 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals @@ -5,7 +14,7 @@ (struct cpp_options): Add utf8_char_literals. * init.c (struct lang_flags): Add utf8_char_literals; (struct lang_flags lang_defaults): Add column for utf8_char_literals. - * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token; + * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token; * expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()): Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens; (cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token; diff --git a/libcpp/charset.c b/libcpp/charset.c index 8e92bc65f90..5a1c929d835 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -972,21 +972,20 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, or 0060 (`), nor one in the range D800 through DFFF inclusive. *PSTR must be preceded by "\u" or "\U"; it is assumed that the - buffer end is delimited by a non-hex digit. Returns zero if the - UCN has not been consumed. + buffer end is delimited by a non-hex digit. Returns false if the + UCN has not been consumed, true otherwise. - Otherwise the nonzero value of the UCN, whether valid or invalid, - is returned. Diagnostics are emitted for invalid values. PSTR - is updated to point one beyond the UCN, or to the syntactically - invalid character. + The value of the UCN, whether valid or invalid, is returned in *CP. + Diagnostics are emitted for invalid values. PSTR is updated to point + one beyond the UCN, or to the syntactically invalid character. IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of an identifier, or 2 otherwise. */ -cppchar_t +bool _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, const uchar *limit, int identifier_pos, - struct normalize_state *nst) + struct normalize_state *nst, cppchar_t *cp) { cppchar_t result, c; unsigned int length; @@ -1030,8 +1029,11 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, multiple tokens in identifiers, so we can't give a helpful error message in that case. */ if (length && identifier_pos) - return 0; - + { + *cp = 0; + return false; + } + *pstr = str; if (length) { @@ -1079,10 +1081,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, (int) (str - base), base); } - if (result == 0) - result = 1; - - return result; + *cp = result; + return true; } /* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate @@ -1100,7 +1100,7 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit, struct normalize_state nst = INITIAL_NORMALIZE_STATE; from++; /* Skip u/U. */ - ucn = _cpp_valid_ucn (pfile, &from, limit, 0, &nst); + _cpp_valid_ucn (pfile, &from, limit, 0, &nst, &ucn); rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft); if (rval) diff --git a/libcpp/internal.h b/libcpp/internal.h index 95cf9c2bfd0..abd464ff422 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -744,9 +744,10 @@ struct normalize_state #define NORMALIZE_STATE_UPDATE_IDNUM(st, c) \ ((st)->previous = (c), (st)->prev_class = 0) -extern cppchar_t _cpp_valid_ucn (cpp_reader *, const unsigned char **, - const unsigned char *, int, - struct normalize_state *state); +extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **, + const unsigned char *, int, + struct normalize_state *state, + cppchar_t *); extern void _cpp_destroy_iconv (cpp_reader *); extern unsigned char *_cpp_convert_input (cpp_reader *, const char *, unsigned char *, size_t, size_t, diff --git a/libcpp/lex.c b/libcpp/lex.c index 8f2bdc80e11..0aa10905752 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -1244,9 +1244,10 @@ forms_identifier_p (cpp_reader *pfile, int first, && *buffer->cur == '\\' && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) { + cppchar_t s; buffer->cur += 2; if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, - state)) + state, &s)) return true; buffer->cur -= 2; } -- 2.30.2