From 0900e29cdbc533fecf2a311447bbde17f101bbd6 Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Thu, 26 Sep 2019 21:43:51 +0000 Subject: [PATCH] charset.c (UCS_LIMIT): New macro. * charset.c (UCS_LIMIT): New macro. (ucn_valid_in_identifier): Use it instead of a hardcoded constant. (_cpp_valid_ucn): Issue a pedantic warning for UCNs larger than UCS_LIMIT outside of identifiers in C and in C++2a or later. From-SVN: r276167 --- gcc/testsuite/ChangeLog | 8 ++++++++ gcc/testsuite/g++.dg/cpp/ucn-1.C | 2 ++ gcc/testsuite/g++.dg/cpp2a/ucn1.C | 7 +++++++ gcc/testsuite/gcc.dg/attr-alias-5.c | 2 +- gcc/testsuite/gcc.dg/cpp/ucs.c | 6 ++++-- gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c | 2 +- libcpp/ChangeLog | 7 +++++++ libcpp/charset.c | 15 ++++++++++++++- 8 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/ucn1.C diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7cdc8f0d107..82607de67fe 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2019-09-26 Eric Botcazou + + * gcc.dg/cpp/ucs.c: Add test for new warning and adjust. + * gcc.dg/cpp/utf8-5byte-1.c: Add -w to the options. + * gcc.dg/attr-alias-5.c: Likewise. + * g++.dg/cpp/ucn-1.C: Add test for new warning. + * g++.dg/cpp2a/ucn1.C: New test. + 2019-09-26 Max Filippov * gcc.target/xtensa/pr91880.c: New test case. diff --git a/gcc/testsuite/g++.dg/cpp/ucn-1.C b/gcc/testsuite/g++.dg/cpp/ucn-1.C index d929078d899..9596a429650 100644 --- a/gcc/testsuite/g++.dg/cpp/ucn-1.C +++ b/gcc/testsuite/g++.dg/cpp/ucn-1.C @@ -12,4 +12,6 @@ int main() int c\u0024c; // { dg-error "not valid in an identifier" "" { target { powerpc-ibm-aix* } } } U"\uD800"; // { dg-error "not a valid universal character" } + + U'\U00110000'; // { dg-warning "outside" "110000 outside UCS" { target c++2a } } } diff --git a/gcc/testsuite/g++.dg/cpp2a/ucn1.C b/gcc/testsuite/g++.dg/cpp2a/ucn1.C new file mode 100644 index 00000000000..e73c77db12e --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/ucn1.C @@ -0,0 +1,7 @@ +// { dg-do compile } +// { dg-options "-std=c++2a" } + +int main() +{ + U'\U00110000'; // { dg-warning "outside" "110000 outside UCS" } +} diff --git a/gcc/testsuite/gcc.dg/attr-alias-5.c b/gcc/testsuite/gcc.dg/attr-alias-5.c index 91e63f89825..a65fe0b9cda 100644 --- a/gcc/testsuite/gcc.dg/attr-alias-5.c +++ b/gcc/testsuite/gcc.dg/attr-alias-5.c @@ -1,7 +1,7 @@ /* Verify diagnostics for aliases to strings containing extended identifiers or bad characters. */ /* { dg-do compile } */ -/* { dg-options "-std=gnu99" } */ +/* { dg-options "-std=gnu99 -w" } */ /* { dg-require-alias "" } */ /* { dg-require-ascii-locale "" } */ /* { dg-skip-if "" { powerpc*-*-aix* } } */ diff --git a/gcc/testsuite/gcc.dg/cpp/ucs.c b/gcc/testsuite/gcc.dg/cpp/ucs.c index 4f76fa99bf5..cac83f3cf14 100644 --- a/gcc/testsuite/gcc.dg/cpp/ucs.c +++ b/gcc/testsuite/gcc.dg/cpp/ucs.c @@ -39,7 +39,7 @@ #endif #if WCHAR_MAX >= 0x7ffffff -# if L'\U1234abcd' != 0x1234abcd +# if L'\U1234abcd' != 0x1234abcd /* { dg-warning "outside" "" } */ # error bad long ucs /* { dg-bogus "bad" "bad U1234abcd evaluation" } */ # endif #endif @@ -49,7 +49,7 @@ void foo () int c; c = L'\ubad'; /* { dg-error "incomplete" "incomplete UCN 1" } */ - c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */ + c = L"\U1234"[0]; /* { dg-error "incomplete" "incomplete UCN 2" } */ c = L'\u000x'; /* { dg-error "incomplete" "non-hex digit in UCN" } */ /* If sizeof(HOST_WIDE_INT) > sizeof(wchar_t), we can get a multi-character @@ -64,4 +64,6 @@ void foo () c = '\u0025'; /* { dg-error "not a valid" "0025 invalid UCN" } */ c = L"\uD800"[0]; /* { dg-error "not a valid" "D800 invalid UCN" } */ c = L'\U0000DFFF'; /* { dg-error "not a valid" "DFFF invalid UCN" } */ + + c = L'\U00110000'; /* { dg-warning "outside" "110000 outside UCS" } */ } diff --git a/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c b/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c index 7f96a56841c..50e6c05575c 100644 --- a/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c +++ b/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c @@ -1,7 +1,7 @@ /* Test for bug in conversions from 5-byte UTF-8 sequences in cpplib. */ /* { dg-do run { target { 4byte_wchar_t } } } */ -/* { dg-options "-std=gnu99" } */ +/* { dg-options "-std=gnu99 -w" } */ extern void abort (void); extern void exit (int); diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 0c851952b55..1ca622df6fd 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,10 @@ +2019-09-26 Eric Botcazou + + * charset.c (UCS_LIMIT): New macro. + (ucn_valid_in_identifier): Use it instead of a hardcoded constant. + (_cpp_valid_ucn): Issue a pedantic warning for UCNs larger than + UCS_LIMIT outside of identifiers in C and in C++2a or later. + 2019-09-19 Lewis Hyatt PR c/67224 diff --git a/libcpp/charset.c b/libcpp/charset.c index 10286219bd6..39af77a554a 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -901,6 +901,9 @@ struct ucnrange { }; #include "ucnid.h" +/* ISO 10646 defines the UCS codespace as the range 0-0x10FFFF inclusive. */ +#define UCS_LIMIT 0x10FFFF + /* Returns 1 if C is valid in an identifier, 2 if C is valid except at the start of an identifier, and 0 if C is not valid in an identifier. We assume C has already gone through the checks of @@ -915,7 +918,7 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, int mn, mx, md; unsigned short valid_flags, invalid_start_flags; - if (c > 0x10FFFF) + if (c > UCS_LIMIT) return 0; mn = 0; @@ -1016,6 +1019,10 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, whose short identifier is less than 00A0 other than 0024 ($), 0040 (@), or 0060 (`), nor one in the range D800 through DFFF inclusive. + If the hexadecimal value is larger than the upper bound of the UCS + codespace specified in ISO/IEC 10646, a pedantic warning is issued + in all versions of C and in the C++2a or later versions of C++. + *PSTR must be preceded by "\u" or "\U"; it is assumed that the buffer end is delimited by a non-hex digit. Returns false if the UCN has not been consumed, true otherwise. @@ -1135,6 +1142,12 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, "universal character %.*s is not valid at the start of an identifier", (int) (str - base), base); } + else if (result > UCS_LIMIT + && (!CPP_OPTION (pfile, cplusplus) + || CPP_OPTION (pfile, lang) > CLK_CXX17)) + cpp_error (pfile, CPP_DL_PEDWARN, + "%.*s is outside the UCS codespace", + (int) (str - base), base); *cp = result; return true; -- 2.30.2