From fe95b0366a02855988d4c626de2045dae6e11502 Mon Sep 17 00:00:00 2001 From: Edward Smith-Rowland <3dw4rd@verizon.net> Date: Tue, 30 Jun 2015 12:58:48 +0000 Subject: [PATCH] Implement N4197 - Adding u8 character literals libcpp: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * include/cpplib.h (UTF8CHAR, UTF8CHAR_USERDEF): New cpp tokens; (struct cpp_options): Add utf8_char_literals. * init.c (struct lang_flags): Add utf8_char_literals; (struct lang_flags lang_defaults): Add column for utf8_char_literals. * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token; * expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()): Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens; (cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token; (eval_token(), _cpp_parse_expr()): Treat CPP_UTF8CHAR token. * lex.c (lex_string(), _cpp_lex_direct()): Include CPP_UTF8CHAR tokens. * charset.c (converter_for_type(), cpp_interpret_charconst()): Treat CPP_UTF8CHAR token. gcc/c-family: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * c-family/c-ada-spec.c (print_ada_macros()): Treat CPP_UTF8CHAR like CPP_CHAR. * c-family/c-common.c (c_parse_error()): print CPP_UTF8CHAR and CPP_UTF8CHAR_USERDEF tokens. * c-family/c-lex.c (c_lex_with_flags()): Treat CPP_UTF8CHAR_USERDEF and CPP_UTF8CHAR tokens; (lex_charconst()): Treat CPP_UTF8CHAR token. gcc/cp: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * parser.c (cp_parser_primary_expression()): Treat CPP_UTF8CHAR and CPP_UTF8CHAR_USERDEF tokens; (cp_parser_parenthesized_expression_list()): Treat CPP_UTF8CHAR token. gcc/testsuite: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * g++.dg/cpp1z/utf8.C: New. * g++.dg/cpp1z/utf8-neg.C: New. * g++.dg/cpp1z/udlit-utf8char.C: New. From-SVN: r225185 --- gcc/c-family/ChangeLog | 10 ++++++ gcc/c-family/c-ada-spec.c | 1 + gcc/c-family/c-common.c | 9 ++++-- gcc/c-family/c-lex.c | 4 +++ gcc/cp/ChangeLog | 7 ++++ gcc/cp/parser.c | 3 ++ gcc/testsuite/ChangeLog | 7 ++++ gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C | 8 +++++ gcc/testsuite/g++.dg/cpp1z/utf8-neg.C | 7 ++++ gcc/testsuite/g++.dg/cpp1z/utf8.C | 15 +++++++++ libcpp/ChangeLog | 16 +++++++++ libcpp/charset.c | 8 +++-- libcpp/expr.c | 9 +++++- libcpp/include/cpplib.h | 5 +++ libcpp/init.c | 36 +++++++++++---------- libcpp/lex.c | 6 ++-- libcpp/macro.c | 2 +- 17 files changed, 127 insertions(+), 26 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C create mode 100644 gcc/testsuite/g++.dg/cpp1z/utf8-neg.C create mode 100644 gcc/testsuite/g++.dg/cpp1z/utf8.C diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 0e03e7e87d0..66ae6a8ab45 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,13 @@ +2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> + + Implement N4197 - Adding u8 character literals + * c-family/c-ada-spec.c (print_ada_macros()): Treat CPP_UTF8CHAR + like CPP_CHAR. + * c-family/c-common.c (c_parse_error()): print CPP_UTF8CHAR + and CPP_UTF8CHAR_USERDEF tokens. + * c-family/c-lex.c (c_lex_with_flags()): Treat CPP_UTF8CHAR_USERDEF + and CPP_UTF8CHAR tokens; (lex_charconst()): Treat CPP_UTF8CHAR token. + 2015-06-29 Manuel López-Ibáñez PR fortran/66605 diff --git a/gcc/c-family/c-ada-spec.c b/gcc/c-family/c-ada-spec.c index ef3c5e3ae2b..41d612fc467 100644 --- a/gcc/c-family/c-ada-spec.c +++ b/gcc/c-family/c-ada-spec.c @@ -249,6 +249,7 @@ print_ada_macros (pretty_printer *pp, cpp_hashnode **macros, int max_ada_macros) case CPP_WCHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: case CPP_NAME: case CPP_STRING: case CPP_NUMBER: diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index ac42e4aea9a..835fc2ce190 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -10256,7 +10256,8 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type, else if (token_type == CPP_CHAR || token_type == CPP_WCHAR || token_type == CPP_CHAR16 - || token_type == CPP_CHAR32) + || token_type == CPP_CHAR32 + || token_type == CPP_UTF8CHAR) { unsigned int val = TREE_INT_CST_LOW (value); const char *prefix; @@ -10275,6 +10276,9 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type, case CPP_CHAR32: prefix = "U"; break; + case CPP_UTF8CHAR: + prefix = "u8"; + break; } if (val <= UCHAR_MAX && ISGRAPH (val)) @@ -10289,7 +10293,8 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type, else if (token_type == CPP_CHAR_USERDEF || token_type == CPP_WCHAR_USERDEF || token_type == CPP_CHAR16_USERDEF - || token_type == CPP_CHAR32_USERDEF) + || token_type == CPP_CHAR32_USERDEF + || token_type == CPP_UTF8CHAR_USERDEF) message = catenate_messages (gmsgid, " before user-defined character literal"); else if (token_type == CPP_STRING_USERDEF diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c index 9c56832219b..aa5d6e2313f 100644 --- a/gcc/c-family/c-lex.c +++ b/gcc/c-family/c-lex.c @@ -536,6 +536,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags, case CPP_WCHAR_USERDEF: case CPP_CHAR16_USERDEF: case CPP_CHAR32_USERDEF: + case CPP_UTF8CHAR_USERDEF: { tree literal; cpp_token temp_tok = *tok; @@ -553,6 +554,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags, case CPP_WCHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: *value = lex_charconst (tok); break; @@ -1250,6 +1252,8 @@ lex_charconst (const cpp_token *token) type = char32_type_node; else if (token->type == CPP_CHAR16) type = char16_type_node; + else if (token->type == CPP_UTF8CHAR) + type = char_type_node; /* In C, a character constant has type 'int'. In C++ 'char', but multi-char charconsts have type 'int'. */ else if (!c_dialect_cxx () || chars_seen > 1) diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 843b7eda176..04fb007f8e7 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,10 @@ +2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> + + Implement N4197 - Adding u8 character literals + * parser.c (cp_parser_primary_expression()): Treat CPP_UTF8CHAR + and CPP_UTF8CHAR_USERDEF tokens; + (cp_parser_parenthesized_expression_list()): Treat CPP_UTF8CHAR token. + 2015-06-29 Paolo Carlini PR c++/65977 diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index acf147c0e5c..bb3d636eee1 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -4284,6 +4284,7 @@ cp_parser_primary_expression (cp_parser *parser, case CPP_CHAR16: case CPP_CHAR32: case CPP_WCHAR: + case CPP_UTF8CHAR: case CPP_NUMBER: case CPP_PREPARSED_EXPR: if (TREE_CODE (token->u.value) == USERDEF_LITERAL) @@ -4345,6 +4346,7 @@ cp_parser_primary_expression (cp_parser *parser, case CPP_CHAR16_USERDEF: case CPP_CHAR32_USERDEF: case CPP_WCHAR_USERDEF: + case CPP_UTF8CHAR_USERDEF: return cp_parser_userdef_char_literal (parser); case CPP_STRING: @@ -6887,6 +6889,7 @@ cp_parser_parenthesized_expression_list (cp_parser* parser, case CPP_WCHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: /* If a parameter is literal zero alone, remember it for -Wmemset-transposed-args warning. */ if (integer_zerop (tok->u.value) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f14bbb11387..f766b0dcbb6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> + + Implement N4197 - Adding u8 character literals + * g++.dg/cpp1z/utf8.C: New. + * g++.dg/cpp1z/utf8-neg.C: New. + * g++.dg/cpp1z/udlit-utf8char.C: New. + 2015-06-30 Marek Polacek * gcc.dg/fold-ior-2.c (fn4): Swap operands. diff --git a/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C b/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C new file mode 100644 index 00000000000..fb9cdf18fd2 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C @@ -0,0 +1,8 @@ +// { dg-do compile } +// { dg-options "-std=c++1z" } + +constexpr int +operator""_foo(char c) +{ return c * 100; } + +auto cc = u8'8'_foo; diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C new file mode 100644 index 00000000000..339f0e3c029 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c++1z" } */ + +const static char c0 = u8''; // { dg-error "empty character" } +const static char c1 = u8'ab'; // { dg-warning "multi-character character constant" } +const static char c2 = u8'\u0124'; // { dg-warning "multi-character character constant" } +const static char c3 = u8'\U00064321'; // { dg-warning "multi-character character constant" } diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8.C b/gcc/testsuite/g++.dg/cpp1z/utf8.C new file mode 100644 index 00000000000..52816f85918 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/utf8.C @@ -0,0 +1,15 @@ +// { dg-do compile } +// { dg-options "-std=c++1z" } + +#include +#include + +auto c = 'c'; +auto u8c = u8'c'; + +static_assert(std::experimental::is_same_v, ""); + +auto u8s = u8"c"; +auto x = u8s[0]; + +static_assert(std::experimental::is_same_v, ""); diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index c7ac1e3aec0..4e2980222b3 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,19 @@ +2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> + + Implement N4197 - Adding u8 character literals + * include/cpplib.h (UTF8CHAR, UTF8CHAR_USERDEF): New cpp tokens; + (struct cpp_options): Add utf8_char_literals. + * init.c (struct lang_flags): Add utf8_char_literals; + (struct lang_flags lang_defaults): Add column for utf8_char_literals. + * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token; + * expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()): + Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens; + (cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token; + (eval_token(), _cpp_parse_expr()): Treat CPP_UTF8CHAR token. + * lex.c (lex_string(), _cpp_lex_direct()): Include CPP_UTF8CHAR tokens. + * charset.c (converter_for_type(), cpp_interpret_charconst()): + Treat CPP_UTF8CHAR token. + 2015-06-30 Uros Bizjak * lex.c (search_line_sse42) [__GCC_ASM_FLAG_OUTPUTS__]: New main diff --git a/libcpp/charset.c b/libcpp/charset.c index b4276380658..8e92bc65f90 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -1355,6 +1355,7 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type) { default: return pfile->narrow_cset_desc; + case CPP_UTF8CHAR: case CPP_UTF8STRING: return pfile->utf8_cset_desc; case CPP_CHAR16: @@ -1611,11 +1612,12 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, unsigned int *pchars_seen, int *unsignedp) { cpp_string str = { 0, 0 }; - bool wide = (token->type != CPP_CHAR); + bool wide = (token->type != CPP_CHAR && token->type != CPP_UTF8CHAR); + int u8 = 2 * int(token->type == CPP_UTF8CHAR); cppchar_t result; - /* an empty constant will appear as L'', u'', U'' or '' */ - if (token->val.str.len == (size_t) (2 + wide)) + /* An empty constant will appear as L'', u'', U'', u8'', or '' */ + if (token->val.str.len == (size_t) (2 + wide + u8)) { cpp_error (pfile, CPP_DL_ERROR, "empty character constant"); return 0; diff --git a/libcpp/expr.c b/libcpp/expr.c index b8e88c50f33..3dc5c0bf238 100644 --- a/libcpp/expr.c +++ b/libcpp/expr.c @@ -307,6 +307,8 @@ cpp_userdef_char_remove_type (enum cpp_ttype type) return CPP_CHAR16; else if (type == CPP_CHAR32_USERDEF) return CPP_CHAR32; + else if (type == CPP_UTF8CHAR_USERDEF) + return CPP_UTF8CHAR; else return type; } @@ -325,6 +327,8 @@ cpp_userdef_char_add_type (enum cpp_ttype type) return CPP_CHAR16_USERDEF; else if (type == CPP_CHAR32) return CPP_CHAR32_USERDEF; + else if (type == CPP_UTF8CHAR) + return CPP_UTF8CHAR_USERDEF; else return type; } @@ -350,7 +354,8 @@ cpp_userdef_char_p (enum cpp_ttype type) if (type == CPP_CHAR_USERDEF || type == CPP_WCHAR_USERDEF || type == CPP_CHAR16_USERDEF - || type == CPP_CHAR32_USERDEF) + || type == CPP_CHAR32_USERDEF + || type == CPP_UTF8CHAR_USERDEF) return true; else return false; @@ -1029,6 +1034,7 @@ eval_token (cpp_reader *pfile, const cpp_token *token, case CPP_CHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: { cppchar_t cc = cpp_interpret_charconst (pfile, token, &temp, &unsignedp); @@ -1214,6 +1220,7 @@ _cpp_parse_expr (cpp_reader *pfile, bool is_if) case CPP_WCHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: case CPP_NAME: case CPP_HASH: if (!want_value) diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 1b1a53ce599..5eaea6b60d7 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -119,6 +119,7 @@ struct _cpp_file; TK(WCHAR, LITERAL) /* L'char' */ \ TK(CHAR16, LITERAL) /* u'char' */ \ TK(CHAR32, LITERAL) /* U'char' */ \ + TK(UTF8CHAR, LITERAL) /* u8'char' */ \ TK(OTHER, LITERAL) /* stray punctuation */ \ \ TK(STRING, LITERAL) /* "string" */ \ @@ -133,6 +134,7 @@ struct _cpp_file; TK(WCHAR_USERDEF, LITERAL) /* L'char'_suffix - C++-0x */ \ TK(CHAR16_USERDEF, LITERAL) /* u'char'_suffix - C++-0x */ \ TK(CHAR32_USERDEF, LITERAL) /* U'char'_suffix - C++-0x */ \ + TK(UTF8CHAR_USERDEF, LITERAL) /* u8'char'_suffix - C++-0x */ \ TK(STRING_USERDEF, LITERAL) /* "string"_suffix - C++-0x */ \ TK(WSTRING_USERDEF, LITERAL) /* L"string"_suffix - C++-0x */ \ TK(STRING16_USERDEF, LITERAL) /* u"string"_suffix - C++-0x */ \ @@ -339,6 +341,9 @@ struct cpp_options /* Nonzero means process u/U prefix literals (UTF-16/32). */ unsigned char uliterals; + /* Nonzero means process u8 prefixed character literals (UTF-8). */ + unsigned char utf8_char_literals; + /* Nonzero means process r/R raw strings. If this is set, uliterals must be set as well. */ unsigned char rliterals; diff --git a/libcpp/init.c b/libcpp/init.c index 1ebd709af2c..2d5626fd9ab 100644 --- a/libcpp/init.c +++ b/libcpp/init.c @@ -90,26 +90,27 @@ struct lang_flags char binary_constants; char digit_separators; char trigraphs; + char utf8_char_literals; }; static const struct lang_flags lang_defaults[] = -{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig */ - /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0 }, - /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 }, - /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, - /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, - /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, - /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1 }, - /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, - /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0 }, - /* CXX11 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1 }, - /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0 }, - /* CXX14 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, - /* GNUCXX1Z */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0 }, - /* CXX1Z */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }, - /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit */ + /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, + /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, + /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0 }, + /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0 }, + /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0 }, + /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 }, + /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0 }, + /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0 }, + /* CXX11 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0 }, + /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0 }, + /* CXX14 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }, + /* GNUCXX1Z */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1 }, + /* CXX1Z */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 }, + /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; /* Sets internal flags correctly for a given language. */ @@ -133,6 +134,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang) CPP_OPTION (pfile, binary_constants) = l->binary_constants; CPP_OPTION (pfile, digit_separators) = l->digit_separators; CPP_OPTION (pfile, trigraphs) = l->trigraphs; + CPP_OPTION (pfile, utf8_char_literals) = l->utf8_char_literals; } /* Initialize library global state. */ diff --git a/libcpp/lex.c b/libcpp/lex.c index 5758e580c2b..8f2bdc80e11 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -1858,7 +1858,8 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) else if (terminator == '\'') type = (*base == 'L' ? CPP_WCHAR : *base == 'U' ? CPP_CHAR32 : - *base == 'u' ? CPP_CHAR16 : CPP_CHAR); + *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16) + : CPP_CHAR); else terminator = '>', type = CPP_HEADER_NAME; @@ -2403,7 +2404,8 @@ _cpp_lex_direct (cpp_reader *pfile) && CPP_OPTION (pfile, rliterals)) || (*buffer->cur == '8' && c == 'u' - && (buffer->cur[1] == '"' + && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\'' + && CPP_OPTION (pfile, utf8_char_literals))) || (buffer->cur[1] == 'R' && buffer->cur[2] == '"' && CPP_OPTION (pfile, rliterals))))) { diff --git a/libcpp/macro.c b/libcpp/macro.c index f76e10b0b2e..786c21beba3 100644 --- a/libcpp/macro.c +++ b/libcpp/macro.c @@ -531,7 +531,7 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg) || token->type == CPP_WSTRING || token->type == CPP_WCHAR || token->type == CPP_STRING32 || token->type == CPP_CHAR32 || token->type == CPP_STRING16 || token->type == CPP_CHAR16 - || token->type == CPP_UTF8STRING + || token->type == CPP_UTF8STRING || token->type == CPP_UTF8CHAR || cpp_userdef_string_p (token->type) || cpp_userdef_char_p (token->type)); -- 2.30.2