Implement N4197 - Adding u8 character literals

author Edward Smith-Rowland <3dw4rd@verizon.net>

Tue, 30 Jun 2015 12:58:48 +0000 (12:58 +0000)

committer Edward Smith-Rowland <emsr@gcc.gnu.org>

Tue, 30 Jun 2015 12:58:48 +0000 (12:58 +0000)
author Edward Smith-Rowland <3dw4rd@verizon.net>
Tue, 30 Jun 2015 12:58:48 +0000 (12:58 +0000)
committer Edward Smith-Rowland <emsr@gcc.gnu.org>
Tue, 30 Jun 2015 12:58:48 +0000 (12:58 +0000)
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog

index 0e03e7e87d0ad9b55c1dae9c152ce7577ec0970c..66ae6a8ab45a44955f7078787bb3df3fde7d968e 100644 (file)
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,3 +1,13 @@
+2015-06-30  Edward Smith-Rowland  <3dw4rd@verizon.net>
+
+       Implement N4197 - Adding u8 character literals
+       * c-family/c-ada-spec.c (print_ada_macros()): Treat CPP_UTF8CHAR
+       like CPP_CHAR.
+       * c-family/c-common.c (c_parse_error()): print CPP_UTF8CHAR
+       and CPP_UTF8CHAR_USERDEF tokens.
+       * c-family/c-lex.c (c_lex_with_flags()): Treat CPP_UTF8CHAR_USERDEF
+       and CPP_UTF8CHAR tokens; (lex_charconst()): Treat CPP_UTF8CHAR token.
+
  2015-06-29  Manuel López-Ibáñez  <manu@gcc.gnu.org>
  
         PR fortran/66605
diff --git a/gcc/c-family/c-ada-spec.c b/gcc/c-family/c-ada-spec.c

index ef3c5e3ae2bfa23cacee7bb6ecfcba4d5eff6f7c..41d612fc46754c577257b2f6cb779d7b2cc927c8 100644 (file)
--- a/gcc/c-family/c-ada-spec.c
+++ b/gcc/c-family/c-ada-spec.c
@@ -249,6 +249,7 @@ print_ada_macros (pretty_printer *pp, cpp_hashnode **macros, int max_ada_macros)
                   case CPP_WCHAR:
                   case CPP_CHAR16:
                   case CPP_CHAR32:
+                 case CPP_UTF8CHAR:
                   case CPP_NAME:
                   case CPP_STRING:
                   case CPP_NUMBER:
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c

index ac42e4aea9aa0ac01ceb278c91be71c0534d4c04..835fc2ce19023ac8b87abd52c3a5d2bbe4cfacc5 100644 (file)
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -10256,7 +10256,8 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type,
    else if (token_type == CPP_CHAR
            || token_type == CPP_WCHAR
            || token_type == CPP_CHAR16
-          || token_type == CPP_CHAR32)
+          || token_type == CPP_CHAR32
+          || token_type == CPP_UTF8CHAR)
      {
        unsigned int val = TREE_INT_CST_LOW (value);
        const char *prefix;
@@ -10275,6 +10276,9 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type,
         case CPP_CHAR32:
           prefix = "U";
           break;
+       case CPP_UTF8CHAR:
+         prefix = "u8";
+         break;
          }
  
        if (val <= UCHAR_MAX && ISGRAPH (val))
@@ -10289,7 +10293,8 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type,
    else if (token_type == CPP_CHAR_USERDEF
            || token_type == CPP_WCHAR_USERDEF
            || token_type == CPP_CHAR16_USERDEF
-          || token_type == CPP_CHAR32_USERDEF)
+          || token_type == CPP_CHAR32_USERDEF
+          || token_type == CPP_UTF8CHAR_USERDEF)
      message = catenate_messages (gmsgid,
                                  " before user-defined character literal");
    else if (token_type == CPP_STRING_USERDEF
diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c

index 9c56832219b273ce4aaf98d072b686850b77a4de..aa5d6e2313f600ed624dbcbdbe288c1577ff6e9f 100644 (file)
--- a/gcc/c-family/c-lex.c
+++ b/gcc/c-family/c-lex.c
@@ -536,6 +536,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
      case CPP_WCHAR_USERDEF:
      case CPP_CHAR16_USERDEF:
      case CPP_CHAR32_USERDEF:
+    case CPP_UTF8CHAR_USERDEF:
        {
         tree literal;
         cpp_token temp_tok = *tok;
@@ -553,6 +554,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
      case CPP_WCHAR:
      case CPP_CHAR16:
      case CPP_CHAR32:
+    case CPP_UTF8CHAR:
        *value = lex_charconst (tok);
        break;
  
@@ -1250,6 +1252,8 @@ lex_charconst (const cpp_token *token)
      type = char32_type_node;
    else if (token->type == CPP_CHAR16)
      type = char16_type_node;
+  else if (token->type == CPP_UTF8CHAR)
+    type = char_type_node;
    /* In C, a character constant has type 'int'.
       In C++ 'char', but multi-char charconsts have type 'int'.  */
    else if (!c_dialect_cxx () || chars_seen > 1)
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog

index 843b7eda1767909dd4269b060166c00785267228..04fb007f8e705aaf50e12e5f9308939a7497e6cd 100644 (file)
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,10 @@
+2015-06-30  Edward Smith-Rowland  <3dw4rd@verizon.net>
+
+       Implement N4197 - Adding u8 character literals
+       * parser.c (cp_parser_primary_expression()): Treat CPP_UTF8CHAR
+       and CPP_UTF8CHAR_USERDEF tokens;
+       (cp_parser_parenthesized_expression_list()): Treat CPP_UTF8CHAR token.
+
  2015-06-29  Paolo Carlini  <paolo.carlini@oracle.com>
  
         PR c++/65977
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c

index acf147c0e5cac35294eb193ed044d3c281ae1f6e..bb3d636eee1327d052d30dd75a510de77455ad35 100644 (file)
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -4284,6 +4284,7 @@ cp_parser_primary_expression (cp_parser *parser,
      case CPP_CHAR16:
      case CPP_CHAR32:
      case CPP_WCHAR:
+    case CPP_UTF8CHAR:
      case CPP_NUMBER:
      case CPP_PREPARSED_EXPR:
        if (TREE_CODE (token->u.value) == USERDEF_LITERAL)
@@ -4345,6 +4346,7 @@ cp_parser_primary_expression (cp_parser *parser,
      case CPP_CHAR16_USERDEF:
      case CPP_CHAR32_USERDEF:
      case CPP_WCHAR_USERDEF:
+    case CPP_UTF8CHAR_USERDEF:
        return cp_parser_userdef_char_literal (parser);
  
      case CPP_STRING:
@@ -6887,6 +6889,7 @@ cp_parser_parenthesized_expression_list (cp_parser* parser,
                   case CPP_WCHAR:
                   case CPP_CHAR16:
                   case CPP_CHAR32:
+                 case CPP_UTF8CHAR:
                     /* If a parameter is literal zero alone, remember it
                        for -Wmemset-transposed-args warning.  */
                     if (integer_zerop (tok->u.value)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index f14bbb11387b0095262e60da6655f344027d262a..f766b0dcbb63cb0be192dfdbba6e45f8232c27bd 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2015-06-30  Edward Smith-Rowland  <3dw4rd@verizon.net>
+
+       Implement N4197 - Adding u8 character literals
+       * g++.dg/cpp1z/utf8.C: New.
+       * g++.dg/cpp1z/utf8-neg.C: New.
+       * g++.dg/cpp1z/udlit-utf8char.C: New.
+
  2015-06-30  Marek Polacek  <polacek@redhat.com>
  
         * gcc.dg/fold-ior-2.c (fn4): Swap operands.
diff --git a/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C b/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C

new file mode 100644 (file)

index 0000000..fb9cdf1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C
@@ -0,0 +1,8 @@
+// { dg-do compile }
+// { dg-options "-std=c++1z" }
+
+constexpr int
+operator""_foo(char c)
+{ return c * 100; }
+
+auto cc = u8'8'_foo;
diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C

new file mode 100644 (file)

index 0000000..339f0e3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c++1z" } */
+
+const static char c0 = u8'';           // { dg-error "empty character" }
+const static char c1 = u8'ab';         // { dg-warning "multi-character character constant" }
+const static char c2 = u8'\u0124';     // { dg-warning "multi-character character constant" }
+const static char c3 = u8'\U00064321';  // { dg-warning "multi-character character constant" }
diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8.C b/gcc/testsuite/g++.dg/cpp1z/utf8.C

new file mode 100644 (file)

index 0000000..52816f8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/utf8.C
@@ -0,0 +1,15 @@
+// { dg-do compile }
+// { dg-options "-std=c++1z" }
+
+#include <cassert>
+#include <experimental/type_traits>
+
+auto c = 'c';
+auto u8c = u8'c';
+
+static_assert(std::experimental::is_same_v<decltype(u8c), decltype(c)>, "");
+
+auto u8s = u8"c";
+auto x = u8s[0];
+
+static_assert(std::experimental::is_same_v<decltype(u8c), decltype(x)>, "");
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog

index c7ac1e3aec021da7bbc247baa44c67fbd5f92815..4e2980222b357048458f73569c86aa943e8aea36 100644 (file)
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,19 @@
+2015-06-30  Edward Smith-Rowland  <3dw4rd@verizon.net>
+
+       Implement N4197 - Adding u8 character literals
+       * include/cpplib.h (UTF8CHAR, UTF8CHAR_USERDEF): New cpp tokens;
+       (struct cpp_options): Add utf8_char_literals.
+       * init.c (struct lang_flags): Add utf8_char_literals;
+       (struct lang_flags lang_defaults): Add column for utf8_char_literals.
+       * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token; 
+       * expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()):
+       Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens;
+       (cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token;
+       (eval_token(), _cpp_parse_expr()): Treat CPP_UTF8CHAR token.
+       * lex.c (lex_string(), _cpp_lex_direct()): Include CPP_UTF8CHAR tokens.
+       * charset.c (converter_for_type(), cpp_interpret_charconst()):
+       Treat CPP_UTF8CHAR token.
+
  2015-06-30  Uros Bizjak  <ubizjak@gmail.com>
  
         * lex.c (search_line_sse42) [__GCC_ASM_FLAG_OUTPUTS__]: New main
diff --git a/libcpp/charset.c b/libcpp/charset.c

index b42763806584cc84fa24aa81eefe279e5f033ef1..8e92bc65f90097cb9770bca0c4539087fc94d2d7 100644 (file)
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -1355,6 +1355,7 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
      {
      default:
         return pfile->narrow_cset_desc;
+    case CPP_UTF8CHAR:
      case CPP_UTF8STRING:
         return pfile->utf8_cset_desc;
      case CPP_CHAR16:
@@ -1611,11 +1612,12 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
                          unsigned int *pchars_seen, int *unsignedp)
  {
    cpp_string str = { 0, 0 };
-  bool wide = (token->type != CPP_CHAR);
+  bool wide = (token->type != CPP_CHAR && token->type != CPP_UTF8CHAR);
+  int u8 = 2 * int(token->type == CPP_UTF8CHAR);
    cppchar_t result;
  
-  /* an empty constant will appear as L'', u'', U'' or '' */
-  if (token->val.str.len == (size_t) (2 + wide))
+  /* An empty constant will appear as L'', u'', U'', u8'', or '' */
+  if (token->val.str.len == (size_t) (2 + wide + u8))
      {
        cpp_error (pfile, CPP_DL_ERROR, "empty character constant");
        return 0;
diff --git a/libcpp/expr.c b/libcpp/expr.c

index b8e88c50f330ff9fab1c91e73cb7e876512798cb..3dc5c0bf238907b8164754b3ad4d0d815ff0ab71 100644 (file)
--- a/libcpp/expr.c
+++ b/libcpp/expr.c
@@ -307,6 +307,8 @@ cpp_userdef_char_remove_type (enum cpp_ttype type)
      return CPP_CHAR16;
    else if (type == CPP_CHAR32_USERDEF)
      return CPP_CHAR32;
+  else if (type == CPP_UTF8CHAR_USERDEF)
+    return CPP_UTF8CHAR;
    else
      return type;
  }
@@ -325,6 +327,8 @@ cpp_userdef_char_add_type (enum cpp_ttype type)
      return CPP_CHAR16_USERDEF;
    else if (type == CPP_CHAR32)
      return CPP_CHAR32_USERDEF;
+  else if (type == CPP_UTF8CHAR)
+    return CPP_UTF8CHAR_USERDEF;
    else
      return type;
  }
@@ -350,7 +354,8 @@ cpp_userdef_char_p (enum cpp_ttype type)
    if (type == CPP_CHAR_USERDEF
     || type == CPP_WCHAR_USERDEF
     || type == CPP_CHAR16_USERDEF
-   || type == CPP_CHAR32_USERDEF)
+   || type == CPP_CHAR32_USERDEF
+   || type == CPP_UTF8CHAR_USERDEF)
      return true;
    else
      return false;
@@ -1029,6 +1034,7 @@ eval_token (cpp_reader *pfile, const cpp_token *token,
      case CPP_CHAR:
      case CPP_CHAR16:
      case CPP_CHAR32:
+    case CPP_UTF8CHAR:
        {
         cppchar_t cc = cpp_interpret_charconst (pfile, token,
                                                 &temp, &unsignedp);
@@ -1214,6 +1220,7 @@ _cpp_parse_expr (cpp_reader *pfile, bool is_if)
         case CPP_WCHAR:
         case CPP_CHAR16:
         case CPP_CHAR32:
+       case CPP_UTF8CHAR:
         case CPP_NAME:
         case CPP_HASH:
           if (!want_value)
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h

index 1b1a53ce599a3041c14bf992ce57ca99f1e5e890..5eaea6b60d79c6d6dcbee76d63b00e916cfdf21f 100644 (file)
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -119,6 +119,7 @@ struct _cpp_file;
    TK(WCHAR,            LITERAL) /* L'char' */                          \
    TK(CHAR16,           LITERAL) /* u'char' */                          \
    TK(CHAR32,           LITERAL) /* U'char' */                          \
+  TK(UTF8CHAR,         LITERAL) /* u8'char' */                         \
    TK(OTHER,            LITERAL) /* stray punctuation */                \
                                                                         \
    TK(STRING,           LITERAL) /* "string" */                         \
@@ -133,6 +134,7 @@ struct _cpp_file;
    TK(WCHAR_USERDEF,    LITERAL) /* L'char'_suffix - C++-0x */          \
    TK(CHAR16_USERDEF,   LITERAL) /* u'char'_suffix - C++-0x */          \
    TK(CHAR32_USERDEF,   LITERAL) /* U'char'_suffix - C++-0x */          \
+  TK(UTF8CHAR_USERDEF, LITERAL) /* u8'char'_suffix - C++-0x */         \
    TK(STRING_USERDEF,   LITERAL) /* "string"_suffix - C++-0x */         \
    TK(WSTRING_USERDEF,  LITERAL) /* L"string"_suffix - C++-0x */        \
    TK(STRING16_USERDEF, LITERAL) /* u"string"_suffix - C++-0x */        \
@@ -339,6 +341,9 @@ struct cpp_options
    /* Nonzero means process u/U prefix literals (UTF-16/32).  */
    unsigned char uliterals;
  
+  /* Nonzero means process u8 prefixed character literals (UTF-8).  */
+  unsigned char utf8_char_literals;
+
    /* Nonzero means process r/R raw strings.  If this is set, uliterals
       must be set as well.  */
    unsigned char rliterals;
diff --git a/libcpp/init.c b/libcpp/init.c

index 1ebd709af2c7a09ebd76c91b702744daf0011bac..2d5626fd9ab770e22a6f5e87155a19d6978e2489 100644 (file)
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -90,26 +90,27 @@ struct lang_flags
    char binary_constants;
    char digit_separators;
    char trigraphs;
+  char utf8_char_literals;
  };
  
  static const struct lang_flags lang_defaults[] =
-{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig */
-  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0 },
-  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0 },
-  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0 },
-  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1 },
-  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1 },
-  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1 },
-  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1 },
-  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0 },
-  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1 },
-  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0 },
-  /* CXX11    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1 },
-  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0 },
-  /* CXX14    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1 },
-  /* GNUCXX1Z */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0 },
-  /* CXX1Z    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0 },
-  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0 }
+{ /*              c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit */
+  /* GNUC89   */  { 0,  0,  1,  0,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0 },
+  /* GNUC99   */  { 1,  0,  1,  1,  0,  0,  1,   1,   1,   0,    0,     0,     0,   0 },
+  /* GNUC11   */  { 1,  0,  1,  1,  1,  0,  1,   1,   1,   0,    0,     0,     0,   0 },
+  /* STDC89   */  { 0,  0,  0,  0,  0,  1,  0,   0,   0,   0,    0,     0,     1,   0 },
+  /* STDC94   */  { 0,  0,  0,  0,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0 },
+  /* STDC99   */  { 1,  0,  1,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0 },
+  /* STDC11   */  { 1,  0,  1,  1,  1,  1,  1,   1,   0,   0,    0,     0,     1,   0 },
+  /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,    0,     0,     0,   0 },
+  /* CXX98    */  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,    0,     0,     1,   0 },
+  /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    0,     0,     0,   0 },
+  /* CXX11    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    0,     0,     1,   0 },
+  /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   0 },
+  /* CXX14    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     1,   0 },
+  /* GNUCXX1Z */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,    1,     1,     0,   1 },
+  /* CXX1Z    */  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,    1,     1,     0,   1 },
+  /* ASM      */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,    0,     0,     0,   0 }
  };
  
  /* Sets internal flags correctly for a given language.  */
@@ -133,6 +134,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
    CPP_OPTION (pfile, binary_constants)          = l->binary_constants;
    CPP_OPTION (pfile, digit_separators)          = l->digit_separators;
    CPP_OPTION (pfile, trigraphs)                         = l->trigraphs;
+  CPP_OPTION (pfile, utf8_char_literals)        = l->utf8_char_literals;
  }
  
  /* Initialize library global state.  */
diff --git a/libcpp/lex.c b/libcpp/lex.c

index 5758e580c2ba09729a0e5874c838691c44ee6102..8f2bdc80e117214adf733200434f8e906bf92da7 100644 (file)
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1858,7 +1858,8 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
    else if (terminator == '\'')
      type = (*base == 'L' ? CPP_WCHAR :
             *base == 'U' ? CPP_CHAR32 :
-           *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
+           *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
+                        : CPP_CHAR);
    else
      terminator = '>', type = CPP_HEADER_NAME;
  
@@ -2403,7 +2404,8 @@ _cpp_lex_direct (cpp_reader *pfile)
                   && CPP_OPTION (pfile, rliterals))
               || (*buffer->cur == '8'
                   && c == 'u'
-                 && (buffer->cur[1] == '"'
+                 && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
+                               && CPP_OPTION (pfile, utf8_char_literals)))
                       || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
                           && CPP_OPTION (pfile, rliterals)))))
             {
diff --git a/libcpp/macro.c b/libcpp/macro.c

index f76e10b0b2e1153c93472b769c7b47b3b70889da..786c21beba37279f40cbd7161991d624adde68fd 100644 (file)
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -531,7 +531,7 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg)
                    || token->type == CPP_WSTRING || token->type == CPP_WCHAR
                    || token->type == CPP_STRING32 || token->type == CPP_CHAR32
                    || token->type == CPP_STRING16 || token->type == CPP_CHAR16
-                  || token->type == CPP_UTF8STRING
+                  || token->type == CPP_UTF8STRING || token->type == CPP_UTF8CHAR
                    || cpp_userdef_string_p (token->type)
                    || cpp_userdef_char_p (token->type));
author	Edward Smith-Rowland <3dw4rd@verizon.net>
	Tue, 30 Jun 2015 12:58:48 +0000 (12:58 +0000)
committer	Edward Smith-Rowland <emsr@gcc.gnu.org>
	Tue, 30 Jun 2015 12:58:48 +0000 (12:58 +0000)
gcc/c-family/ChangeLog		patch \| blob \| history
gcc/c-family/c-ada-spec.c		patch \| blob \| history
gcc/c-family/c-common.c		patch \| blob \| history
gcc/c-family/c-lex.c		patch \| blob \| history
gcc/cp/ChangeLog		patch \| blob \| history
gcc/cp/parser.c		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C	[new file with mode: 0644]	patch \| blob
gcc/testsuite/g++.dg/cpp1z/utf8-neg.C	[new file with mode: 0644]	patch \| blob
gcc/testsuite/g++.dg/cpp1z/utf8.C	[new file with mode: 0644]	patch \| blob
libcpp/ChangeLog		patch \| blob \| history
libcpp/charset.c		patch \| blob \| history
libcpp/expr.c		patch \| blob \| history
libcpp/include/cpplib.h		patch \| blob \| history
libcpp/init.c		patch \| blob \| history
libcpp/lex.c		patch \| blob \| history
libcpp/macro.c		patch \| blob \| history