From 2c6e3f5540bc82374b7f71333f550cfccca93a74 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Mon, 19 Oct 2009 23:41:15 +0200
Subject: [PATCH] charset.c (cpp_init_iconv): Initialize utf8_cset_desc.

	* charset.c (cpp_init_iconv): Initialize utf8_cset_desc.
	(_cpp_destroy_iconv): Destroy utf8_cset_desc, char16_cset_desc
	and char32_cset_desc.
	(converter_for_type): Handle CPP_UTF8STRING.
	(cpp_interpret_string): Handle CPP_UTF8STRING and raw-strings.
	* directives.c (get__Pragma_string): Handle CPP_UTF8STRING.
	(parse_include): Reject raw strings.
	* include/cpplib.h (CPP_UTF8STRING): New token type.
	* internal.h (struct cpp_reader): Add utf8_cset_desc field.
	* lex.c (lex_raw_string): New function.
	(lex_string): Handle u8 string literals, call lex_raw_string
	for raw string literals.
	(_cpp_lex_direct): Call lex_string even for u8" and {,u,U,L,u8}R"
	sequences.
	* macro.c (stringify_arg): Handle CPP_UTF8STRING.

	* c-common.c (c_parse_error): Handle CPP_UTF8STRING.
	* c-lex.c (c_lex_with_flags): Likewise.  Test C_LEX_STRING_NO_JOIN
	instead of C_LEX_RAW_STRINGS.
	(lex_string): Handle CPP_UTF8STRING.
	* c-parser.c (c_parser_postfix_expression): Likewise.
	* c-pragma.h (C_LEX_RAW_STRINGS): Rename to ...
	(C_LEX_STRING_NO_JOIN): ... this.

	* parser.c (cp_lexer_print_token, cp_parser_is_string_literal,
	cp_parser_string_literal, cp_parser_primary_expression): Likewise.
	(cp_lexer_get_preprocessor_token): Use C_LEX_STRING_JOIN instead
	of C_LEX_RAW_STRINGS.

	* gcc.dg/raw-string-1.c: New test.
	* gcc.dg/raw-string-2.c: New test.
	* gcc.dg/raw-string-3.c: New test.
	* gcc.dg/raw-string-4.c: New test.
	* gcc.dg/raw-string-5.c: New test.
	* gcc.dg/raw-string-6.c: New test.
	* gcc.dg/raw-string-7.c: New test.
	* gcc.dg/utf8-1.c: New test.
	* gcc.dg/utf8-2.c: New test.
	* gcc.dg/utf-badconcat2.c: New test.
	* gcc.dg/utf-dflt2.c: New test.
	* gcc.dg/cpp/include6.c: New test.
	* g++.dg/ext/raw-string-1.C: New test.
	* g++.dg/ext/raw-string-2.C: New test.
	* g++.dg/ext/raw-string-3.C: New test.
	* g++.dg/ext/raw-string-4.C: New test.
	* g++.dg/ext/raw-string-5.C: New test.
	* g++.dg/ext/raw-string-6.C: New test.
	* g++.dg/ext/raw-string-7.C: New test.
	* g++.dg/ext/utf8-1.C: New test.
	* g++.dg/ext/utf8-2.C: New test.
	* g++.dg/ext/utf-badconcat2.C: New test.
	* g++.dg/ext/utf-dflt2.C: New test.

From-SVN: r152995
---
 gcc/ChangeLog                             |  10 +
 gcc/c-common.c                            |   3 +-
 gcc/c-lex.c                               |  18 +-
 gcc/c-parser.c                            |   1 +
 gcc/c-pragma.h                            |   6 +-
 gcc/cp/ChangeLog                          |   7 +
 gcc/cp/parser.c                           |   8 +-
 gcc/testsuite/ChangeLog                   |  24 +++
 gcc/testsuite/g++.dg/ext/raw-string-1.C   |  96 ++++++++++
 gcc/testsuite/g++.dg/ext/raw-string-2.C   | 104 ++++++++++
 gcc/testsuite/g++.dg/ext/raw-string-3.C   |  58 ++++++
 gcc/testsuite/g++.dg/ext/raw-string-4.C   |  28 +++
 gcc/testsuite/g++.dg/ext/raw-string-5.C   |  23 +++
 gcc/testsuite/g++.dg/ext/raw-string-6.C   |   5 +
 gcc/testsuite/g++.dg/ext/raw-string-7.C   |  23 +++
 gcc/testsuite/g++.dg/ext/utf-badconcat2.C |  15 ++
 gcc/testsuite/g++.dg/ext/utf-dflt2.C      |  12 ++
 gcc/testsuite/g++.dg/ext/utf8-1.C         |  45 +++++
 gcc/testsuite/g++.dg/ext/utf8-2.C         |  21 ++
 gcc/testsuite/gcc.dg/cpp/include6.c       |  14 ++
 gcc/testsuite/gcc.dg/raw-string-1.c       | 101 ++++++++++
 gcc/testsuite/gcc.dg/raw-string-2.c       | 109 +++++++++++
 gcc/testsuite/gcc.dg/raw-string-3.c       |  53 +++++
 gcc/testsuite/gcc.dg/raw-string-4.c       |  28 +++
 gcc/testsuite/gcc.dg/raw-string-5.c       |  23 +++
 gcc/testsuite/gcc.dg/raw-string-6.c       |   5 +
 gcc/testsuite/gcc.dg/raw-string-7.c       |  23 +++
 gcc/testsuite/gcc.dg/utf-badconcat2.c     |  15 ++
 gcc/testsuite/gcc.dg/utf-dflt2.c          |  12 ++
 gcc/testsuite/gcc.dg/utf8-1.c             |  45 +++++
 gcc/testsuite/gcc.dg/utf8-2.c             |  26 +++
 libcpp/ChangeLog                          |  18 ++
 libcpp/charset.c                          |  52 ++++-
 libcpp/directives.c                       |   6 +-
 libcpp/include/cpplib.h                   |   5 +-
 libcpp/internal.h                         |   4 +
 libcpp/lex.c                              | 223 +++++++++++++++++++++-
 libcpp/macro.c                            |   3 +-
 38 files changed, 1244 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/raw-string-1.C
 create mode 100644 gcc/testsuite/g++.dg/ext/raw-string-2.C
 create mode 100644 gcc/testsuite/g++.dg/ext/raw-string-3.C
 create mode 100644 gcc/testsuite/g++.dg/ext/raw-string-4.C
 create mode 100644 gcc/testsuite/g++.dg/ext/raw-string-5.C
 create mode 100644 gcc/testsuite/g++.dg/ext/raw-string-6.C
 create mode 100644 gcc/testsuite/g++.dg/ext/raw-string-7.C
 create mode 100644 gcc/testsuite/g++.dg/ext/utf-badconcat2.C
 create mode 100644 gcc/testsuite/g++.dg/ext/utf-dflt2.C
 create mode 100644 gcc/testsuite/g++.dg/ext/utf8-1.C
 create mode 100644 gcc/testsuite/g++.dg/ext/utf8-2.C
 create mode 100644 gcc/testsuite/gcc.dg/cpp/include6.c
 create mode 100644 gcc/testsuite/gcc.dg/raw-string-1.c
 create mode 100644 gcc/testsuite/gcc.dg/raw-string-2.c
 create mode 100644 gcc/testsuite/gcc.dg/raw-string-3.c
 create mode 100644 gcc/testsuite/gcc.dg/raw-string-4.c
 create mode 100644 gcc/testsuite/gcc.dg/raw-string-5.c
 create mode 100644 gcc/testsuite/gcc.dg/raw-string-6.c
 create mode 100644 gcc/testsuite/gcc.dg/raw-string-7.c
 create mode 100644 gcc/testsuite/gcc.dg/utf-badconcat2.c
 create mode 100644 gcc/testsuite/gcc.dg/utf-dflt2.c
 create mode 100644 gcc/testsuite/gcc.dg/utf8-1.c
 create mode 100644 gcc/testsuite/gcc.dg/utf8-2.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 83b507adb28..548cc0b36ed 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2009-10-19  Jakub Jelinek  <jakub@redhat.com>
+
+	* c-common.c (c_parse_error): Handle CPP_UTF8STRING.
+	* c-lex.c (c_lex_with_flags): Likewise.  Test C_LEX_STRING_NO_JOIN
+	instead of C_LEX_RAW_STRINGS.
+	(lex_string): Handle CPP_UTF8STRING.
+	* c-parser.c (c_parser_postfix_expression): Likewise.
+	* c-pragma.h (C_LEX_RAW_STRINGS): Rename to ...
+	(C_LEX_STRING_NO_JOIN): ... this.
+
 2009-10-19  Anatoly Sokolov  <aesok@post.ru>
 
 	* config/cris/cris.c (cris_function_value, cris_libcall_value,
diff --git a/gcc/c-common.c b/gcc/c-common.c
index fb2a84ebbf4..16e17b39d1d 100644
--- a/gcc/c-common.c
+++ b/gcc/c-common.c
@@ -8181,7 +8181,8 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type,
   else if (token_type == CPP_STRING 
 	   || token_type == CPP_WSTRING 
 	   || token_type == CPP_STRING16
-	   || token_type == CPP_STRING32)
+	   || token_type == CPP_STRING32
+	   || token_type == CPP_UTF8STRING)
     message = catenate_messages (gmsgid, " before string constant");
   else if (token_type == CPP_NUMBER)
     message = catenate_messages (gmsgid, " before numeric constant");
diff --git a/gcc/c-lex.c b/gcc/c-lex.c
index 0c6cdab9dff..fd3df8c0a48 100644
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -365,6 +365,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
 	    case CPP_WSTRING:
 	    case CPP_STRING16:
 	    case CPP_STRING32:
+	    case CPP_UTF8STRING:
 	      type = lex_string (tok, value, true, true);
 	      break;
 
@@ -423,7 +424,8 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
     case CPP_WSTRING:
     case CPP_STRING16:
     case CPP_STRING32:
-      if ((lex_flags & C_LEX_RAW_STRINGS) == 0)
+    case CPP_UTF8STRING:
+      if ((lex_flags & C_LEX_STRING_NO_JOIN) == 0)
 	{
 	  type = lex_string (tok, value, false,
 			     (lex_flags & C_LEX_STRING_NO_TRANSLATE) == 0);
@@ -871,12 +873,13 @@ interpret_fixed (const cpp_token *token, unsigned int flags)
   return value;
 }
 
-/* Convert a series of STRING, WSTRING, STRING16 and/or STRING32 tokens
-   into a tree, performing string constant concatenation.  TOK is the
-   first of these.  VALP is the location to write the string into.
-   OBJC_STRING indicates whether an '@' token preceded the incoming token.
+/* Convert a series of STRING, WSTRING, STRING16, STRING32 and/or
+   UTF8STRING tokens into a tree, performing string constant
+   concatenation.  TOK is the first of these.  VALP is the location
+   to write the string into. OBJC_STRING indicates whether an '@' token
+   preceded the incoming token.
    Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
-   CPP_STRING32, CPP_STRING16, or CPP_OBJC_STRING).
+   CPP_STRING32, CPP_STRING16, CPP_UTF8STRING, or CPP_OBJC_STRING).
 
    This is unfortunately more work than it should be.  If any of the
    strings in the series has an L prefix, the result is a wide string
@@ -921,6 +924,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
     case CPP_WSTRING:
     case CPP_STRING16:
     case CPP_STRING32:
+    case CPP_UTF8STRING:
       if (type != tok->type)
 	{
 	  if (type == CPP_STRING)
@@ -966,6 +970,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
 	{
 	default:
 	case CPP_STRING:
+	case CPP_UTF8STRING:
 	  value = build_string (1, "");
 	  break;
 	case CPP_STRING16:
@@ -991,6 +996,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
     {
     default:
     case CPP_STRING:
+    case CPP_UTF8STRING:
       TREE_TYPE (value) = char_array_type_node;
       break;
     case CPP_STRING16:
diff --git a/gcc/c-parser.c b/gcc/c-parser.c
index 3f6e949fe8e..767d97fbe58 100644
--- a/gcc/c-parser.c
+++ b/gcc/c-parser.c
@@ -5349,6 +5349,7 @@ c_parser_postfix_expression (c_parser *parser)
     case CPP_STRING16:
     case CPP_STRING32:
     case CPP_WSTRING:
+    case CPP_UTF8STRING:
       expr.value = c_parser_peek_token (parser)->value;
       expr.original_code = STRING_CST;
       c_parser_consume_token (parser);
diff --git a/gcc/c-pragma.h b/gcc/c-pragma.h
index 188afb8dbaa..be085ee1115 100644
--- a/gcc/c-pragma.h
+++ b/gcc/c-pragma.h
@@ -118,9 +118,9 @@ extern enum cpp_ttype pragma_lex (tree *);
    so that 0 means to translate and join strings.  */
 #define C_LEX_STRING_NO_TRANSLATE 1 /* Do not lex strings into
 				       execution character set.  */
-#define C_LEX_RAW_STRINGS         2 /* Return raw strings -- no
-				       concatenation, no
-				       translation.  */
+#define C_LEX_STRING_NO_JOIN	  2 /* Do not concatenate strings
+				       nor translate them into execution
+				       character set.  */
 
 /* This is not actually available to pragma parsers.  It's merely a
    convenient location to declare this function for c-lex, after
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 279ba9e6419..a068a432b1f 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,10 @@
+2009-10-19  Jakub Jelinek  <jakub@redhat.com>
+
+	* parser.c (cp_lexer_print_token, cp_parser_is_string_literal,
+	cp_parser_string_literal, cp_parser_primary_expression): Likewise.
+	(cp_lexer_get_preprocessor_token): Use C_LEX_STRING_JOIN instead
+	of C_LEX_RAW_STRINGS.
+
 2009-10-15  Jason Merrill  <jason@redhat.com>
 
 	PR c++/38888
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index b9b53e5ebaf..7fd995f9e83 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -402,7 +402,7 @@ cp_lexer_get_preprocessor_token (cp_lexer *lexer, cp_token *token)
    /* Get a new token from the preprocessor.  */
   token->type
     = c_lex_with_flags (&token->u.value, &token->location, &token->flags,
-			lexer == NULL ? 0 : C_LEX_RAW_STRINGS);
+			lexer == NULL ? 0 : C_LEX_STRING_NO_JOIN);
   token->keyword = RID_MAX;
   token->pragma_kind = PRAGMA_NONE;
 
@@ -792,6 +792,7 @@ cp_lexer_print_token (FILE * stream, cp_token *token)
     case CPP_STRING16:
     case CPP_STRING32:
     case CPP_WSTRING:
+    case CPP_UTF8STRING:
       fprintf (stream, " \"%s\"", TREE_STRING_POINTER (token->u.value));
       break;
 
@@ -2065,7 +2066,8 @@ cp_parser_is_string_literal (cp_token* token)
   return (token->type == CPP_STRING ||
 	  token->type == CPP_STRING16 ||
 	  token->type == CPP_STRING32 ||
-	  token->type == CPP_WSTRING);
+	  token->type == CPP_WSTRING ||
+	  token->type == CPP_UTF8STRING);
 }
 
 /* Returns nonzero if TOKEN is the indicated KEYWORD.  */
@@ -3004,6 +3006,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
 	{
 	default:
 	case CPP_STRING:
+	case CPP_UTF8STRING:
 	  TREE_TYPE (value) = char_array_type_node;
 	  break;
 	case CPP_STRING16:
@@ -3233,6 +3236,7 @@ cp_parser_primary_expression (cp_parser *parser,
     case CPP_STRING16:
     case CPP_STRING32:
     case CPP_WSTRING:
+    case CPP_UTF8STRING:
       /* ??? Should wide strings be allowed when parser->translate_strings_p
 	 is false (i.e. in attributes)?  If not, we can kill the third
 	 argument to cp_parser_string_literal.  */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8fce05cd9ed..c91c4d427c3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,29 @@
 2009-10-19  Jakub Jelinek  <jakub@redhat.com>
 
+	* gcc.dg/raw-string-1.c: New test.
+	* gcc.dg/raw-string-2.c: New test.
+	* gcc.dg/raw-string-3.c: New test.
+	* gcc.dg/raw-string-4.c: New test.
+	* gcc.dg/raw-string-5.c: New test.
+	* gcc.dg/raw-string-6.c: New test.
+	* gcc.dg/raw-string-7.c: New test.
+	* gcc.dg/utf8-1.c: New test.
+	* gcc.dg/utf8-2.c: New test.
+	* gcc.dg/utf-badconcat2.c: New test.
+	* gcc.dg/utf-dflt2.c: New test.
+	* gcc.dg/cpp/include6.c: New test.
+	* g++.dg/ext/raw-string-1.C: New test.
+	* g++.dg/ext/raw-string-2.C: New test.
+	* g++.dg/ext/raw-string-3.C: New test.
+	* g++.dg/ext/raw-string-4.C: New test.
+	* g++.dg/ext/raw-string-5.C: New test.
+	* g++.dg/ext/raw-string-6.C: New test.
+	* g++.dg/ext/raw-string-7.C: New test.
+	* g++.dg/ext/utf8-1.C: New test.
+	* g++.dg/ext/utf8-2.C: New test.
+	* g++.dg/ext/utf-badconcat2.C: New test.
+	* g++.dg/ext/utf-dflt2.C: New test.
+
 	* gcc.dg/cleanup-13.c: New test.
 
 2009-10-19  Janus Weil  <janus@gcc.gnu.org>
diff --git a/gcc/testsuite/g++.dg/ext/raw-string-1.C b/gcc/testsuite/g++.dg/ext/raw-string-1.C
new file mode 100644
index 00000000000..0e8abf8b498
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/raw-string-1.C
@@ -0,0 +1,96 @@
+// { dg-do run }
+// { dg-options "-std=c++0x" }
+
+const char s0[] = R"[a\
+\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
+c]";
+const char s1[] = "a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char s2[] = R"*|*[a\
+b
+c]"
+c]*|"
+c]*|*";
+const char s3[] = "ab\nc]\"\nc]*|\"\nc";
+
+const char t0[] = u8R"[a\
+\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
+c]";
+const char t1[] = u8"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char t2[] = u8R"*|*[a\
+b
+c]"
+c]*|"
+c]*|*";
+const char t3[] = u8"ab\nc]\"\nc]*|\"\nc";
+
+const char16_t u0[] = uR"[a\
+\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
+c]";
+const char16_t u1[] = u"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char16_t u2[] = uR"*|*[a\
+b
+c]"
+c]*|"
+c]*|*";
+const char16_t u3[] = u"ab\nc]\"\nc]*|\"\nc";
+
+const char32_t U0[] = UR"[a\
+\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
+c]";
+const char32_t U1[] = U"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char32_t U2[] = UR"*|*[a\
+b
+c]"
+c]*|"
+c]*|*";
+const char32_t U3[] = U"ab\nc]\"\nc]*|\"\nc";
+
+const wchar_t L0[] = LR"[a\
+\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
+c]";
+const wchar_t L1[] = L"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const wchar_t L2[] = LR"*|*[a\
+b
+c]"
+c]*|"
+c]*|*";
+const wchar_t L3[] = L"ab\nc]\"\nc]*|\"\nc";
+
+int
+main (void)
+{
+  if (sizeof (s0) != sizeof (s1)
+      || __builtin_memcmp (s0, s1, sizeof (s0)) != 0)
+    __builtin_abort ();
+  if (sizeof (s2) != sizeof (s3)
+      || __builtin_memcmp (s2, s3, sizeof (s2)) != 0)
+    __builtin_abort ();
+  if (sizeof (t0) != sizeof (t1)
+      || __builtin_memcmp (t0, t1, sizeof (t0)) != 0)
+    __builtin_abort ();
+  if (sizeof (t2) != sizeof (t3)
+      || __builtin_memcmp (t2, t3, sizeof (t2)) != 0)
+    __builtin_abort ();
+  if (sizeof (u0) != sizeof (u1)
+      || __builtin_memcmp (u0, u1, sizeof (u0)) != 0)
+    __builtin_abort ();
+  if (sizeof (u2) != sizeof (u3)
+      || __builtin_memcmp (u2, u3, sizeof (u2)) != 0)
+    __builtin_abort ();
+  if (sizeof (U0) != sizeof (U1)
+      || __builtin_memcmp (U0, U1, sizeof (U0)) != 0)
+    __builtin_abort ();
+  if (sizeof (U2) != sizeof (U3)
+      || __builtin_memcmp (U2, U3, sizeof (U2)) != 0)
+    __builtin_abort ();
+  if (sizeof (L0) != sizeof (L1)
+      || __builtin_memcmp (L0, L1, sizeof (L0)) != 0)
+    __builtin_abort ();
+  if (sizeof (L2) != sizeof (L3)
+      || __builtin_memcmp (L2, L3, sizeof (L2)) != 0)
+    __builtin_abort ();
+  if (sizeof (R"*[]*") != 1
+      || __builtin_memcmp (R"*[]*", "", 1) != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/ext/raw-string-2.C b/gcc/testsuite/g++.dg/ext/raw-string-2.C
new file mode 100644
index 00000000000..c7eb602afec
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/raw-string-2.C
@@ -0,0 +1,104 @@
+// { dg-do run }
+// { dg-options "-std=c++0x" }
+
+#define R
+#define u
+#define uR
+#define U
+#define UR
+#define u8
+#define u8R
+#define L
+#define LR
+
+const char s00[] = R"[a]" "[b]";
+const char s01[] = "[a]" R"*[b]*";
+const char s02[] = R"[a]" R"[b]";
+const char s03[] = R"-[a]-" u8"[b]";
+const char s04[] = "[a]" u8R"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
+const char s05[] = R"[a]" u8R"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
+const char s06[] = u8R";([a];(" "[b]";
+const char s07[] = u8"[a]" R"[b]";
+const char s08[] = u8R"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
+const char s09[] = u8R"/^&|~!=,"'\[a]/^&|~!=,"'\" u8"[b]";
+const char s10[] = u8"[a]" u8R"0123456789abcdef[b]0123456789abcdef";
+const char s11[] = u8R"ghijklmnopqrstuv[a]ghijklmnopqrstuv" u8R"w[b]w";
+
+const char16_t u03[] = R"-[a]-" u"[b]";
+const char16_t u04[] = "[a]" uR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
+const char16_t u05[] = R"[a]" uR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
+const char16_t u06[] = uR";([a];(" "[b]";
+const char16_t u07[] = u"[a]" R"[b]";
+const char16_t u08[] = uR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
+const char16_t u09[] = uR"/^&|~!=,"'\[a]/^&|~!=,"'\" u"[b]";
+const char16_t u10[] = u"[a]" uR"0123456789abcdef[b]0123456789abcdef";
+const char16_t u11[] = uR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" uR"w[b]w";
+
+const char32_t U03[] = R"-[a]-" U"[b]";
+const char32_t U04[] = "[a]" UR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
+const char32_t U05[] = R"[a]" UR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
+const char32_t U06[] = UR";([a];(" "[b]";
+const char32_t U07[] = U"[a]" R"[b]";
+const char32_t U08[] = UR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
+const char32_t U09[] = UR"/^&|~!=,"'\[a]/^&|~!=,"'\" U"[b]";
+const char32_t U10[] = U"[a]" UR"0123456789abcdef[b]0123456789abcdef";
+const char32_t U11[] = UR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" UR"w[b]w";
+
+const wchar_t L03[] = R"-[a]-" L"[b]";
+const wchar_t L04[] = "[a]" LR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
+const wchar_t L05[] = R"[a]" LR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
+const wchar_t L06[] = LR";([a];(" "[b]";
+const wchar_t L07[] = L"[a]" R"[b]";
+const wchar_t L08[] = LR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
+const wchar_t L09[] = LR"/^&|~!=,"'\[a]/^&|~!=,"'\" L"[b]";
+const wchar_t L10[] = L"[a]" LR"0123456789abcdef[b]0123456789abcdef";
+const wchar_t L11[] = LR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" LR"w[b]w";
+
+int
+main (void)
+{
+#define TEST(str, val) \
+  if (sizeof (str) != sizeof (val) \
+      || __builtin_memcmp (str, val, sizeof (str)) != 0) \
+    __builtin_abort ()
+  TEST (s00, "a[b]");
+  TEST (s01, "[a]b");
+  TEST (s02, "ab");
+  TEST (s03, "a[b]");
+  TEST (s04, "[a]b");
+  TEST (s05, "ab");
+  TEST (s06, "a[b]");
+  TEST (s07, "[a]b");
+  TEST (s08, "ab");
+  TEST (s09, "a[b]");
+  TEST (s10, "[a]b");
+  TEST (s11, "ab");
+  TEST (u03, u"a[b]");
+  TEST (u04, u"[a]b");
+  TEST (u05, u"ab");
+  TEST (u06, u"a[b]");
+  TEST (u07, u"[a]b");
+  TEST (u08, u"ab");
+  TEST (u09, u"a[b]");
+  TEST (u10, u"[a]b");
+  TEST (u11, u"ab");
+  TEST (U03, U"a[b]");
+  TEST (U04, U"[a]b");
+  TEST (U05, U"ab");
+  TEST (U06, U"a[b]");
+  TEST (U07, U"[a]b");
+  TEST (U08, U"ab");
+  TEST (U09, U"a[b]");
+  TEST (U10, U"[a]b");
+  TEST (U11, U"ab");
+  TEST (L03, L"a[b]");
+  TEST (L04, L"[a]b");
+  TEST (L05, L"ab");
+  TEST (L06, L"a[b]");
+  TEST (L07, L"[a]b");
+  TEST (L08, L"ab");
+  TEST (L09, L"a[b]");
+  TEST (L10, L"[a]b");
+  TEST (L11, L"ab");
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/ext/raw-string-3.C b/gcc/testsuite/g++.dg/ext/raw-string-3.C
new file mode 100644
index 00000000000..46c48620905
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/raw-string-3.C
@@ -0,0 +1,58 @@
+// If c++98, the {,u,u8,U,L}R prefix should be parsed as separate
+// token.
+// { dg-do compile }
+// { dg-options "-std=c++98" }
+
+const void	*s0	= R"[a]";	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 6 }
+const void	*s1	= uR"[a]";	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 8 }
+const void	*s2	= UR"[a]";	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 10 }
+const void	*s3	= u8R"[a]";	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 12 }
+const void	*s4	= LR"[a]";	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 14 }
+
+const int	i0	= R'a';	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 17 }
+const int	i1	= uR'a';	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 19 }
+const int	i2	= UR'a';	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 21 }
+const int	i3	= u8R'a';	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 23 }
+const int	i4	= LR'a';	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 25 }
+
+#define R	"a"
+#define uR	"b"
+#define UR	"c"
+#define u8R	"d"
+#define LR	"e"
+
+const void	*s5	= R"[a]";
+const void	*s6	= uR"[a]";
+const void	*s7	= UR"[a]";
+const void	*s8	= u8R"[a]";
+const void	*s9	= LR"[a]";
+
+#undef R
+#undef uR
+#undef UR
+#undef u8R
+#undef LR
+
+#define R	1 +
+#define uR	2 +
+#define UR	3 +
+#define u8R	4 +
+#define LR	5 +
+
+const int	i5	= R'a';
+const int	i6	= uR'a';
+const int	i7	= UR'a';
+const int	i8	= u8R'a';
+const int	i9	= LR'a';
+
+int main () {}
diff --git a/gcc/testsuite/g++.dg/ext/raw-string-4.C b/gcc/testsuite/g++.dg/ext/raw-string-4.C
new file mode 100644
index 00000000000..03179befac8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/raw-string-4.C
@@ -0,0 +1,28 @@
+// R is not applicable for character literals.
+// { dg-do compile }
+// { dg-options "-std=c++0x" }
+
+const int	i0	= R'a';	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 5 }
+const int	i1	= uR'a';	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 7 }
+const int	i2	= UR'a';	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 9 }
+const int	i3	= u8R'a';	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 11 }
+const int	i4	= LR'a';	// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 13 }
+
+#define R	1 +
+#define uR	2 +
+#define UR	3 +
+#define u8R	4 +
+#define LR	5 +
+
+const int	i5	= R'a';
+const int	i6	= uR'a';
+const int	i7	= UR'a';
+const int	i8	= u8R'a';
+const int	i9	= LR'a';
+
+int main () {}
diff --git a/gcc/testsuite/g++.dg/ext/raw-string-5.C b/gcc/testsuite/g++.dg/ext/raw-string-5.C
new file mode 100644
index 00000000000..b815eb23b16
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/raw-string-5.C
@@ -0,0 +1,23 @@
+// { dg-do compile }
+// { dg-options "-std=c++0x" }
+
+const void *s0 = R"0123456789abcdefg[]0123456789abcdefg";
+	// { dg-error "raw string delimiter longer" "" { target *-*-* } 4 }
+	// { dg-error "stray" "" { target *-*-* } 4 }
+const void *s1 = R" [] ";
+	// { dg-error "invalid character" "" { target *-*-* } 7 }
+	// { dg-error "stray" "" { target *-*-* } 7 }
+const void *s2 = R"	[]	";
+	// { dg-error "invalid character" "" { target *-*-* } 10 }
+	// { dg-error "stray" "" { target *-*-* } 10 }
+const void *s3 = R"][]]";
+	// { dg-error "invalid character" "" { target *-*-* } 13 }
+	// { dg-error "stray" "" { target *-*-* } 13 }
+const void *s4 = R"@[]@";
+	// { dg-error "invalid character" "" { target *-*-* } 16 }
+	// { dg-error "stray" "" { target *-*-* } 16 }
+const void *s5 = R"$[]$";
+	// { dg-error "invalid character" "" { target *-*-* } 19 }
+	// { dg-error "stray" "" { target *-*-* } 19 }
+
+int main () {}
diff --git a/gcc/testsuite/g++.dg/ext/raw-string-6.C b/gcc/testsuite/g++.dg/ext/raw-string-6.C
new file mode 100644
index 00000000000..a97d95d5693
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/raw-string-6.C
@@ -0,0 +1,5 @@
+// { dg-do compile }
+// { dg-options "-std=c++0x" }
+
+const void *s0 = R"ouch[]ouCh";	// { dg-error "at end of input" }
+	// { dg-error "unterminated raw string" "" { target *-*-* } 4 }
diff --git a/gcc/testsuite/g++.dg/ext/raw-string-7.C b/gcc/testsuite/g++.dg/ext/raw-string-7.C
new file mode 100644
index 00000000000..0f479193433
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/raw-string-7.C
@@ -0,0 +1,23 @@
+// The trailing whitespace after \ and before newline extension
+// breaks full compliance for raw strings.
+// { dg-do run { xfail *-*-* } }
+// { dg-options "-std=c++0x" }
+
+// Note, there is a single space after \ on the following line.
+const char *s0 = R"[\ 
+]";
+// { dg-bogus "backslash and newline separated by space" "" { xfail *-*-* } 7 }
+
+// Note, there is a single tab after \ on the following line.
+const char *s1 = R"[\	
+]";
+// { dg-bogus "backslash and newline separated by space" "" { xfail *-*-* } 12 }
+
+int
+main (void)
+{
+  if (__builtin_strcmp (s0, "\\ \n") != 0
+      || __builtin_strcmp (s1, "\\\t\n") != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/ext/utf-badconcat2.C b/gcc/testsuite/g++.dg/ext/utf-badconcat2.C
new file mode 100644
index 00000000000..499b323fc19
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/utf-badconcat2.C
@@ -0,0 +1,15 @@
+// Test unsupported concatenation of UTF-8 string literals.
+// { dg-do compile }
+// { dg-options "-std=c++0x" }
+
+const void *s0	= u8"a"   "b";
+const void *s1	=   "a" u8"b";
+const void *s2	= u8"a" u8"b";
+const void *s3	= u8"a"  u"b";	// { dg-error "non-standard concatenation" }
+const void *s4	=  u"a" u8"b";	// { dg-error "non-standard concatenation" }
+const void *s5	= u8"a"  U"b";	// { dg-error "non-standard concatenation" }
+const void *s6	=  U"a" u8"b";	// { dg-error "non-standard concatenation" }
+const void *s7	= u8"a"  L"b";	// { dg-error "non-standard concatenation" }
+const void *s8	=  L"a" u8"b";	// { dg-error "non-standard concatenation" }
+
+int main () {}
diff --git a/gcc/testsuite/g++.dg/ext/utf-dflt2.C b/gcc/testsuite/g++.dg/ext/utf-dflt2.C
new file mode 100644
index 00000000000..fd2222f672e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/utf-dflt2.C
@@ -0,0 +1,12 @@
+// In C++0x, the u8 prefix should be parsed as separate tokens.
+// { dg-do compile }
+// { dg-options "-std=c++98" }
+
+const void	*s0 = u8"a";		// { dg-error "was not declared" }
+		// { dg-error "expected ',' or ';'" "" { target *-*-* } 5 }
+
+#define u8	"a"
+
+const void	*s1 = u8"a";
+
+int main () {}
diff --git a/gcc/testsuite/g++.dg/ext/utf8-1.C b/gcc/testsuite/g++.dg/ext/utf8-1.C
new file mode 100644
index 00000000000..203b326af79
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/utf8-1.C
@@ -0,0 +1,45 @@
+// { dg-do run }
+// { dg-require-iconv "ISO-8859-2" }
+// { dg-options "-std=c++0x -fexec-charset=ISO-8859-2" }
+
+const char *str1 = "h\u00e1\U0000010Dky ";
+const char *str2 = "\u010d\u00E1rky\n";
+const char *str3 = u8"h\u00e1\U0000010Dky ";
+const char *str4 = u8"\u010d\u00E1rky\n";
+const char *str5 = "h\u00e1\U0000010Dky " "\u010d\u00E1rky\n";
+const char *str6 = u8"h\u00e1\U0000010Dky " "\u010d\u00E1rky\n";
+const char *str7 = "h\u00e1\U0000010Dky " u8"\u010d\u00E1rky\n";
+#define u8
+const char *str8 = u8"h\u00e1\U0000010Dky " u8"\u010d\u00E1rky\n";
+
+const char latin2_1[] = "\x68\xe1\xe8\x6b\x79\x20";
+const char latin2_2[] = "\xe8\xe1\x72\x6b\x79\n";
+const char utf8_1[] = "\x68\xc3\xa1\xc4\x8d\x6b\x79\x20";
+const char utf8_2[] = "\xc4\x8d\xc3\xa1\x72\x6b\x79\n";
+
+int
+main (void)
+{
+  if (__builtin_strcmp (str1, latin2_1) != 0
+      || __builtin_strcmp (str2, latin2_2) != 0
+      || __builtin_strcmp (str3, utf8_1) != 0
+      || __builtin_strcmp (str4, utf8_2) != 0
+      || __builtin_strncmp (str5, latin2_1, sizeof (latin2_1) - 1) != 0
+      || __builtin_strcmp (str5 + sizeof (latin2_1) - 1, latin2_2) != 0
+      || __builtin_strncmp (str6, utf8_1, sizeof (utf8_1) - 1) != 0
+      || __builtin_strcmp (str6 + sizeof (utf8_1) - 1, utf8_2) != 0
+      || __builtin_strncmp (str7, utf8_1, sizeof (utf8_1) - 1) != 0
+      || __builtin_strcmp (str7 + sizeof (utf8_1) - 1, utf8_2) != 0
+      || __builtin_strncmp (str8, utf8_1, sizeof (utf8_1) - 1) != 0
+      || __builtin_strcmp (str8 + sizeof (utf8_1) - 1, utf8_2) != 0)
+    __builtin_abort ();
+  if (sizeof ("a" u8"b"[0]) != 1
+      || sizeof (u8"a" "b"[0]) != 1
+      || sizeof (u8"a" u8"b"[0]) != 1
+      || sizeof ("a" "\u010d") != 3
+      || sizeof ("a" u8"\u010d") != 4
+      || sizeof (u8"a" "\u010d") != 4
+      || sizeof (u8"a" "\u010d") != 4)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/ext/utf8-2.C b/gcc/testsuite/g++.dg/ext/utf8-2.C
new file mode 100644
index 00000000000..417a8bfdc1e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/utf8-2.C
@@ -0,0 +1,21 @@
+// { dg-do compile }
+// { dg-options "-std=c++0x" }
+
+const char	s0[]	= u8"ab";
+const char16_t	s1[]	= u8"ab";	// { dg-error "from non-wide" }
+const char32_t  s2[]    = u8"ab";	// { dg-error "from non-wide" }
+const wchar_t   s3[]    = u8"ab";	// { dg-error "from non-wide" }
+
+const char      t0[0]   = u8"ab";	// { dg-error "chars is too long" }
+const char      t1[1]   = u8"ab";	// { dg-error "chars is too long" }
+const char      t2[2]   = u8"ab";	// { dg-error "chars is too long" }
+const char      t3[3]   = u8"ab";
+const char      t4[4]   = u8"ab";
+
+const char      u0[0]   = u8"\u2160.";	// { dg-error "chars is too long" }
+const char      u1[1]   = u8"\u2160.";	// { dg-error "chars is too long" }
+const char      u2[2]   = u8"\u2160.";	// { dg-error "chars is too long" }
+const char      u3[3]   = u8"\u2160.";	// { dg-error "chars is too long" }
+const char      u4[4]   = u8"\u2160.";	// { dg-error "chars is too long" }
+const char      u5[5]   = u8"\u2160.";
+const char      u6[6]   = u8"\u2160.";
diff --git a/gcc/testsuite/gcc.dg/cpp/include6.c b/gcc/testsuite/gcc.dg/cpp/include6.c
new file mode 100644
index 00000000000..2bb1320daa2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/include6.c
@@ -0,0 +1,14 @@
+/* { dg-do preprocess } */
+/* { dg-options "-std=gnu99" } */
+
+#include <stddef.h>
+#include "stddef.h"
+#include L"stddef.h"		/* { dg-error "include expects" } */
+#include u"stddef.h"		/* { dg-error "include expects" } */
+#include U"stddef.h"		/* { dg-error "include expects" } */
+#include u8"stddef.h"		/* { dg-error "include expects" } */
+#include R"[stddef.h]"		/* { dg-error "include expects" } */
+#include LR"[stddef.h]"		/* { dg-error "include expects" } */
+#include uR"[stddef.h]"		/* { dg-error "include expects" } */
+#include UR"[stddef.h]"		/* { dg-error "include expects" } */
+#include u8R"[stddef.h]"	/* { dg-error "include expects" } */
diff --git a/gcc/testsuite/gcc.dg/raw-string-1.c b/gcc/testsuite/gcc.dg/raw-string-1.c
new file mode 100644
index 00000000000..b499e5cce69
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/raw-string-1.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-options "-std=gnu99" } */
+
+#include <wchar.h>
+
+typedef __CHAR16_TYPE__	char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+
+const char s0[] = R"[a\
+\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
+c]";
+const char s1[] = "a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char s2[] = R"*|*[a\
+b
+c]"
+c]*|"
+c]*|*";
+const char s3[] = "ab\nc]\"\nc]*|\"\nc";
+
+const char t0[] = u8R"[a\
+\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
+c]";
+const char t1[] = u8"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char t2[] = u8R"*|*[a\
+b
+c]"
+c]*|"
+c]*|*";
+const char t3[] = u8"ab\nc]\"\nc]*|\"\nc";
+
+const char16_t u0[] = uR"[a\
+\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
+c]";
+const char16_t u1[] = u"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char16_t u2[] = uR"*|*[a\
+b
+c]"
+c]*|"
+c]*|*";
+const char16_t u3[] = u"ab\nc]\"\nc]*|\"\nc";
+
+const char32_t U0[] = UR"[a\
+\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
+c]";
+const char32_t U1[] = U"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char32_t U2[] = UR"*|*[a\
+b
+c]"
+c]*|"
+c]*|*";
+const char32_t U3[] = U"ab\nc]\"\nc]*|\"\nc";
+
+const wchar_t L0[] = LR"[a\
+\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
+c]";
+const wchar_t L1[] = L"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const wchar_t L2[] = LR"*|*[a\
+b
+c]"
+c]*|"
+c]*|*";
+const wchar_t L3[] = L"ab\nc]\"\nc]*|\"\nc";
+
+int
+main (void)
+{
+  if (sizeof (s0) != sizeof (s1)
+      || __builtin_memcmp (s0, s1, sizeof (s0)) != 0)
+    __builtin_abort ();
+  if (sizeof (s2) != sizeof (s3)
+      || __builtin_memcmp (s2, s3, sizeof (s2)) != 0)
+    __builtin_abort ();
+  if (sizeof (t0) != sizeof (t1)
+      || __builtin_memcmp (t0, t1, sizeof (t0)) != 0)
+    __builtin_abort ();
+  if (sizeof (t2) != sizeof (t3)
+      || __builtin_memcmp (t2, t3, sizeof (t2)) != 0)
+    __builtin_abort ();
+  if (sizeof (u0) != sizeof (u1)
+      || __builtin_memcmp (u0, u1, sizeof (u0)) != 0)
+    __builtin_abort ();
+  if (sizeof (u2) != sizeof (u3)
+      || __builtin_memcmp (u2, u3, sizeof (u2)) != 0)
+    __builtin_abort ();
+  if (sizeof (U0) != sizeof (U1)
+      || __builtin_memcmp (U0, U1, sizeof (U0)) != 0)
+    __builtin_abort ();
+  if (sizeof (U2) != sizeof (U3)
+      || __builtin_memcmp (U2, U3, sizeof (U2)) != 0)
+    __builtin_abort ();
+  if (sizeof (L0) != sizeof (L1)
+      || __builtin_memcmp (L0, L1, sizeof (L0)) != 0)
+    __builtin_abort ();
+  if (sizeof (L2) != sizeof (L3)
+      || __builtin_memcmp (L2, L3, sizeof (L2)) != 0)
+    __builtin_abort ();
+  if (sizeof (R"*[]*") != 1
+      || __builtin_memcmp (R"*[]*", "", 1) != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/raw-string-2.c b/gcc/testsuite/gcc.dg/raw-string-2.c
new file mode 100644
index 00000000000..533abe2bc89
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/raw-string-2.c
@@ -0,0 +1,109 @@
+/* { dg-do run } */
+/* { dg-options "-std=gnu99" } */
+
+#include <wchar.h>
+
+typedef __CHAR16_TYPE__	char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+
+#define R
+#define u
+#define uR
+#define U
+#define UR
+#define u8
+#define u8R
+#define L
+#define LR
+
+const char s00[] = R"[a]" "[b]";
+const char s01[] = "[a]" R"*[b]*";
+const char s02[] = R"[a]" R"[b]";
+const char s03[] = R"-[a]-" u8"[b]";
+const char s04[] = "[a]" u8R"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
+const char s05[] = R"[a]" u8R"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
+const char s06[] = u8R";([a];(" "[b]";
+const char s07[] = u8"[a]" R"[b]";
+const char s08[] = u8R"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
+const char s09[] = u8R"/^&|~!=,"'\[a]/^&|~!=,"'\" u8"[b]";
+const char s10[] = u8"[a]" u8R"0123456789abcdef[b]0123456789abcdef";
+const char s11[] = u8R"ghijklmnopqrstuv[a]ghijklmnopqrstuv" u8R"w[b]w";
+
+const char16_t u03[] = R"-[a]-" u"[b]";
+const char16_t u04[] = "[a]" uR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
+const char16_t u05[] = R"[a]" uR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
+const char16_t u06[] = uR";([a];(" "[b]";
+const char16_t u07[] = u"[a]" R"[b]";
+const char16_t u08[] = uR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
+const char16_t u09[] = uR"/^&|~!=,"'\[a]/^&|~!=,"'\" u"[b]";
+const char16_t u10[] = u"[a]" uR"0123456789abcdef[b]0123456789abcdef";
+const char16_t u11[] = uR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" uR"w[b]w";
+
+const char32_t U03[] = R"-[a]-" U"[b]";
+const char32_t U04[] = "[a]" UR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
+const char32_t U05[] = R"[a]" UR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
+const char32_t U06[] = UR";([a];(" "[b]";
+const char32_t U07[] = U"[a]" R"[b]";
+const char32_t U08[] = UR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
+const char32_t U09[] = UR"/^&|~!=,"'\[a]/^&|~!=,"'\" U"[b]";
+const char32_t U10[] = U"[a]" UR"0123456789abcdef[b]0123456789abcdef";
+const char32_t U11[] = UR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" UR"w[b]w";
+
+const wchar_t L03[] = R"-[a]-" L"[b]";
+const wchar_t L04[] = "[a]" LR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
+const wchar_t L05[] = R"[a]" LR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
+const wchar_t L06[] = LR";([a];(" "[b]";
+const wchar_t L07[] = L"[a]" R"[b]";
+const wchar_t L08[] = LR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
+const wchar_t L09[] = LR"/^&|~!=,"'\[a]/^&|~!=,"'\" L"[b]";
+const wchar_t L10[] = L"[a]" LR"0123456789abcdef[b]0123456789abcdef";
+const wchar_t L11[] = LR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" LR"w[b]w";
+
+int
+main (void)
+{
+#define TEST(str, val) \
+  if (sizeof (str) != sizeof (val) \
+      || __builtin_memcmp (str, val, sizeof (str)) != 0) \
+    __builtin_abort ()
+  TEST (s00, "a[b]");
+  TEST (s01, "[a]b");
+  TEST (s02, "ab");
+  TEST (s03, "a[b]");
+  TEST (s04, "[a]b");
+  TEST (s05, "ab");
+  TEST (s06, "a[b]");
+  TEST (s07, "[a]b");
+  TEST (s08, "ab");
+  TEST (s09, "a[b]");
+  TEST (s10, "[a]b");
+  TEST (s11, "ab");
+  TEST (u03, u"a[b]");
+  TEST (u04, u"[a]b");
+  TEST (u05, u"ab");
+  TEST (u06, u"a[b]");
+  TEST (u07, u"[a]b");
+  TEST (u08, u"ab");
+  TEST (u09, u"a[b]");
+  TEST (u10, u"[a]b");
+  TEST (u11, u"ab");
+  TEST (U03, U"a[b]");
+  TEST (U04, U"[a]b");
+  TEST (U05, U"ab");
+  TEST (U06, U"a[b]");
+  TEST (U07, U"[a]b");
+  TEST (U08, U"ab");
+  TEST (U09, U"a[b]");
+  TEST (U10, U"[a]b");
+  TEST (U11, U"ab");
+  TEST (L03, L"a[b]");
+  TEST (L04, L"[a]b");
+  TEST (L05, L"ab");
+  TEST (L06, L"a[b]");
+  TEST (L07, L"[a]b");
+  TEST (L08, L"ab");
+  TEST (L09, L"a[b]");
+  TEST (L10, L"[a]b");
+  TEST (L11, L"ab");
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/raw-string-3.c b/gcc/testsuite/gcc.dg/raw-string-3.c
new file mode 100644
index 00000000000..9226411d2e0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/raw-string-3.c
@@ -0,0 +1,53 @@
+/* If not gnu99, the {,u,u8,U,L}R prefix should be parsed as separate
+   token. */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+const void	*s0	= R"[a]";	/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 6 } */
+const void	*s1	= uR"[a]";	/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 8 } */
+const void	*s2	= UR"[a]";	/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 10 } */
+const void	*s3	= u8R"[a]";	/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 12 } */
+const void	*s4	= LR"[a]";	/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 14 } */
+
+const int	i0	= R'a';		/* { dg-error "expected ',' or ';'" } */
+const int	i1	= uR'a';	/* { dg-error "expected ',' or ';'" } */
+const int	i2	= UR'a';	/* { dg-error "expected ',' or ';'" } */
+const int	i3	= u8R'a';	/* { dg-error "expected ',' or ';'" } */
+const int	i4	= LR'a';	/* { dg-error "expected ',' or ';'" } */
+
+#define R	"a"
+#define uR	"b"
+#define UR	"c"
+#define u8R	"d"
+#define LR	"e"
+
+const void	*s5	= R"[a]";
+const void	*s6	= uR"[a]";
+const void	*s7	= UR"[a]";
+const void	*s8	= u8R"[a]";
+const void	*s9	= LR"[a]";
+
+#undef R
+#undef uR
+#undef UR
+#undef u8R
+#undef LR
+
+#define R	1 +
+#define uR	2 +
+#define UR	3 +
+#define u8R	4 +
+#define LR	5 +
+
+const int	i5	= R'a';
+const int	i6	= uR'a';
+const int	i7	= UR'a';
+const int	i8	= u8R'a';
+const int	i9	= LR'a';
+
+int main () {}
diff --git a/gcc/testsuite/gcc.dg/raw-string-4.c b/gcc/testsuite/gcc.dg/raw-string-4.c
new file mode 100644
index 00000000000..24d56ed7464
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/raw-string-4.c
@@ -0,0 +1,28 @@
+/* R is not applicable for character literals.  */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+
+const int	i0	= R'a';	/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 5 } */
+const int	i1	= uR'a';	/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 7 } */
+const int	i2	= UR'a';	/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 9 } */
+const int	i3	= u8R'a';	/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 11 } */
+const int	i4	= LR'a';	/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 13 } */
+
+#define R	1 +
+#define uR	2 +
+#define UR	3 +
+#define u8R	4 +
+#define LR	5 +
+
+const int	i5	= R'a';
+const int	i6	= uR'a';
+const int	i7	= UR'a';
+const int	i8	= u8R'a';
+const int	i9	= LR'a';
+
+int main () {}
diff --git a/gcc/testsuite/gcc.dg/raw-string-5.c b/gcc/testsuite/gcc.dg/raw-string-5.c
new file mode 100644
index 00000000000..3dbf97ded90
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/raw-string-5.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+
+const void *s0 = R"0123456789abcdefg[]0123456789abcdefg";
+	/* { dg-error "raw string delimiter longer" "" { target *-*-* } 4 } */
+	/* { dg-error "stray" "" { target *-*-* } 4 } */
+const void *s1 = R" [] ";
+	/* { dg-error "invalid character" "" { target *-*-* } 7 } */
+	/* { dg-error "stray" "" { target *-*-* } 7 } */
+const void *s2 = R"	[]	";
+	/* { dg-error "invalid character" "" { target *-*-* } 10 } */
+	/* { dg-error "stray" "" { target *-*-* } 10 } */
+const void *s3 = R"][]]";
+	/* { dg-error "invalid character" "" { target *-*-* } 13 } */
+	/* { dg-error "stray" "" { target *-*-* } 13 } */
+const void *s4 = R"@[]@";
+	/* { dg-error "invalid character" "" { target *-*-* } 16 } */
+	/* { dg-error "stray" "" { target *-*-* } 16 } */
+const void *s5 = R"$[]$";
+	/* { dg-error "invalid character" "" { target *-*-* } 19 } */
+	/* { dg-error "stray" "" { target *-*-* } 19 } */
+
+int main () {}
diff --git a/gcc/testsuite/gcc.dg/raw-string-6.c b/gcc/testsuite/gcc.dg/raw-string-6.c
new file mode 100644
index 00000000000..338087a25fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/raw-string-6.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+
+const void *s0 = R"ouch[]ouCh";	/* { dg-error "expected expression at end of input" } */
+	/* { dg-error "unterminated raw string" "" { target *-*-* } 4 } */
diff --git a/gcc/testsuite/gcc.dg/raw-string-7.c b/gcc/testsuite/gcc.dg/raw-string-7.c
new file mode 100644
index 00000000000..5bb4bec2085
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/raw-string-7.c
@@ -0,0 +1,23 @@
+/* The trailing whitespace after \ and before newline extension
+   breaks full compliance for raw strings.  */
+/* { dg-do run { xfail *-*-* } } */
+/* { dg-options "-std=gnu99" } */
+
+/* Note, there is a single space after \ on the following line.  */
+const void *s0 = R"[\ 
+]";
+/* { dg-bogus "backslash and newline separated by space" "" { xfail *-*-* } 7 } */
+
+/* Note, there is a single tab after \ on the following line.  */
+const void *s1 = R"[\	
+]";
+/* { dg-bogus "backslash and newline separated by space" "" { xfail *-*-* } 12 } */
+
+int
+main (void)
+{
+  if (__builtin_strcmp (s0, "\\ \n") != 0
+      || __builtin_strcmp (s1, "\\\t\n") != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/utf-badconcat2.c b/gcc/testsuite/gcc.dg/utf-badconcat2.c
new file mode 100644
index 00000000000..ea71a7f6857
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/utf-badconcat2.c
@@ -0,0 +1,15 @@
+/* Test unsupported concatenation of UTF-8 string literals. */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+
+void	*s0	= u8"a"   "b";
+void	*s1	=   "a" u8"b";
+void	*s2	= u8"a" u8"b";
+void	*s3	= u8"a"  u"b";	/* { dg-error "non-standard concatenation" } */
+void	*s4	=  u"a" u8"b";	/* { dg-error "non-standard concatenation" } */
+void	*s5	= u8"a"  U"b";	/* { dg-error "non-standard concatenation" } */
+void	*s6	=  U"a" u8"b";	/* { dg-error "non-standard concatenation" } */
+void	*s7	= u8"a"  L"b";	/* { dg-error "non-standard concatenation" } */
+void	*s8	=  L"a" u8"b";	/* { dg-error "non-standard concatenation" } */
+
+int main () {}
diff --git a/gcc/testsuite/gcc.dg/utf-dflt2.c b/gcc/testsuite/gcc.dg/utf-dflt2.c
new file mode 100644
index 00000000000..ab101f45b78
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/utf-dflt2.c
@@ -0,0 +1,12 @@
+/* If not gnu99, the u8 prefix should be parsed as separate tokens. */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+const void	*s0 = u8"a";		/* { dg-error "undeclared" } */
+		/* { dg-error "expected ',' or ';'" "" { target *-*-* } 5 } */
+
+#define u8	"a"
+
+const void	*s1 = u8"a";
+
+int main () {}
diff --git a/gcc/testsuite/gcc.dg/utf8-1.c b/gcc/testsuite/gcc.dg/utf8-1.c
new file mode 100644
index 00000000000..e287e065688
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/utf8-1.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-require-iconv "ISO-8859-2" } */
+/* { dg-options "-std=gnu99 -fexec-charset=ISO-8859-2" } */
+
+const char *str1 = "h\u00e1\U0000010Dky ";
+const char *str2 = "\u010d\u00E1rky\n";
+const char *str3 = u8"h\u00e1\U0000010Dky ";
+const char *str4 = u8"\u010d\u00E1rky\n";
+const char *str5 = "h\u00e1\U0000010Dky " "\u010d\u00E1rky\n";
+const char *str6 = u8"h\u00e1\U0000010Dky " "\u010d\u00E1rky\n";
+const char *str7 = "h\u00e1\U0000010Dky " u8"\u010d\u00E1rky\n";
+#define u8
+const char *str8 = u8"h\u00e1\U0000010Dky " u8"\u010d\u00E1rky\n";
+
+const char latin2_1[] = "\x68\xe1\xe8\x6b\x79\x20";
+const char latin2_2[] = "\xe8\xe1\x72\x6b\x79\n";
+const char utf8_1[] = "\x68\xc3\xa1\xc4\x8d\x6b\x79\x20";
+const char utf8_2[] = "\xc4\x8d\xc3\xa1\x72\x6b\x79\n";
+
+int
+main (void)
+{
+  if (__builtin_strcmp (str1, latin2_1) != 0
+      || __builtin_strcmp (str2, latin2_2) != 0
+      || __builtin_strcmp (str3, utf8_1) != 0
+      || __builtin_strcmp (str4, utf8_2) != 0
+      || __builtin_strncmp (str5, latin2_1, sizeof (latin2_1) - 1) != 0
+      || __builtin_strcmp (str5 + sizeof (latin2_1) - 1, latin2_2) != 0
+      || __builtin_strncmp (str6, utf8_1, sizeof (utf8_1) - 1) != 0
+      || __builtin_strcmp (str6 + sizeof (utf8_1) - 1, utf8_2) != 0
+      || __builtin_strncmp (str7, utf8_1, sizeof (utf8_1) - 1) != 0
+      || __builtin_strcmp (str7 + sizeof (utf8_1) - 1, utf8_2) != 0
+      || __builtin_strncmp (str8, utf8_1, sizeof (utf8_1) - 1) != 0
+      || __builtin_strcmp (str8 + sizeof (utf8_1) - 1, utf8_2) != 0)
+    __builtin_abort ();
+  if (sizeof ("a" u8"b"[0]) != 1
+      || sizeof (u8"a" "b"[0]) != 1
+      || sizeof (u8"a" u8"b"[0]) != 1
+      || sizeof ("a" "\u010d") != 3
+      || sizeof ("a" u8"\u010d") != 4
+      || sizeof (u8"a" "\u010d") != 4
+      || sizeof (u8"a" "\u010d") != 4)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/utf8-2.c b/gcc/testsuite/gcc.dg/utf8-2.c
new file mode 100644
index 00000000000..9c0442fde3f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/utf8-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+
+#include <wchar.h>
+
+typedef __CHAR16_TYPE__	char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+
+const char	s0[]	= u8"ab";
+const char16_t	s1[]	= u8"ab";	/* { dg-error "from non-wide" } */
+const char32_t  s2[]    = u8"ab";	/* { dg-error "from non-wide" } */
+const wchar_t   s3[]    = u8"ab";	/* { dg-error "from non-wide" } */
+
+const char      t0[0]   = u8"ab";	/* { dg-warning "chars is too long" } */
+const char      t1[1]   = u8"ab";	/* { dg-warning "chars is too long" } */
+const char      t2[2]   = u8"ab";
+const char      t3[3]   = u8"ab";
+const char      t4[4]   = u8"ab";
+
+const char      u0[0]   = u8"\u2160.";	/* { dg-warning "chars is too long" } */
+const char      u1[1]   = u8"\u2160.";	/* { dg-warning "chars is too long" } */
+const char      u2[2]   = u8"\u2160.";	/* { dg-warning "chars is too long" } */
+const char      u3[3]   = u8"\u2160.";	/* { dg-warning "chars is too long" } */
+const char      u4[4]   = u8"\u2160.";
+const char      u5[5]   = u8"\u2160.";
+const char      u6[6]   = u8"\u2160.";
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 3259c56f46e..5946b29dc56 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,21 @@
+2009-10-19  Jakub Jelinek  <jakub@redhat.com>
+
+	* charset.c (cpp_init_iconv): Initialize utf8_cset_desc.
+	(_cpp_destroy_iconv): Destroy utf8_cset_desc, char16_cset_desc
+	and char32_cset_desc.
+	(converter_for_type): Handle CPP_UTF8STRING.
+	(cpp_interpret_string): Handle CPP_UTF8STRING and raw-strings.
+	* directives.c (get__Pragma_string): Handle CPP_UTF8STRING.
+	(parse_include): Reject raw strings.
+	* include/cpplib.h (CPP_UTF8STRING): New token type.
+	* internal.h (struct cpp_reader): Add utf8_cset_desc field.
+	* lex.c (lex_raw_string): New function.
+	(lex_string): Handle u8 string literals, call lex_raw_string
+	for raw string literals.
+	(_cpp_lex_direct): Call lex_string even for u8" and {,u,U,L,u8}R"
+	sequences.
+	* macro.c (stringify_arg): Handle CPP_UTF8STRING.
+
 2009-10-14  Jakub Jelinek  <jakub@redhat.com>
 
 	PR preprocessor/41543
diff --git a/libcpp/charset.c b/libcpp/charset.c
index bd24ec2490d..837ccd77aab 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -721,6 +721,8 @@ cpp_init_iconv (cpp_reader *pfile)
 
   pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
   pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
+  pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET);
+  pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision);
   pfile->char16_cset_desc = init_iconv_desc (pfile,
 					     be ? "UTF-16BE" : "UTF-16LE",
 					     SOURCE_CHARSET);
@@ -741,6 +743,12 @@ _cpp_destroy_iconv (cpp_reader *pfile)
     {
       if (pfile->narrow_cset_desc.func == convert_using_iconv)
 	iconv_close (pfile->narrow_cset_desc.cd);
+      if (pfile->utf8_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->utf8_cset_desc.cd);
+      if (pfile->char16_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->char16_cset_desc.cd);
+      if (pfile->char32_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->char32_cset_desc.cd);
       if (pfile->wide_cset_desc.func == convert_using_iconv)
 	iconv_close (pfile->wide_cset_desc.cd);
     }
@@ -1339,6 +1347,8 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
     {
     default:
 	return pfile->narrow_cset_desc;
+    case CPP_UTF8STRING:
+	return pfile->utf8_cset_desc;
     case CPP_CHAR16:
     case CPP_STRING16:
 	return pfile->char16_cset_desc;
@@ -1373,7 +1383,47 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
   for (i = 0; i < count; i++)
     {
       p = from[i].text;
-      if (*p == 'L' || *p == 'u' || *p == 'U') p++;
+      if (*p == 'u')
+	{
+	  if (*++p == '8')
+	    p++;
+	}
+      else if (*p == 'L' || *p == 'U') p++;
+      if (*p == 'R')
+	{
+	  const uchar *prefix;
+
+	  /* Skip over 'R"'.  */
+	  p += 2;
+	  prefix = p;
+	  while (*p != '[')
+	    p++;
+	  p++;
+	  limit = from[i].text + from[i].len;
+	  if (limit >= p + (p - prefix) + 1)
+	    limit -= (p - prefix) + 1;
+
+	  for (;;)
+	    {
+	      base = p;
+	      while (p < limit && (*p != '\\' || (p[1] != 'u' && p[1] != 'U')))
+		p++;
+	      if (p > base)
+		{
+		  /* We have a run of normal characters; these can be fed
+		     directly to convert_cset.  */
+		  if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
+		    goto fail;
+		}
+	      if (p == limit)
+		break;
+
+	      p = convert_ucn (pfile, p + 1, limit, &tbuf, cvt);
+	    }
+
+	  continue;
+	}
+
       p++; /* Skip leading quote.  */
       limit = from[i].text + from[i].len - 1; /* Skip trailing quote.  */
 
diff --git a/libcpp/directives.c b/libcpp/directives.c
index f9dba539ea2..01bb599e266 100644
--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -697,7 +697,8 @@ parse_include (cpp_reader *pfile, int *pangle_brackets,
   /* Allow macro expansion.  */
   header = get_token_no_padding (pfile);
   *location = header->src_loc;
-  if (header->type == CPP_STRING || header->type == CPP_HEADER_NAME)
+  if ((header->type == CPP_STRING && header->val.str.text[0] != 'R')
+      || header->type == CPP_HEADER_NAME)
     {
       fname = XNEWVEC (char, header->val.str.len - 1);
       memcpy (fname, header->val.str.text + 1, header->val.str.len - 2);
@@ -1537,7 +1538,8 @@ get__Pragma_string (cpp_reader *pfile)
   if (string->type == CPP_EOF)
     _cpp_backup_tokens (pfile, 1);
   if (string->type != CPP_STRING && string->type != CPP_WSTRING
-      && string->type != CPP_STRING32 && string->type != CPP_STRING16)
+      && string->type != CPP_STRING32 && string->type != CPP_STRING16
+      && string->type != CPP_UTF8STRING)
     return NULL;
 
   paren = get_token_no_padding (pfile);
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index df04668dda0..e95f01a412a 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -127,6 +127,7 @@ struct _cpp_file;
   TK(WSTRING,		LITERAL) /* L"string" */			\
   TK(STRING16,		LITERAL) /* u"string" */			\
   TK(STRING32,		LITERAL) /* U"string" */			\
+  TK(UTF8STRING,	LITERAL) /* u8"string" */			\
   TK(OBJC_STRING,	LITERAL) /* @"string" - Objective-C */		\
   TK(HEADER_NAME,	LITERAL) /* <stdio.h> in #include */		\
 									\
@@ -728,10 +729,10 @@ extern const unsigned char *cpp_macro_definition (cpp_reader *,
 extern void _cpp_backup_tokens (cpp_reader *, unsigned int);
 extern const cpp_token *cpp_peek_token (cpp_reader *, int);
 
-/* Evaluate a CPP_CHAR or CPP_WCHAR token.  */
+/* Evaluate a CPP_*CHAR* token.  */
 extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *,
 					  unsigned int *, int *);
-/* Evaluate a vector of CPP_STRING or CPP_WSTRING tokens.  */
+/* Evaluate a vector of CPP_*STRING* tokens.  */
 extern bool cpp_interpret_string (cpp_reader *,
 				  const cpp_string *, size_t,
 				  cpp_string *, enum cpp_ttype);
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 21e51c6553c..aaa231c2ab1 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -396,6 +396,10 @@ struct cpp_reader
      execution character set.  */
   struct cset_converter narrow_cset_desc;
 
+  /* Descriptor for converting from the source character set to the
+     UTF-8 execution character set.  */
+  struct cset_converter utf8_cset_desc;
+
   /* Descriptor for converting from the source character set to the
      UTF-16 execution character set.  */
   struct cset_converter char16_cset_desc;
diff --git a/libcpp/lex.c b/libcpp/lex.c
index bab14a4baa3..55bffa9a326 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -617,12 +617,192 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
   token->val.str.text = dest;
 }
 
+/* Lexes a raw string.  The stored string contains the spelling, including
+   double quotes, delimiter string, '[' and ']', any leading
+   'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
+   literal, or CPP_OTHER if it was not properly terminated.
+
+   The spelling is NUL-terminated, but it is not guaranteed that this
+   is the first NUL since embedded NULs are preserved.  */
+
+static void
+lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
+		const uchar *cur)
+{
+  source_location saw_NUL = 0;
+  const uchar *raw_prefix;
+  unsigned int raw_prefix_len = 0;
+  enum cpp_ttype type;
+  size_t total_len = 0;
+  _cpp_buff *first_buff = NULL, *last_buff = NULL;
+
+  type = (*base == 'L' ? CPP_WSTRING :
+	  *base == 'U' ? CPP_STRING32 :
+	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
+	  : CPP_STRING);
+
+  raw_prefix = cur + 1;
+  while (raw_prefix_len < 16)
+    {
+      switch (raw_prefix[raw_prefix_len])
+	{
+	case ' ': case '[': case ']': case '\t':
+	case '\v': case '\f': case '\n': default:
+	  break;
+	/* Basic source charset except the above chars.  */
+	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+	case 'y': case 'z':
+	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+	case 'Y': case 'Z':
+	case '0': case '1': case '2': case '3': case '4': case '5':
+	case '6': case '7': case '8': case '9':
+	case '_': case '{': case '}': case '#': case '(': case ')':
+	case '<': case '>': case '%': case ':': case ';': case '.':
+	case '?': case '*': case '+': case '-': case '/': case '^':
+	case '&': case '|': case '~': case '!': case '=': case ',':
+	case '\\': case '"': case '\'':
+	  raw_prefix_len++;
+	  continue;
+	}
+      break;
+    }
+
+  if (raw_prefix[raw_prefix_len] != '[')
+    {
+      int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
+		+ 1;
+      if (raw_prefix_len == 16)
+	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
+			     "raw string delimiter longer than 16 characters");
+      else
+	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
+			     "invalid character '%c' in raw string delimiter",
+			     (int) raw_prefix[raw_prefix_len]);
+      pfile->buffer->cur = raw_prefix - 1;
+      create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
+      return;
+    }
+
+  cur = raw_prefix + raw_prefix_len + 1;
+  for (;;)
+    {
+      cppchar_t c = *cur++;
+
+      if (c == ']'
+	  && strncmp ((const char *) cur, (const char *) raw_prefix,
+		      raw_prefix_len) == 0
+	  && cur[raw_prefix_len] == '"')
+	{
+	  cur += raw_prefix_len + 1;
+	  break;
+	}
+      else if (c == '\n')
+	{
+	  if (pfile->state.in_directive
+	      || pfile->state.parsing_args
+	      || pfile->state.in_deferred_pragma)
+	    {
+	      cur--;
+	      type = CPP_OTHER;
+	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+				   "unterminated raw string");
+	      break;
+	    }
+
+	  /* raw strings allow embedded non-escaped newlines, which
+	     complicates this routine a lot.  */
+	  if (first_buff == NULL)
+	    {
+	      total_len = cur - base;
+	      first_buff = last_buff = _cpp_get_buff (pfile, total_len);
+	      memcpy (BUFF_FRONT (last_buff), base, total_len);
+	      raw_prefix = BUFF_FRONT (last_buff) + (raw_prefix - base);
+	      BUFF_FRONT (last_buff) += total_len;
+	    }
+	  else
+	    {
+	      size_t len = cur - base;
+	      size_t cur_len = len > BUFF_ROOM (last_buff)
+			       ? BUFF_ROOM (last_buff) : len;
+
+	      total_len += len;
+	      memcpy (BUFF_FRONT (last_buff), base, cur_len);
+	      BUFF_FRONT (last_buff) += cur_len;
+	      if (len > cur_len)
+		{
+		  last_buff = _cpp_append_extend_buff (pfile, last_buff,
+						       len - cur_len);
+		  memcpy (BUFF_FRONT (last_buff), base + cur_len,
+			  len - cur_len);
+		  BUFF_FRONT (last_buff) += len - cur_len;
+		}
+	    }
+
+	  if (pfile->buffer->cur < pfile->buffer->rlimit)
+	    CPP_INCREMENT_LINE (pfile, 0);
+	  pfile->buffer->need_line = true;
+
+	  if (!_cpp_get_fresh_line (pfile))
+	    {
+	      source_location src_loc = token->src_loc;
+	      token->type = CPP_EOF;
+	      /* Tell the compiler the line number of the EOF token.  */
+	      token->src_loc = pfile->line_table->highest_line;
+	      token->flags = BOL;
+	      if (first_buff != NULL)
+		_cpp_release_buff (pfile, first_buff);
+	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
+				   "unterminated raw string");
+	      return;
+	    }
+
+	  cur = base = pfile->buffer->cur;
+	}
+      else if (c == '\0' && !saw_NUL)
+	LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
+				     CPP_BUF_COLUMN (pfile->buffer, cur));
+    }
+
+  if (saw_NUL && !pfile->state.skipping)
+    cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
+	       "null character(s) preserved in literal");
+
+  pfile->buffer->cur = cur;
+  if (first_buff == NULL)
+    create_literal (pfile, token, base, cur - base, type);
+  else
+    {
+      uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
+
+      token->type = type;
+      token->val.str.len = total_len + (cur - base);
+      token->val.str.text = dest;
+      last_buff = first_buff;
+      while (last_buff != NULL)
+	{
+	  memcpy (dest, last_buff->base,
+		  BUFF_FRONT (last_buff) - last_buff->base);
+	  dest += BUFF_FRONT (last_buff) - last_buff->base;
+	  last_buff = last_buff->next;
+	}
+      _cpp_release_buff (pfile, first_buff);
+      memcpy (dest, base, cur - base);
+      dest[cur - base] = '\0';
+    }
+}
+
 /* Lexes a string, character constant, or angle-bracketed header file
    name.  The stored string contains the spelling, including opening
-   quote and leading any leading 'L', 'u' or 'U'.  It returns the type
-   of the literal, or CPP_OTHER if it was not properly terminated, or
-   CPP_LESS for an unterminated header name which must be relexed as
-   normal tokens.
+   quote and any leading 'L', 'u', 'U' or 'u8' and optional
+   'R' modifier.  It returns the type of the literal, or CPP_OTHER
+   if it was not properly terminated, or CPP_LESS for an unterminated
+   header name which must be relexed as normal tokens.
 
    The spelling is NUL-terminated, but it is not guaranteed that this
    is the first NUL since embedded NULs are preserved.  */
@@ -636,12 +816,24 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 
   cur = base;
   terminator = *cur++;
-  if (terminator == 'L' || terminator == 'u' || terminator == 'U')
+  if (terminator == 'L' || terminator == 'U')
     terminator = *cur++;
-  if (terminator == '\"')
+  else if (terminator == 'u')
+    {
+      terminator = *cur++;
+      if (terminator == '8')
+	terminator = *cur++;
+    }
+  if (terminator == 'R')
+    {
+      lex_raw_string (pfile, token, base, cur);
+      return;
+    }
+  if (terminator == '"')
     type = (*base == 'L' ? CPP_WSTRING :
 	    *base == 'U' ? CPP_STRING32 :
-	    *base == 'u' ? CPP_STRING16 : CPP_STRING);
+	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
+			 : CPP_STRING);
   else if (terminator == '\'')
     type = (*base == 'L' ? CPP_WCHAR :
 	    *base == 'U' ? CPP_CHAR32 :
@@ -1101,10 +1293,21 @@ _cpp_lex_direct (cpp_reader *pfile)
     case 'L':
     case 'u':
     case 'U':
-      /* 'L', 'u' or 'U' may introduce wide characters or strings.  */
+    case 'R':
+      /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
+	 wide strings or raw strings.  */
       if (c == 'L' || CPP_OPTION (pfile, uliterals))
 	{
-	  if (*buffer->cur == '\'' || *buffer->cur == '"')
+	  if ((*buffer->cur == '\'' && c != 'R')
+	      || *buffer->cur == '"'
+	      || (*buffer->cur == 'R'
+		  && c != 'R'
+		  && buffer->cur[1] == '"'
+		  && CPP_OPTION (pfile, uliterals))
+	      || (*buffer->cur == '8'
+		  && c == 'u'
+		  && (buffer->cur[1] == '"'
+		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
 	    {
 	      lex_string (pfile, result, buffer->cur - 1);
 	      break;
@@ -1120,7 +1323,7 @@ _cpp_lex_direct (cpp_reader *pfile)
     case 'y': case 'z':
     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     case 'G': case 'H': case 'I': case 'J': case 'K':
-    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+    case 'M': case 'N': case 'O': case 'P': case 'Q':
     case 'S': case 'T':           case 'V': case 'W': case 'X':
     case 'Y': case 'Z':
       result->type = CPP_NAME;
diff --git a/libcpp/macro.c b/libcpp/macro.c
index f31805955c6..1d284cf9f8a 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -379,7 +379,8 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg)
       escape_it = (token->type == CPP_STRING || token->type == CPP_CHAR
 		   || token->type == CPP_WSTRING || token->type == CPP_WCHAR
 		   || token->type == CPP_STRING32 || token->type == CPP_CHAR32
-		   || token->type == CPP_STRING16 || token->type == CPP_CHAR16);
+		   || token->type == CPP_STRING16 || token->type == CPP_CHAR16
+		   || token->type == CPP_UTF8STRING);
 
       /* Room for each char being written in octal, initial space and
 	 final quote and NUL.  */
-- 
2.30.2