acinclude.m4: Fix typo in comment.

author Jonathan Wakely <jwakely@redhat.com>

Fri, 16 Jan 2015 15:02:35 +0000 (15:02 +0000)

committer Jonathan Wakely <redi@gcc.gnu.org>

Fri, 16 Jan 2015 15:02:35 +0000 (15:02 +0000)
author Jonathan Wakely <jwakely@redhat.com>
Fri, 16 Jan 2015 15:02:35 +0000 (15:02 +0000)
committer Jonathan Wakely <redi@gcc.gnu.org>
Fri, 16 Jan 2015 15:02:35 +0000 (15:02 +0000)
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog

index 98b958961551755ab594bb3da223027d6f47bc42..7e68c450e6ba7c3509491390847f3f598b1ca537 100644 (file)
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,23 @@
+2015-01-16  Jonathan Wakely  <jwakely@redhat.com>
+
+       * acinclude.m4: Fix typo in comment.
+       * configure: Regenerate.
+       * include/bits/codecvt.h (codecvt<char16_t, char, mbstate_t>,
+       codecvt<char16_t, char, mbstate_t>): Declare specializations.
+       * include/bits/locale_facets.h: Reserve space for new specializations.
+       * src/c++11/Makefile.am: Add codecvt.cc.
+       * src/c++11/Makefile.in: Regenerate.
+       * src/c++11/codecvt.cc: New.
+       * src/c++98/Makefile.am: Compile locale_init.cc and localename.cc
+       with -std=gnu++11.
+       * src/c++98/Makefile.in: Regenerate.
+       * src/c++98/locale_init.cc: Initialize new codecvt specializations.
+       * src/c++98/localename.cc: Likewise.
+       * config/abi/pre/gnu.ver: Exports for new codecvt specializations.
+       * testsuite/22_locale/codecvt/utf8.cc: New.
+       * testsuite/22_locale/locale/cons/unicode.cc: Check that new
+       specializations are installed in locale objects.
+
  2015-01-16  Torvald Riegel  <triegel@redhat.com>
  
         * include/std/shared_mutex (shared_timed_mutex): Add POSIX-based
diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4

index db357d6d73188034d533a6b9fe21134c80633ba9..74e8eaffe0a5efcb3e8898b5f6200d35ae95c928 100644 (file)
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -1777,7 +1777,7 @@ AC_DEFUN([GLIBCXX_CHECK_C99_TR1], [
               <tr1/cinttypes> in namespace std::tr1.])
    fi
  
-  # Check for the existence of whcar_t <inttypes.h> functions (NB: doesn't
+  # Check for the existence of wchar_t <inttypes.h> functions (NB: doesn't
    # make sense if the glibcxx_cv_c99_stdint_tr1 check fails, per C99, 7.8/1).
    ac_c99_inttypes_wchar_t_tr1=no;
    if test x"$glibcxx_cv_c99_stdint_tr1" = x"yes"; then
diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver

index 700da189b781819550880ae68139771b1ac24777..83f4e8c9c6fb370ab664ea6bd0a8a78e206bd8b3 100644 (file)
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -1759,6 +1759,11 @@ GLIBCXX_3.4.21 {
      _ZNKSt8time_getI[cw]St19istreambuf_iteratorI[cw]St11char_traitsI[cw]EEE3getES3_S3_RSt8ios_baseRSt12_Ios_IostateP2tmPK[cw]SC_;
      _ZNKSt8time_getI[cw]St19istreambuf_iteratorI[cw]St11char_traitsI[cw]EEE6do_getES3_S3_RSt8ios_baseRSt12_Ios_IostateP2tmcc;
  
+    # codecvt<char16_t, char, mbstate_t>, codecvt<char32_t, char, mbstate_t>
+    _ZNKSt7codecvtID[is]c11__mbstate_t*;
+    _ZNSt7codecvtID[is]c11__mbstate_t*;
+    _ZT[ISV]St7codecvtID[is]c11__mbstate_tE;
+
  } GLIBCXX_3.4.20;
  
  
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure

index be9337fcec0f8879eeea543630ddcac85a02222f..7bd1ede852effd065e722b9fc7d9c6c24b038b34 100755 (executable)
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -19216,7 +19216,7 @@ $as_echo "#define _GLIBCXX_USE_C99_INTTYPES_TR1 1" >>confdefs.h
  
    fi
  
-  # Check for the existence of whcar_t <inttypes.h> functions (NB: doesn't
+  # Check for the existence of wchar_t <inttypes.h> functions (NB: doesn't
    # make sense if the glibcxx_cv_c99_stdint_tr1 check fails, per C99, 7.8/1).
    ac_c99_inttypes_wchar_t_tr1=no;
    if test x"$glibcxx_cv_c99_stdint_tr1" = x"yes"; then
diff --git a/libstdc++-v3/include/bits/codecvt.h b/libstdc++-v3/include/bits/codecvt.h

index 1eee1ccbc63d9b2ee213835777516435bcc85a68..a6e59b5128f9595c8f2228387988a1b496206df1 100644 (file)
--- a/libstdc++-v3/include/bits/codecvt.h
+++ b/libstdc++-v3/include/bits/codecvt.h
@@ -393,7 +393,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    };
  
  #ifdef _GLIBCXX_USE_WCHAR_T
-  /// class codecvt<wchar_t, char, mbstate_t> specialization.
+  /** @brief  Class codecvt<wchar_t, char, mbstate_t> specialization.
+   *
+   *  Converts between narrow and wide characters in the native character set
+   */
    template<>
      class codecvt<wchar_t, char, mbstate_t>
      : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
@@ -455,6 +458,125 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      };
  #endif //_GLIBCXX_USE_WCHAR_T
  
+#if __cplusplus >= 201103L
+#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+  /** @brief  Class codecvt<char16_t, char, mbstate_t> specialization.
+   *
+   *  Converts between UTF-16 and UTF-8.
+   */
+  template<>
+    class codecvt<char16_t, char, mbstate_t>
+    : public __codecvt_abstract_base<char16_t, char, mbstate_t>
+    {
+    public:
+      // Types:
+      typedef char16_t                 intern_type;
+      typedef char                     extern_type;
+      typedef mbstate_t                        state_type;
+
+    public:
+      static locale::id                        id;
+
+      explicit
+      codecvt(size_t __refs = 0)
+      : __codecvt_abstract_base<char16_t, char, mbstate_t>(__refs) { }
+
+    protected:
+      virtual
+      ~codecvt();
+
+      virtual result
+      do_out(state_type& __state, const intern_type* __from,
+            const intern_type* __from_end, const intern_type*& __from_next,
+            extern_type* __to, extern_type* __to_end,
+            extern_type*& __to_next) const;
+
+      virtual result
+      do_unshift(state_type& __state,
+                extern_type* __to, extern_type* __to_end,
+                extern_type*& __to_next) const;
+
+      virtual result
+      do_in(state_type& __state,
+            const extern_type* __from, const extern_type* __from_end,
+            const extern_type*& __from_next,
+            intern_type* __to, intern_type* __to_end,
+            intern_type*& __to_next) const;
+
+      virtual
+      int do_encoding() const throw();
+
+      virtual
+      bool do_always_noconv() const throw();
+
+      virtual
+      int do_length(state_type&, const extern_type* __from,
+                   const extern_type* __end, size_t __max) const;
+
+      virtual int
+      do_max_length() const throw();
+    };
+
+  /** @brief  Class codecvt<char32_t, char, mbstate_t> specialization.
+   *
+   *  Converts between UTF-32 and UTF-8.
+   */
+  template<>
+    class codecvt<char32_t, char, mbstate_t>
+    : public __codecvt_abstract_base<char32_t, char, mbstate_t>
+    {
+    public:
+      // Types:
+      typedef char32_t                 intern_type;
+      typedef char                     extern_type;
+      typedef mbstate_t                        state_type;
+
+    public:
+      static locale::id                        id;
+
+      explicit
+      codecvt(size_t __refs = 0)
+      : __codecvt_abstract_base<char32_t, char, mbstate_t>(__refs) { }
+
+    protected:
+      virtual
+      ~codecvt();
+
+      virtual result
+      do_out(state_type& __state, const intern_type* __from,
+            const intern_type* __from_end, const intern_type*& __from_next,
+            extern_type* __to, extern_type* __to_end,
+            extern_type*& __to_next) const;
+
+      virtual result
+      do_unshift(state_type& __state,
+                extern_type* __to, extern_type* __to_end,
+                extern_type*& __to_next) const;
+
+      virtual result
+      do_in(state_type& __state,
+            const extern_type* __from, const extern_type* __from_end,
+            const extern_type*& __from_next,
+            intern_type* __to, intern_type* __to_end,
+            intern_type*& __to_next) const;
+
+      virtual
+      int do_encoding() const throw();
+
+      virtual
+      bool do_always_noconv() const throw();
+
+      virtual
+      int do_length(state_type&, const extern_type* __from,
+                   const extern_type* __end, size_t __max) const;
+
+      virtual int
+      do_max_length() const throw();
+    };
+
+#endif // _GLIBCXX_USE_C99_STDINT_TR1
+#endif // C++11
+
    /// class codecvt_byname [22.2.1.6].
    template<typename _InternT, typename _ExternT, typename _StateT>
      class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
diff --git a/libstdc++-v3/include/bits/locale_facets.h b/libstdc++-v3/include/bits/locale_facets.h

index a5fc45ee597277c9cfc5c5dc222280694aea19c8..77932a5060b71018897aeabd43f86c9e0fcac0ed 100644 (file)
--- a/libstdc++-v3/include/bits/locale_facets.h
+++ b/libstdc++-v3/include/bits/locale_facets.h
@@ -58,6 +58,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  #else
  # define  _GLIBCXX_NUM_FACETS 14
  # define  _GLIBCXX_NUM_CXX11_FACETS 8
+#endif
+#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+# define _GLIBCXX_NUM_UNICODE_FACETS 2
+#else
+# define _GLIBCXX_NUM_UNICODE_FACETS 0
  #endif
  
    // Convert string to numeric value of type _Tp and store results.
diff --git a/libstdc++-v3/src/c++11/Makefile.am b/libstdc++-v3/src/c++11/Makefile.am

index 4cba983ccd2339f1d1c3499ed43ec44de6a0aeb4..b57e552647eff7b3ccd97a8e2d1dfff482b5f6a2 100644 (file)
--- a/libstdc++-v3/src/c++11/Makefile.am
+++ b/libstdc++-v3/src/c++11/Makefile.am
@@ -53,6 +53,7 @@ endif
  
  sources = \
         chrono.cc \
+       codecvt.cc \
         condition_variable.cc \
         cow-stdexcept.cc \
         ctype.cc \
diff --git a/libstdc++-v3/src/c++11/Makefile.in b/libstdc++-v3/src/c++11/Makefile.in

index 619bf37edbf781e1210758d5b61df2b065520b1a..00a5c25293b674ef85ecfc0d4f215bea73675481 100644 (file)
--- a/libstdc++-v3/src/c++11/Makefile.in
+++ b/libstdc++-v3/src/c++11/Makefile.in
@@ -72,12 +72,12 @@ libc__11convenience_la_LIBADD =
  @ENABLE_DUAL_ABI_TRUE@ cxx11-ios_failure.lo \
  @ENABLE_DUAL_ABI_TRUE@ cxx11-shim_facets.lo cxx11-stdexcept.lo
  am__objects_2 = ctype_configure_char.lo ctype_members.lo
-am__objects_3 = chrono.lo condition_variable.lo cow-stdexcept.lo \
-       ctype.lo debug.lo functexcept.lo functional.lo future.lo \
-       hash_c++0x.lo hashtable_c++0x.lo ios.lo limits.lo mutex.lo \
-       placeholders.lo random.lo regex.lo shared_ptr.lo \
-       snprintf_lite.lo system_error.lo thread.lo $(am__objects_1) \
-       $(am__objects_2)
+am__objects_3 = chrono.lo codecvt.lo condition_variable.lo \
+       cow-stdexcept.lo ctype.lo debug.lo functexcept.lo \
+       functional.lo future.lo hash_c++0x.lo hashtable_c++0x.lo \
+       ios.lo limits.lo mutex.lo placeholders.lo random.lo regex.lo \
+       shared_ptr.lo snprintf_lite.lo system_error.lo thread.lo \
+       $(am__objects_1) $(am__objects_2)
  @ENABLE_DUAL_ABI_TRUE@am__objects_4 = cow-fstream-inst.lo \
  @ENABLE_DUAL_ABI_TRUE@ cow-sstream-inst.lo cow-string-inst.lo \
  @ENABLE_DUAL_ABI_TRUE@ cow-wstring-inst.lo cxx11-locale-inst.lo \
@@ -344,6 +344,7 @@ host_sources = \
  
  sources = \
         chrono.cc \
+       codecvt.cc \
         condition_variable.cc \
         cow-stdexcept.cc \
         ctype.cc \
diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc

new file mode 100644 (file)

index 0000000..fdd4972
--- /dev/null
+++ b/libstdc++-v3/src/c++11/codecvt.cc
@@ -0,0 +1,461 @@
+// Locale support (codecvt) -*- C++ -*-
+
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// <http://www.gnu.org/licenses/>.
+
+#include <bits/locale_classes.h>
+#include <bits/codecvt.h>
+#include <bits/stl_algobase.h> // std::max
+#include <cstring>             // std::memcpy, std::memcmp
+
+#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+namespace
+{
+  // Largest code point that fits in a single UTF-16 code unit.
+  const char32_t max_single_utf16_unit = 0xFFFF;
+  const char32_t max_code_point = 0x10FFFF;
+
+  template<typename Elem>
+    struct range
+    {
+      Elem* next;
+      Elem* end;
+
+      Elem operator*() const { return *next; }
+
+      range& operator++() { ++next; return *this; }
+
+      size_t size() const { return end - next; }
+    };
+
+  char32_t
+  read_utf8_code_point(range<const char>& from, unsigned long maxcode)
+  {
+    size_t avail = from.size();
+    if (avail == 0)
+      return -1;
+    unsigned char c1 = from.next[0];
+    // https://en.wikipedia.org/wiki/UTF-8#Sample_code
+    if (c1 < 0x80)
+    {
+      ++from.next;
+      return c1;
+    }
+    else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
+      return -1;
+    else if (c1 < 0xE0) // 2-byte sequence
+    {
+      if (avail < 2)
+       return -1;
+      unsigned char c2 = from.next[1];
+      if ((c2 & 0xC0) != 0x80)
+       return -1;
+      char32_t c = (c1 << 6) + c2 - 0x3080;
+      if (c > maxcode)
+       return -1;
+      from.next += 2;
+      return c;
+    }
+    else if (c1 < 0xF0) // 3-byte sequence
+    {
+      if (avail < 3)
+       return -1;
+      unsigned char c2 = from.next[1];
+      if ((c2 & 0xC0) != 0x80)
+       return -1;
+      if (c1 == 0xE0 && c2 < 0xA0) // overlong
+       return -1;
+      unsigned char c3 = from.next[2];
+      if ((c3 & 0xC0) != 0x80)
+       return -1;
+      char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
+      if (c > maxcode)
+       return -1;
+      from.next += 3;
+      return c;
+    }
+    else if (c1 < 0xF5) // 4-byte sequence
+    {
+      if (avail < 4)
+       return -1;
+      unsigned char c2 = from.next[1];
+      if ((c2 & 0xC0) != 0x80)
+       return -1;
+      if (c1 == 0xF0 && c2 < 0x90) // overlong
+       return -1;
+      if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
+      return -1;
+      unsigned char c3 = from.next[2];
+      if ((c3 & 0xC0) != 0x80)
+       return -1;
+      unsigned char c4 = from.next[3];
+      if ((c4 & 0xC0) != 0x80)
+       return -1;
+      char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
+      if (c > maxcode)
+       return -1;
+      from.next += 4;
+      return c;
+    }
+    else // > U+10FFFF
+      return -1;
+  }
+
+  bool
+  write_utf8_code_point(range<char>& to, char32_t code_point)
+  {
+    if (code_point < 0x80)
+      {
+       if (to.size() < 1)
+         return false;
+       *to.next++ = code_point;
+      }
+    else if (code_point <= 0x7FF)
+      {
+       if (to.size() < 2)
+         return false;
+       *to.next++ = (code_point >> 6) + 0xC0;
+       *to.next++ = (code_point & 0x3F) + 0x80;
+      }
+    else if (code_point <= 0xFFFF)
+      {
+       if (to.size() < 3)
+         return false;
+       *to.next++ = (code_point >> 12) + 0xE0;
+       *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
+       *to.next++ = (code_point & 0x3F) + 0x80;
+      }
+    else if (code_point <= 0x10FFFF)
+      {
+       if (to.size() < 4)
+         return false;
+       *to.next++ = (code_point >> 18) + 0xF0;
+       *to.next++ = ((code_point >> 12) & 0x3F) + 0x80;
+       *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
+       *to.next++ = (code_point & 0x3F) + 0x80;
+      }
+    else
+      return false;
+    return true;
+  }
+
+  bool
+  write_utf16_code_point(range<char16_t>& to, char32_t codepoint)
+  {
+    if (codepoint < max_single_utf16_unit)
+      {
+       if (to.size() > 0)
+         {
+           *to.next = codepoint;
+           ++to.next;
+           return true;
+         }
+      }
+    else if (to.size() > 1)
+      {
+       // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
+       const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
+       const char32_t SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
+       char16_t lead = LEAD_OFFSET + (codepoint >> 10);
+       char16_t trail = 0xDC00 + (codepoint & 0x3FF);
+       char32_t utf16bytes = (lead << 10) + trail + SURROGATE_OFFSET;
+
+       to.next[0] = utf16bytes >> 16;
+       to.next[1] = utf16bytes & 0xFFFF;
+       to.next += 2;
+       return true;
+      }
+    return false;
+  }
+
+  // utf8 -> ucs4
+  codecvt_base::result
+  ucs4_in(range<const char>& from, range<char32_t>& to,
+          unsigned long maxcode = max_code_point)
+  {
+    while (from.size() && to.size())
+      {
+       const char32_t codepoint = read_utf8_code_point(from, maxcode);
+       if (codepoint == char32_t(-1) || codepoint > maxcode)
+         return codecvt_base::error;
+       *to.next++ = codepoint;
+      }
+    return from.size() ? codecvt_base::partial : codecvt_base::ok;
+  }
+
+  // ucs4 -> utf8
+  codecvt_base::result
+  ucs4_out(range<const char32_t>& from, range<char>& to,
+           unsigned long maxcode = max_code_point)
+  {
+    while (from.size())
+      {
+       const char32_t c = from.next[0];
+       if (c > maxcode)
+         return codecvt_base::error;
+       if (!write_utf8_code_point(to, c))
+         return codecvt_base::partial;
+       ++from.next;
+      }
+    return codecvt_base::ok;
+  }
+
+  // utf8 -> utf16
+  codecvt_base::result
+  utf16_in(range<const char>& from, range<char16_t>& to,
+           unsigned long maxcode = max_code_point)
+  {
+    while (from.size() && to.size())
+      {
+       const char* first = from.next;
+       if ((unsigned char)*first >= 0xF0 && to.size() < 2)
+         return codecvt_base::partial;
+       const char32_t codepoint = read_utf8_code_point(from, maxcode);
+       if (codepoint == char32_t(-1) || codepoint > maxcode)
+         return codecvt_base::error;
+       if (!write_utf16_code_point(to, codepoint))
+         {
+           from.next = first;
+           return codecvt_base::partial;
+         }
+      }
+    return codecvt_base::ok;
+  }
+
+  // utf16 -> utf8
+  codecvt_base::result
+  utf16_out(range<const char16_t>& from, range<char>& to,
+            unsigned long maxcode = max_code_point)
+  {
+    while (from.size())
+      {
+       char32_t c = from.next[0];
+       int inc = 1;
+       if (c >= 0xD800 && c < 0xDBFF) // start of surrogate pair
+         {
+           if (from.size() < 2)
+             return codecvt_base::ok; // stop converting at this point
+
+           const char32_t c2 = from.next[1];
+           if (c2 >= 0xDC00 && c2 <= 0xDFFF)
+             {
+               inc = 2;
+               c = (c << 10) + c2 - 0x35FDC00;
+             }
+           else
+             return codecvt_base::error;
+         }
+       if (c > maxcode)
+         return codecvt_base::error;
+       if (!write_utf8_code_point(to, c))
+         return codecvt_base::partial;
+       from.next += inc;
+      }
+    return codecvt_base::ok;
+  }
+
+  // return pos such that [begin,pos) is valid UTF-16 string no longer than max
+  int
+  utf16_len(const char* begin, const char* end, size_t max,
+            char32_t maxcode = max_code_point)
+  {
+    range<const char> from{ begin, end };
+    size_t count = 0;
+    while (count+1 < max)
+      {
+       char32_t c = read_utf8_code_point(from, maxcode);
+       if (c == char32_t(-1))
+         break;
+       else if (c > max_single_utf16_unit)
+         ++count;
+       ++count;
+      }
+    if (count+1 == max) // take one more character if it fits in a single unit
+      read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
+    return from.next - begin;
+  }
+
+  // return pos such that [begin,pos) is valid UCS-4 string no longer than max
+  int
+  ucs4_len(const char* begin, const char* end, size_t max,
+            char32_t maxcode = max_code_point)
+  {
+    range<const char> from{ begin, end };
+    size_t count = 0;
+    while (count < max)
+      {
+       char32_t c = read_utf8_code_point(from, maxcode);
+       if (c == char32_t(-1))
+         break;
+       ++count;
+      }
+    return from.next - begin;
+  }
+}
+
+// Define members of codecvt<char16_t, char, mbstate_t> specialization.
+// Converts from UTF-8 to UTF-16.
+
+locale::id codecvt<char16_t, char, mbstate_t>::id;
+
+codecvt<char16_t, char, mbstate_t>::~codecvt() { }
+
+codecvt_base::result
+codecvt<char16_t, char, mbstate_t>::
+do_out(state_type&,
+       const intern_type* __from,
+       const intern_type* __from_end, const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<const char16_t> from{ __from, __from_end };
+  range<char> to{ __to, __to_end };
+  auto res = utf16_out(from, to);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+codecvt_base::result
+codecvt<char16_t, char, mbstate_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+          extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv; // we don't use mbstate_t for the unicode facets
+}
+
+codecvt_base::result
+codecvt<char16_t, char, mbstate_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char> from{ __from, __from_end };
+  range<char16_t> to{ __to, __to_end };
+  auto res = utf16_in(from, to);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+int
+codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+codecvt<char16_t, char, mbstate_t>::
+do_length(state_type&, const extern_type* __from,
+         const extern_type* __end, size_t __max) const
+{
+  return utf16_len(__from, __end, __max);
+}
+
+int
+codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
+{
+  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
+  // whereas 4 byte sequences require two 16-bit code units.
+  return 3;
+}
+
+// Define members of codecvt<char32_t, char, mbstate_t> specialization.
+// Converts from UTF-8 to UTF-32 (aka UCS-4).
+
+locale::id codecvt<char32_t, char, mbstate_t>::id;
+
+codecvt<char32_t, char, mbstate_t>::~codecvt() { }
+
+codecvt_base::result
+codecvt<char32_t, char, mbstate_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+       const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<const char32_t> from{ __from, __from_end };
+  range<char> to{ __to, __to_end };
+  auto res = ucs4_out(from, to);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+codecvt_base::result
+codecvt<char32_t, char, mbstate_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+          extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv;
+}
+
+codecvt_base::result
+codecvt<char32_t, char, mbstate_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char> from{ __from, __from_end };
+  range<char32_t> to{ __to, __to_end };
+  auto res = ucs4_in(from, to);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+int
+codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+codecvt<char32_t, char, mbstate_t>::
+do_length(state_type&, const extern_type* __from,
+         const extern_type* __end, size_t __max) const
+{
+  return ucs4_len(__from, __end, __max);
+}
+
+int
+codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
+{ return 4; }
+
+inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
+inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
+
+_GLIBCXX_END_NAMESPACE_VERSION
+}
+#endif // _GLIBCXX_USE_C99_STDINT_TR1
diff --git a/libstdc++-v3/src/c++98/Makefile.am b/libstdc++-v3/src/c++98/Makefile.am

index 6dd7a720d5b0e0f9b8155b41888d6d4d6bf24456..e348dfb120587f2a12781ca4a88cf1e709ba1124 100644 (file)
--- a/libstdc++-v3/src/c++98/Makefile.am
+++ b/libstdc++-v3/src/c++98/Makefile.am
@@ -176,6 +176,16 @@ numeric_members_cow.o: numeric_members_cow.cc
         $(CXXCOMPILE) $(GLIBCXX_ABI_FLAGS) -fimplicit-templates -c $<
  endif
  
+# XXX TODO move locale_init.cc and localename.cc to src/c++11
+locale_init.lo: locale_init.cc
+       $(LTCXXCOMPILE) -std=gnu++11 -c $<
+locale_init.o: locale_init.cc
+       $(LTCXXCOMPILE) -std=gnu++11 -c $<
+localename.lo: localename.cc
+       $(LTCXXCOMPILE) -std=gnu++11 -c $<
+localename.o: localename.cc
+       $(LTCXXCOMPILE) -std=gnu++11 -c $<
+
  # Use special rules for the deprecated source files so that they find
  # deprecated include files.
  GLIBCXX_INCLUDE_DIR=$(glibcxx_builddir)/include
diff --git a/libstdc++-v3/src/c++98/Makefile.in b/libstdc++-v3/src/c++98/Makefile.in

index 9a2a27f0da352bf263f7cdb5ad2bbbd8059a3f6a..3c69791bb66f956588f05dc1c2d422282160560b 100644 (file)
--- a/libstdc++-v3/src/c++98/Makefile.in
+++ b/libstdc++-v3/src/c++98/Makefile.in
@@ -764,6 +764,16 @@ vpath % $(top_srcdir)/src/c++98
  @ENABLE_DUAL_ABI_TRUE@ $(LTCXXCOMPILE) $(GLIBCXX_ABI_FLAGS) -fimplicit-templates -c $<
  @ENABLE_DUAL_ABI_TRUE@numeric_members_cow.o: numeric_members_cow.cc
  @ENABLE_DUAL_ABI_TRUE@ $(CXXCOMPILE) $(GLIBCXX_ABI_FLAGS) -fimplicit-templates -c $<
+
+# XXX TODO move locale_init.cc and localename.cc to src/c++11
+locale_init.lo: locale_init.cc
+       $(LTCXXCOMPILE) -std=gnu++11 -c $<
+locale_init.o: locale_init.cc
+       $(LTCXXCOMPILE) -std=gnu++11 -c $<
+localename.lo: localename.cc
+       $(LTCXXCOMPILE) -std=gnu++11 -c $<
+localename.o: localename.cc
+       $(LTCXXCOMPILE) -std=gnu++11 -c $<
  strstream.lo: strstream.cc
         $(LTCXXCOMPILE) -I$(GLIBCXX_INCLUDE_DIR)/backward -Wno-deprecated -c $<
  strstream.o: strstream.cc
diff --git a/libstdc++-v3/src/c++98/locale_init.cc b/libstdc++-v3/src/c++98/locale_init.cc

index c45eff3d6c9bbe2e13fa5ec8ea40838b8acf7c99..0a95b9ff4b54edbd91e1e84db835134880c4e983 100644 (file)
--- a/libstdc++-v3/src/c++98/locale_init.cc
+++ b/libstdc++-v3/src/c++98/locale_init.cc
@@ -57,7 +57,7 @@ _GLIBCXX_LOC_ID(_ZNSt8messagesIwE2idE);
  
  namespace 
  {
-  const int num_facets = _GLIBCXX_NUM_FACETS
+  const int num_facets = _GLIBCXX_NUM_FACETS + _GLIBCXX_NUM_UNICODE_FACETS
      + (_GLIBCXX_USE_DUAL_ABI ? _GLIBCXX_NUM_CXX11_FACETS : 0);
  
    __gnu_cxx::__mutex&
@@ -201,6 +201,16 @@ namespace
    fake_messages_w messages_w;
  #endif
  
+#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+  typedef char fake_codecvt_c16[sizeof(codecvt<char16_t, char, mbstate_t>)]
+  __attribute__ ((aligned(__alignof__(codecvt<char16_t, char, mbstate_t>))));
+  fake_codecvt_c16 codecvt_c16;
+
+  typedef char fake_codecvt_c32[sizeof(codecvt<char32_t, char, mbstate_t>)]
+  __attribute__ ((aligned(__alignof__(codecvt<char32_t, char, mbstate_t>))));
+  fake_codecvt_c32 codecvt_c32;
+#endif
+
    // Storage for "C" locale caches.
    typedef char fake_num_cache_c[sizeof(std::__numpunct_cache<char>)]
    __attribute__ ((aligned(__alignof__(std::__numpunct_cache<char>))));
@@ -318,6 +328,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  #ifdef _GLIBCXX_USE_WCHAR_T
      &std::ctype<wchar_t>::id,
      &codecvt<wchar_t, char, mbstate_t>::id,
+#endif
+#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+    &codecvt<char16_t, char, mbstate_t>::id,
+    &codecvt<char32_t, char, mbstate_t>::id,
  #endif
      0
    };
@@ -522,6 +536,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      _M_init_facet(new (&messages_w) std::messages<wchar_t>(1));
  #endif
  
+#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+    _M_init_facet(new (&codecvt_c16) codecvt<char16_t, char, mbstate_t>(1));
+    _M_init_facet(new (&codecvt_c32) codecvt<char32_t, char, mbstate_t>(1));
+#endif
+
  #if _GLIBCXX_USE_DUAL_ABI
      facet* extra[] = { __npc, __mpcf, __mpct
  # ifdef  _GLIBCXX_USE_WCHAR_T
diff --git a/libstdc++-v3/src/c++98/localename.cc b/libstdc++-v3/src/c++98/localename.cc

index c42a2173563d1864a10bdfb7557afde5e98921a6..2884beebfe152ebead770b57be7c764b6299dacf 100644 (file)
--- a/libstdc++-v3/src/c++98/localename.cc
+++ b/libstdc++-v3/src/c++98/localename.cc
@@ -171,7 +171,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        }
    }
  
-const int num_facets = _GLIBCXX_NUM_FACETS
+const int num_facets = _GLIBCXX_NUM_FACETS + _GLIBCXX_NUM_UNICODE_FACETS
    + (_GLIBCXX_USE_DUAL_ABI ? _GLIBCXX_NUM_CXX11_FACETS : 0);
  
    // Construct named _Impl.
@@ -267,7 +267,12 @@ const int num_facets = _GLIBCXX_NUM_FACETS
         _M_init_facet(new time_get<wchar_t>);
         _M_init_facet(new time_put<wchar_t>);
         _M_init_facet(new std::messages<wchar_t>(__cloc, __s));
-#endif   
+#endif
+
+#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+        _M_init_facet(new codecvt<char16_t, char, mbstate_t>);
+        _M_init_facet(new codecvt<char32_t, char, mbstate_t>);
+#endif
  
  #if _GLIBCXX_USE_DUAL_ABI
          _M_init_extra(&__cloc, &__clocm, __s, __smon);
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc b/libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc

new file mode 100644 (file)

index 0000000..987233a
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc
@@ -0,0 +1,76 @@
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-require-cstdint "" }
+// { dg-options "-std=gnu++11" }
+
+#include <locale>
+#include <iterator>
+#include <string>
+#include <testsuite_hooks.h>
+
+const char expected[] = u8"£¥€";
+const std::size_t expected_len = std::char_traits<char>::length(expected);
+
+template<typename C>
+void test(const C* from)
+{
+  auto len = std::char_traits<C>::length(from);
+  std::mbstate_t state{};
+  char buf[16] = { };
+  using test_type = std::codecvt<C, char, std::mbstate_t>;
+  const test_type& cvt = std::use_facet<test_type>(std::locale::classic());
+  auto from_end = from + len;
+  auto from_next = from;
+  auto buf_end = std::end(buf);
+  auto buf_next = buf;
+  auto res = cvt.out(state, from, from_end, from_next, buf, buf_end, buf_next);
+  VERIFY( res == std::codecvt_base::ok );
+  VERIFY( from_next == from_end );
+  VERIFY( (buf_next - buf) == expected_len );
+  VERIFY( 0 == std::char_traits<char>::compare(buf, expected, expected_len) );
+
+  C buf2[16];
+  auto exp_end = expected + expected_len;
+  auto exp_next = expected;
+  auto buf2_end = std::end(buf2);
+  auto buf2_next = buf2;
+  res = cvt.in(state, expected, exp_end, exp_next, buf2, buf2_end, buf2_next);
+  VERIFY( res == std::codecvt_base::ok );
+  VERIFY( exp_next == exp_end );
+  VERIFY( (buf2_next - buf2) == len );
+  VERIFY( 0 == std::char_traits<C>::compare(buf2, from, len) );
+}
+
+void
+test01()
+{
+  test(u"£¥€");
+}
+
+void
+test02()
+{
+  test(U"£¥€");
+}
+
+int
+main()
+{
+  test01();
+  test02();
+}
diff --git a/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc b/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc

index b6f8c8bda372dcfbc0df46e833ab8269269af1b8..33b5a8a7d4bac22884b7cc9c5d14f25e57ae753a 100644 (file)
--- a/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc
+++ b/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc
@@ -1,4 +1,5 @@
  // { dg-require-iconv "ISO-8859-1" }
+// { dg-options "-std=gnu++11" }
  
  // Copyright (C) 2006-2015 Free Software Foundation, Inc.
  //
@@ -32,6 +33,11 @@ typedef std::codecvt<char, char, std::mbstate_t>           c_codecvt;
  typedef std::codecvt<wchar_t, char, std::mbstate_t>          w_codecvt;
  #endif
  
+#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+typedef std::codecvt<char16_t, char, std::mbstate_t>         u16_codecvt;
+typedef std::codecvt<char32_t, char, std::mbstate_t>         u32_codecvt;
+#endif
+
  class gnu_facet: public std::locale::facet
  {
  public:
@@ -60,6 +66,10 @@ void test01()
        VERIFY( has_facet<c_codecvt>(loc13) );
  #ifdef _GLIBCXX_USE_WCHAR_T
        VERIFY( has_facet<w_codecvt>(loc13) );
+#endif
+#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+      VERIFY( has_facet<u16_codecvt>(loc13) );
+      VERIFY( has_facet<u32_codecvt>(loc13) );
  #endif
        VERIFY( has_facet<unicode_codecvt>(loc13) );
      }
author	Jonathan Wakely <jwakely@redhat.com>
	Fri, 16 Jan 2015 15:02:35 +0000 (15:02 +0000)
committer	Jonathan Wakely <redi@gcc.gnu.org>
	Fri, 16 Jan 2015 15:02:35 +0000 (15:02 +0000)
libstdc++-v3/ChangeLog		patch \| blob \| history
libstdc++-v3/acinclude.m4		patch \| blob \| history
libstdc++-v3/config/abi/pre/gnu.ver		patch \| blob \| history
libstdc++-v3/configure		patch \| blob \| history
libstdc++-v3/include/bits/codecvt.h		patch \| blob \| history
libstdc++-v3/include/bits/locale_facets.h		patch \| blob \| history
libstdc++-v3/src/c++11/Makefile.am		patch \| blob \| history
libstdc++-v3/src/c++11/Makefile.in		patch \| blob \| history
libstdc++-v3/src/c++11/codecvt.cc	[new file with mode: 0644]	patch \| blob
libstdc++-v3/src/c++98/Makefile.am		patch \| blob \| history
libstdc++-v3/src/c++98/Makefile.in		patch \| blob \| history
libstdc++-v3/src/c++98/locale_init.cc		patch \| blob \| history
libstdc++-v3/src/c++98/localename.cc		patch \| blob \| history
libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc	[new file with mode: 0644]	patch \| blob
libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc		patch \| blob \| history