From bb93f35da1612940266f5d159b6cc5a3e54fca14 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Fri, 16 Jan 2015 15:02:35 +0000 Subject: [PATCH] acinclude.m4: Fix typo in comment. * acinclude.m4: Fix typo in comment. * configure: Regenerate. * include/bits/codecvt.h (codecvt, codecvt): Declare specializations. * include/bits/locale_facets.h: Reserve space for new specializations. * src/c++11/Makefile.am: Add codecvt.cc. * src/c++11/Makefile.in: Regenerate. * src/c++11/codecvt.cc: New. * src/c++98/Makefile.am: Compile locale_init.cc and localename.cc with -std=gnu++11. * src/c++98/Makefile.in: Regenerate. * src/c++98/locale_init.cc: Initialize new codecvt specializations. * src/c++98/localename.cc: Likewise. * config/abi/pre/gnu.ver: Exports for new codecvt specializations. * testsuite/22_locale/codecvt/utf8.cc: New. * testsuite/22_locale/locale/cons/unicode.cc: Check that new specializations are installed in locale objects. From-SVN: r219747 --- libstdc++-v3/ChangeLog | 20 + libstdc++-v3/acinclude.m4 | 2 +- libstdc++-v3/config/abi/pre/gnu.ver | 5 + libstdc++-v3/configure | 2 +- libstdc++-v3/include/bits/codecvt.h | 124 ++++- libstdc++-v3/include/bits/locale_facets.h | 5 + libstdc++-v3/src/c++11/Makefile.am | 1 + libstdc++-v3/src/c++11/Makefile.in | 13 +- libstdc++-v3/src/c++11/codecvt.cc | 461 ++++++++++++++++++ libstdc++-v3/src/c++98/Makefile.am | 10 + libstdc++-v3/src/c++98/Makefile.in | 10 + libstdc++-v3/src/c++98/locale_init.cc | 21 +- libstdc++-v3/src/c++98/localename.cc | 9 +- .../testsuite/22_locale/codecvt/utf8.cc | 76 +++ .../22_locale/locale/cons/unicode.cc | 10 + 15 files changed, 757 insertions(+), 12 deletions(-) create mode 100644 libstdc++-v3/src/c++11/codecvt.cc create mode 100644 libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 98b95896155..7e68c450e6b 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,23 @@ +2015-01-16 Jonathan Wakely + + * acinclude.m4: Fix typo in comment. + * configure: Regenerate. + * include/bits/codecvt.h (codecvt, + codecvt): Declare specializations. + * include/bits/locale_facets.h: Reserve space for new specializations. + * src/c++11/Makefile.am: Add codecvt.cc. + * src/c++11/Makefile.in: Regenerate. + * src/c++11/codecvt.cc: New. + * src/c++98/Makefile.am: Compile locale_init.cc and localename.cc + with -std=gnu++11. + * src/c++98/Makefile.in: Regenerate. + * src/c++98/locale_init.cc: Initialize new codecvt specializations. + * src/c++98/localename.cc: Likewise. + * config/abi/pre/gnu.ver: Exports for new codecvt specializations. + * testsuite/22_locale/codecvt/utf8.cc: New. + * testsuite/22_locale/locale/cons/unicode.cc: Check that new + specializations are installed in locale objects. + 2015-01-16 Torvald Riegel * include/std/shared_mutex (shared_timed_mutex): Add POSIX-based diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4 index db357d6d731..74e8eaffe0a 100644 --- a/libstdc++-v3/acinclude.m4 +++ b/libstdc++-v3/acinclude.m4 @@ -1777,7 +1777,7 @@ AC_DEFUN([GLIBCXX_CHECK_C99_TR1], [ in namespace std::tr1.]) fi - # Check for the existence of whcar_t functions (NB: doesn't + # Check for the existence of wchar_t functions (NB: doesn't # make sense if the glibcxx_cv_c99_stdint_tr1 check fails, per C99, 7.8/1). ac_c99_inttypes_wchar_t_tr1=no; if test x"$glibcxx_cv_c99_stdint_tr1" = x"yes"; then diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver index 700da189b78..83f4e8c9c6f 100644 --- a/libstdc++-v3/config/abi/pre/gnu.ver +++ b/libstdc++-v3/config/abi/pre/gnu.ver @@ -1759,6 +1759,11 @@ GLIBCXX_3.4.21 { _ZNKSt8time_getI[cw]St19istreambuf_iteratorI[cw]St11char_traitsI[cw]EEE3getES3_S3_RSt8ios_baseRSt12_Ios_IostateP2tmPK[cw]SC_; _ZNKSt8time_getI[cw]St19istreambuf_iteratorI[cw]St11char_traitsI[cw]EEE6do_getES3_S3_RSt8ios_baseRSt12_Ios_IostateP2tmcc; + # codecvt, codecvt + _ZNKSt7codecvtID[is]c11__mbstate_t*; + _ZNSt7codecvtID[is]c11__mbstate_t*; + _ZT[ISV]St7codecvtID[is]c11__mbstate_tE; + } GLIBCXX_3.4.20; diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure index be9337fcec0..7bd1ede852e 100755 --- a/libstdc++-v3/configure +++ b/libstdc++-v3/configure @@ -19216,7 +19216,7 @@ $as_echo "#define _GLIBCXX_USE_C99_INTTYPES_TR1 1" >>confdefs.h fi - # Check for the existence of whcar_t functions (NB: doesn't + # Check for the existence of wchar_t functions (NB: doesn't # make sense if the glibcxx_cv_c99_stdint_tr1 check fails, per C99, 7.8/1). ac_c99_inttypes_wchar_t_tr1=no; if test x"$glibcxx_cv_c99_stdint_tr1" = x"yes"; then diff --git a/libstdc++-v3/include/bits/codecvt.h b/libstdc++-v3/include/bits/codecvt.h index 1eee1ccbc63..a6e59b5128f 100644 --- a/libstdc++-v3/include/bits/codecvt.h +++ b/libstdc++-v3/include/bits/codecvt.h @@ -393,7 +393,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION }; #ifdef _GLIBCXX_USE_WCHAR_T - /// class codecvt specialization. + /** @brief Class codecvt specialization. + * + * Converts between narrow and wide characters in the native character set + */ template<> class codecvt : public __codecvt_abstract_base @@ -455,6 +458,125 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION }; #endif //_GLIBCXX_USE_WCHAR_T +#if __cplusplus >= 201103L +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + /** @brief Class codecvt specialization. + * + * Converts between UTF-16 and UTF-8. + */ + template<> + class codecvt + : public __codecvt_abstract_base + { + public: + // Types: + typedef char16_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + public: + static locale::id id; + + explicit + codecvt(size_t __refs = 0) + : __codecvt_abstract_base(__refs) { } + + protected: + virtual + ~codecvt(); + + virtual result + do_out(state_type& __state, const intern_type* __from, + const intern_type* __from_end, const intern_type*& __from_next, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const; + + virtual result + do_unshift(state_type& __state, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const; + + virtual result + do_in(state_type& __state, + const extern_type* __from, const extern_type* __from_end, + const extern_type*& __from_next, + intern_type* __to, intern_type* __to_end, + intern_type*& __to_next) const; + + virtual + int do_encoding() const throw(); + + virtual + bool do_always_noconv() const throw(); + + virtual + int do_length(state_type&, const extern_type* __from, + const extern_type* __end, size_t __max) const; + + virtual int + do_max_length() const throw(); + }; + + /** @brief Class codecvt specialization. + * + * Converts between UTF-32 and UTF-8. + */ + template<> + class codecvt + : public __codecvt_abstract_base + { + public: + // Types: + typedef char32_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + public: + static locale::id id; + + explicit + codecvt(size_t __refs = 0) + : __codecvt_abstract_base(__refs) { } + + protected: + virtual + ~codecvt(); + + virtual result + do_out(state_type& __state, const intern_type* __from, + const intern_type* __from_end, const intern_type*& __from_next, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const; + + virtual result + do_unshift(state_type& __state, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const; + + virtual result + do_in(state_type& __state, + const extern_type* __from, const extern_type* __from_end, + const extern_type*& __from_next, + intern_type* __to, intern_type* __to_end, + intern_type*& __to_next) const; + + virtual + int do_encoding() const throw(); + + virtual + bool do_always_noconv() const throw(); + + virtual + int do_length(state_type&, const extern_type* __from, + const extern_type* __end, size_t __max) const; + + virtual int + do_max_length() const throw(); + }; + +#endif // _GLIBCXX_USE_C99_STDINT_TR1 +#endif // C++11 + /// class codecvt_byname [22.2.1.6]. template class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT> diff --git a/libstdc++-v3/include/bits/locale_facets.h b/libstdc++-v3/include/bits/locale_facets.h index a5fc45ee597..77932a5060b 100644 --- a/libstdc++-v3/include/bits/locale_facets.h +++ b/libstdc++-v3/include/bits/locale_facets.h @@ -58,6 +58,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #else # define _GLIBCXX_NUM_FACETS 14 # define _GLIBCXX_NUM_CXX11_FACETS 8 +#endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 +# define _GLIBCXX_NUM_UNICODE_FACETS 2 +#else +# define _GLIBCXX_NUM_UNICODE_FACETS 0 #endif // Convert string to numeric value of type _Tp and store results. diff --git a/libstdc++-v3/src/c++11/Makefile.am b/libstdc++-v3/src/c++11/Makefile.am index 4cba983ccd2..b57e552647e 100644 --- a/libstdc++-v3/src/c++11/Makefile.am +++ b/libstdc++-v3/src/c++11/Makefile.am @@ -53,6 +53,7 @@ endif sources = \ chrono.cc \ + codecvt.cc \ condition_variable.cc \ cow-stdexcept.cc \ ctype.cc \ diff --git a/libstdc++-v3/src/c++11/Makefile.in b/libstdc++-v3/src/c++11/Makefile.in index 619bf37edbf..00a5c25293b 100644 --- a/libstdc++-v3/src/c++11/Makefile.in +++ b/libstdc++-v3/src/c++11/Makefile.in @@ -72,12 +72,12 @@ libc__11convenience_la_LIBADD = @ENABLE_DUAL_ABI_TRUE@ cxx11-ios_failure.lo \ @ENABLE_DUAL_ABI_TRUE@ cxx11-shim_facets.lo cxx11-stdexcept.lo am__objects_2 = ctype_configure_char.lo ctype_members.lo -am__objects_3 = chrono.lo condition_variable.lo cow-stdexcept.lo \ - ctype.lo debug.lo functexcept.lo functional.lo future.lo \ - hash_c++0x.lo hashtable_c++0x.lo ios.lo limits.lo mutex.lo \ - placeholders.lo random.lo regex.lo shared_ptr.lo \ - snprintf_lite.lo system_error.lo thread.lo $(am__objects_1) \ - $(am__objects_2) +am__objects_3 = chrono.lo codecvt.lo condition_variable.lo \ + cow-stdexcept.lo ctype.lo debug.lo functexcept.lo \ + functional.lo future.lo hash_c++0x.lo hashtable_c++0x.lo \ + ios.lo limits.lo mutex.lo placeholders.lo random.lo regex.lo \ + shared_ptr.lo snprintf_lite.lo system_error.lo thread.lo \ + $(am__objects_1) $(am__objects_2) @ENABLE_DUAL_ABI_TRUE@am__objects_4 = cow-fstream-inst.lo \ @ENABLE_DUAL_ABI_TRUE@ cow-sstream-inst.lo cow-string-inst.lo \ @ENABLE_DUAL_ABI_TRUE@ cow-wstring-inst.lo cxx11-locale-inst.lo \ @@ -344,6 +344,7 @@ host_sources = \ sources = \ chrono.cc \ + codecvt.cc \ condition_variable.cc \ cow-stdexcept.cc \ ctype.cc \ diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc new file mode 100644 index 00000000000..fdd49720384 --- /dev/null +++ b/libstdc++-v3/src/c++11/codecvt.cc @@ -0,0 +1,461 @@ +// Locale support (codecvt) -*- C++ -*- + +// Copyright (C) 2015 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +#include +#include +#include // std::max +#include // std::memcpy, std::memcmp + +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 +namespace std _GLIBCXX_VISIBILITY(default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + +namespace +{ + // Largest code point that fits in a single UTF-16 code unit. + const char32_t max_single_utf16_unit = 0xFFFF; + const char32_t max_code_point = 0x10FFFF; + + template + struct range + { + Elem* next; + Elem* end; + + Elem operator*() const { return *next; } + + range& operator++() { ++next; return *this; } + + size_t size() const { return end - next; } + }; + + char32_t + read_utf8_code_point(range& from, unsigned long maxcode) + { + size_t avail = from.size(); + if (avail == 0) + return -1; + unsigned char c1 = from.next[0]; + // https://en.wikipedia.org/wiki/UTF-8#Sample_code + if (c1 < 0x80) + { + ++from.next; + return c1; + } + else if (c1 < 0xC2) // continuation or overlong 2-byte sequence + return -1; + else if (c1 < 0xE0) // 2-byte sequence + { + if (avail < 2) + return -1; + unsigned char c2 = from.next[1]; + if ((c2 & 0xC0) != 0x80) + return -1; + char32_t c = (c1 << 6) + c2 - 0x3080; + if (c > maxcode) + return -1; + from.next += 2; + return c; + } + else if (c1 < 0xF0) // 3-byte sequence + { + if (avail < 3) + return -1; + unsigned char c2 = from.next[1]; + if ((c2 & 0xC0) != 0x80) + return -1; + if (c1 == 0xE0 && c2 < 0xA0) // overlong + return -1; + unsigned char c3 = from.next[2]; + if ((c3 & 0xC0) != 0x80) + return -1; + char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080; + if (c > maxcode) + return -1; + from.next += 3; + return c; + } + else if (c1 < 0xF5) // 4-byte sequence + { + if (avail < 4) + return -1; + unsigned char c2 = from.next[1]; + if ((c2 & 0xC0) != 0x80) + return -1; + if (c1 == 0xF0 && c2 < 0x90) // overlong + return -1; + if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF + return -1; + unsigned char c3 = from.next[2]; + if ((c3 & 0xC0) != 0x80) + return -1; + unsigned char c4 = from.next[3]; + if ((c4 & 0xC0) != 0x80) + return -1; + char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080; + if (c > maxcode) + return -1; + from.next += 4; + return c; + } + else // > U+10FFFF + return -1; + } + + bool + write_utf8_code_point(range& to, char32_t code_point) + { + if (code_point < 0x80) + { + if (to.size() < 1) + return false; + *to.next++ = code_point; + } + else if (code_point <= 0x7FF) + { + if (to.size() < 2) + return false; + *to.next++ = (code_point >> 6) + 0xC0; + *to.next++ = (code_point & 0x3F) + 0x80; + } + else if (code_point <= 0xFFFF) + { + if (to.size() < 3) + return false; + *to.next++ = (code_point >> 12) + 0xE0; + *to.next++ = ((code_point >> 6) & 0x3F) + 0x80; + *to.next++ = (code_point & 0x3F) + 0x80; + } + else if (code_point <= 0x10FFFF) + { + if (to.size() < 4) + return false; + *to.next++ = (code_point >> 18) + 0xF0; + *to.next++ = ((code_point >> 12) & 0x3F) + 0x80; + *to.next++ = ((code_point >> 6) & 0x3F) + 0x80; + *to.next++ = (code_point & 0x3F) + 0x80; + } + else + return false; + return true; + } + + bool + write_utf16_code_point(range& to, char32_t codepoint) + { + if (codepoint < max_single_utf16_unit) + { + if (to.size() > 0) + { + *to.next = codepoint; + ++to.next; + return true; + } + } + else if (to.size() > 1) + { + // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4 + const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10); + const char32_t SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; + char16_t lead = LEAD_OFFSET + (codepoint >> 10); + char16_t trail = 0xDC00 + (codepoint & 0x3FF); + char32_t utf16bytes = (lead << 10) + trail + SURROGATE_OFFSET; + + to.next[0] = utf16bytes >> 16; + to.next[1] = utf16bytes & 0xFFFF; + to.next += 2; + return true; + } + return false; + } + + // utf8 -> ucs4 + codecvt_base::result + ucs4_in(range& from, range& to, + unsigned long maxcode = max_code_point) + { + while (from.size() && to.size()) + { + const char32_t codepoint = read_utf8_code_point(from, maxcode); + if (codepoint == char32_t(-1) || codepoint > maxcode) + return codecvt_base::error; + *to.next++ = codepoint; + } + return from.size() ? codecvt_base::partial : codecvt_base::ok; + } + + // ucs4 -> utf8 + codecvt_base::result + ucs4_out(range& from, range& to, + unsigned long maxcode = max_code_point) + { + while (from.size()) + { + const char32_t c = from.next[0]; + if (c > maxcode) + return codecvt_base::error; + if (!write_utf8_code_point(to, c)) + return codecvt_base::partial; + ++from.next; + } + return codecvt_base::ok; + } + + // utf8 -> utf16 + codecvt_base::result + utf16_in(range& from, range& to, + unsigned long maxcode = max_code_point) + { + while (from.size() && to.size()) + { + const char* first = from.next; + if ((unsigned char)*first >= 0xF0 && to.size() < 2) + return codecvt_base::partial; + const char32_t codepoint = read_utf8_code_point(from, maxcode); + if (codepoint == char32_t(-1) || codepoint > maxcode) + return codecvt_base::error; + if (!write_utf16_code_point(to, codepoint)) + { + from.next = first; + return codecvt_base::partial; + } + } + return codecvt_base::ok; + } + + // utf16 -> utf8 + codecvt_base::result + utf16_out(range& from, range& to, + unsigned long maxcode = max_code_point) + { + while (from.size()) + { + char32_t c = from.next[0]; + int inc = 1; + if (c >= 0xD800 && c < 0xDBFF) // start of surrogate pair + { + if (from.size() < 2) + return codecvt_base::ok; // stop converting at this point + + const char32_t c2 = from.next[1]; + if (c2 >= 0xDC00 && c2 <= 0xDFFF) + { + inc = 2; + c = (c << 10) + c2 - 0x35FDC00; + } + else + return codecvt_base::error; + } + if (c > maxcode) + return codecvt_base::error; + if (!write_utf8_code_point(to, c)) + return codecvt_base::partial; + from.next += inc; + } + return codecvt_base::ok; + } + + // return pos such that [begin,pos) is valid UTF-16 string no longer than max + int + utf16_len(const char* begin, const char* end, size_t max, + char32_t maxcode = max_code_point) + { + range from{ begin, end }; + size_t count = 0; + while (count+1 < max) + { + char32_t c = read_utf8_code_point(from, maxcode); + if (c == char32_t(-1)) + break; + else if (c > max_single_utf16_unit) + ++count; + ++count; + } + if (count+1 == max) // take one more character if it fits in a single unit + read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode)); + return from.next - begin; + } + + // return pos such that [begin,pos) is valid UCS-4 string no longer than max + int + ucs4_len(const char* begin, const char* end, size_t max, + char32_t maxcode = max_code_point) + { + range from{ begin, end }; + size_t count = 0; + while (count < max) + { + char32_t c = read_utf8_code_point(from, maxcode); + if (c == char32_t(-1)) + break; + ++count; + } + return from.next - begin; + } +} + +// Define members of codecvt specialization. +// Converts from UTF-8 to UTF-16. + +locale::id codecvt::id; + +codecvt::~codecvt() { } + +codecvt_base::result +codecvt:: +do_out(state_type&, + const intern_type* __from, + const intern_type* __from_end, const intern_type*& __from_next, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const +{ + range from{ __from, __from_end }; + range to{ __to, __to_end }; + auto res = utf16_out(from, to); + __from_next = from.next; + __to_next = to.next; + return res; +} + +codecvt_base::result +codecvt:: +do_unshift(state_type&, extern_type* __to, extern_type*, + extern_type*& __to_next) const +{ + __to_next = __to; + return noconv; // we don't use mbstate_t for the unicode facets +} + +codecvt_base::result +codecvt:: +do_in(state_type&, const extern_type* __from, const extern_type* __from_end, + const extern_type*& __from_next, + intern_type* __to, intern_type* __to_end, + intern_type*& __to_next) const +{ + range from{ __from, __from_end }; + range to{ __to, __to_end }; + auto res = utf16_in(from, to); + __from_next = from.next; + __to_next = to.next; + return res; +} + +int +codecvt::do_encoding() const throw() +{ return 0; } + +bool +codecvt::do_always_noconv() const throw() +{ return false; } + +int +codecvt:: +do_length(state_type&, const extern_type* __from, + const extern_type* __end, size_t __max) const +{ + return utf16_len(__from, __end, __max); +} + +int +codecvt::do_max_length() const throw() +{ + // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit, + // whereas 4 byte sequences require two 16-bit code units. + return 3; +} + +// Define members of codecvt specialization. +// Converts from UTF-8 to UTF-32 (aka UCS-4). + +locale::id codecvt::id; + +codecvt::~codecvt() { } + +codecvt_base::result +codecvt:: +do_out(state_type&, const intern_type* __from, const intern_type* __from_end, + const intern_type*& __from_next, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const +{ + range from{ __from, __from_end }; + range to{ __to, __to_end }; + auto res = ucs4_out(from, to); + __from_next = from.next; + __to_next = to.next; + return res; +} + +codecvt_base::result +codecvt:: +do_unshift(state_type&, extern_type* __to, extern_type*, + extern_type*& __to_next) const +{ + __to_next = __to; + return noconv; +} + +codecvt_base::result +codecvt:: +do_in(state_type&, const extern_type* __from, const extern_type* __from_end, + const extern_type*& __from_next, + intern_type* __to, intern_type* __to_end, + intern_type*& __to_next) const +{ + range from{ __from, __from_end }; + range to{ __to, __to_end }; + auto res = ucs4_in(from, to); + __from_next = from.next; + __to_next = to.next; + return res; +} + +int +codecvt::do_encoding() const throw() +{ return 0; } + +bool +codecvt::do_always_noconv() const throw() +{ return false; } + +int +codecvt:: +do_length(state_type&, const extern_type* __from, + const extern_type* __end, size_t __max) const +{ + return ucs4_len(__from, __end, __max); +} + +int +codecvt::do_max_length() const throw() +{ return 4; } + +inline template class __codecvt_abstract_base; +inline template class __codecvt_abstract_base; + +_GLIBCXX_END_NAMESPACE_VERSION +} +#endif // _GLIBCXX_USE_C99_STDINT_TR1 diff --git a/libstdc++-v3/src/c++98/Makefile.am b/libstdc++-v3/src/c++98/Makefile.am index 6dd7a720d5b..e348dfb1205 100644 --- a/libstdc++-v3/src/c++98/Makefile.am +++ b/libstdc++-v3/src/c++98/Makefile.am @@ -176,6 +176,16 @@ numeric_members_cow.o: numeric_members_cow.cc $(CXXCOMPILE) $(GLIBCXX_ABI_FLAGS) -fimplicit-templates -c $< endif +# XXX TODO move locale_init.cc and localename.cc to src/c++11 +locale_init.lo: locale_init.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< +locale_init.o: locale_init.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< +localename.lo: localename.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< +localename.o: localename.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< + # Use special rules for the deprecated source files so that they find # deprecated include files. GLIBCXX_INCLUDE_DIR=$(glibcxx_builddir)/include diff --git a/libstdc++-v3/src/c++98/Makefile.in b/libstdc++-v3/src/c++98/Makefile.in index 9a2a27f0da3..3c69791bb66 100644 --- a/libstdc++-v3/src/c++98/Makefile.in +++ b/libstdc++-v3/src/c++98/Makefile.in @@ -764,6 +764,16 @@ vpath % $(top_srcdir)/src/c++98 @ENABLE_DUAL_ABI_TRUE@ $(LTCXXCOMPILE) $(GLIBCXX_ABI_FLAGS) -fimplicit-templates -c $< @ENABLE_DUAL_ABI_TRUE@numeric_members_cow.o: numeric_members_cow.cc @ENABLE_DUAL_ABI_TRUE@ $(CXXCOMPILE) $(GLIBCXX_ABI_FLAGS) -fimplicit-templates -c $< + +# XXX TODO move locale_init.cc and localename.cc to src/c++11 +locale_init.lo: locale_init.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< +locale_init.o: locale_init.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< +localename.lo: localename.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< +localename.o: localename.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< strstream.lo: strstream.cc $(LTCXXCOMPILE) -I$(GLIBCXX_INCLUDE_DIR)/backward -Wno-deprecated -c $< strstream.o: strstream.cc diff --git a/libstdc++-v3/src/c++98/locale_init.cc b/libstdc++-v3/src/c++98/locale_init.cc index c45eff3d6c9..0a95b9ff4b5 100644 --- a/libstdc++-v3/src/c++98/locale_init.cc +++ b/libstdc++-v3/src/c++98/locale_init.cc @@ -57,7 +57,7 @@ _GLIBCXX_LOC_ID(_ZNSt8messagesIwE2idE); namespace { - const int num_facets = _GLIBCXX_NUM_FACETS + const int num_facets = _GLIBCXX_NUM_FACETS + _GLIBCXX_NUM_UNICODE_FACETS + (_GLIBCXX_USE_DUAL_ABI ? _GLIBCXX_NUM_CXX11_FACETS : 0); __gnu_cxx::__mutex& @@ -201,6 +201,16 @@ namespace fake_messages_w messages_w; #endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + typedef char fake_codecvt_c16[sizeof(codecvt)] + __attribute__ ((aligned(__alignof__(codecvt)))); + fake_codecvt_c16 codecvt_c16; + + typedef char fake_codecvt_c32[sizeof(codecvt)] + __attribute__ ((aligned(__alignof__(codecvt)))); + fake_codecvt_c32 codecvt_c32; +#endif + // Storage for "C" locale caches. typedef char fake_num_cache_c[sizeof(std::__numpunct_cache)] __attribute__ ((aligned(__alignof__(std::__numpunct_cache)))); @@ -318,6 +328,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #ifdef _GLIBCXX_USE_WCHAR_T &std::ctype::id, &codecvt::id, +#endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + &codecvt::id, + &codecvt::id, #endif 0 }; @@ -522,6 +536,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_init_facet(new (&messages_w) std::messages(1)); #endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + _M_init_facet(new (&codecvt_c16) codecvt(1)); + _M_init_facet(new (&codecvt_c32) codecvt(1)); +#endif + #if _GLIBCXX_USE_DUAL_ABI facet* extra[] = { __npc, __mpcf, __mpct # ifdef _GLIBCXX_USE_WCHAR_T diff --git a/libstdc++-v3/src/c++98/localename.cc b/libstdc++-v3/src/c++98/localename.cc index c42a2173563..2884beebfe1 100644 --- a/libstdc++-v3/src/c++98/localename.cc +++ b/libstdc++-v3/src/c++98/localename.cc @@ -171,7 +171,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } } -const int num_facets = _GLIBCXX_NUM_FACETS +const int num_facets = _GLIBCXX_NUM_FACETS + _GLIBCXX_NUM_UNICODE_FACETS + (_GLIBCXX_USE_DUAL_ABI ? _GLIBCXX_NUM_CXX11_FACETS : 0); // Construct named _Impl. @@ -267,7 +267,12 @@ const int num_facets = _GLIBCXX_NUM_FACETS _M_init_facet(new time_get); _M_init_facet(new time_put); _M_init_facet(new std::messages(__cloc, __s)); -#endif +#endif + +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + _M_init_facet(new codecvt); + _M_init_facet(new codecvt); +#endif #if _GLIBCXX_USE_DUAL_ABI _M_init_extra(&__cloc, &__clocm, __s, __smon); diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc b/libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc new file mode 100644 index 00000000000..987233a1306 --- /dev/null +++ b/libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc @@ -0,0 +1,76 @@ +// Copyright (C) 2015 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// { dg-require-cstdint "" } +// { dg-options "-std=gnu++11" } + +#include +#include +#include +#include + +const char expected[] = u8"£¥€"; +const std::size_t expected_len = std::char_traits::length(expected); + +template +void test(const C* from) +{ + auto len = std::char_traits::length(from); + std::mbstate_t state{}; + char buf[16] = { }; + using test_type = std::codecvt; + const test_type& cvt = std::use_facet(std::locale::classic()); + auto from_end = from + len; + auto from_next = from; + auto buf_end = std::end(buf); + auto buf_next = buf; + auto res = cvt.out(state, from, from_end, from_next, buf, buf_end, buf_next); + VERIFY( res == std::codecvt_base::ok ); + VERIFY( from_next == from_end ); + VERIFY( (buf_next - buf) == expected_len ); + VERIFY( 0 == std::char_traits::compare(buf, expected, expected_len) ); + + C buf2[16]; + auto exp_end = expected + expected_len; + auto exp_next = expected; + auto buf2_end = std::end(buf2); + auto buf2_next = buf2; + res = cvt.in(state, expected, exp_end, exp_next, buf2, buf2_end, buf2_next); + VERIFY( res == std::codecvt_base::ok ); + VERIFY( exp_next == exp_end ); + VERIFY( (buf2_next - buf2) == len ); + VERIFY( 0 == std::char_traits::compare(buf2, from, len) ); +} + +void +test01() +{ + test(u"£¥€"); +} + +void +test02() +{ + test(U"£¥€"); +} + +int +main() +{ + test01(); + test02(); +} diff --git a/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc b/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc index b6f8c8bda37..33b5a8a7d4b 100644 --- a/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc +++ b/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc @@ -1,4 +1,5 @@ // { dg-require-iconv "ISO-8859-1" } +// { dg-options "-std=gnu++11" } // Copyright (C) 2006-2015 Free Software Foundation, Inc. // @@ -32,6 +33,11 @@ typedef std::codecvt c_codecvt; typedef std::codecvt w_codecvt; #endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 +typedef std::codecvt u16_codecvt; +typedef std::codecvt u32_codecvt; +#endif + class gnu_facet: public std::locale::facet { public: @@ -60,6 +66,10 @@ void test01() VERIFY( has_facet(loc13) ); #ifdef _GLIBCXX_USE_WCHAR_T VERIFY( has_facet(loc13) ); +#endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + VERIFY( has_facet(loc13) ); + VERIFY( has_facet(loc13) ); #endif VERIFY( has_facet(loc13) ); } -- 2.30.2