From 7f971f18e4285157dbf584cf9330716d3ea0fdfc Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Wed, 18 Feb 2015 19:39:03 +0000 Subject: [PATCH] codecvt.cc (write_utf16_code_point): Fix code to output surrogate pairs. * src/c++11/codecvt.cc (write_utf16_code_point): Fix code to output surrogate pairs. (utf16_in): Pass mode argument to write_utf16_code_point. (codecvt::do_in): Set mode according to native byte order. * testsuite/22_locale/codecvt/char16_t.cc: New. * testsuite/22_locale/codecvt/in/wchar_t/1.cc: Fix typo. From-SVN: r220793 --- libstdc++-v3/ChangeLog | 10 ++ libstdc++-v3/src/c++11/codecvt.cc | 16 +-- .../testsuite/22_locale/codecvt/char16_t.cc | 97 +++++++++++++++++++ .../22_locale/codecvt/in/wchar_t/1.cc | 2 +- 4 files changed, 117 insertions(+), 8 deletions(-) create mode 100644 libstdc++-v3/testsuite/22_locale/codecvt/char16_t.cc diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 9e08cb14871..fb9a4bd7562 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,13 @@ +2015-02-18 Jonathan Wakely + + * src/c++11/codecvt.cc (write_utf16_code_point): Fix code to output + surrogate pairs. + (utf16_in): Pass mode argument to write_utf16_code_point. + (codecvt::do_in): Set mode according to + native byte order. + * testsuite/22_locale/codecvt/char16_t.cc: New. + * testsuite/22_locale/codecvt/in/wchar_t/1.cc: Fix typo. + 2015-02-17 Rüdiger Sonderfeld Jonathan Wakely diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc index 594dae6810c..aebd3f34986 100644 --- a/libstdc++-v3/src/c++11/codecvt.cc +++ b/libstdc++-v3/src/c++11/codecvt.cc @@ -295,13 +295,10 @@ namespace { // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4 const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10); - const char32_t SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; char16_t lead = LEAD_OFFSET + (codepoint >> 10); char16_t trail = 0xDC00 + (codepoint & 0x3FF); - char32_t utf16bytes = (lead << 10) + trail + SURROGATE_OFFSET; - - to.next[0] = adjust_byte_order(utf16bytes >> 16, mode); - to.next[1] = adjust_byte_order(utf16bytes & 0xFFFF, mode); + to.next[0] = adjust_byte_order(lead, mode); + to.next[1] = adjust_byte_order(trail, mode); to.next += 2; return true; } @@ -400,7 +397,7 @@ namespace return codecvt_base::partial; if (codepoint > maxcode) return codecvt_base::error; - if (!write_utf16_code_point(to, codepoint, {})) + if (!write_utf16_code_point(to, codepoint, mode)) { from.next = first; return codecvt_base::partial; @@ -618,7 +615,12 @@ do_in(state_type&, const extern_type* __from, const extern_type* __from_end, { range from{ __from, __from_end }; range to{ __to, __to_end }; - auto res = utf16_in(from, to); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + codecvt_mode mode = {}; +#else + codecvt_mode mode = little_endian; +#endif + auto res = utf16_in(from, to, max_code_point, mode); __from_next = from.next; __to_next = to.next; return res; diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/char16_t.cc b/libstdc++-v3/testsuite/22_locale/codecvt/char16_t.cc new file mode 100644 index 00000000000..14477f5b711 --- /dev/null +++ b/libstdc++-v3/testsuite/22_locale/codecvt/char16_t.cc @@ -0,0 +1,97 @@ +// Copyright (C) 2015 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// { dg-options "-std=gnu++11" } + +// [locale.codecvt], C++11 22.4.1.4. specialization. + +#include +#include +#include + +void +test01() +{ + using namespace std; + typedef codecvt codecvt_c16; + locale loc_c = locale::classic(); + VERIFY(has_facet(loc_c)); + const codecvt_c16* const cvt = &use_facet(loc_c); + + VERIFY(!cvt->always_noconv()); + VERIFY(cvt->max_length() == 3); + VERIFY(cvt->encoding() == 0); + + const char u8dat[] = u8"H\U000000E4ll\U000000F6 \U0001F63F \U000056FD " + u8"\U0000222B f(\U000003BA) exp(-2\U000003C0\U000003C9) d\U000003BA " + u8"\U0001F6BF \U0001F6BF \U0001F648 \U00000413\U00000435\U0000043E" + u8"\U00000433\U00000440\U00000430\U00000444\U00000438\U0000044F \U0000FB05"; + const char* const u8dat_end = std::end(u8dat); + + const char16_t u16dat[] = u"H\U000000E4ll\U000000F6 \U0001F63F \U000056FD " + u"\U0000222B f(\U000003BA) exp(-2\U000003C0\U000003C9) d\U000003BA " + u"\U0001F6BF \U0001F6BF \U0001F648 \U00000413\U00000435\U0000043E" + u"\U00000433\U00000440\U00000430\U00000444\U00000438\U0000044F \U0000FB05"; + const char16_t* const u16dat_end = std::end(u16dat); + + { + const size_t len = u16dat_end - u16dat + 1; + char16_t* const buffer = new char16_t[len]; + char16_t* const buffer_end = buffer + len; + + const char* from_next; + char16_t* to_next; + + codecvt_c16::state_type state01; + state01 = {}; + codecvt_base::result res = cvt->in(state01, u8dat, u8dat_end, from_next, + buffer, buffer_end, to_next); + + VERIFY(res == codecvt_base::ok); + VERIFY(from_next == u8dat_end); + VERIFY(std::memcmp((void*)buffer, (void*)u16dat, sizeof(u16dat)) == 0); + + delete[] buffer; + } + + { + const size_t len = u8dat_end - u8dat + 1; + char* const buffer = new char[len]; + char* const buffer_end = buffer + len; + + const char16_t* from_next; + char* to_next; + + codecvt_c16::state_type state01; + state01 = {}; + codecvt_base::result res = cvt->out(state01, u16dat, u16dat_end, +from_next, + buffer, buffer_end, to_next); + + VERIFY(res == codecvt_base::ok); + VERIFY(from_next == u16dat_end); + VERIFY(std::memcmp((void*)buffer, (void*)u8dat, sizeof(u8dat)) == 0); + + delete[] buffer; + } +} + +int +main() +{ + test01(); +} diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/in/wchar_t/1.cc b/libstdc++-v3/testsuite/22_locale/codecvt/in/wchar_t/1.cc index 1e682a671a7..ff0b657e88c 100644 --- a/libstdc++-v3/testsuite/22_locale/codecvt/in/wchar_t/1.cc +++ b/libstdc++-v3/testsuite/22_locale/codecvt/in/wchar_t/1.cc @@ -25,7 +25,7 @@ // Need to explicitly set the state(mbstate_t) to zero. // How to do this is not specified by the ISO C99 standard, so we -// might need to add some operators to make the intuiative case +// might need to add some operators to make the intuitive case // work: // w_codecvt::state_type state00; // state00 = 0; -- 2.30.2