PR libstdc++/87642 handle multibyte thousands separators from libc
authorJonathan Wakely <jwakely@redhat.com>
Thu, 18 Oct 2018 19:57:25 +0000 (20:57 +0100)
committerJonathan Wakely <redi@gcc.gnu.org>
Thu, 18 Oct 2018 19:57:25 +0000 (20:57 +0100)
If a locale's THOUSANDS_SEP or MON_THOUSANDS_SEP string is not a
single character we either need to narrow it to a single char or
ignore it (and therefore disable digit grouping for that facet).

PR libstdc++/87642
* config/locale/gnu/monetary_members.cc
(moneypunct<char, true>::_M_initialize_moneypunct): Use
__narrow_multibyte_chars to convert multibyte thousands separators
to a single char.
* config/locale/gnu/numeric_members.cc
(numpunct<char>::_M_initialize_numpunct): Likewise.
(__narrow_multibyte_chars): New function.

From-SVN: r265286

libstdc++-v3/ChangeLog
libstdc++-v3/config/locale/gnu/monetary_members.cc
libstdc++-v3/config/locale/gnu/numeric_members.cc

index cb9aac1a7c3b21b420302f35de50e70ee2003f33..b26e1fcfd299e5195ae729abeddd9c5e9eb26afb 100644 (file)
@@ -1,5 +1,14 @@
 2018-10-18  Jonathan Wakely  <jwakely@redhat.com>
 
+       PR libstdc++/87642
+       * config/locale/gnu/monetary_members.cc
+       (moneypunct<char, true>::_M_initialize_moneypunct): Use
+       __narrow_multibyte_chars to convert multibyte thousands separators
+       to a single char.
+       * config/locale/gnu/numeric_members.cc
+       (numpunct<char>::_M_initialize_numpunct): Likewise.
+       (__narrow_multibyte_chars): New function.
+
        PR libstdc++/87641
        * include/bits/valarray_array.h (__valarray_sum): Use first element
        to initialize accumulator instead of value-initializing it.
index b3e7645385ad0e3736af8bcf42de330482d32929..212c68dd5018c1b6cc10de3ee5d78a38efb45c07 100644 (file)
@@ -207,6 +207,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   }
 #endif
 
+  extern char __narrow_multibyte_chars(const char* s, __locale_t cloc);
+
   template<>
     void
     moneypunct<char, true>::_M_initialize_moneypunct(__c_locale __cloc,
@@ -241,8 +243,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          // Named locale.
          _M_data->_M_decimal_point = *(__nl_langinfo_l(__MON_DECIMAL_POINT,
                                                        __cloc));
-         _M_data->_M_thousands_sep = *(__nl_langinfo_l(__MON_THOUSANDS_SEP,
-                                                       __cloc));
+         const char* thousands_sep = __nl_langinfo_l(__MON_THOUSANDS_SEP,
+                                                     __cloc);
+         if (thousands_sep[0] != '\0' && thousands_sep[1] != '\0')
+           _M_data->_M_thousands_sep = __narrow_multibyte_chars(thousands_sep,
+                                                                __cloc);
+         else
+           _M_data->_M_thousands_sep = *thousands_sep;
 
          // Check for NULL, which implies no fractional digits.
          if (_M_data->_M_decimal_point == '\0')
index 1ede8fadbd02411b2590d5a41a775ab19fd30ab4..faa35777cf3319ba2756ff2ad2c325c7e6956e9d 100644 (file)
 
 #include <locale>
 #include <bits/c++locale_internal.h>
+#include <iconv.h>
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+  extern char __narrow_multibyte_chars(const char* s, __locale_t cloc);
+
+// This file might be compiled twice, but we only want to define this once.
+#if ! _GLIBCXX_USE_CXX11_ABI
+  char
+  __narrow_multibyte_chars(const char* s, __locale_t cloc)
+  {
+    const char* codeset = __nl_langinfo_l(CODESET, cloc);
+    if (!strcmp(codeset, "UTF-8"))
+      {
+       // optimize for some known cases
+       if (!strcmp(s, "\u202F")) // NARROW NO-BREAK SPACE
+         return ' ';
+       if (!strcmp(s, "\u2019")) // RIGHT SINGLE QUOTATION MARK
+         return '\'';
+       if (!strcmp(s, "\u066C")) // ARABIC THOUSANDS SEPARATOR
+         return '\'';
+      }
+
+    iconv_t cd = iconv_open("ASCII//TRANSLIT", codeset);
+    if (cd != (iconv_t)-1)
+      {
+       char c1;
+       size_t inbytesleft = strlen(s);
+       size_t outbytesleft = 1;
+       char* inbuf = const_cast<char*>(s);
+       char* outbuf = &c1;
+       size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+       iconv_close(cd);
+       if (n != (size_t)-1)
+         {
+           cd = iconv_open(codeset, "ASCII");
+           if (cd != (iconv_t)-1)
+             {
+               char c2;
+               inbuf = &c1;
+               inbytesleft = 1;
+               outbuf = &c2;
+               outbytesleft = 1;
+               n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+               iconv_close(cd);
+               if (n != (size_t)-1)
+                 return c2;
+             }
+         }
+      }
+    return '\0';
+  }
+#endif
+
   template<>
     void
     numpunct<char>::_M_initialize_numpunct(__c_locale __cloc)
@@ -63,8 +114,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          // Named locale.
          _M_data->_M_decimal_point = *(__nl_langinfo_l(DECIMAL_POINT,
                                                        __cloc));
-         _M_data->_M_thousands_sep = *(__nl_langinfo_l(THOUSANDS_SEP,
-                                                       __cloc));
+         const char* thousands_sep = __nl_langinfo_l(THOUSANDS_SEP, __cloc);
+
+         if (thousands_sep[0] != '\0' && thousands_sep[1] != '\0')
+           _M_data->_M_thousands_sep = __narrow_multibyte_chars(thousands_sep,
+                                                                __cloc);
+         else
+           _M_data->_M_thousands_sep = *thousands_sep;
 
          // Check for NULL, which implies no grouping.
          if (_M_data->_M_thousands_sep == '\0')