libstdc++-v3/src/c++11/codecvt.cc

   1 // Locale support (codecvt) -*- C++ -*-
   2
   3 // Copyright (C) 2015-2016 Free Software Foundation, Inc.
   4 //
   5 // This file is part of the GNU ISO C++ Library.  This library is free
   6 // software; you can redistribute it and/or modify it under the
   7 // terms of the GNU General Public License as published by the
   8 // Free Software Foundation; either version 3, or (at your option)
   9 // any later version.
  10
  11 // This library is distributed in the hope that it will be useful,
  12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 // GNU General Public License for more details.
  15
  16 // Under Section 7 of GPL version 3, you are granted additional
  17 // permissions described in the GCC Runtime Library Exception, version
  18 // 3.1, as published by the Free Software Foundation.
  19
  20 // You should have received a copy of the GNU General Public License and
  21 // a copy of the GCC Runtime Library Exception along with this program;
  22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23 // <http://www.gnu.org/licenses/>.
  24
  25 #include <codecvt>
  26 #include <cstring>              // std::memcpy, std::memcmp
  27 #include <bits/stl_algobase.h>  // std::max
  28
  29 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
  30 namespace std _GLIBCXX_VISIBILITY(default)
  31 {
  32 _GLIBCXX_BEGIN_NAMESPACE_VERSION
  33
  34 namespace
  35 {
  36   // Largest code point that fits in a single UTF-16 code unit.
  37   const char32_t max_single_utf16_unit = 0xFFFF;
  38
  39   const char32_t max_code_point = 0x10FFFF;
  40
  41   // The functions below rely on maxcode < incomplete_mb_character
  42   // (which is enforced by the codecvt_utf* classes on construction).
  43   const char32_t incomplete_mb_character = char32_t(-2);
  44   const char32_t invalid_mb_sequence = char32_t(-1);
  45
  46   template<typename Elem>
  47     struct range
  48     {
  49       Elem* next;
  50       Elem* end;
  51
  52       Elem operator*() const { return *next; }
  53
  54       range& operator++() { ++next; return *this; }
  55
  56       size_t size() const { return end - next; }
  57     };
  58
  59   // Multibyte sequences can have "header" consisting of Byte Order Mark
  60   const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
  61   const unsigned char utf16_bom[4] = { 0xFE, 0xFF };
  62   const unsigned char utf16le_bom[4] = { 0xFF, 0xFE };
  63
  64   template<size_t N>
  65     inline bool
  66     write_bom(range<char>& to, const unsigned char (&bom)[N])
  67     {
  68       if (to.size() < N)
  69         return false;
  70       memcpy(to.next, bom, N);
  71       to.next += N;
  72       return true;
  73     }
  74
  75   // If generate_header is set in mode write out UTF-8 BOM.
  76   bool
  77   write_utf8_bom(range<char>& to, codecvt_mode mode)
  78   {
  79     if (mode & generate_header)
  80       return write_bom(to, utf8_bom);
  81     return true;
  82   }
  83
  84   // If generate_header is set in mode write out the UTF-16 BOM indicated
  85   // by whether little_endian is set in mode.
  86   bool
  87   write_utf16_bom(range<char16_t>& to, codecvt_mode mode)
  88   {
  89     if (mode & generate_header)
  90     {
  91       if (!to.size())
  92         return false;
  93       auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom;
  94       std::memcpy(to.next, bom, 2);
  95       ++to.next;
  96     }
  97     return true;
  98   }
  99
 100   template<size_t N>
 101     inline bool
 102     read_bom(range<const char>& from, const unsigned char (&bom)[N])
 103     {
 104       if (from.size() >= N && !memcmp(from.next, bom, N))
 105         {
 106           from.next += N;
 107           return true;
 108         }
 109       return false;
 110     }
 111
 112   // If consume_header is set in mode update from.next to after any BOM.
 113   void
 114   read_utf8_bom(range<const char>& from, codecvt_mode mode)
 115   {
 116     if (mode & consume_header)
 117       read_bom(from, utf8_bom);
 118   }
 119
 120   // If consume_header is set in mode update from.next to after any BOM.
 121   // Return little_endian iff the UTF-16LE BOM was present.
 122   codecvt_mode
 123   read_utf16_bom(range<const char16_t>& from, codecvt_mode mode)
 124   {
 125     if (mode & consume_header && from.size())
 126       {
 127         if (*from.next == 0xFEFF)
 128           ++from.next;
 129         else if (*from.next == 0xFFFE)
 130           {
 131             ++from.next;
 132             return little_endian;
 133           }
 134       }
 135     return {};
 136   }
 137
 138   // Read a codepoint from a UTF-8 multibyte sequence.
 139   // Updates from.next if the codepoint is not greater than maxcode.
 140   // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
 141   char32_t
 142   read_utf8_code_point(range<const char>& from, unsigned long maxcode)
 143   {
 144     const size_t avail = from.size();
 145     if (avail == 0)
 146       return incomplete_mb_character;
 147     unsigned char c1 = from.next[0];
 148     // https://en.wikipedia.org/wiki/UTF-8#Sample_code
 149     if (c1 < 0x80)
 150     {
 151       ++from.next;
 152       return c1;
 153     }
 154     else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
 155       return invalid_mb_sequence;
 156     else if (c1 < 0xE0) // 2-byte sequence
 157     {
 158       if (avail < 2)
 159         return incomplete_mb_character;
 160       unsigned char c2 = from.next[1];
 161       if ((c2 & 0xC0) != 0x80)
 162         return invalid_mb_sequence;
 163       char32_t c = (c1 << 6) + c2 - 0x3080;
 164       if (c <= maxcode)
 165         from.next += 2;
 166       return c;
 167     }
 168     else if (c1 < 0xF0) // 3-byte sequence
 169     {
 170       if (avail < 3)
 171         return incomplete_mb_character;
 172       unsigned char c2 = from.next[1];
 173       if ((c2 & 0xC0) != 0x80)
 174         return invalid_mb_sequence;
 175       if (c1 == 0xE0 && c2 < 0xA0) // overlong
 176         return invalid_mb_sequence;
 177       unsigned char c3 = from.next[2];
 178       if ((c3 & 0xC0) != 0x80)
 179         return invalid_mb_sequence;
 180       char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
 181       if (c <= maxcode)
 182         from.next += 3;
 183       return c;
 184     }
 185     else if (c1 < 0xF5) // 4-byte sequence
 186     {
 187       if (avail < 4)
 188         return incomplete_mb_character;
 189       unsigned char c2 = from.next[1];
 190       if ((c2 & 0xC0) != 0x80)
 191         return invalid_mb_sequence;
 192       if (c1 == 0xF0 && c2 < 0x90) // overlong
 193         return invalid_mb_sequence;
 194       if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
 195       return invalid_mb_sequence;
 196       unsigned char c3 = from.next[2];
 197       if ((c3 & 0xC0) != 0x80)
 198         return invalid_mb_sequence;
 199       unsigned char c4 = from.next[3];
 200       if ((c4 & 0xC0) != 0x80)
 201         return invalid_mb_sequence;
 202       char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
 203       if (c <= maxcode)
 204         from.next += 4;
 205       return c;
 206     }
 207     else // > U+10FFFF
 208       return invalid_mb_sequence;
 209   }
 210
 211   bool
 212   write_utf8_code_point(range<char>& to, char32_t code_point)
 213   {
 214     if (code_point < 0x80)
 215       {
 216         if (to.size() < 1)
 217           return false;
 218         *to.next++ = code_point;
 219       }
 220     else if (code_point <= 0x7FF)
 221       {
 222         if (to.size() < 2)
 223           return false;
 224         *to.next++ = (code_point >> 6) + 0xC0;
 225         *to.next++ = (code_point & 0x3F) + 0x80;
 226       }
 227     else if (code_point <= 0xFFFF)
 228       {
 229         if (to.size() < 3)
 230           return false;
 231         *to.next++ = (code_point >> 12) + 0xE0;
 232         *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
 233         *to.next++ = (code_point & 0x3F) + 0x80;
 234       }
 235     else if (code_point <= 0x10FFFF)
 236       {
 237         if (to.size() < 4)
 238           return false;
 239         *to.next++ = (code_point >> 18) + 0xF0;
 240         *to.next++ = ((code_point >> 12) & 0x3F) + 0x80;
 241         *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
 242         *to.next++ = (code_point & 0x3F) + 0x80;
 243       }
 244     else
 245       return false;
 246     return true;
 247   }
 248
 249   inline char16_t
 250   adjust_byte_order(char16_t c, codecvt_mode mode)
 251   {
 252 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 253     return (mode & little_endian) ? __builtin_bswap16(c) : c;
 254 #else
 255     return (mode & little_endian) ? c : __builtin_bswap16(c);
 256 #endif
 257   }
 258
 259   // Return true if c is a high-surrogate (aka leading) code point.
 260   inline bool
 261   is_high_surrogate(char32_t c)
 262   {
 263     return c >= 0xD800 && c <= 0xDBFF;
 264   }
 265
 266   // Return true if c is a low-surrogate (aka trailing) code point.
 267   inline bool
 268   is_low_surrogate(char32_t c)
 269   {
 270     return c >= 0xDC00 && c <= 0xDFFF;
 271   }
 272
 273   inline char32_t
 274   surrogate_pair_to_code_point(char32_t high, char32_t low)
 275   {
 276     return (high << 10) + low - 0x35FDC00;
 277   }
 278
 279   // Read a codepoint from a UTF-16 multibyte sequence.
 280   // The sequence's endianness is indicated by (mode & little_endian).
 281   // Updates from.next if the codepoint is not greater than maxcode.
 282   // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
 283   char32_t
 284   read_utf16_code_point(range<const char16_t>& from, unsigned long maxcode,
 285                         codecvt_mode mode)
 286   {
 287     const size_t avail = from.size();
 288     if (avail == 0)
 289       return incomplete_mb_character;
 290     int inc = 1;
 291     char32_t c = adjust_byte_order(from.next[0], mode);
 292     if (is_high_surrogate(c))
 293       {
 294         if (avail < 2)
 295           return incomplete_mb_character;
 296         const char16_t c2 = adjust_byte_order(from.next[1], mode);
 297         if (is_low_surrogate(c2))
 298           {
 299             c = surrogate_pair_to_code_point(c, c2);
 300             inc = 2;
 301           }
 302         else
 303           return invalid_mb_sequence;
 304       }
 305     else if (is_low_surrogate(c))
 306       return invalid_mb_sequence;
 307     if (c <= maxcode)
 308       from.next += inc;
 309     return c;
 310   }
 311
 312   template<typename C>
 313   bool
 314   write_utf16_code_point(range<C>& to, char32_t codepoint, codecvt_mode mode)
 315   {
 316     static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
 317
 318     if (codepoint < max_single_utf16_unit)
 319       {
 320         if (to.size() > 0)
 321           {
 322             *to.next = adjust_byte_order(codepoint, mode);
 323             ++to.next;
 324             return true;
 325           }
 326       }
 327     else if (to.size() > 1)
 328       {
 329         // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
 330         const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
 331         char16_t lead = LEAD_OFFSET + (codepoint >> 10);
 332         char16_t trail = 0xDC00 + (codepoint & 0x3FF);
 333         to.next[0] = adjust_byte_order(lead, mode);
 334         to.next[1] = adjust_byte_order(trail, mode);
 335         to.next += 2;
 336         return true;
 337       }
 338     return false;
 339   }
 340
 341   // utf8 -> ucs4
 342   codecvt_base::result
 343   ucs4_in(range<const char>& from, range<char32_t>& to,
 344           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 345   {
 346     read_utf8_bom(from, mode);
 347     while (from.size() && to.size())
 348       {
 349         const char32_t codepoint = read_utf8_code_point(from, maxcode);
 350         if (codepoint == incomplete_mb_character)
 351           return codecvt_base::partial;
 352         if (codepoint > maxcode)
 353           return codecvt_base::error;
 354         *to.next++ = codepoint;
 355       }
 356     return from.size() ? codecvt_base::partial : codecvt_base::ok;
 357   }
 358
 359   // ucs4 -> utf8
 360   codecvt_base::result
 361   ucs4_out(range<const char32_t>& from, range<char>& to,
 362            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 363   {
 364     if (!write_utf8_bom(to, mode))
 365       return codecvt_base::partial;
 366     while (from.size())
 367       {
 368         const char32_t c = from.next[0];
 369         if (c > maxcode)
 370           return codecvt_base::error;
 371         if (!write_utf8_code_point(to, c))
 372           return codecvt_base::partial;
 373         ++from.next;
 374       }
 375     return codecvt_base::ok;
 376   }
 377
 378   // utf16 -> ucs4
 379   codecvt_base::result
 380   ucs4_in(range<const char16_t>& from, range<char32_t>& to,
 381           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 382   {
 383     if (read_utf16_bom(from, mode) == little_endian)
 384       mode = codecvt_mode(mode & little_endian);
 385     while (from.size() && to.size())
 386       {
 387         const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
 388         if (codepoint == incomplete_mb_character)
 389           return codecvt_base::partial;
 390         if (codepoint > maxcode)
 391           return codecvt_base::error;
 392         *to.next++ = codepoint;
 393       }
 394     return from.size() ? codecvt_base::partial : codecvt_base::ok;
 395   }
 396
 397   // ucs4 -> utf16
 398   codecvt_base::result
 399   ucs4_out(range<const char32_t>& from, range<char16_t>& to,
 400            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 401   {
 402     if (!write_utf16_bom(to, mode))
 403       return codecvt_base::partial;
 404     while (from.size())
 405       {
 406         const char32_t c = from.next[0];
 407         if (c > maxcode)
 408           return codecvt_base::error;
 409         if (!write_utf16_code_point(to, c, mode))
 410           return codecvt_base::partial;
 411         ++from.next;
 412       }
 413     return codecvt_base::ok;
 414   }
 415
 416   // utf8 -> utf16
 417   template<typename C>
 418   codecvt_base::result
 419   utf16_in(range<const char>& from, range<C>& to,
 420            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 421   {
 422     read_utf8_bom(from, mode);
 423     while (from.size() && to.size())
 424       {
 425         const char* const first = from.next;
 426         const char32_t codepoint = read_utf8_code_point(from, maxcode);
 427         if (codepoint == incomplete_mb_character)
 428           return codecvt_base::partial;
 429         if (codepoint > maxcode)
 430           return codecvt_base::error;
 431         if (!write_utf16_code_point(to, codepoint, mode))
 432           {
 433             from.next = first;
 434             return codecvt_base::partial;
 435           }
 436       }
 437     return codecvt_base::ok;
 438   }
 439
 440   // utf16 -> utf8
 441   template<typename C>
 442   codecvt_base::result
 443   utf16_out(range<const C>& from, range<char>& to,
 444             unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 445   {
 446     if (!write_utf8_bom(to, mode))
 447       return codecvt_base::partial;
 448     while (from.size())
 449       {
 450         char32_t c = from.next[0];
 451         int inc = 1;
 452         if (is_high_surrogate(c))
 453           {
 454             if (from.size() < 2)
 455               return codecvt_base::ok; // stop converting at this point
 456
 457             const char32_t c2 = from.next[1];
 458             if (is_low_surrogate(c2))
 459               {
 460                 c = surrogate_pair_to_code_point(c, c2);
 461                 inc = 2;
 462               }
 463             else
 464               return codecvt_base::error;
 465           }
 466         else if (is_low_surrogate(c))
 467           return codecvt_base::error;
 468         if (c > maxcode)
 469           return codecvt_base::error;
 470         if (!write_utf8_code_point(to, c))
 471           return codecvt_base::partial;
 472         from.next += inc;
 473       }
 474     return codecvt_base::ok;
 475   }
 476
 477   // return pos such that [begin,pos) is valid UTF-16 string no longer than max
 478   const char*
 479   utf16_span(const char* begin, const char* end, size_t max,
 480              char32_t maxcode = max_code_point, codecvt_mode mode = {})
 481   {
 482     range<const char> from{ begin, end };
 483     read_utf8_bom(from, mode);
 484     size_t count = 0;
 485     while (count+1 < max)
 486       {
 487         char32_t c = read_utf8_code_point(from, maxcode);
 488         if (c > maxcode)
 489           return from.next;
 490         else if (c > max_single_utf16_unit)
 491           ++count;
 492         ++count;
 493       }
 494     if (count+1 == max) // take one more character if it fits in a single unit
 495       read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
 496     return from.next;
 497   }
 498
 499   // utf8 -> ucs2
 500   codecvt_base::result
 501   ucs2_in(range<const char>& from, range<char16_t>& to,
 502           char32_t maxcode = max_code_point, codecvt_mode mode = {})
 503   {
 504     return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode);
 505   }
 506
 507   // ucs2 -> utf8
 508   codecvt_base::result
 509   ucs2_out(range<const char16_t>& from, range<char>& to,
 510            char32_t maxcode = max_code_point, codecvt_mode mode = {})
 511   {
 512     return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode);
 513   }
 514
 515   // ucs2 -> utf16
 516   codecvt_base::result
 517   ucs2_out(range<const char16_t>& from, range<char16_t>& to,
 518            char32_t maxcode = max_code_point, codecvt_mode mode = {})
 519   {
 520     if (!write_utf16_bom(to, mode))
 521       return codecvt_base::partial;
 522     while (from.size() && to.size())
 523       {
 524         char16_t c = from.next[0];
 525         if (is_high_surrogate(c))
 526           return codecvt_base::error;
 527         if (c > maxcode)
 528           return codecvt_base::error;
 529         *to.next++ = adjust_byte_order(c, mode);
 530         ++from.next;
 531       }
 532     return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
 533   }
 534
 535   // utf16 -> ucs2
 536   codecvt_base::result
 537   ucs2_in(range<const char16_t>& from, range<char16_t>& to,
 538           char32_t maxcode = max_code_point, codecvt_mode mode = {})
 539   {
 540     if (read_utf16_bom(from, mode) == little_endian)
 541       mode = codecvt_mode(mode & little_endian);
 542     maxcode = std::max(max_single_utf16_unit, maxcode);
 543     while (from.size() && to.size())
 544       {
 545         const char32_t c = read_utf16_code_point(from, maxcode, mode);
 546         if (c == incomplete_mb_character)
 547           return codecvt_base::partial;
 548         if (c > maxcode)
 549           return codecvt_base::error;
 550         *to.next++ = c;
 551       }
 552     return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
 553   }
 554
 555   const char16_t*
 556   ucs2_span(const char16_t* begin, const char16_t* end, size_t max,
 557             char32_t maxcode, codecvt_mode mode)
 558   {
 559     range<const char16_t> from{ begin, end };
 560     if (read_utf16_bom(from, mode) == little_endian)
 561       mode = codecvt_mode(mode & little_endian);
 562     maxcode = std::max(max_single_utf16_unit, maxcode);
 563     char32_t c = 0;
 564     while (max-- && c <= maxcode)
 565       c = read_utf16_code_point(from, maxcode, mode);
 566     return from.next;
 567   }
 568
 569   const char*
 570   ucs2_span(const char* begin, const char* end, size_t max,
 571             char32_t maxcode, codecvt_mode mode)
 572   {
 573     range<const char> from{ begin, end };
 574     read_utf8_bom(from, mode);
 575     maxcode = std::max(max_single_utf16_unit, maxcode);
 576     char32_t c = 0;
 577     while (max-- && c <= maxcode)
 578       c = read_utf8_code_point(from, maxcode);
 579     return from.next;
 580   }
 581
 582   // return pos such that [begin,pos) is valid UCS-4 string no longer than max
 583   const char*
 584   ucs4_span(const char* begin, const char* end, size_t max,
 585             char32_t maxcode = max_code_point, codecvt_mode mode = {})
 586   {
 587     range<const char> from{ begin, end };
 588     read_utf8_bom(from, mode);
 589     char32_t c = 0;
 590     while (max-- && c <= maxcode)
 591       c = read_utf8_code_point(from, maxcode);
 592     return from.next;
 593   }
 594
 595   // return pos such that [begin,pos) is valid UCS-4 string no longer than max
 596   const char16_t*
 597   ucs4_span(const char16_t* begin, const char16_t* end, size_t max,
 598             char32_t maxcode = max_code_point, codecvt_mode mode = {})
 599   {
 600     range<const char16_t> from{ begin, end };
 601     if (read_utf16_bom(from, mode) == little_endian)
 602       mode = codecvt_mode(mode & little_endian);
 603     char32_t c = 0;
 604     while (max-- && c <= maxcode)
 605       c = read_utf16_code_point(from, maxcode, mode);
 606     return from.next;
 607   }
 608 }
 609
 610 // Define members of codecvt<char16_t, char, mbstate_t> specialization.
 611 // Converts from UTF-8 to UTF-16.
 612
 613 locale::id codecvt<char16_t, char, mbstate_t>::id;
 614
 615 codecvt<char16_t, char, mbstate_t>::~codecvt() { }
 616
 617 codecvt_base::result
 618 codecvt<char16_t, char, mbstate_t>::
 619 do_out(state_type&,
 620        const intern_type* __from,
 621        const intern_type* __from_end, const intern_type*& __from_next,
 622        extern_type* __to, extern_type* __to_end,
 623        extern_type*& __to_next) const
 624 {
 625   range<const char16_t> from{ __from, __from_end };
 626   range<char> to{ __to, __to_end };
 627   auto res = utf16_out(from, to);
 628   __from_next = from.next;
 629   __to_next = to.next;
 630   return res;
 631 }
 632
 633 codecvt_base::result
 634 codecvt<char16_t, char, mbstate_t>::
 635 do_unshift(state_type&, extern_type* __to, extern_type*,
 636            extern_type*& __to_next) const
 637 {
 638   __to_next = __to;
 639   return noconv; // we don't use mbstate_t for the unicode facets
 640 }
 641
 642 codecvt_base::result
 643 codecvt<char16_t, char, mbstate_t>::
 644 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 645       const extern_type*& __from_next,
 646       intern_type* __to, intern_type* __to_end,
 647       intern_type*& __to_next) const
 648 {
 649   range<const char> from{ __from, __from_end };
 650   range<char16_t> to{ __to, __to_end };
 651 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 652   codecvt_mode mode = {};
 653 #else
 654   codecvt_mode mode = little_endian;
 655 #endif
 656   auto res = utf16_in(from, to, max_code_point, mode);
 657   __from_next = from.next;
 658   __to_next = to.next;
 659   return res;
 660 }
 661
 662 int
 663 codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
 664 { return 0; }
 665
 666 bool
 667 codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
 668 { return false; }
 669
 670 int
 671 codecvt<char16_t, char, mbstate_t>::
 672 do_length(state_type&, const extern_type* __from,
 673           const extern_type* __end, size_t __max) const
 674 {
 675   __end = utf16_span(__from, __end, __max);
 676   return __end - __from;
 677 }
 678
 679 int
 680 codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
 681 {
 682   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
 683   // whereas 4 byte sequences require two 16-bit code units.
 684   return 3;
 685 }
 686
 687 // Define members of codecvt<char32_t, char, mbstate_t> specialization.
 688 // Converts from UTF-8 to UTF-32 (aka UCS-4).
 689
 690 locale::id codecvt<char32_t, char, mbstate_t>::id;
 691
 692 codecvt<char32_t, char, mbstate_t>::~codecvt() { }
 693
 694 codecvt_base::result
 695 codecvt<char32_t, char, mbstate_t>::
 696 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
 697        const intern_type*& __from_next,
 698        extern_type* __to, extern_type* __to_end,
 699        extern_type*& __to_next) const
 700 {
 701   range<const char32_t> from{ __from, __from_end };
 702   range<char> to{ __to, __to_end };
 703   auto res = ucs4_out(from, to);
 704   __from_next = from.next;
 705   __to_next = to.next;
 706   return res;
 707 }
 708
 709 codecvt_base::result
 710 codecvt<char32_t, char, mbstate_t>::
 711 do_unshift(state_type&, extern_type* __to, extern_type*,
 712            extern_type*& __to_next) const
 713 {
 714   __to_next = __to;
 715   return noconv;
 716 }
 717
 718 codecvt_base::result
 719 codecvt<char32_t, char, mbstate_t>::
 720 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 721       const extern_type*& __from_next,
 722       intern_type* __to, intern_type* __to_end,
 723       intern_type*& __to_next) const
 724 {
 725   range<const char> from{ __from, __from_end };
 726   range<char32_t> to{ __to, __to_end };
 727   auto res = ucs4_in(from, to);
 728   __from_next = from.next;
 729   __to_next = to.next;
 730   return res;
 731 }
 732
 733 int
 734 codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
 735 { return 0; }
 736
 737 bool
 738 codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
 739 { return false; }
 740
 741 int
 742 codecvt<char32_t, char, mbstate_t>::
 743 do_length(state_type&, const extern_type* __from,
 744           const extern_type* __end, size_t __max) const
 745 {
 746   __end = ucs4_span(__from, __end, __max);
 747   return __end - __from;
 748 }
 749
 750 int
 751 codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
 752 { return 4; }
 753
 754 // Define members of codecvt_utf8<char16_t> base class implementation.
 755 // Converts from UTF-8 to UCS-2.
 756
 757 __codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
 758
 759 codecvt_base::result
 760 __codecvt_utf8_base<char16_t>::
 761 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
 762        const intern_type*& __from_next,
 763        extern_type* __to, extern_type* __to_end,
 764        extern_type*& __to_next) const
 765 {
 766   range<const char16_t> from{ __from, __from_end };
 767   range<char> to{ __to, __to_end };
 768   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
 769   __from_next = from.next;
 770   __to_next = to.next;
 771   return res;
 772 }
 773
 774 codecvt_base::result
 775 __codecvt_utf8_base<char16_t>::
 776 do_unshift(state_type&, extern_type* __to, extern_type*,
 777            extern_type*& __to_next) const
 778 {
 779   __to_next = __to;
 780   return noconv;
 781 }
 782
 783 codecvt_base::result
 784 __codecvt_utf8_base<char16_t>::
 785 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 786       const extern_type*& __from_next,
 787       intern_type* __to, intern_type* __to_end,
 788       intern_type*& __to_next) const
 789 {
 790   range<const char> from{ __from, __from_end };
 791   range<char16_t> to{ __to, __to_end };
 792   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
 793   __from_next = from.next;
 794   __to_next = to.next;
 795   return res;
 796 }
 797
 798 int
 799 __codecvt_utf8_base<char16_t>::do_encoding() const throw()
 800 { return 0; }
 801
 802 bool
 803 __codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
 804 { return false; }
 805
 806 int
 807 __codecvt_utf8_base<char16_t>::
 808 do_length(state_type&, const extern_type* __from,
 809           const extern_type* __end, size_t __max) const
 810 {
 811   __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
 812   return __end - __from;
 813 }
 814
 815 int
 816 __codecvt_utf8_base<char16_t>::do_max_length() const throw()
 817 { return 3; }
 818
 819 // Define members of codecvt_utf8<char32_t> base class implementation.
 820 // Converts from UTF-8 to UTF-32 (aka UCS-4).
 821
 822 __codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
 823
 824 codecvt_base::result
 825 __codecvt_utf8_base<char32_t>::
 826 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
 827        const intern_type*& __from_next,
 828        extern_type* __to, extern_type* __to_end,
 829        extern_type*& __to_next) const
 830 {
 831   range<const char32_t> from{ __from, __from_end };
 832   range<char> to{ __to, __to_end };
 833   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
 834   __from_next = from.next;
 835   __to_next = to.next;
 836   return res;
 837 }
 838
 839 codecvt_base::result
 840 __codecvt_utf8_base<char32_t>::
 841 do_unshift(state_type&, extern_type* __to, extern_type*,
 842            extern_type*& __to_next) const
 843 {
 844   __to_next = __to;
 845   return noconv;
 846 }
 847
 848 codecvt_base::result
 849 __codecvt_utf8_base<char32_t>::
 850 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 851       const extern_type*& __from_next,
 852       intern_type* __to, intern_type* __to_end,
 853       intern_type*& __to_next) const
 854 {
 855   range<const char> from{ __from, __from_end };
 856   range<char32_t> to{ __to, __to_end };
 857   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
 858   __from_next = from.next;
 859   __to_next = to.next;
 860   return res;
 861 }
 862
 863 int
 864 __codecvt_utf8_base<char32_t>::do_encoding() const throw()
 865 { return 0; }
 866
 867 bool
 868 __codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
 869 { return false; }
 870
 871 int
 872 __codecvt_utf8_base<char32_t>::
 873 do_length(state_type&, const extern_type* __from,
 874           const extern_type* __end, size_t __max) const
 875 {
 876   __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
 877   return __end - __from;
 878 }
 879
 880 int
 881 __codecvt_utf8_base<char32_t>::do_max_length() const throw()
 882 { return 4; }
 883
 884 #ifdef _GLIBCXX_USE_WCHAR_T
 885 // Define members of codecvt_utf8<wchar_t> base class implementation.
 886 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
 887
 888 __codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
 889
 890 codecvt_base::result
 891 __codecvt_utf8_base<wchar_t>::
 892 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
 893        const intern_type*& __from_next,
 894        extern_type* __to, extern_type* __to_end,
 895        extern_type*& __to_next) const
 896 {
 897   range<char> to{ __to, __to_end };
 898 #if __SIZEOF_WCHAR_T__ == 2
 899   range<const char16_t> from{
 900     reinterpret_cast<const char16_t*>(__from),
 901     reinterpret_cast<const char16_t*>(__from_end)
 902   };
 903   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
 904 #elif __SIZEOF_WCHAR_T__ == 4
 905   range<const char32_t> from{
 906     reinterpret_cast<const char32_t*>(__from),
 907     reinterpret_cast<const char32_t*>(__from_end)
 908   };
 909   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
 910 #else
 911   return codecvt_base::error;
 912 #endif
 913   __from_next = reinterpret_cast<const wchar_t*>(from.next);
 914   __to_next = to.next;
 915   return res;
 916 }
 917
 918 codecvt_base::result
 919 __codecvt_utf8_base<wchar_t>::
 920 do_unshift(state_type&, extern_type* __to, extern_type*,
 921            extern_type*& __to_next) const
 922 {
 923   __to_next = __to;
 924   return noconv;
 925 }
 926
 927 codecvt_base::result
 928 __codecvt_utf8_base<wchar_t>::
 929 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 930       const extern_type*& __from_next,
 931       intern_type* __to, intern_type* __to_end,
 932       intern_type*& __to_next) const
 933 {
 934   range<const char> from{ __from, __from_end };
 935 #if __SIZEOF_WCHAR_T__ == 2
 936   range<char16_t> to{
 937     reinterpret_cast<char16_t*>(__to),
 938     reinterpret_cast<char16_t*>(__to_end)
 939   };
 940   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
 941 #elif __SIZEOF_WCHAR_T__ == 4
 942   range<char32_t> to{
 943     reinterpret_cast<char32_t*>(__to),
 944     reinterpret_cast<char32_t*>(__to_end)
 945   };
 946   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
 947 #else
 948   return codecvt_base::error;
 949 #endif
 950   __from_next = from.next;
 951   __to_next = reinterpret_cast<wchar_t*>(to.next);
 952   return res;
 953 }
 954
 955 int
 956 __codecvt_utf8_base<wchar_t>::do_encoding() const throw()
 957 { return 0; }
 958
 959 bool
 960 __codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
 961 { return false; }
 962
 963 int
 964 __codecvt_utf8_base<wchar_t>::
 965 do_length(state_type&, const extern_type* __from,
 966           const extern_type* __end, size_t __max) const
 967 {
 968 #if __SIZEOF_WCHAR_T__ == 2
 969   __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
 970 #elif __SIZEOF_WCHAR_T__ == 4
 971   __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
 972 #else
 973   __end = __from;
 974 #endif
 975   return __end - __from;
 976 }
 977
 978 int
 979 __codecvt_utf8_base<wchar_t>::do_max_length() const throw()
 980 { return 4; }
 981 #endif
 982
 983 // Define members of codecvt_utf16<char16_t> base class implementation.
 984 // Converts from UTF-16 to UCS-2.
 985
 986 __codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
 987
 988 codecvt_base::result
 989 __codecvt_utf16_base<char16_t>::
 990 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
 991        const intern_type*& __from_next,
 992        extern_type* __to, extern_type* __to_end,
 993        extern_type*& __to_next) const
 994 {
 995   range<const char16_t> from{ __from, __from_end };
 996   range<char16_t> to{
 997     reinterpret_cast<char16_t*>(__to),
 998     reinterpret_cast<char16_t*>(__to_end)
 999   };
1000   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1001   __from_next = from.next;
1002   __to_next = reinterpret_cast<char*>(to.next);
1003   return res;
1004 }
1005
1006 codecvt_base::result
1007 __codecvt_utf16_base<char16_t>::
1008 do_unshift(state_type&, extern_type* __to, extern_type*,
1009            extern_type*& __to_next) const
1010 {
1011   __to_next = __to;
1012   return noconv;
1013 }
1014
1015 codecvt_base::result
1016 __codecvt_utf16_base<char16_t>::
1017 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1018       const extern_type*& __from_next,
1019       intern_type* __to, intern_type* __to_end,
1020       intern_type*& __to_next) const
1021 {
1022   range<const char16_t> from{
1023     reinterpret_cast<const char16_t*>(__from),
1024     reinterpret_cast<const char16_t*>(__from_end)
1025   };
1026   range<char16_t> to{ __to, __to_end };
1027   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1028   __from_next = reinterpret_cast<const char*>(from.next);
1029   __to_next = to.next;
1030   return res;
1031 }
1032
1033 int
1034 __codecvt_utf16_base<char16_t>::do_encoding() const throw()
1035 { return 1; }
1036
1037 bool
1038 __codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
1039 { return false; }
1040
1041 int
1042 __codecvt_utf16_base<char16_t>::
1043 do_length(state_type&, const extern_type* __from,
1044           const extern_type* __end, size_t __max) const
1045 {
1046   auto next = reinterpret_cast<const char16_t*>(__from);
1047   next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1048                    _M_maxcode, _M_mode);
1049   return reinterpret_cast<const char*>(next) - __from;
1050 }
1051
1052 int
1053 __codecvt_utf16_base<char16_t>::do_max_length() const throw()
1054 { return 3; }
1055
1056 // Define members of codecvt_utf16<char32_t> base class implementation.
1057 // Converts from UTF-16 to UTF-32 (aka UCS-4).
1058
1059 __codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
1060
1061 codecvt_base::result
1062 __codecvt_utf16_base<char32_t>::
1063 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1064        const intern_type*& __from_next,
1065        extern_type* __to, extern_type* __to_end,
1066        extern_type*& __to_next) const
1067 {
1068   range<const char32_t> from{ __from, __from_end };
1069   range<char16_t> to{
1070     reinterpret_cast<char16_t*>(__to),
1071     reinterpret_cast<char16_t*>(__to_end)
1072   };
1073   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1074   __from_next = from.next;
1075   __to_next = reinterpret_cast<char*>(to.next);
1076   return res;
1077 }
1078
1079 codecvt_base::result
1080 __codecvt_utf16_base<char32_t>::
1081 do_unshift(state_type&, extern_type* __to, extern_type*,
1082            extern_type*& __to_next) const
1083 {
1084   __to_next = __to;
1085   return noconv;
1086 }
1087
1088 codecvt_base::result
1089 __codecvt_utf16_base<char32_t>::
1090 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1091       const extern_type*& __from_next,
1092       intern_type* __to, intern_type* __to_end,
1093       intern_type*& __to_next) const
1094 {
1095   range<const char16_t> from{
1096     reinterpret_cast<const char16_t*>(__from),
1097     reinterpret_cast<const char16_t*>(__from_end)
1098   };
1099   range<char32_t> to{ __to, __to_end };
1100   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1101   __from_next = reinterpret_cast<const char*>(from.next);
1102   __to_next = to.next;
1103   return res;
1104 }
1105
1106 int
1107 __codecvt_utf16_base<char32_t>::do_encoding() const throw()
1108 { return 0; }
1109
1110 bool
1111 __codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
1112 { return false; }
1113
1114 int
1115 __codecvt_utf16_base<char32_t>::
1116 do_length(state_type&, const extern_type* __from,
1117           const extern_type* __end, size_t __max) const
1118 {
1119   auto next = reinterpret_cast<const char16_t*>(__from);
1120   next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1121                    _M_maxcode, _M_mode);
1122   return reinterpret_cast<const char*>(next) - __from;
1123 }
1124
1125 int
1126 __codecvt_utf16_base<char32_t>::do_max_length() const throw()
1127 { return 4; }
1128
1129 #ifdef _GLIBCXX_USE_WCHAR_T
1130 // Define members of codecvt_utf16<wchar_t> base class implementation.
1131 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1132
1133 __codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
1134
1135 codecvt_base::result
1136 __codecvt_utf16_base<wchar_t>::
1137 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1138        const intern_type*& __from_next,
1139        extern_type* __to, extern_type* __to_end,
1140        extern_type*& __to_next) const
1141 {
1142   range<char> to{ __to, __to_end };
1143 #if __SIZEOF_WCHAR_T__ == 2
1144   range<const char16_t> from{
1145     reinterpret_cast<const char16_t*>(__from),
1146     reinterpret_cast<const char16_t*>(__from_end)
1147   };
1148   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1149 #elif __SIZEOF_WCHAR_T__ == 4
1150   range<const char32_t> from{
1151     reinterpret_cast<const char32_t*>(__from),
1152     reinterpret_cast<const char32_t*>(__from_end)
1153   };
1154   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1155 #else
1156   return codecvt_base::error;
1157 #endif
1158   __from_next = reinterpret_cast<const wchar_t*>(from.next);
1159   __to_next = to.next;
1160   return res;
1161 }
1162
1163 codecvt_base::result
1164 __codecvt_utf16_base<wchar_t>::
1165 do_unshift(state_type&, extern_type* __to, extern_type*,
1166            extern_type*& __to_next) const
1167 {
1168   __to_next = __to;
1169   return noconv;
1170 }
1171
1172 codecvt_base::result
1173 __codecvt_utf16_base<wchar_t>::
1174 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1175       const extern_type*& __from_next,
1176       intern_type* __to, intern_type* __to_end,
1177       intern_type*& __to_next) const
1178 {
1179   range<const char> from{ __from, __from_end };
1180 #if __SIZEOF_WCHAR_T__ == 2
1181   range<char16_t> to{
1182     reinterpret_cast<char16_t*>(__to),
1183     reinterpret_cast<char16_t*>(__to_end)
1184   };
1185   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1186 #elif __SIZEOF_WCHAR_T__ == 4
1187   range<char32_t> to{
1188     reinterpret_cast<char32_t*>(__to),
1189     reinterpret_cast<char32_t*>(__to_end)
1190   };
1191   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1192 #else
1193   return codecvt_base::error;
1194 #endif
1195   __from_next = from.next;
1196   __to_next = reinterpret_cast<wchar_t*>(to.next);
1197   return res;
1198 }
1199
1200 int
1201 __codecvt_utf16_base<wchar_t>::do_encoding() const throw()
1202 { return 0; }
1203
1204 bool
1205 __codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
1206 { return false; }
1207
1208 int
1209 __codecvt_utf16_base<wchar_t>::
1210 do_length(state_type&, const extern_type* __from,
1211           const extern_type* __end, size_t __max) const
1212 {
1213   auto next = reinterpret_cast<const char16_t*>(__from);
1214 #if __SIZEOF_WCHAR_T__ == 2
1215   next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1216                    _M_maxcode, _M_mode);
1217 #elif __SIZEOF_WCHAR_T__ == 4
1218   next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1219                    _M_maxcode, _M_mode);
1220 #endif
1221   return reinterpret_cast<const char*>(next) - __from;
1222 }
1223
1224 int
1225 __codecvt_utf16_base<wchar_t>::do_max_length() const throw()
1226 { return 4; }
1227 #endif
1228
1229 // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
1230 // Converts from UTF-8 to UTF-16.
1231
1232 __codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
1233
1234 codecvt_base::result
1235 __codecvt_utf8_utf16_base<char16_t>::
1236 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1237        const intern_type*& __from_next,
1238        extern_type* __to, extern_type* __to_end,
1239        extern_type*& __to_next) const
1240 {
1241   range<const char16_t> from{ __from, __from_end };
1242   range<char> to{ __to, __to_end };
1243   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1244   __from_next = from.next;
1245   __to_next = to.next;
1246   return res;
1247 }
1248
1249 codecvt_base::result
1250 __codecvt_utf8_utf16_base<char16_t>::
1251 do_unshift(state_type&, extern_type* __to, extern_type*,
1252            extern_type*& __to_next) const
1253 {
1254   __to_next = __to;
1255   return noconv;
1256 }
1257
1258 codecvt_base::result
1259 __codecvt_utf8_utf16_base<char16_t>::
1260 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1261       const extern_type*& __from_next,
1262       intern_type* __to, intern_type* __to_end,
1263       intern_type*& __to_next) const
1264 {
1265   range<const char> from{ __from, __from_end };
1266   range<char16_t> to{ __to, __to_end };
1267   codecvt_mode mode = codecvt_mode(_M_mode | (consume_header|generate_header));
1268 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1269   mode = codecvt_mode(mode | little_endian);
1270 #endif
1271   auto res = utf16_in(from, to, _M_maxcode, mode);
1272   __from_next = from.next;
1273   __to_next = to.next;
1274   return res;
1275 }
1276
1277 int
1278 __codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
1279 { return 0; }
1280
1281 bool
1282 __codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
1283 { return false; }
1284
1285 int
1286 __codecvt_utf8_utf16_base<char16_t>::
1287 do_length(state_type&, const extern_type* __from,
1288           const extern_type* __end, size_t __max) const
1289 {
1290   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1291   return __end - __from;
1292 }
1293
1294 int
1295 __codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
1296 {
1297   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1298   // whereas 4 byte sequences require two 16-bit code units.
1299   return 3;
1300 }
1301
1302 // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
1303 // Converts from UTF-8 to UTF-16.
1304
1305 __codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
1306
1307 codecvt_base::result
1308 __codecvt_utf8_utf16_base<char32_t>::
1309 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1310        const intern_type*& __from_next,
1311        extern_type* __to, extern_type* __to_end,
1312        extern_type*& __to_next) const
1313 {
1314   range<const char32_t> from{ __from, __from_end };
1315   range<char> to{ __to, __to_end };
1316   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1317   __from_next = from.next;
1318   __to_next = to.next;
1319   return res;
1320 }
1321
1322 codecvt_base::result
1323 __codecvt_utf8_utf16_base<char32_t>::
1324 do_unshift(state_type&, extern_type* __to, extern_type*,
1325            extern_type*& __to_next) const
1326 {
1327   __to_next = __to;
1328   return noconv;
1329 }
1330
1331 codecvt_base::result
1332 __codecvt_utf8_utf16_base<char32_t>::
1333 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1334       const extern_type*& __from_next,
1335       intern_type* __to, intern_type* __to_end,
1336       intern_type*& __to_next) const
1337 {
1338   range<const char> from{ __from, __from_end };
1339   range<char32_t> to{ __to, __to_end };
1340   auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1341   __from_next = from.next;
1342   __to_next = to.next;
1343   return res;
1344 }
1345
1346 int
1347 __codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
1348 { return 0; }
1349
1350 bool
1351 __codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
1352 { return false; }
1353
1354 int
1355 __codecvt_utf8_utf16_base<char32_t>::
1356 do_length(state_type&, const extern_type* __from,
1357           const extern_type* __end, size_t __max) const
1358 {
1359   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1360   return __end - __from;
1361 }
1362
1363 int
1364 __codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
1365 {
1366   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1367   // whereas 4 byte sequences require two 16-bit code units.
1368   return 3;
1369 }
1370
1371 #ifdef _GLIBCXX_USE_WCHAR_T
1372 // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
1373 // Converts from UTF-8 to UTF-16.
1374
1375 __codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
1376
1377 codecvt_base::result
1378 __codecvt_utf8_utf16_base<wchar_t>::
1379 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1380        const intern_type*& __from_next,
1381        extern_type* __to, extern_type* __to_end,
1382        extern_type*& __to_next) const
1383 {
1384   range<const wchar_t> from{ __from, __from_end };
1385   range<char> to{ __to, __to_end };
1386   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1387   __from_next = from.next;
1388   __to_next = to.next;
1389   return res;
1390 }
1391
1392 codecvt_base::result
1393 __codecvt_utf8_utf16_base<wchar_t>::
1394 do_unshift(state_type&, extern_type* __to, extern_type*,
1395            extern_type*& __to_next) const
1396 {
1397   __to_next = __to;
1398   return noconv;
1399 }
1400
1401 codecvt_base::result
1402 __codecvt_utf8_utf16_base<wchar_t>::
1403 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1404       const extern_type*& __from_next,
1405       intern_type* __to, intern_type* __to_end,
1406       intern_type*& __to_next) const
1407 {
1408   range<const char> from{ __from, __from_end };
1409   range<wchar_t> to{ __to, __to_end };
1410   auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1411   __from_next = from.next;
1412   __to_next = to.next;
1413   return res;
1414 }
1415
1416 int
1417 __codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
1418 { return 0; }
1419
1420 bool
1421 __codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
1422 { return false; }
1423
1424 int
1425 __codecvt_utf8_utf16_base<wchar_t>::
1426 do_length(state_type&, const extern_type* __from,
1427           const extern_type* __end, size_t __max) const
1428 {
1429   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1430   return __end - __from;
1431 }
1432
1433 int
1434 __codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
1435 {
1436   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1437   // whereas 4 byte sequences require two 16-bit code units.
1438   return 3;
1439 }
1440 #endif
1441
1442 inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
1443 inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
1444 template class codecvt_byname<char16_t, char, mbstate_t>;
1445 template class codecvt_byname<char32_t, char, mbstate_t>;
1446
1447 _GLIBCXX_END_NAMESPACE_VERSION
1448 }
1449 #endif // _GLIBCXX_USE_C99_STDINT_TR1