327beb6b91c0449b1a44eccb72ddab845a21d7bd
[gcc.git] / libstdc++-v3 / src / c++11 / codecvt.cc
1 // Locale support (codecvt) -*- C++ -*-
2
3 // Copyright (C) 2015-2016 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 #include <codecvt>
26 #include <cstring> // std::memcpy, std::memcmp
27 #include <bits/stl_algobase.h> // std::max
28
29 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
30 namespace std _GLIBCXX_VISIBILITY(default)
31 {
32 _GLIBCXX_BEGIN_NAMESPACE_VERSION
33
34 namespace
35 {
36 // Largest code point that fits in a single UTF-16 code unit.
37 const char32_t max_single_utf16_unit = 0xFFFF;
38
39 const char32_t max_code_point = 0x10FFFF;
40
41 // The functions below rely on maxcode < incomplete_mb_character
42 // (which is enforced by the codecvt_utf* classes on construction).
43 const char32_t incomplete_mb_character = char32_t(-2);
44 const char32_t invalid_mb_sequence = char32_t(-1);
45
46 template<typename Elem>
47 struct range
48 {
49 Elem* next;
50 Elem* end;
51
52 Elem operator*() const { return *next; }
53
54 range& operator++() { ++next; return *this; }
55
56 size_t size() const { return end - next; }
57 };
58
59 // Multibyte sequences can have "header" consisting of Byte Order Mark
60 const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
61 const unsigned char utf16_bom[4] = { 0xFE, 0xFF };
62 const unsigned char utf16le_bom[4] = { 0xFF, 0xFE };
63
64 template<size_t N>
65 inline bool
66 write_bom(range<char>& to, const unsigned char (&bom)[N])
67 {
68 if (to.size() < N)
69 return false;
70 memcpy(to.next, bom, N);
71 to.next += N;
72 return true;
73 }
74
75 // If generate_header is set in mode write out UTF-8 BOM.
76 bool
77 write_utf8_bom(range<char>& to, codecvt_mode mode)
78 {
79 if (mode & generate_header)
80 return write_bom(to, utf8_bom);
81 return true;
82 }
83
84 // If generate_header is set in mode write out the UTF-16 BOM indicated
85 // by whether little_endian is set in mode.
86 bool
87 write_utf16_bom(range<char16_t>& to, codecvt_mode mode)
88 {
89 if (mode & generate_header)
90 {
91 if (!to.size())
92 return false;
93 auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom;
94 std::memcpy(to.next, bom, 2);
95 ++to.next;
96 }
97 return true;
98 }
99
100 template<size_t N>
101 inline bool
102 read_bom(range<const char>& from, const unsigned char (&bom)[N])
103 {
104 if (from.size() >= N && !memcmp(from.next, bom, N))
105 {
106 from.next += N;
107 return true;
108 }
109 return false;
110 }
111
112 // If consume_header is set in mode update from.next to after any BOM.
113 void
114 read_utf8_bom(range<const char>& from, codecvt_mode mode)
115 {
116 if (mode & consume_header)
117 read_bom(from, utf8_bom);
118 }
119
120 // If consume_header is set in mode update from.next to after any BOM.
121 // Return little_endian iff the UTF-16LE BOM was present.
122 codecvt_mode
123 read_utf16_bom(range<const char16_t>& from, codecvt_mode mode)
124 {
125 if (mode & consume_header && from.size())
126 {
127 if (*from.next == 0xFEFF)
128 ++from.next;
129 else if (*from.next == 0xFFFE)
130 {
131 ++from.next;
132 return little_endian;
133 }
134 }
135 return {};
136 }
137
138 // Read a codepoint from a UTF-8 multibyte sequence.
139 // Updates from.next if the codepoint is not greater than maxcode.
140 // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
141 char32_t
142 read_utf8_code_point(range<const char>& from, unsigned long maxcode)
143 {
144 const size_t avail = from.size();
145 if (avail == 0)
146 return incomplete_mb_character;
147 unsigned char c1 = from.next[0];
148 // https://en.wikipedia.org/wiki/UTF-8#Sample_code
149 if (c1 < 0x80)
150 {
151 ++from.next;
152 return c1;
153 }
154 else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
155 return invalid_mb_sequence;
156 else if (c1 < 0xE0) // 2-byte sequence
157 {
158 if (avail < 2)
159 return incomplete_mb_character;
160 unsigned char c2 = from.next[1];
161 if ((c2 & 0xC0) != 0x80)
162 return invalid_mb_sequence;
163 char32_t c = (c1 << 6) + c2 - 0x3080;
164 if (c <= maxcode)
165 from.next += 2;
166 return c;
167 }
168 else if (c1 < 0xF0) // 3-byte sequence
169 {
170 if (avail < 3)
171 return incomplete_mb_character;
172 unsigned char c2 = from.next[1];
173 if ((c2 & 0xC0) != 0x80)
174 return invalid_mb_sequence;
175 if (c1 == 0xE0 && c2 < 0xA0) // overlong
176 return invalid_mb_sequence;
177 unsigned char c3 = from.next[2];
178 if ((c3 & 0xC0) != 0x80)
179 return invalid_mb_sequence;
180 char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
181 if (c <= maxcode)
182 from.next += 3;
183 return c;
184 }
185 else if (c1 < 0xF5) // 4-byte sequence
186 {
187 if (avail < 4)
188 return incomplete_mb_character;
189 unsigned char c2 = from.next[1];
190 if ((c2 & 0xC0) != 0x80)
191 return invalid_mb_sequence;
192 if (c1 == 0xF0 && c2 < 0x90) // overlong
193 return invalid_mb_sequence;
194 if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
195 return invalid_mb_sequence;
196 unsigned char c3 = from.next[2];
197 if ((c3 & 0xC0) != 0x80)
198 return invalid_mb_sequence;
199 unsigned char c4 = from.next[3];
200 if ((c4 & 0xC0) != 0x80)
201 return invalid_mb_sequence;
202 char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
203 if (c <= maxcode)
204 from.next += 4;
205 return c;
206 }
207 else // > U+10FFFF
208 return invalid_mb_sequence;
209 }
210
211 bool
212 write_utf8_code_point(range<char>& to, char32_t code_point)
213 {
214 if (code_point < 0x80)
215 {
216 if (to.size() < 1)
217 return false;
218 *to.next++ = code_point;
219 }
220 else if (code_point <= 0x7FF)
221 {
222 if (to.size() < 2)
223 return false;
224 *to.next++ = (code_point >> 6) + 0xC0;
225 *to.next++ = (code_point & 0x3F) + 0x80;
226 }
227 else if (code_point <= 0xFFFF)
228 {
229 if (to.size() < 3)
230 return false;
231 *to.next++ = (code_point >> 12) + 0xE0;
232 *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
233 *to.next++ = (code_point & 0x3F) + 0x80;
234 }
235 else if (code_point <= 0x10FFFF)
236 {
237 if (to.size() < 4)
238 return false;
239 *to.next++ = (code_point >> 18) + 0xF0;
240 *to.next++ = ((code_point >> 12) & 0x3F) + 0x80;
241 *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
242 *to.next++ = (code_point & 0x3F) + 0x80;
243 }
244 else
245 return false;
246 return true;
247 }
248
249 inline char16_t
250 adjust_byte_order(char16_t c, codecvt_mode mode)
251 {
252 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
253 return (mode & little_endian) ? __builtin_bswap16(c) : c;
254 #else
255 return (mode & little_endian) ? c : __builtin_bswap16(c);
256 #endif
257 }
258
259 // Return true if c is a high-surrogate (aka leading) code point.
260 inline bool
261 is_high_surrogate(char32_t c)
262 {
263 return c >= 0xD800 && c <= 0xDBFF;
264 }
265
266 // Return true if c is a low-surrogate (aka trailing) code point.
267 inline bool
268 is_low_surrogate(char32_t c)
269 {
270 return c >= 0xDC00 && c <= 0xDFFF;
271 }
272
273 inline char32_t
274 surrogate_pair_to_code_point(char32_t high, char32_t low)
275 {
276 return (high << 10) + low - 0x35FDC00;
277 }
278
279 // Read a codepoint from a UTF-16 multibyte sequence.
280 // The sequence's endianness is indicated by (mode & little_endian).
281 // Updates from.next if the codepoint is not greater than maxcode.
282 // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
283 char32_t
284 read_utf16_code_point(range<const char16_t>& from, unsigned long maxcode,
285 codecvt_mode mode)
286 {
287 const size_t avail = from.size();
288 if (avail == 0)
289 return incomplete_mb_character;
290 int inc = 1;
291 char32_t c = adjust_byte_order(from.next[0], mode);
292 if (is_high_surrogate(c))
293 {
294 if (avail < 2)
295 return incomplete_mb_character;
296 const char16_t c2 = adjust_byte_order(from.next[1], mode);
297 if (is_low_surrogate(c2))
298 {
299 c = surrogate_pair_to_code_point(c, c2);
300 inc = 2;
301 }
302 else
303 return invalid_mb_sequence;
304 }
305 else if (is_low_surrogate(c))
306 return invalid_mb_sequence;
307 if (c <= maxcode)
308 from.next += inc;
309 return c;
310 }
311
312 template<typename C>
313 bool
314 write_utf16_code_point(range<C>& to, char32_t codepoint, codecvt_mode mode)
315 {
316 static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
317
318 if (codepoint < max_single_utf16_unit)
319 {
320 if (to.size() > 0)
321 {
322 *to.next = adjust_byte_order(codepoint, mode);
323 ++to.next;
324 return true;
325 }
326 }
327 else if (to.size() > 1)
328 {
329 // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
330 const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
331 char16_t lead = LEAD_OFFSET + (codepoint >> 10);
332 char16_t trail = 0xDC00 + (codepoint & 0x3FF);
333 to.next[0] = adjust_byte_order(lead, mode);
334 to.next[1] = adjust_byte_order(trail, mode);
335 to.next += 2;
336 return true;
337 }
338 return false;
339 }
340
341 // utf8 -> ucs4
342 codecvt_base::result
343 ucs4_in(range<const char>& from, range<char32_t>& to,
344 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
345 {
346 read_utf8_bom(from, mode);
347 while (from.size() && to.size())
348 {
349 const char32_t codepoint = read_utf8_code_point(from, maxcode);
350 if (codepoint == incomplete_mb_character)
351 return codecvt_base::partial;
352 if (codepoint > maxcode)
353 return codecvt_base::error;
354 *to.next++ = codepoint;
355 }
356 return from.size() ? codecvt_base::partial : codecvt_base::ok;
357 }
358
359 // ucs4 -> utf8
360 codecvt_base::result
361 ucs4_out(range<const char32_t>& from, range<char>& to,
362 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
363 {
364 if (!write_utf8_bom(to, mode))
365 return codecvt_base::partial;
366 while (from.size())
367 {
368 const char32_t c = from.next[0];
369 if (c > maxcode)
370 return codecvt_base::error;
371 if (!write_utf8_code_point(to, c))
372 return codecvt_base::partial;
373 ++from.next;
374 }
375 return codecvt_base::ok;
376 }
377
378 // utf16 -> ucs4
379 codecvt_base::result
380 ucs4_in(range<const char16_t>& from, range<char32_t>& to,
381 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
382 {
383 if (read_utf16_bom(from, mode) == little_endian)
384 mode = codecvt_mode(mode & little_endian);
385 while (from.size() && to.size())
386 {
387 const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
388 if (codepoint == incomplete_mb_character)
389 return codecvt_base::partial;
390 if (codepoint > maxcode)
391 return codecvt_base::error;
392 *to.next++ = codepoint;
393 }
394 return from.size() ? codecvt_base::partial : codecvt_base::ok;
395 }
396
397 // ucs4 -> utf16
398 codecvt_base::result
399 ucs4_out(range<const char32_t>& from, range<char16_t>& to,
400 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
401 {
402 if (!write_utf16_bom(to, mode))
403 return codecvt_base::partial;
404 while (from.size())
405 {
406 const char32_t c = from.next[0];
407 if (c > maxcode)
408 return codecvt_base::error;
409 if (!write_utf16_code_point(to, c, mode))
410 return codecvt_base::partial;
411 ++from.next;
412 }
413 return codecvt_base::ok;
414 }
415
416 // utf8 -> utf16
417 template<typename C>
418 codecvt_base::result
419 utf16_in(range<const char>& from, range<C>& to,
420 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
421 {
422 read_utf8_bom(from, mode);
423 while (from.size() && to.size())
424 {
425 const char* const first = from.next;
426 const char32_t codepoint = read_utf8_code_point(from, maxcode);
427 if (codepoint == incomplete_mb_character)
428 return codecvt_base::partial;
429 if (codepoint > maxcode)
430 return codecvt_base::error;
431 if (!write_utf16_code_point(to, codepoint, mode))
432 {
433 from.next = first;
434 return codecvt_base::partial;
435 }
436 }
437 return codecvt_base::ok;
438 }
439
440 // utf16 -> utf8
441 template<typename C>
442 codecvt_base::result
443 utf16_out(range<const C>& from, range<char>& to,
444 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
445 {
446 if (!write_utf8_bom(to, mode))
447 return codecvt_base::partial;
448 while (from.size())
449 {
450 char32_t c = from.next[0];
451 int inc = 1;
452 if (is_high_surrogate(c))
453 {
454 if (from.size() < 2)
455 return codecvt_base::ok; // stop converting at this point
456
457 const char32_t c2 = from.next[1];
458 if (is_low_surrogate(c2))
459 {
460 c = surrogate_pair_to_code_point(c, c2);
461 inc = 2;
462 }
463 else
464 return codecvt_base::error;
465 }
466 else if (is_low_surrogate(c))
467 return codecvt_base::error;
468 if (c > maxcode)
469 return codecvt_base::error;
470 if (!write_utf8_code_point(to, c))
471 return codecvt_base::partial;
472 from.next += inc;
473 }
474 return codecvt_base::ok;
475 }
476
477 // return pos such that [begin,pos) is valid UTF-16 string no longer than max
478 const char*
479 utf16_span(const char* begin, const char* end, size_t max,
480 char32_t maxcode = max_code_point, codecvt_mode mode = {})
481 {
482 range<const char> from{ begin, end };
483 read_utf8_bom(from, mode);
484 size_t count = 0;
485 while (count+1 < max)
486 {
487 char32_t c = read_utf8_code_point(from, maxcode);
488 if (c > maxcode)
489 return from.next;
490 else if (c > max_single_utf16_unit)
491 ++count;
492 ++count;
493 }
494 if (count+1 == max) // take one more character if it fits in a single unit
495 read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
496 return from.next;
497 }
498
499 // utf8 -> ucs2
500 codecvt_base::result
501 ucs2_in(range<const char>& from, range<char16_t>& to,
502 char32_t maxcode = max_code_point, codecvt_mode mode = {})
503 {
504 return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode);
505 }
506
507 // ucs2 -> utf8
508 codecvt_base::result
509 ucs2_out(range<const char16_t>& from, range<char>& to,
510 char32_t maxcode = max_code_point, codecvt_mode mode = {})
511 {
512 return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode);
513 }
514
515 // ucs2 -> utf16
516 codecvt_base::result
517 ucs2_out(range<const char16_t>& from, range<char16_t>& to,
518 char32_t maxcode = max_code_point, codecvt_mode mode = {})
519 {
520 if (!write_utf16_bom(to, mode))
521 return codecvt_base::partial;
522 while (from.size() && to.size())
523 {
524 char16_t c = from.next[0];
525 if (is_high_surrogate(c))
526 return codecvt_base::error;
527 if (c > maxcode)
528 return codecvt_base::error;
529 *to.next++ = adjust_byte_order(c, mode);
530 ++from.next;
531 }
532 return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
533 }
534
535 // utf16 -> ucs2
536 codecvt_base::result
537 ucs2_in(range<const char16_t>& from, range<char16_t>& to,
538 char32_t maxcode = max_code_point, codecvt_mode mode = {})
539 {
540 if (read_utf16_bom(from, mode) == little_endian)
541 mode = codecvt_mode(mode & little_endian);
542 maxcode = std::max(max_single_utf16_unit, maxcode);
543 while (from.size() && to.size())
544 {
545 const char32_t c = read_utf16_code_point(from, maxcode, mode);
546 if (c == incomplete_mb_character)
547 return codecvt_base::partial;
548 if (c > maxcode)
549 return codecvt_base::error;
550 *to.next++ = c;
551 }
552 return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
553 }
554
555 const char16_t*
556 ucs2_span(const char16_t* begin, const char16_t* end, size_t max,
557 char32_t maxcode, codecvt_mode mode)
558 {
559 range<const char16_t> from{ begin, end };
560 if (read_utf16_bom(from, mode) == little_endian)
561 mode = codecvt_mode(mode & little_endian);
562 maxcode = std::max(max_single_utf16_unit, maxcode);
563 char32_t c = 0;
564 while (max-- && c <= maxcode)
565 c = read_utf16_code_point(from, maxcode, mode);
566 return from.next;
567 }
568
569 const char*
570 ucs2_span(const char* begin, const char* end, size_t max,
571 char32_t maxcode, codecvt_mode mode)
572 {
573 range<const char> from{ begin, end };
574 read_utf8_bom(from, mode);
575 maxcode = std::max(max_single_utf16_unit, maxcode);
576 char32_t c = 0;
577 while (max-- && c <= maxcode)
578 c = read_utf8_code_point(from, maxcode);
579 return from.next;
580 }
581
582 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
583 const char*
584 ucs4_span(const char* begin, const char* end, size_t max,
585 char32_t maxcode = max_code_point, codecvt_mode mode = {})
586 {
587 range<const char> from{ begin, end };
588 read_utf8_bom(from, mode);
589 char32_t c = 0;
590 while (max-- && c <= maxcode)
591 c = read_utf8_code_point(from, maxcode);
592 return from.next;
593 }
594
595 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
596 const char16_t*
597 ucs4_span(const char16_t* begin, const char16_t* end, size_t max,
598 char32_t maxcode = max_code_point, codecvt_mode mode = {})
599 {
600 range<const char16_t> from{ begin, end };
601 if (read_utf16_bom(from, mode) == little_endian)
602 mode = codecvt_mode(mode & little_endian);
603 char32_t c = 0;
604 while (max-- && c <= maxcode)
605 c = read_utf16_code_point(from, maxcode, mode);
606 return from.next;
607 }
608 }
609
610 // Define members of codecvt<char16_t, char, mbstate_t> specialization.
611 // Converts from UTF-8 to UTF-16.
612
613 locale::id codecvt<char16_t, char, mbstate_t>::id;
614
615 codecvt<char16_t, char, mbstate_t>::~codecvt() { }
616
617 codecvt_base::result
618 codecvt<char16_t, char, mbstate_t>::
619 do_out(state_type&,
620 const intern_type* __from,
621 const intern_type* __from_end, const intern_type*& __from_next,
622 extern_type* __to, extern_type* __to_end,
623 extern_type*& __to_next) const
624 {
625 range<const char16_t> from{ __from, __from_end };
626 range<char> to{ __to, __to_end };
627 auto res = utf16_out(from, to);
628 __from_next = from.next;
629 __to_next = to.next;
630 return res;
631 }
632
633 codecvt_base::result
634 codecvt<char16_t, char, mbstate_t>::
635 do_unshift(state_type&, extern_type* __to, extern_type*,
636 extern_type*& __to_next) const
637 {
638 __to_next = __to;
639 return noconv; // we don't use mbstate_t for the unicode facets
640 }
641
642 codecvt_base::result
643 codecvt<char16_t, char, mbstate_t>::
644 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
645 const extern_type*& __from_next,
646 intern_type* __to, intern_type* __to_end,
647 intern_type*& __to_next) const
648 {
649 range<const char> from{ __from, __from_end };
650 range<char16_t> to{ __to, __to_end };
651 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
652 codecvt_mode mode = {};
653 #else
654 codecvt_mode mode = little_endian;
655 #endif
656 auto res = utf16_in(from, to, max_code_point, mode);
657 __from_next = from.next;
658 __to_next = to.next;
659 return res;
660 }
661
662 int
663 codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
664 { return 0; }
665
666 bool
667 codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
668 { return false; }
669
670 int
671 codecvt<char16_t, char, mbstate_t>::
672 do_length(state_type&, const extern_type* __from,
673 const extern_type* __end, size_t __max) const
674 {
675 __end = utf16_span(__from, __end, __max);
676 return __end - __from;
677 }
678
679 int
680 codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
681 {
682 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
683 // whereas 4 byte sequences require two 16-bit code units.
684 return 3;
685 }
686
687 // Define members of codecvt<char32_t, char, mbstate_t> specialization.
688 // Converts from UTF-8 to UTF-32 (aka UCS-4).
689
690 locale::id codecvt<char32_t, char, mbstate_t>::id;
691
692 codecvt<char32_t, char, mbstate_t>::~codecvt() { }
693
694 codecvt_base::result
695 codecvt<char32_t, char, mbstate_t>::
696 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
697 const intern_type*& __from_next,
698 extern_type* __to, extern_type* __to_end,
699 extern_type*& __to_next) const
700 {
701 range<const char32_t> from{ __from, __from_end };
702 range<char> to{ __to, __to_end };
703 auto res = ucs4_out(from, to);
704 __from_next = from.next;
705 __to_next = to.next;
706 return res;
707 }
708
709 codecvt_base::result
710 codecvt<char32_t, char, mbstate_t>::
711 do_unshift(state_type&, extern_type* __to, extern_type*,
712 extern_type*& __to_next) const
713 {
714 __to_next = __to;
715 return noconv;
716 }
717
718 codecvt_base::result
719 codecvt<char32_t, char, mbstate_t>::
720 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
721 const extern_type*& __from_next,
722 intern_type* __to, intern_type* __to_end,
723 intern_type*& __to_next) const
724 {
725 range<const char> from{ __from, __from_end };
726 range<char32_t> to{ __to, __to_end };
727 auto res = ucs4_in(from, to);
728 __from_next = from.next;
729 __to_next = to.next;
730 return res;
731 }
732
733 int
734 codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
735 { return 0; }
736
737 bool
738 codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
739 { return false; }
740
741 int
742 codecvt<char32_t, char, mbstate_t>::
743 do_length(state_type&, const extern_type* __from,
744 const extern_type* __end, size_t __max) const
745 {
746 __end = ucs4_span(__from, __end, __max);
747 return __end - __from;
748 }
749
750 int
751 codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
752 { return 4; }
753
754 // Define members of codecvt_utf8<char16_t> base class implementation.
755 // Converts from UTF-8 to UCS-2.
756
757 __codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
758
759 codecvt_base::result
760 __codecvt_utf8_base<char16_t>::
761 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
762 const intern_type*& __from_next,
763 extern_type* __to, extern_type* __to_end,
764 extern_type*& __to_next) const
765 {
766 range<const char16_t> from{ __from, __from_end };
767 range<char> to{ __to, __to_end };
768 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
769 __from_next = from.next;
770 __to_next = to.next;
771 return res;
772 }
773
774 codecvt_base::result
775 __codecvt_utf8_base<char16_t>::
776 do_unshift(state_type&, extern_type* __to, extern_type*,
777 extern_type*& __to_next) const
778 {
779 __to_next = __to;
780 return noconv;
781 }
782
783 codecvt_base::result
784 __codecvt_utf8_base<char16_t>::
785 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
786 const extern_type*& __from_next,
787 intern_type* __to, intern_type* __to_end,
788 intern_type*& __to_next) const
789 {
790 range<const char> from{ __from, __from_end };
791 range<char16_t> to{ __to, __to_end };
792 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
793 __from_next = from.next;
794 __to_next = to.next;
795 return res;
796 }
797
798 int
799 __codecvt_utf8_base<char16_t>::do_encoding() const throw()
800 { return 0; }
801
802 bool
803 __codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
804 { return false; }
805
806 int
807 __codecvt_utf8_base<char16_t>::
808 do_length(state_type&, const extern_type* __from,
809 const extern_type* __end, size_t __max) const
810 {
811 __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
812 return __end - __from;
813 }
814
815 int
816 __codecvt_utf8_base<char16_t>::do_max_length() const throw()
817 { return 3; }
818
819 // Define members of codecvt_utf8<char32_t> base class implementation.
820 // Converts from UTF-8 to UTF-32 (aka UCS-4).
821
822 __codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
823
824 codecvt_base::result
825 __codecvt_utf8_base<char32_t>::
826 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
827 const intern_type*& __from_next,
828 extern_type* __to, extern_type* __to_end,
829 extern_type*& __to_next) const
830 {
831 range<const char32_t> from{ __from, __from_end };
832 range<char> to{ __to, __to_end };
833 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
834 __from_next = from.next;
835 __to_next = to.next;
836 return res;
837 }
838
839 codecvt_base::result
840 __codecvt_utf8_base<char32_t>::
841 do_unshift(state_type&, extern_type* __to, extern_type*,
842 extern_type*& __to_next) const
843 {
844 __to_next = __to;
845 return noconv;
846 }
847
848 codecvt_base::result
849 __codecvt_utf8_base<char32_t>::
850 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
851 const extern_type*& __from_next,
852 intern_type* __to, intern_type* __to_end,
853 intern_type*& __to_next) const
854 {
855 range<const char> from{ __from, __from_end };
856 range<char32_t> to{ __to, __to_end };
857 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
858 __from_next = from.next;
859 __to_next = to.next;
860 return res;
861 }
862
863 int
864 __codecvt_utf8_base<char32_t>::do_encoding() const throw()
865 { return 0; }
866
867 bool
868 __codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
869 { return false; }
870
871 int
872 __codecvt_utf8_base<char32_t>::
873 do_length(state_type&, const extern_type* __from,
874 const extern_type* __end, size_t __max) const
875 {
876 __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
877 return __end - __from;
878 }
879
880 int
881 __codecvt_utf8_base<char32_t>::do_max_length() const throw()
882 { return 4; }
883
884 #ifdef _GLIBCXX_USE_WCHAR_T
885 // Define members of codecvt_utf8<wchar_t> base class implementation.
886 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
887
888 __codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
889
890 codecvt_base::result
891 __codecvt_utf8_base<wchar_t>::
892 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
893 const intern_type*& __from_next,
894 extern_type* __to, extern_type* __to_end,
895 extern_type*& __to_next) const
896 {
897 range<char> to{ __to, __to_end };
898 #if __SIZEOF_WCHAR_T__ == 2
899 range<const char16_t> from{
900 reinterpret_cast<const char16_t*>(__from),
901 reinterpret_cast<const char16_t*>(__from_end)
902 };
903 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
904 #elif __SIZEOF_WCHAR_T__ == 4
905 range<const char32_t> from{
906 reinterpret_cast<const char32_t*>(__from),
907 reinterpret_cast<const char32_t*>(__from_end)
908 };
909 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
910 #else
911 return codecvt_base::error;
912 #endif
913 __from_next = reinterpret_cast<const wchar_t*>(from.next);
914 __to_next = to.next;
915 return res;
916 }
917
918 codecvt_base::result
919 __codecvt_utf8_base<wchar_t>::
920 do_unshift(state_type&, extern_type* __to, extern_type*,
921 extern_type*& __to_next) const
922 {
923 __to_next = __to;
924 return noconv;
925 }
926
927 codecvt_base::result
928 __codecvt_utf8_base<wchar_t>::
929 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
930 const extern_type*& __from_next,
931 intern_type* __to, intern_type* __to_end,
932 intern_type*& __to_next) const
933 {
934 range<const char> from{ __from, __from_end };
935 #if __SIZEOF_WCHAR_T__ == 2
936 range<char16_t> to{
937 reinterpret_cast<char16_t*>(__to),
938 reinterpret_cast<char16_t*>(__to_end)
939 };
940 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
941 #elif __SIZEOF_WCHAR_T__ == 4
942 range<char32_t> to{
943 reinterpret_cast<char32_t*>(__to),
944 reinterpret_cast<char32_t*>(__to_end)
945 };
946 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
947 #else
948 return codecvt_base::error;
949 #endif
950 __from_next = from.next;
951 __to_next = reinterpret_cast<wchar_t*>(to.next);
952 return res;
953 }
954
955 int
956 __codecvt_utf8_base<wchar_t>::do_encoding() const throw()
957 { return 0; }
958
959 bool
960 __codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
961 { return false; }
962
963 int
964 __codecvt_utf8_base<wchar_t>::
965 do_length(state_type&, const extern_type* __from,
966 const extern_type* __end, size_t __max) const
967 {
968 #if __SIZEOF_WCHAR_T__ == 2
969 __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
970 #elif __SIZEOF_WCHAR_T__ == 4
971 __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
972 #else
973 __end = __from;
974 #endif
975 return __end - __from;
976 }
977
978 int
979 __codecvt_utf8_base<wchar_t>::do_max_length() const throw()
980 { return 4; }
981 #endif
982
983 // Define members of codecvt_utf16<char16_t> base class implementation.
984 // Converts from UTF-16 to UCS-2.
985
986 __codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
987
988 codecvt_base::result
989 __codecvt_utf16_base<char16_t>::
990 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
991 const intern_type*& __from_next,
992 extern_type* __to, extern_type* __to_end,
993 extern_type*& __to_next) const
994 {
995 range<const char16_t> from{ __from, __from_end };
996 range<char16_t> to{
997 reinterpret_cast<char16_t*>(__to),
998 reinterpret_cast<char16_t*>(__to_end)
999 };
1000 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1001 __from_next = from.next;
1002 __to_next = reinterpret_cast<char*>(to.next);
1003 return res;
1004 }
1005
1006 codecvt_base::result
1007 __codecvt_utf16_base<char16_t>::
1008 do_unshift(state_type&, extern_type* __to, extern_type*,
1009 extern_type*& __to_next) const
1010 {
1011 __to_next = __to;
1012 return noconv;
1013 }
1014
1015 codecvt_base::result
1016 __codecvt_utf16_base<char16_t>::
1017 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1018 const extern_type*& __from_next,
1019 intern_type* __to, intern_type* __to_end,
1020 intern_type*& __to_next) const
1021 {
1022 range<const char16_t> from{
1023 reinterpret_cast<const char16_t*>(__from),
1024 reinterpret_cast<const char16_t*>(__from_end)
1025 };
1026 range<char16_t> to{ __to, __to_end };
1027 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1028 __from_next = reinterpret_cast<const char*>(from.next);
1029 __to_next = to.next;
1030 return res;
1031 }
1032
1033 int
1034 __codecvt_utf16_base<char16_t>::do_encoding() const throw()
1035 { return 1; }
1036
1037 bool
1038 __codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
1039 { return false; }
1040
1041 int
1042 __codecvt_utf16_base<char16_t>::
1043 do_length(state_type&, const extern_type* __from,
1044 const extern_type* __end, size_t __max) const
1045 {
1046 auto next = reinterpret_cast<const char16_t*>(__from);
1047 next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1048 _M_maxcode, _M_mode);
1049 return reinterpret_cast<const char*>(next) - __from;
1050 }
1051
1052 int
1053 __codecvt_utf16_base<char16_t>::do_max_length() const throw()
1054 { return 3; }
1055
1056 // Define members of codecvt_utf16<char32_t> base class implementation.
1057 // Converts from UTF-16 to UTF-32 (aka UCS-4).
1058
1059 __codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
1060
1061 codecvt_base::result
1062 __codecvt_utf16_base<char32_t>::
1063 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1064 const intern_type*& __from_next,
1065 extern_type* __to, extern_type* __to_end,
1066 extern_type*& __to_next) const
1067 {
1068 range<const char32_t> from{ __from, __from_end };
1069 range<char16_t> to{
1070 reinterpret_cast<char16_t*>(__to),
1071 reinterpret_cast<char16_t*>(__to_end)
1072 };
1073 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1074 __from_next = from.next;
1075 __to_next = reinterpret_cast<char*>(to.next);
1076 return res;
1077 }
1078
1079 codecvt_base::result
1080 __codecvt_utf16_base<char32_t>::
1081 do_unshift(state_type&, extern_type* __to, extern_type*,
1082 extern_type*& __to_next) const
1083 {
1084 __to_next = __to;
1085 return noconv;
1086 }
1087
1088 codecvt_base::result
1089 __codecvt_utf16_base<char32_t>::
1090 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1091 const extern_type*& __from_next,
1092 intern_type* __to, intern_type* __to_end,
1093 intern_type*& __to_next) const
1094 {
1095 range<const char16_t> from{
1096 reinterpret_cast<const char16_t*>(__from),
1097 reinterpret_cast<const char16_t*>(__from_end)
1098 };
1099 range<char32_t> to{ __to, __to_end };
1100 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1101 __from_next = reinterpret_cast<const char*>(from.next);
1102 __to_next = to.next;
1103 return res;
1104 }
1105
1106 int
1107 __codecvt_utf16_base<char32_t>::do_encoding() const throw()
1108 { return 0; }
1109
1110 bool
1111 __codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
1112 { return false; }
1113
1114 int
1115 __codecvt_utf16_base<char32_t>::
1116 do_length(state_type&, const extern_type* __from,
1117 const extern_type* __end, size_t __max) const
1118 {
1119 auto next = reinterpret_cast<const char16_t*>(__from);
1120 next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1121 _M_maxcode, _M_mode);
1122 return reinterpret_cast<const char*>(next) - __from;
1123 }
1124
1125 int
1126 __codecvt_utf16_base<char32_t>::do_max_length() const throw()
1127 { return 4; }
1128
1129 #ifdef _GLIBCXX_USE_WCHAR_T
1130 // Define members of codecvt_utf16<wchar_t> base class implementation.
1131 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1132
1133 __codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
1134
1135 codecvt_base::result
1136 __codecvt_utf16_base<wchar_t>::
1137 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1138 const intern_type*& __from_next,
1139 extern_type* __to, extern_type* __to_end,
1140 extern_type*& __to_next) const
1141 {
1142 range<char> to{ __to, __to_end };
1143 #if __SIZEOF_WCHAR_T__ == 2
1144 range<const char16_t> from{
1145 reinterpret_cast<const char16_t*>(__from),
1146 reinterpret_cast<const char16_t*>(__from_end)
1147 };
1148 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1149 #elif __SIZEOF_WCHAR_T__ == 4
1150 range<const char32_t> from{
1151 reinterpret_cast<const char32_t*>(__from),
1152 reinterpret_cast<const char32_t*>(__from_end)
1153 };
1154 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1155 #else
1156 return codecvt_base::error;
1157 #endif
1158 __from_next = reinterpret_cast<const wchar_t*>(from.next);
1159 __to_next = to.next;
1160 return res;
1161 }
1162
1163 codecvt_base::result
1164 __codecvt_utf16_base<wchar_t>::
1165 do_unshift(state_type&, extern_type* __to, extern_type*,
1166 extern_type*& __to_next) const
1167 {
1168 __to_next = __to;
1169 return noconv;
1170 }
1171
1172 codecvt_base::result
1173 __codecvt_utf16_base<wchar_t>::
1174 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1175 const extern_type*& __from_next,
1176 intern_type* __to, intern_type* __to_end,
1177 intern_type*& __to_next) const
1178 {
1179 range<const char> from{ __from, __from_end };
1180 #if __SIZEOF_WCHAR_T__ == 2
1181 range<char16_t> to{
1182 reinterpret_cast<char16_t*>(__to),
1183 reinterpret_cast<char16_t*>(__to_end)
1184 };
1185 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1186 #elif __SIZEOF_WCHAR_T__ == 4
1187 range<char32_t> to{
1188 reinterpret_cast<char32_t*>(__to),
1189 reinterpret_cast<char32_t*>(__to_end)
1190 };
1191 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1192 #else
1193 return codecvt_base::error;
1194 #endif
1195 __from_next = from.next;
1196 __to_next = reinterpret_cast<wchar_t*>(to.next);
1197 return res;
1198 }
1199
1200 int
1201 __codecvt_utf16_base<wchar_t>::do_encoding() const throw()
1202 { return 0; }
1203
1204 bool
1205 __codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
1206 { return false; }
1207
1208 int
1209 __codecvt_utf16_base<wchar_t>::
1210 do_length(state_type&, const extern_type* __from,
1211 const extern_type* __end, size_t __max) const
1212 {
1213 auto next = reinterpret_cast<const char16_t*>(__from);
1214 #if __SIZEOF_WCHAR_T__ == 2
1215 next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1216 _M_maxcode, _M_mode);
1217 #elif __SIZEOF_WCHAR_T__ == 4
1218 next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1219 _M_maxcode, _M_mode);
1220 #endif
1221 return reinterpret_cast<const char*>(next) - __from;
1222 }
1223
1224 int
1225 __codecvt_utf16_base<wchar_t>::do_max_length() const throw()
1226 { return 4; }
1227 #endif
1228
1229 // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
1230 // Converts from UTF-8 to UTF-16.
1231
1232 __codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
1233
1234 codecvt_base::result
1235 __codecvt_utf8_utf16_base<char16_t>::
1236 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1237 const intern_type*& __from_next,
1238 extern_type* __to, extern_type* __to_end,
1239 extern_type*& __to_next) const
1240 {
1241 range<const char16_t> from{ __from, __from_end };
1242 range<char> to{ __to, __to_end };
1243 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1244 __from_next = from.next;
1245 __to_next = to.next;
1246 return res;
1247 }
1248
1249 codecvt_base::result
1250 __codecvt_utf8_utf16_base<char16_t>::
1251 do_unshift(state_type&, extern_type* __to, extern_type*,
1252 extern_type*& __to_next) const
1253 {
1254 __to_next = __to;
1255 return noconv;
1256 }
1257
1258 codecvt_base::result
1259 __codecvt_utf8_utf16_base<char16_t>::
1260 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1261 const extern_type*& __from_next,
1262 intern_type* __to, intern_type* __to_end,
1263 intern_type*& __to_next) const
1264 {
1265 range<const char> from{ __from, __from_end };
1266 range<char16_t> to{ __to, __to_end };
1267 codecvt_mode mode = codecvt_mode(_M_mode | (consume_header|generate_header));
1268 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1269 mode = codecvt_mode(mode | little_endian);
1270 #endif
1271 auto res = utf16_in(from, to, _M_maxcode, mode);
1272 __from_next = from.next;
1273 __to_next = to.next;
1274 return res;
1275 }
1276
1277 int
1278 __codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
1279 { return 0; }
1280
1281 bool
1282 __codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
1283 { return false; }
1284
1285 int
1286 __codecvt_utf8_utf16_base<char16_t>::
1287 do_length(state_type&, const extern_type* __from,
1288 const extern_type* __end, size_t __max) const
1289 {
1290 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1291 return __end - __from;
1292 }
1293
1294 int
1295 __codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
1296 {
1297 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1298 // whereas 4 byte sequences require two 16-bit code units.
1299 return 3;
1300 }
1301
1302 // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
1303 // Converts from UTF-8 to UTF-16.
1304
1305 __codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
1306
1307 codecvt_base::result
1308 __codecvt_utf8_utf16_base<char32_t>::
1309 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1310 const intern_type*& __from_next,
1311 extern_type* __to, extern_type* __to_end,
1312 extern_type*& __to_next) const
1313 {
1314 range<const char32_t> from{ __from, __from_end };
1315 range<char> to{ __to, __to_end };
1316 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1317 __from_next = from.next;
1318 __to_next = to.next;
1319 return res;
1320 }
1321
1322 codecvt_base::result
1323 __codecvt_utf8_utf16_base<char32_t>::
1324 do_unshift(state_type&, extern_type* __to, extern_type*,
1325 extern_type*& __to_next) const
1326 {
1327 __to_next = __to;
1328 return noconv;
1329 }
1330
1331 codecvt_base::result
1332 __codecvt_utf8_utf16_base<char32_t>::
1333 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1334 const extern_type*& __from_next,
1335 intern_type* __to, intern_type* __to_end,
1336 intern_type*& __to_next) const
1337 {
1338 range<const char> from{ __from, __from_end };
1339 range<char32_t> to{ __to, __to_end };
1340 auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1341 __from_next = from.next;
1342 __to_next = to.next;
1343 return res;
1344 }
1345
1346 int
1347 __codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
1348 { return 0; }
1349
1350 bool
1351 __codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
1352 { return false; }
1353
1354 int
1355 __codecvt_utf8_utf16_base<char32_t>::
1356 do_length(state_type&, const extern_type* __from,
1357 const extern_type* __end, size_t __max) const
1358 {
1359 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1360 return __end - __from;
1361 }
1362
1363 int
1364 __codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
1365 {
1366 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1367 // whereas 4 byte sequences require two 16-bit code units.
1368 return 3;
1369 }
1370
1371 #ifdef _GLIBCXX_USE_WCHAR_T
1372 // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
1373 // Converts from UTF-8 to UTF-16.
1374
1375 __codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
1376
1377 codecvt_base::result
1378 __codecvt_utf8_utf16_base<wchar_t>::
1379 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1380 const intern_type*& __from_next,
1381 extern_type* __to, extern_type* __to_end,
1382 extern_type*& __to_next) const
1383 {
1384 range<const wchar_t> from{ __from, __from_end };
1385 range<char> to{ __to, __to_end };
1386 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1387 __from_next = from.next;
1388 __to_next = to.next;
1389 return res;
1390 }
1391
1392 codecvt_base::result
1393 __codecvt_utf8_utf16_base<wchar_t>::
1394 do_unshift(state_type&, extern_type* __to, extern_type*,
1395 extern_type*& __to_next) const
1396 {
1397 __to_next = __to;
1398 return noconv;
1399 }
1400
1401 codecvt_base::result
1402 __codecvt_utf8_utf16_base<wchar_t>::
1403 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1404 const extern_type*& __from_next,
1405 intern_type* __to, intern_type* __to_end,
1406 intern_type*& __to_next) const
1407 {
1408 range<const char> from{ __from, __from_end };
1409 range<wchar_t> to{ __to, __to_end };
1410 auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1411 __from_next = from.next;
1412 __to_next = to.next;
1413 return res;
1414 }
1415
1416 int
1417 __codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
1418 { return 0; }
1419
1420 bool
1421 __codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
1422 { return false; }
1423
1424 int
1425 __codecvt_utf8_utf16_base<wchar_t>::
1426 do_length(state_type&, const extern_type* __from,
1427 const extern_type* __end, size_t __max) const
1428 {
1429 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1430 return __end - __from;
1431 }
1432
1433 int
1434 __codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
1435 {
1436 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1437 // whereas 4 byte sequences require two 16-bit code units.
1438 return 3;
1439 }
1440 #endif
1441
1442 inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
1443 inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
1444 template class codecvt_byname<char16_t, char, mbstate_t>;
1445 template class codecvt_byname<char32_t, char, mbstate_t>;
1446
1447 _GLIBCXX_END_NAMESPACE_VERSION
1448 }
1449 #endif // _GLIBCXX_USE_C99_STDINT_TR1