2a135f67df111001ac87ff9319eab67c9ccb3c42
[gcc.git] / libstdc++-v3 / testsuite / 22_locale / codecvt_unicode_char.cc
1 // 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>
2
3 // Copyright (C) 2000, 2001 Free Software Foundation
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 2, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // You should have received a copy of the GNU General Public License along
17 // with this library; see the file COPYING. If not, write to the Free
18 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
19 // USA.
20
21 // 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
22
23 #include <locale>
24 #include <testsuite_hooks.h>
25
26 using namespace std;
27
28 #ifdef _GLIBCPP_USE___ENC_TRAITS
29
30 /*
31 > how do I check that these conversions are correct?
32 Very easy. Since all the characters are from ASCII you simply
33 zero-extend the values.
34
35 drepper$ echo 'black pearl jasmine tea' | od -t x1
36 0000000 62 6c 61 63 6b 20 70 65 61 72 6c 20 6a 61 73 6d
37 0000020 69 6e 65 20 74 65 61 0a
38
39 So the UCS-2 string is
40
41 0x0062, 0x006c, 0x0061, ...
42
43 You get the idea. With iconv() you have to take care of the
44 byte-order, though. UCS-2 can mean little- or big endian. Looking at
45 your result
46
47 > $9 = 25856
48
49 it shows that the other byte-order is used (25856 == 0x6500).
50 */
51
52
53 void
54 initialize_state(__enc_traits& state)
55 { state._M_init(); }
56
57 // Partial specialization using __enc_traits.
58 // codecvt<unicode_t, char, __enc_traits>
59 // UNICODE - UCS2 (big endian)
60 void test01()
61 {
62 typedef codecvt_base::result result;
63 typedef unsigned short unicode_t;
64 typedef unicode_t int_type;
65 typedef char ext_type;
66 typedef __enc_traits enc_type;
67 typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
68 typedef char_traits<int_type> int_traits;
69 typedef char_traits<ext_type> ext_traits;
70
71 bool test = true;
72 const ext_type* e_lit = "black pearl jasmine tea";
73 int size = strlen(e_lit);
74
75 int_type i_lit_base[25] =
76 {
77 0x6200, 0x6c00, 0x6100, 0x6300, 0x6b00, 0x2000, 0x7000, 0x6500, 0x6100,
78 0x7200, 0x6c00, 0x2000, 0x6a00, 0x6100, 0x7300, 0x6d00, 0x6900, 0x6e00,
79 0x6500, 0x2000, 0x7400, 0x6500, 0x6100, 0xa000
80 };
81 const int_type* i_lit = i_lit_base;
82
83 const ext_type* efrom_next;
84 const int_type* ifrom_next;
85 ext_type* e_arr = new ext_type[size + 1];
86 ext_type* eto_next;
87 int_type* i_arr = new int_type[size + 1];
88 int_type* ito_next;
89
90 // construct a locale object with the specialized facet.
91 locale loc(locale::classic(), new unicode_codecvt);
92 // sanity check the constructed locale has the specialized facet.
93 VERIFY( has_facet<unicode_codecvt>(loc) );
94 const unicode_codecvt& cvt = use_facet<unicode_codecvt>(loc);
95
96 // in
97 unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
98 initialize_state(state01);
99 // internal encoding is bigger because of bom
100 result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
101 i_arr, i_arr + size + 1, ito_next);
102 VERIFY( r1 == codecvt_base::ok );
103 VERIFY( !int_traits::compare(i_arr, i_lit, size) );
104 VERIFY( efrom_next == e_lit + size );
105 VERIFY( ito_next == i_arr + size );
106
107 // out
108 unicode_codecvt::state_type state02("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
109 initialize_state(state02);
110 result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
111 e_arr, e_arr + size, eto_next);
112 VERIFY( r2 == codecvt_base::ok );
113 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
114 VERIFY( ifrom_next == i_lit + size );
115 VERIFY( eto_next == e_arr + size );
116
117 // unshift
118 ext_traits::copy(e_arr, e_lit, size);
119 unicode_codecvt::state_type state03("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
120 initialize_state(state03);
121 result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
122 VERIFY( r3 == codecvt_base::noconv );
123 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
124 VERIFY( eto_next == e_arr );
125
126 int i = cvt.encoding();
127 VERIFY( i == 0 );
128
129 VERIFY( !cvt.always_noconv() );
130
131 unicode_codecvt::state_type state04("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
132 initialize_state(state04);
133 int j = cvt.length(state03, e_lit, e_lit + size, 5);
134 VERIFY( j == 5 );
135
136 int k = cvt.max_length();
137 VERIFY( k == 1 );
138
139 delete [] e_arr;
140 delete [] i_arr;
141 }
142
143 // Partial specialization using __enc_traits.
144 // codecvt<unicode_t, char, __enc_traits>
145 // UNICODE - UCS2 (little endian)
146 void test02()
147 {
148 typedef codecvt_base::result result;
149 typedef unsigned short unicode_t;
150 typedef unicode_t int_type;
151 typedef char ext_type;
152 typedef __enc_traits enc_type;
153 typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
154 typedef char_traits<int_type> int_traits;
155 typedef char_traits<ext_type> ext_traits;
156
157 bool test = true;
158 const ext_type* e_lit = "black pearl jasmine tea";
159 int size = strlen(e_lit);
160
161 int_type i_lit_base[25] =
162 {
163 0x0062, 0x006c, 0x0061, 0x0063, 0x006b, 0x0020, 0x0070, 0x0065, 0x0061,
164 0x0072, 0x006c, 0x0020, 0x006a, 0x0061, 0x0073, 0x006d, 0x0069, 0x006e,
165 0x0065, 0x0020, 0x0074, 0x0065, 0x0061, 0x00a0
166 };
167 const int_type* i_lit = i_lit_base;
168
169 const ext_type* efrom_next;
170 const int_type* ifrom_next;
171 ext_type* e_arr = new ext_type[size + 1];
172 ext_type* eto_next;
173 int_type* i_arr = new int_type[size + 1];
174 int_type* ito_next;
175
176 // construct a locale object with the specialized facet.
177 locale loc(locale::classic(), new unicode_codecvt);
178 // sanity check the constructed locale has the specialized facet.
179 VERIFY( has_facet<unicode_codecvt>(loc) );
180 const unicode_codecvt& cvt = use_facet<unicode_codecvt>(loc);
181
182 // in
183 unicode_codecvt::state_type state01("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
184 initialize_state(state01);
185 // internal encoding is bigger because of bom
186 result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
187 i_arr, i_arr + size + 1, ito_next);
188 VERIFY( r1 == codecvt_base::ok );
189 VERIFY( !int_traits::compare(i_arr, i_lit, size) );
190 VERIFY( efrom_next == e_lit + size );
191 VERIFY( ito_next == i_arr + size );
192
193 // out
194 unicode_codecvt::state_type state02("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
195 initialize_state(state02);
196 result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
197 e_arr, e_arr + size, eto_next);
198 VERIFY( r2 == codecvt_base::ok );
199 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
200 VERIFY( ifrom_next == i_lit + size );
201 VERIFY( eto_next == e_arr + size );
202
203 // unshift
204 ext_traits::copy(e_arr, e_lit, size);
205 unicode_codecvt::state_type state03("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
206 initialize_state(state03);
207 result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
208 VERIFY( r3 == codecvt_base::noconv );
209 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
210 VERIFY( eto_next == e_arr );
211
212 int i = cvt.encoding();
213 VERIFY( i == 0 );
214
215 VERIFY( !cvt.always_noconv() );
216
217 unicode_codecvt::state_type state04("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
218 initialize_state(state04);
219 int j = cvt.length(state03, e_lit, e_lit + size, 5);
220 VERIFY( j == 5 );
221
222 int k = cvt.max_length();
223 VERIFY( k == 1 );
224
225 delete [] e_arr;
226 delete [] i_arr;
227 }
228
229 #endif // _GLIBCPP_USE___ENC_TRAITS
230
231 int main ()
232 {
233 #if _GLIBCPP_USE___ENC_TRAITS
234 test01();
235 test02();
236 #endif
237
238 return 0;
239 }