libstdc++-v3/testsuite/22_locale/codecvt_unicode_char.cc

   1 // 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>
   2
   3 // Copyright (C) 2000, 2001 Free Software Foundation
   4 //
   5 // This file is part of the GNU ISO C++ Library.  This library is free
   6 // software; you can redistribute it and/or modify it under the
   7 // terms of the GNU General Public License as published by the
   8 // Free Software Foundation; either version 2, or (at your option)
   9 // any later version.
  10
  11 // This library is distributed in the hope that it will be useful,
  12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 // GNU General Public License for more details.
  15
  16 // You should have received a copy of the GNU General Public License along
  17 // with this library; see the file COPYING.  If not, write to the Free
  18 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  19 // USA.
  20
  21 // 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
  22
  23 #include <locale>
  24 #include <testsuite_hooks.h>
  25
  26 using namespace std;
  27
  28 #ifdef _GLIBCPP_USE___ENC_TRAITS
  29
  30 /*
  31 > how do I check that these conversions are correct?
  32 Very easy.  Since all the characters are from ASCII you simply
  33 zero-extend the values.
  34
  35 drepper$ echo 'black pearl jasmine tea' | od -t x1
  36 0000000 62 6c 61 63 6b 20 70 65 61 72 6c 20 6a 61 73 6d
  37 0000020 69 6e 65 20 74 65 61 0a
  38
  39 So the UCS-2 string is
  40
  41 0x0062, 0x006c, 0x0061, ...
  42
  43 You get the idea.  With iconv() you have to take care of the
  44 byte-order, though.  UCS-2 can mean little- or big endian.  Looking at
  45 your result
  46
  47 > $9 = 25856
  48
  49 it shows that the other byte-order is used (25856 == 0x6500).
  50 */
  51
  52
  53 void
  54 initialize_state(__enc_traits& state)
  55 { state._M_init(); }
  56
  57 // Partial specialization using __enc_traits.
  58 // codecvt<unicode_t, char, __enc_traits>
  59 // UNICODE - UCS2 (big endian)
  60 void test01()
  61 {
  62   typedef codecvt_base::result                  result;
  63   typedef unsigned short                        unicode_t;
  64   typedef unicode_t                             int_type;
  65   typedef char                                  ext_type;
  66   typedef __enc_traits                          enc_type;
  67   typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
  68   typedef char_traits<int_type>                 int_traits;
  69   typedef char_traits<ext_type>                 ext_traits;
  70
  71   bool                  test = true;
  72   const ext_type*       e_lit = "black pearl jasmine tea";
  73   int                   size = strlen(e_lit);
  74
  75   int_type              i_lit_base[25] =
  76   {
  77     0x6200, 0x6c00, 0x6100, 0x6300, 0x6b00, 0x2000, 0x7000, 0x6500, 0x6100,
  78     0x7200, 0x6c00, 0x2000, 0x6a00, 0x6100, 0x7300, 0x6d00, 0x6900, 0x6e00,
  79     0x6500, 0x2000, 0x7400, 0x6500, 0x6100, 0xa000
  80   };
  81   const int_type*       i_lit = i_lit_base;
  82
  83   const ext_type*       efrom_next;
  84   const int_type*       ifrom_next;
  85   ext_type*             e_arr = new ext_type[size + 1];
  86   ext_type*             eto_next;
  87   int_type*             i_arr = new int_type[size + 1];
  88   int_type*             ito_next;
  89
  90   // construct a locale object with the specialized facet.
  91   locale                loc(locale::classic(), new unicode_codecvt);
  92   // sanity check the constructed locale has the specialized facet.
  93   VERIFY( has_facet<unicode_codecvt>(loc) );
  94   const unicode_codecvt&        cvt = use_facet<unicode_codecvt>(loc);
  95
  96   // in
  97   unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
  98   initialize_state(state01);
  99   // internal encoding is bigger because of bom
 100   result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
 101                      i_arr, i_arr + size + 1, ito_next);
 102   VERIFY( r1 == codecvt_base::ok );
 103   VERIFY( !int_traits::compare(i_arr, i_lit, size) );
 104   VERIFY( efrom_next == e_lit + size );
 105   VERIFY( ito_next == i_arr + size );
 106
 107   // out
 108   unicode_codecvt::state_type state02("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
 109   initialize_state(state02);
 110   result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
 111                        e_arr, e_arr + size, eto_next);
 112   VERIFY( r2 == codecvt_base::ok );
 113   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 114   VERIFY( ifrom_next == i_lit + size );
 115   VERIFY( eto_next == e_arr + size );
 116
 117   // unshift
 118   ext_traits::copy(e_arr, e_lit, size);
 119   unicode_codecvt::state_type state03("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
 120   initialize_state(state03);
 121   result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
 122   VERIFY( r3 == codecvt_base::noconv );
 123   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 124   VERIFY( eto_next == e_arr );
 125
 126   int i = cvt.encoding();
 127   VERIFY( i == 0 );
 128
 129   VERIFY( !cvt.always_noconv() );
 130
 131   unicode_codecvt::state_type state04("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
 132   initialize_state(state04);
 133   int j = cvt.length(state03, e_lit, e_lit + size, 5);
 134   VERIFY( j == 5 );
 135
 136   int k = cvt.max_length();
 137   VERIFY( k == 1 );
 138
 139   delete [] e_arr;
 140   delete [] i_arr;
 141 }
 142
 143 // Partial specialization using __enc_traits.
 144 // codecvt<unicode_t, char, __enc_traits>
 145 // UNICODE - UCS2 (little endian)
 146 void test02()
 147 {
 148   typedef codecvt_base::result                  result;
 149   typedef unsigned short                        unicode_t;
 150   typedef unicode_t                             int_type;
 151   typedef char                                  ext_type;
 152   typedef __enc_traits                          enc_type;
 153   typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
 154   typedef char_traits<int_type>                 int_traits;
 155   typedef char_traits<ext_type>                 ext_traits;
 156
 157   bool                  test = true;
 158   const ext_type*       e_lit = "black pearl jasmine tea";
 159   int                   size = strlen(e_lit);
 160
 161   int_type              i_lit_base[25] =
 162   {
 163     0x0062, 0x006c, 0x0061, 0x0063, 0x006b, 0x0020, 0x0070, 0x0065, 0x0061,
 164     0x0072, 0x006c, 0x0020, 0x006a, 0x0061, 0x0073, 0x006d, 0x0069, 0x006e,
 165     0x0065, 0x0020, 0x0074, 0x0065, 0x0061, 0x00a0
 166   };
 167   const int_type*       i_lit = i_lit_base;
 168
 169   const ext_type*       efrom_next;
 170   const int_type*       ifrom_next;
 171   ext_type*             e_arr = new ext_type[size + 1];
 172   ext_type*             eto_next;
 173   int_type*             i_arr = new int_type[size + 1];
 174   int_type*             ito_next;
 175
 176   // construct a locale object with the specialized facet.
 177   locale                loc(locale::classic(), new unicode_codecvt);
 178   // sanity check the constructed locale has the specialized facet.
 179   VERIFY( has_facet<unicode_codecvt>(loc) );
 180   const unicode_codecvt&        cvt = use_facet<unicode_codecvt>(loc);
 181
 182   // in
 183   unicode_codecvt::state_type state01("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
 184   initialize_state(state01);
 185   // internal encoding is bigger because of bom
 186   result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
 187                      i_arr, i_arr + size + 1, ito_next);
 188   VERIFY( r1 == codecvt_base::ok );
 189   VERIFY( !int_traits::compare(i_arr, i_lit, size) );
 190   VERIFY( efrom_next == e_lit + size );
 191   VERIFY( ito_next == i_arr + size );
 192
 193   // out
 194   unicode_codecvt::state_type state02("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
 195   initialize_state(state02);
 196   result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
 197                        e_arr, e_arr + size, eto_next);
 198   VERIFY( r2 == codecvt_base::ok );
 199   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 200   VERIFY( ifrom_next == i_lit + size );
 201   VERIFY( eto_next == e_arr + size );
 202
 203   // unshift
 204   ext_traits::copy(e_arr, e_lit, size);
 205   unicode_codecvt::state_type state03("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
 206   initialize_state(state03);
 207   result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
 208   VERIFY( r3 == codecvt_base::noconv );
 209   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 210   VERIFY( eto_next == e_arr );
 211
 212   int i = cvt.encoding();
 213   VERIFY( i == 0 );
 214
 215   VERIFY( !cvt.always_noconv() );
 216
 217   unicode_codecvt::state_type state04("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
 218   initialize_state(state04);
 219   int j = cvt.length(state03, e_lit, e_lit + size, 5);
 220   VERIFY( j == 5 );
 221
 222   int k = cvt.max_length();
 223   VERIFY( k == 1 );
 224
 225   delete [] e_arr;
 226   delete [] i_arr;
 227 }
 228
 229 #endif // _GLIBCPP_USE___ENC_TRAITS
 230
 231 int main ()
 232 {
 233 #if _GLIBCPP_USE___ENC_TRAITS
 234   test01();
 235   test02();
 236 #endif
 237
 238   return 0;
 239 }