libjava/scripts/encodings.pl

   1 # encodings.pl - Download IANA text and compute alias list.
   2 # Assumes you are running this program from gnu/gcj/convert/.
   3 # Output suitable for direct inclusion in IOConverter.java.
   4
   5 # Map IANA canonical names onto our canonical names.
   6 %map = (
   7         'ANSI_X3.4-1968' => 'ASCII',
   8         'ISO_8859-1:1987' => '8859_1',
   9         'UTF-8' => 'UTF8',
  10         'Shift_JIS' => 'SJIS',
  11         'Extended_UNIX_Code_Packed_Format_for_Japanese' => 'EUCJIS',
  12         'UTF16-LE' => 'UnicodeLittle',
  13         'UTF16-BE' => 'UnicodeBig'
  14         );
  15
  16 if ($ARGV[0] eq '')
  17 {
  18     $file = 'character-sets';
  19     if (! -f $file)
  20     {
  21         # Too painful to figure out how to get Perl to do it.
  22         system 'wget -o .wget-log http://www.iana.org/assignments/character-sets';
  23     }
  24 }
  25 else
  26 {
  27     $file = $ARGV[0];
  28 }
  29
  30 # Include canonical names in the output.
  31 foreach $key (keys %map)
  32 {
  33     $output{lc ($key)} = $map{$key};
  34 }
  35
  36 open (INPUT, "< $file") || die "couldn't open $file: $!";
  37
  38 $body = 0;
  39 $current = '';
  40 while (<INPUT>)
  41 {
  42     chop;
  43     $body = 1 if /^Name:/;
  44     next unless $body;
  45
  46     if (/^$/)
  47     {
  48         $current = '';
  49         next;
  50     }
  51
  52     ($type, $name) = split (/\s+/);
  53     # Encoding names are case-insensitive.  We do all processing on
  54     # the lower-case form.
  55     my $lower = lc ($name);
  56     if ($type eq 'Name:')
  57     {
  58         $current = $map{$name};
  59         if ($current)
  60         {
  61             $output{$lower} = $current;
  62         }
  63     }
  64     elsif ($type eq 'Alias:')
  65     {
  66         # The IANA list has some ugliness.
  67         if ($name ne '' && $lower ne 'none' && $current)
  68         {
  69             $output{$lower} = $current;
  70         }
  71     }
  72 }
  73
  74 close (INPUT);
  75
  76 foreach $key (sort keys %output)
  77 {
  78     print "    hash.put (\"$key\", \"$output{$key}\");\n";
  79 }