src/util/xmlpool/gen_xmlpool.py

   1
   2 #
   3 # Usage:
   4 #     gen_xmlpool.py /path/to/t_option.h localedir lang lang lang ...
   5 #
   6 # For each given language, this script expects to find a .mo file at
   7 # `{localedir}/{language}/LC_MESSAGES/options.mo`.
   8 #
   9
  10 from __future__ import print_function
  11 import argparse
  12 import gettext
  13 import io
  14 import os
  15 import re
  16 import sys
  17
  18 parser = argparse.ArgumentParser()
  19 parser.add_argument('template')
  20 parser.add_argument('localedir')
  21 parser.add_argument('languages', nargs='*')
  22 args = parser.parse_args()
  23
  24 if sys.version_info < (3, 0):
  25     gettext_method = 'ugettext'
  26 else:
  27     gettext_method = 'gettext'
  28
  29 # Escape special characters in C strings
  30 def escapeCString (s):
  31     escapeSeqs = {'\a' : '\\a', '\b' : '\\b', '\f' : '\\f', '\n' : '\\n',
  32                   '\r' : '\\r', '\t' : '\\t', '\v' : '\\v', '\\' : '\\\\'}
  33     # " -> '' is a hack. Quotes (") aren't possible in XML attributes.
  34     # Better use Unicode characters for typographic quotes in option
  35     # descriptions and translations.
  36     i = 0
  37     r = ''
  38     while i < len(s):
  39         # Special case: escape double quote with \u201c or \u201d, depending
  40         # on whether it's an open or close quote. This is needed because plain
  41         # double quotes are not possible in XML attributes.
  42         if s[i] == '"':
  43             if i == len(s)-1 or s[i+1].isspace():
  44                 # close quote
  45                 q = u'\u201c'
  46             else:
  47                 # open quote
  48                 q = u'\u201d'
  49             r = r + q
  50         elif s[i] in escapeSeqs:
  51             r = r + escapeSeqs[s[i]]
  52         else:
  53             r = r + s[i]
  54         i = i + 1
  55     return r
  56
  57 # Expand escape sequences in C strings (needed for gettext lookup)
  58 def expandCString (s):
  59     escapeSeqs = {'a' : '\a', 'b' : '\b', 'f' : '\f', 'n' : '\n',
  60                   'r' : '\r', 't' : '\t', 'v' : '\v',
  61                   '"' : '"', '\\' : '\\'}
  62     i = 0
  63     escape = False
  64     hexa = False
  65     octa = False
  66     num = 0
  67     digits = 0
  68     r = u''
  69     while i < len(s):
  70         if not escape:
  71             if s[i] == '\\':
  72                 escape = True
  73             else:
  74                 r = r + s[i]
  75         elif hexa:
  76             if (s[i] >= '0' and s[i] <= '9') or \
  77                (s[i] >= 'a' and s[i] <= 'f') or \
  78                (s[i] >= 'A' and s[i] <= 'F'):
  79                 num = num * 16 + int(s[i],16)
  80                 digits = digits + 1
  81             else:
  82                 digits = 2
  83             if digits >= 2:
  84                 hexa = False
  85                 escape = False
  86                 r = r + chr(num)
  87         elif octa:
  88             if s[i] >= '0' and s[i] <= '7':
  89                 num = num * 8 + int(s[i],8)
  90                 digits = digits + 1
  91             else:
  92                 digits = 3
  93             if digits >= 3:
  94                 octa = False
  95                 escape = False
  96                 r = r + chr(num)
  97         else:
  98             if s[i] in escapeSeqs:
  99                 r = r + escapeSeqs[s[i]]
 100                 escape = False
 101             elif s[i] >= '0' and s[i] <= '7':
 102                 octa = True
 103                 num = int(s[i],8)
 104                 if num <= 3:
 105                     digits = 1
 106                 else:
 107                     digits = 2
 108             elif s[i] == 'x' or s[i] == 'X':
 109                 hexa = True
 110                 num = 0
 111                 digits = 0
 112             else:
 113                 r = r + s[i]
 114                 escape = False
 115         i = i + 1
 116     return r
 117
 118 # Expand matches. The first match is always a DESC or DESC_BEGIN match.
 119 # Subsequent matches are ENUM matches.
 120 #
 121 # DESC, DESC_BEGIN format: \1 \2=<lang> \3 \4=gettext(" \5=<text> \6=") \7
 122 # ENUM format:             \1 \2=gettext(" \3=<text> \4=") \5
 123 def expandMatches (matches, translations, end=None):
 124     assert len(matches) > 0
 125     nTranslations = len(translations)
 126     i = 0
 127     # Expand the description+enums for all translations
 128     for lang,trans in translations:
 129         i = i + 1
 130         # Make sure that all but the last line of a simple description
 131         # are extended with a backslash.
 132         suffix = ''
 133         if len(matches) == 1 and i < len(translations) and \
 134                not matches[0].expand (r'\7').endswith('\\'):
 135             suffix = ' \\'
 136         text = escapeCString (getattr(trans, gettext_method) (expandCString (
 137             matches[0].expand (r'\5'))))
 138         text = (matches[0].expand (r'\1' + lang + r'\3"' + text + r'"\7') + suffix)
 139
 140         # In Python 2, stdout expects encoded byte strings, or else it will
 141         # encode them with the ascii 'codec'
 142         if sys.version_info.major == 2:
 143             text = text.encode('utf-8')
 144
 145         print(text)
 146
 147         # Expand any subsequent enum lines
 148         for match in matches[1:]:
 149             text = escapeCString (getattr(trans, gettext_method) (expandCString (
 150                 match.expand (r'\3'))))
 151             text = match.expand (r'\1"' + text + r'"\5')
 152
 153             # In Python 2, stdout expects encoded byte strings, or else it will
 154             # encode them with the ascii 'codec'
 155             if sys.version_info.major == 2:
 156                 text = text.encode('utf-8')
 157
 158             print(text)
 159
 160         # Expand description end
 161         if end:
 162             print(end, end='')
 163
 164 # Compile a list of translation classes to all supported languages.
 165 # The first translation is always a NullTranslations.
 166 translations = [("en", gettext.NullTranslations())]
 167 for lang in args.languages:
 168     try:
 169         filename = os.path.join(args.localedir, '{}.gmo'.format(lang))
 170         with io.open(filename, 'rb') as f:
 171             trans = gettext.GNUTranslations(f)
 172     except (IOError, OSError):
 173         sys.stderr.write ("Warning: language '%s' not found.\n" % lang)
 174         continue
 175     translations.append ((lang, trans))
 176
 177 # Regular expressions:
 178 reLibintl_h  = re.compile (r'#\s*include\s*<libintl.h>')
 179 reDESC       = re.compile (r'(\s*DRI_CONF_DESC\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
 180 reDESC_BEGIN = re.compile (r'(\s*DRI_CONF_DESC_BEGIN\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
 181 reENUM       = re.compile (r'(\s*DRI_CONF_ENUM\s*\([^,]+,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
 182 reDESC_END   = re.compile (r'\s*DRI_CONF_DESC_END')
 183
 184 # Print a header
 185 print("/***********************************************************************\n" \
 186 " ***        THIS FILE IS GENERATED AUTOMATICALLY. DON'T EDIT!        ***\n" \
 187 " ***********************************************************************/")
 188
 189 # Process the options template and generate options.h with all
 190 # translations.
 191 template = io.open (args.template, mode="rt", encoding='utf-8')
 192 descMatches = []
 193 for line in template:
 194     if len(descMatches) > 0:
 195         matchENUM     = reENUM    .match (line)
 196         matchDESC_END = reDESC_END.match (line)
 197         if matchENUM:
 198             descMatches.append (matchENUM)
 199         elif matchDESC_END:
 200             expandMatches (descMatches, translations, line)
 201             descMatches = []
 202         else:
 203             sys.stderr.write (
 204                 "Warning: unexpected line inside description dropped:\n%s\n" \
 205                 % line)
 206         continue
 207     if reLibintl_h.search (line):
 208         # Ignore (comment out) #include <libintl.h>
 209         print("/* %s * commented out by gen_xmlpool.py */" % line)
 210         continue
 211     matchDESC       = reDESC      .match (line)
 212     matchDESC_BEGIN = reDESC_BEGIN.match (line)
 213     if matchDESC:
 214         assert len(descMatches) == 0
 215         expandMatches ([matchDESC], translations)
 216     elif matchDESC_BEGIN:
 217         assert len(descMatches) == 0
 218         descMatches = [matchDESC_BEGIN]
 219     else:
 220         # In Python 2, stdout expects encoded byte strings, or else it will
 221         # encode them with the ascii 'codec'
 222         if sys.version_info.major == 2:
 223            line = line.encode('utf-8')
 224
 225         print(line, end='')
 226
 227 template.close()
 228
 229 if len(descMatches) > 0:
 230     sys.stderr.write ("Warning: unterminated description at end of file.\n")
 231     expandMatches (descMatches, translations)