src/util/xmlpool/gen_xmlpool.py

   1
   2 #
   3 # Usage:
   4 #     gen_xmlpool.py /path/to/t_option.h localedir lang lang lang ...
   5 #
   6 # For each given language, this script expects to find a .mo file at
   7 # `{localedir}/{language}/LC_MESSAGES/options.mo`.
   8 #
   9
  10 from __future__ import print_function
  11
  12 import sys
  13 import gettext
  14 import re
  15
  16
  17 if sys.version_info < (3, 0):
  18     gettext_method = 'ugettext'
  19 else:
  20     gettext_method = 'gettext'
  21
  22 # Path to t_options.h
  23 template_header_path = sys.argv[1]
  24
  25 localedir = sys.argv[2]
  26
  27 # List of supported languages
  28 languages = sys.argv[3:]
  29
  30 # Escape special characters in C strings
  31 def escapeCString (s):
  32     escapeSeqs = {'\a' : '\\a', '\b' : '\\b', '\f' : '\\f', '\n' : '\\n',
  33                   '\r' : '\\r', '\t' : '\\t', '\v' : '\\v', '\\' : '\\\\'}
  34     # " -> '' is a hack. Quotes (") aren't possible in XML attributes.
  35     # Better use Unicode characters for typographic quotes in option
  36     # descriptions and translations.
  37     i = 0
  38     r = ''
  39     while i < len(s):
  40         # Special case: escape double quote with \u201c or \u201d, depending
  41         # on whether it's an open or close quote. This is needed because plain
  42         # double quotes are not possible in XML attributes.
  43         if s[i] == '"':
  44             if i == len(s)-1 or s[i+1].isspace():
  45                 # close quote
  46                 q = u'\u201c'
  47             else:
  48                 # open quote
  49                 q = u'\u201d'
  50             r = r + q
  51         elif s[i] in escapeSeqs:
  52             r = r + escapeSeqs[s[i]]
  53         else:
  54             r = r + s[i]
  55         i = i + 1
  56     return r
  57
  58 # Expand escape sequences in C strings (needed for gettext lookup)
  59 def expandCString (s):
  60     escapeSeqs = {'a' : '\a', 'b' : '\b', 'f' : '\f', 'n' : '\n',
  61                   'r' : '\r', 't' : '\t', 'v' : '\v',
  62                   '"' : '"', '\\' : '\\'}
  63     i = 0
  64     escape = False
  65     hexa = False
  66     octa = False
  67     num = 0
  68     digits = 0
  69     r = u''
  70     while i < len(s):
  71         if not escape:
  72             if s[i] == '\\':
  73                 escape = True
  74             else:
  75                 r = r + s[i]
  76         elif hexa:
  77             if (s[i] >= '0' and s[i] <= '9') or \
  78                (s[i] >= 'a' and s[i] <= 'f') or \
  79                (s[i] >= 'A' and s[i] <= 'F'):
  80                 num = num * 16 + int(s[i],16)
  81                 digits = digits + 1
  82             else:
  83                 digits = 2
  84             if digits >= 2:
  85                 hexa = False
  86                 escape = False
  87                 r = r + chr(num)
  88         elif octa:
  89             if s[i] >= '0' and s[i] <= '7':
  90                 num = num * 8 + int(s[i],8)
  91                 digits = digits + 1
  92             else:
  93                 digits = 3
  94             if digits >= 3:
  95                 octa = False
  96                 escape = False
  97                 r = r + chr(num)
  98         else:
  99             if s[i] in escapeSeqs:
 100                 r = r + escapeSeqs[s[i]]
 101                 escape = False
 102             elif s[i] >= '0' and s[i] <= '7':
 103                 octa = True
 104                 num = int(s[i],8)
 105                 if num <= 3:
 106                     digits = 1
 107                 else:
 108                     digits = 2
 109             elif s[i] == 'x' or s[i] == 'X':
 110                 hexa = True
 111                 num = 0
 112                 digits = 0
 113             else:
 114                 r = r + s[i]
 115                 escape = False
 116         i = i + 1
 117     return r
 118
 119 # Expand matches. The first match is always a DESC or DESC_BEGIN match.
 120 # Subsequent matches are ENUM matches.
 121 #
 122 # DESC, DESC_BEGIN format: \1 \2=<lang> \3 \4=gettext(" \5=<text> \6=") \7
 123 # ENUM format:             \1 \2=gettext(" \3=<text> \4=") \5
 124 def expandMatches (matches, translations, end=None):
 125     assert len(matches) > 0
 126     nTranslations = len(translations)
 127     i = 0
 128     # Expand the description+enums for all translations
 129     for lang,trans in translations:
 130         i = i + 1
 131         # Make sure that all but the last line of a simple description
 132         # are extended with a backslash.
 133         suffix = ''
 134         if len(matches) == 1 and i < len(translations) and \
 135                not matches[0].expand (r'\7').endswith('\\'):
 136             suffix = ' \\'
 137         text = escapeCString (getattr(trans, gettext_method) (expandCString (
 138             matches[0].expand (r'\5'))))
 139         text = (matches[0].expand (r'\1' + lang + r'\3"' + text + r'"\7') + suffix)
 140
 141         # In Python 2, stdout expects encoded byte strings, or else it will
 142         # encode them with the ascii 'codec'
 143         if sys.version_info.major == 2:
 144             text = text.encode('utf-8')
 145
 146         print(text)
 147
 148         # Expand any subsequent enum lines
 149         for match in matches[1:]:
 150             text = escapeCString (getattr(trans, gettext_method) (expandCString (
 151                 match.expand (r'\3'))))
 152             text = match.expand (r'\1"' + text + r'"\5')
 153
 154             # In Python 2, stdout expects encoded byte strings, or else it will
 155             # encode them with the ascii 'codec'
 156             if sys.version_info.major == 2:
 157                 text = text.encode('utf-8')
 158
 159             print(text)
 160
 161         # Expand description end
 162         if end:
 163             print(end, end='')
 164
 165 # Compile a list of translation classes to all supported languages.
 166 # The first translation is always a NullTranslations.
 167 translations = [("en", gettext.NullTranslations())]
 168 for lang in languages:
 169     try:
 170         trans = gettext.translation ("options", localedir, [lang])
 171     except IOError:
 172         sys.stderr.write ("Warning: language '%s' not found.\n" % lang)
 173         continue
 174     translations.append ((lang, trans))
 175
 176 # Regular expressions:
 177 reLibintl_h  = re.compile (r'#\s*include\s*<libintl.h>')
 178 reDESC       = re.compile (r'(\s*DRI_CONF_DESC\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
 179 reDESC_BEGIN = re.compile (r'(\s*DRI_CONF_DESC_BEGIN\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
 180 reENUM       = re.compile (r'(\s*DRI_CONF_ENUM\s*\([^,]+,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
 181 reDESC_END   = re.compile (r'\s*DRI_CONF_DESC_END')
 182
 183 # Print a header
 184 print("/***********************************************************************\n" \
 185 " ***        THIS FILE IS GENERATED AUTOMATICALLY. DON'T EDIT!        ***\n" \
 186 " ***********************************************************************/")
 187
 188 # Process the options template and generate options.h with all
 189 # translations.
 190 template = open (template_header_path, "rb")
 191 descMatches = []
 192 for line in template:
 193     line = line.decode('utf-8')
 194
 195     if len(descMatches) > 0:
 196         matchENUM     = reENUM    .match (line)
 197         matchDESC_END = reDESC_END.match (line)
 198         if matchENUM:
 199             descMatches.append (matchENUM)
 200         elif matchDESC_END:
 201             expandMatches (descMatches, translations, line)
 202             descMatches = []
 203         else:
 204             sys.stderr.write (
 205                 "Warning: unexpected line inside description dropped:\n%s\n" \
 206                 % line)
 207         continue
 208     if reLibintl_h.search (line):
 209         # Ignore (comment out) #include <libintl.h>
 210         print("/* %s * commented out by gen_xmlpool.py */" % line)
 211         continue
 212     matchDESC       = reDESC      .match (line)
 213     matchDESC_BEGIN = reDESC_BEGIN.match (line)
 214     if matchDESC:
 215         assert len(descMatches) == 0
 216         expandMatches ([matchDESC], translations)
 217     elif matchDESC_BEGIN:
 218         assert len(descMatches) == 0
 219         descMatches = [matchDESC_BEGIN]
 220     else:
 221         # In Python 2, stdout expects encoded byte strings, or else it will
 222         # encode them with the ascii 'codec'
 223         if sys.version_info.major == 2:
 224            line = line.encode('utf-8')
 225
 226         print(line, end='')
 227
 228 template.close()
 229
 230 if len(descMatches) > 0:
 231     sys.stderr.write ("Warning: unterminated description at end of file.\n")
 232     expandMatches (descMatches, translations)