src/util/xmlpool/gen_xmlpool.py

   1
   2 #
   3 # Usage:
   4 #     gen_xmlpool.py /path/to/t_option.h localedir lang lang lang ...
   5 #
   6 # For each given language, this script expects to find a .mo file at
   7 # `{localedir}/{language}/LC_MESSAGES/options.mo`.
   8 #
   9
  10 from __future__ import print_function
  11 import argparse
  12 import gettext
  13 import io
  14 import os
  15 import re
  16 import sys
  17
  18 if sys.version_info < (3, 0):
  19     gettext_method = 'ugettext'
  20 else:
  21     gettext_method = 'gettext'
  22
  23 # Escape special characters in C strings
  24 def escapeCString(s):
  25     escapeSeqs = {'\a' : '\\a', '\b' : '\\b', '\f' : '\\f', '\n' : '\\n',
  26                   '\r' : '\\r', '\t' : '\\t', '\v' : '\\v', '\\' : '\\\\'}
  27     # " -> '' is a hack. Quotes (") aren't possible in XML attributes.
  28     # Better use Unicode characters for typographic quotes in option
  29     # descriptions and translations.
  30     i = 0
  31     r = ''
  32     while i < len(s):
  33         # Special case: escape double quote with \u201c or \u201d, depending
  34         # on whether it's an open or close quote. This is needed because plain
  35         # double quotes are not possible in XML attributes.
  36         if s[i] == '"':
  37             if i == len(s) - 1 or s[i + 1].isspace():
  38                 # close quote
  39                 q = u'\u201c'
  40             else:
  41                 # open quote
  42                 q = u'\u201d'
  43             r = r + q
  44         elif s[i] in escapeSeqs:
  45             r = r + escapeSeqs[s[i]]
  46         else:
  47             r = r + s[i]
  48         i = i + 1
  49     return r
  50
  51 # Expand escape sequences in C strings (needed for gettext lookup)
  52 def expandCString(s):
  53     escapeSeqs = {'a' : '\a', 'b' : '\b', 'f' : '\f', 'n' : '\n',
  54                   'r' : '\r', 't' : '\t', 'v' : '\v',
  55                   '"' : '"', '\\' : '\\'}
  56     i = 0
  57     escape = False
  58     hexa = False
  59     octa = False
  60     num = 0
  61     digits = 0
  62     r = u''
  63     while i < len(s):
  64         if not escape:
  65             if s[i] == '\\':
  66                 escape = True
  67             else:
  68                 r = r + s[i]
  69         elif hexa:
  70             if (s[i] >= '0' and s[i] <= '9') or \
  71                (s[i] >= 'a' and s[i] <= 'f') or \
  72                (s[i] >= 'A' and s[i] <= 'F'):
  73                 num = num * 16 + int(s[i],16)
  74                 digits = digits + 1
  75             else:
  76                 digits = 2
  77             if digits >= 2:
  78                 hexa = False
  79                 escape = False
  80                 r = r + chr(num)
  81         elif octa:
  82             if s[i] >= '0' and s[i] <= '7':
  83                 num = num * 8 + int(s[i],8)
  84                 digits = digits + 1
  85             else:
  86                 digits = 3
  87             if digits >= 3:
  88                 octa = False
  89                 escape = False
  90                 r = r + chr(num)
  91         else:
  92             if s[i] in escapeSeqs:
  93                 r = r + escapeSeqs[s[i]]
  94                 escape = False
  95             elif s[i] >= '0' and s[i] <= '7':
  96                 octa = True
  97                 num = int(s[i],8)
  98                 if num <= 3:
  99                     digits = 1
 100                 else:
 101                     digits = 2
 102             elif s[i] == 'x' or s[i] == 'X':
 103                 hexa = True
 104                 num = 0
 105                 digits = 0
 106             else:
 107                 r = r + s[i]
 108                 escape = False
 109         i = i + 1
 110     return r
 111
 112 # Expand matches. The first match is always a DESC or DESC_BEGIN match.
 113 # Subsequent matches are ENUM matches.
 114 #
 115 # DESC, DESC_BEGIN format: \1 \2=<lang> \3 \4=gettext(" \5=<text> \6=") \7
 116 # ENUM format:             \1 \2=gettext(" \3=<text> \4=") \5
 117 def expandMatches(matches, translations, end=None):
 118     assert len(matches) > 0
 119     nTranslations = len(translations)
 120     i = 0
 121     # Expand the description+enums for all translations
 122     for lang,trans in translations:
 123         i = i + 1
 124         # Make sure that all but the last line of a simple description
 125         # are extended with a backslash.
 126         suffix = ''
 127         if len(matches) == 1 and i < len(translations) and \
 128                not matches[0].expand(r'\7').endswith('\\'):
 129             suffix = ' \\'
 130         text = escapeCString(getattr(trans, gettext_method)(expandCString(
 131             matches[0].expand (r'\5'))))
 132         text = (matches[0].expand(r'\1' + lang + r'\3"' + text + r'"\7') + suffix)
 133
 134         # In Python 2, stdout expects encoded byte strings, or else it will
 135         # encode them with the ascii 'codec'
 136         if sys.version_info.major == 2:
 137             text = text.encode('utf-8')
 138
 139         print(text)
 140
 141         # Expand any subsequent enum lines
 142         for match in matches[1:]:
 143             text = escapeCString(getattr(trans, gettext_method)(expandCString(
 144                 match.expand(r'\3'))))
 145             text = match.expand(r'\1"' + text + r'"\5')
 146
 147             # In Python 2, stdout expects encoded byte strings, or else it will
 148             # encode them with the ascii 'codec'
 149             if sys.version_info.major == 2:
 150                 text = text.encode('utf-8')
 151
 152             print(text)
 153
 154         # Expand description end
 155         if end:
 156             print(end, end='')
 157
 158 # Regular expressions:
 159 reLibintl_h = re.compile(r'#\s*include\s*<libintl.h>')
 160 reDESC = re.compile(r'(\s*DRI_CONF_DESC\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
 161 reDESC_BEGIN = re.compile(r'(\s*DRI_CONF_DESC_BEGIN\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
 162 reENUM = re.compile(r'(\s*DRI_CONF_ENUM\s*\([^,]+,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
 163 reDESC_END = re.compile(r'\s*DRI_CONF_DESC_END')
 164
 165
 166 def main():
 167     parser = argparse.ArgumentParser()
 168     parser.add_argument('template')
 169     parser.add_argument('localedir')
 170     parser.add_argument('languages', nargs='*')
 171     args = parser.parse_args()
 172
 173     # Compile a list of translation classes to all supported languages.
 174     # The first translation is always a NullTranslations.
 175     translations = [("en", gettext.NullTranslations())]
 176     for lang in args.languages:
 177         try:
 178             filename = os.path.join(args.localedir, '{}.gmo'.format(lang))
 179             with io.open(filename, 'rb') as f:
 180                 trans = gettext.GNUTranslations(f)
 181         except (IOError, OSError):
 182             print("Warning: language '%s' not found." % lang, file=sys.stderr)
 183             continue
 184         translations.append((lang, trans))
 185
 186     print("/***********************************************************************\n" \
 187     " ***        THIS FILE IS GENERATED AUTOMATICALLY. DON'T EDIT!        ***\n" \
 188     " ***********************************************************************/")
 189
 190     # Process the options template and generate options.h with all
 191     # translations.
 192     template = io.open(args.template, mode="rt", encoding='utf-8')
 193     descMatches = []
 194     for line in template:
 195         if len(descMatches) > 0:
 196             matchENUM = reENUM.match(line)
 197             matchDESC_END = reDESC_END.match(line)
 198             if matchENUM:
 199                 descMatches.append(matchENUM)
 200             elif matchDESC_END:
 201                 expandMatches(descMatches, translations, line)
 202                 descMatches = []
 203             else:
 204                 print("Warning: unexpected line inside description dropped:\n", line,
 205                       file=sys.stderr)
 206             continue
 207         if reLibintl_h.search(line):
 208             # Ignore (comment out) #include <libintl.h>
 209             print("/* %s * commented out by gen_xmlpool.py */" % line)
 210             continue
 211         matchDESC = reDESC.match(line)
 212         matchDESC_BEGIN = reDESC_BEGIN.match(line)
 213         if matchDESC:
 214             assert len(descMatches) == 0
 215             expandMatches([matchDESC], translations)
 216         elif matchDESC_BEGIN:
 217             assert len(descMatches) == 0
 218             descMatches = [matchDESC_BEGIN]
 219         else:
 220             # In Python 2, stdout expects encoded byte strings, or else it will
 221             # encode them with the ascii 'codec'
 222             if sys.version_info.major == 2:
 223                line = line.encode('utf-8')
 224
 225             print(line, end='')
 226
 227     template.close()
 228
 229     if len(descMatches) > 0:
 230         print("Warning: unterminated description at end of file.", file=sys.stderr)
 231         expandMatches(descMatches, translations)
 232
 233
 234 if __name__ == '__main__':
 235     main()