ext/pybind11/tools/mkdoc.py

   1 #!/usr/bin/env python3
   2 #
   3 #  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
   4 #
   5 #  Extract documentation from C++ header files to use it in Python bindings
   6 #
   7
   8 import os
   9 import sys
  10 import platform
  11 import re
  12 import textwrap
  13
  14 from clang import cindex
  15 from clang.cindex import CursorKind
  16 from collections import OrderedDict
  17 from threading import Thread, Semaphore
  18 from multiprocessing import cpu_count
  19
  20 RECURSE_LIST = [
  21     CursorKind.TRANSLATION_UNIT,
  22     CursorKind.NAMESPACE,
  23     CursorKind.CLASS_DECL,
  24     CursorKind.STRUCT_DECL,
  25     CursorKind.ENUM_DECL,
  26     CursorKind.CLASS_TEMPLATE
  27 ]
  28
  29 PRINT_LIST = [
  30     CursorKind.CLASS_DECL,
  31     CursorKind.STRUCT_DECL,
  32     CursorKind.ENUM_DECL,
  33     CursorKind.ENUM_CONSTANT_DECL,
  34     CursorKind.CLASS_TEMPLATE,
  35     CursorKind.FUNCTION_DECL,
  36     CursorKind.FUNCTION_TEMPLATE,
  37     CursorKind.CONVERSION_FUNCTION,
  38     CursorKind.CXX_METHOD,
  39     CursorKind.CONSTRUCTOR,
  40     CursorKind.FIELD_DECL
  41 ]
  42
  43 CPP_OPERATORS = {
  44     '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
  45     '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
  46     'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
  47     '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
  48     'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
  49     '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
  50     'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
  51 }
  52
  53 CPP_OPERATORS = OrderedDict(
  54     sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
  55
  56 job_count = cpu_count()
  57 job_semaphore = Semaphore(job_count)
  58
  59 output = []
  60
  61 def d(s):
  62     return s.decode('utf8')
  63
  64
  65 def sanitize_name(name):
  66     name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
  67     for k, v in CPP_OPERATORS.items():
  68         name = name.replace('operator%s' % k, 'operator_%s' % v)
  69     name = re.sub('<.*>', '', name)
  70     name = ''.join([ch if ch.isalnum() else '_' for ch in name])
  71     name = re.sub('_$', '', re.sub('_+', '_', name))
  72     return '__doc_' + name
  73
  74
  75 def process_comment(comment):
  76     result = ''
  77
  78     # Remove C++ comment syntax
  79     leading_spaces = float('inf')
  80     for s in comment.expandtabs(tabsize=4).splitlines():
  81         s = s.strip()
  82         if s.startswith('/*'):
  83             s = s[2:].lstrip('*')
  84         elif s.endswith('*/'):
  85             s = s[:-2].rstrip('*')
  86         elif s.startswith('///'):
  87             s = s[3:]
  88         if s.startswith('*'):
  89             s = s[1:]
  90         if len(s) > 0:
  91             leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
  92         result += s + '\n'
  93
  94     if leading_spaces != float('inf'):
  95         result2 = ""
  96         for s in result.splitlines():
  97             result2 += s[leading_spaces:] + '\n'
  98         result = result2
  99
 100     # Doxygen tags
 101     cpp_group = '([\w:]+)'
 102     param_group = '([\[\w:\]]+)'
 103
 104     s = result
 105     s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
 106     s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
 107     s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
 108     s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
 109     s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
 110     s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
 111     s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
 112                r'\n\n$Parameter ``\2``:\n\n', s)
 113     s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
 114                r'\n\n$Template parameter ``\2``:\n\n', s)
 115
 116     for in_, out_ in {
 117         'return': 'Returns',
 118         'author': 'Author',
 119         'authors': 'Authors',
 120         'copyright': 'Copyright',
 121         'date': 'Date',
 122         'remark': 'Remark',
 123         'sa': 'See also',
 124         'see': 'See also',
 125         'extends': 'Extends',
 126         'throw': 'Throws',
 127         'throws': 'Throws'
 128     }.items():
 129         s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
 130
 131     s = re.sub(r'\\details\s*', r'\n\n', s)
 132     s = re.sub(r'\\brief\s*', r'', s)
 133     s = re.sub(r'\\short\s*', r'', s)
 134     s = re.sub(r'\\ref\s*', r'', s)
 135
 136     s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
 137                r"```\n\1\n```\n", s, flags=re.DOTALL)
 138
 139     # HTML/TeX tags
 140     s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
 141     s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
 142     s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
 143     s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
 144     s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
 145     s = re.sub(r'<li>', r'\n\n* ', s)
 146     s = re.sub(r'</?ul>', r'', s)
 147     s = re.sub(r'</li>', r'\n\n', s)
 148
 149     s = s.replace('``true``', '``True``')
 150     s = s.replace('``false``', '``False``')
 151
 152     # Re-flow text
 153     wrapper = textwrap.TextWrapper()
 154     wrapper.expand_tabs = True
 155     wrapper.replace_whitespace = True
 156     wrapper.drop_whitespace = True
 157     wrapper.width = 70
 158     wrapper.initial_indent = wrapper.subsequent_indent = ''
 159
 160     result = ''
 161     in_code_segment = False
 162     for x in re.split(r'(```)', s):
 163         if x == '```':
 164             if not in_code_segment:
 165                 result += '```\n'
 166             else:
 167                 result += '\n```\n\n'
 168             in_code_segment = not in_code_segment
 169         elif in_code_segment:
 170             result += x.strip()
 171         else:
 172             for y in re.split(r'(?: *\n *){2,}', x):
 173                 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
 174                 if len(wrapped) > 0 and wrapped[0] == '$':
 175                     result += wrapped[1:] + '\n'
 176                     wrapper.initial_indent = \
 177                         wrapper.subsequent_indent = ' ' * 4
 178                 else:
 179                     if len(wrapped) > 0:
 180                         result += wrapped + '\n\n'
 181                     wrapper.initial_indent = wrapper.subsequent_indent = ''
 182     return result.rstrip().lstrip('\n')
 183
 184
 185 def extract(filename, node, prefix):
 186     if not (node.location.file is None or
 187             os.path.samefile(d(node.location.file.name), filename)):
 188         return 0
 189     if node.kind in RECURSE_LIST:
 190         sub_prefix = prefix
 191         if node.kind != CursorKind.TRANSLATION_UNIT:
 192             if len(sub_prefix) > 0:
 193                 sub_prefix += '_'
 194             sub_prefix += d(node.spelling)
 195         for i in node.get_children():
 196             extract(filename, i, sub_prefix)
 197     if node.kind in PRINT_LIST:
 198         comment = d(node.raw_comment) if node.raw_comment is not None else ''
 199         comment = process_comment(comment)
 200         sub_prefix = prefix
 201         if len(sub_prefix) > 0:
 202             sub_prefix += '_'
 203         if len(node.spelling) > 0:
 204             name = sanitize_name(sub_prefix + d(node.spelling))
 205             global output
 206             output.append((name, filename, comment))
 207
 208
 209 class ExtractionThread(Thread):
 210     def __init__(self, filename, parameters):
 211         Thread.__init__(self)
 212         self.filename = filename
 213         self.parameters = parameters
 214         job_semaphore.acquire()
 215
 216     def run(self):
 217         print('Processing "%s" ..' % self.filename, file=sys.stderr)
 218         try:
 219             index = cindex.Index(
 220                 cindex.conf.lib.clang_createIndex(False, True))
 221             tu = index.parse(self.filename, self.parameters)
 222             extract(self.filename, tu.cursor, '')
 223         finally:
 224             job_semaphore.release()
 225
 226 if __name__ == '__main__':
 227     parameters = ['-x', 'c++', '-std=c++11']
 228     filenames = []
 229
 230     if platform.system() == 'Darwin':
 231         dev_path = '/Applications/Xcode.app/Contents/Developer/'
 232         lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
 233         sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
 234         libclang = lib_dir + 'libclang.dylib'
 235
 236         if os.path.exists(libclang):
 237             cindex.Config.set_library_path(os.path.dirname(libclang))
 238
 239         if os.path.exists(sdk_dir):
 240             sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
 241             parameters.append('-isysroot')
 242             parameters.append(sysroot_dir)
 243
 244     for item in sys.argv[1:]:
 245         if item.startswith('-'):
 246             parameters.append(item)
 247         else:
 248             filenames.append(item)
 249
 250     if len(filenames) == 0:
 251         print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
 252         exit(-1)
 253
 254     print('''/*
 255   This file contains docstrings for the Python bindings.
 256   Do not edit! These were automatically extracted by mkdoc.py
 257  */
 258
 259 #define __EXPAND(x)                                      x
 260 #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
 261 #define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
 262 #define __CAT1(a, b)                                     a ## b
 263 #define __CAT2(a, b)                                     __CAT1(a, b)
 264 #define __DOC1(n1)                                       __doc_##n1
 265 #define __DOC2(n1, n2)                                   __doc_##n1##_##n2
 266 #define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
 267 #define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
 268 #define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
 269 #define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
 270 #define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
 271 #define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
 272
 273 #if defined(__GNUG__)
 274 #pragma GCC diagnostic push
 275 #pragma GCC diagnostic ignored "-Wunused-variable"
 276 #endif
 277 ''')
 278
 279     output.clear()
 280     for filename in filenames:
 281         thr = ExtractionThread(filename, parameters)
 282         thr.start()
 283
 284     print('Waiting for jobs to finish ..', file=sys.stderr)
 285     for i in range(job_count):
 286         job_semaphore.acquire()
 287
 288     name_ctr = 1
 289     name_prev = None
 290     for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
 291         if name == name_prev:
 292             name_ctr += 1
 293             name = name + "_%i" % name_ctr
 294         else:
 295             name_prev = name
 296             name_ctr = 1
 297         print('\nstatic const char *%s =%sR"doc(%s)doc";' %
 298               (name, '\n' if '\n' in comment else ' ', comment))
 299
 300     print('''
 301 #if defined(__GNUG__)
 302 #pragma GCC diagnostic pop
 303 #endif
 304 ''')