util/sort_includes.py

   1 #!/usr/bin/env python
   2
   3 import os
   4 import re
   5 import sys
   6
   7 from file_types import *
   8
   9 cpp_c_headers = {
  10     'assert.h' : 'cassert',
  11     'ctype.h'  : 'cctype',
  12     'errno.h'  : 'cerrno',
  13     'float.h'  : 'cfloat',
  14     'limits.h' : 'climits',
  15     'locale.h' : 'clocale',
  16     'math.h'   : 'cmath',
  17     'setjmp.h' : 'csetjmp',
  18     'signal.h' : 'csignal',
  19     'stdarg.h' : 'cstdarg',
  20     'stddef.h' : 'cstddef',
  21     'stdio.h'  : 'cstdio',
  22     'stdlib.h' : 'cstdlib',
  23     'string.h' : 'cstring',
  24     'time.h'   : 'ctime',
  25     'wchar.h'  : 'cwchar',
  26     'wctype.h' : 'cwctype',
  27 }
  28
  29 include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
  30 def include_key(line):
  31     '''Mark directories with a leading space so directories
  32     are sorted before files'''
  33
  34     match = include_re.match(line)
  35     assert match, line
  36     keyword = match.group(2)
  37     include = match.group(3)
  38
  39     # Everything but the file part needs to have a space prepended
  40     parts = include.split('/')
  41     if len(parts) == 2 and parts[0] == 'dnet':
  42         # Don't sort the dnet includes with respect to each other, but
  43         # make them sorted with respect to non dnet includes.  Python
  44         # guarantees that sorting is stable, so just clear the
  45         # basename part of the filename.
  46         parts[1] = ' '
  47     parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
  48     key = '/'.join(parts)
  49
  50     return key
  51
  52
  53 def _include_matcher(keyword="#include", delim="<>"):
  54     """Match an include statement and return a (keyword, file, extra)
  55     duple, or a touple of None values if there isn't a match."""
  56
  57     rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
  58
  59     def matcher(context, line):
  60         m = rex.match(line)
  61         return m.groups() if m else (None, ) * 3
  62
  63     return matcher
  64
  65 def _include_matcher_fname(fname, **kwargs):
  66     """Match an include of a specific file name. Any keyword arguments
  67     are forwarded to _include_matcher, which is used to match the
  68     actual include line."""
  69
  70     rex = re.compile(fname)
  71     base_matcher = _include_matcher(**kwargs)
  72
  73     def matcher(context, line):
  74         (keyword, fname, extra) = base_matcher(context, line)
  75         if fname and rex.match(fname):
  76             return (keyword, fname, extra)
  77         else:
  78             return (None, ) * 3
  79
  80     return matcher
  81
  82
  83 def _include_matcher_main():
  84     """Match a C/C++ source file's primary header (i.e., a file with
  85     the same base name, but a header extension)."""
  86
  87     base_matcher = _include_matcher(delim='""')
  88     rex = re.compile(r"^src/(.*)\.([^.]+)$")
  89     header_map = {
  90         "c" : "h",
  91         "cc" : "hh",
  92         "cpp" : "hh",
  93         }
  94     def matcher(context, line):
  95         m = rex.match(context["filename"])
  96         if not m:
  97             return (None, ) * 3
  98         base, ext = m.groups()
  99         (keyword, fname, extra) = base_matcher(context, line)
 100         try:
 101             if fname == "%s.%s" % (base, header_map[ext]):
 102                 return (keyword, fname, extra)
 103         except KeyError:
 104             pass
 105
 106         return (None, ) * 3
 107
 108     return matcher
 109
 110 class SortIncludes(object):
 111     # different types of includes for different sorting of headers
 112     # <Python.h>         - Python header needs to be first if it exists
 113     # <*.h>              - system headers (directories before files)
 114     # <*>                - STL headers
 115     # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
 116     # "*"                - M5 headers (directories before files)
 117     includes_re = (
 118         ('main', '""', _include_matcher_main()),
 119         ('python', '<>', _include_matcher_fname("^Python\.h$")),
 120         ('c', '<>', _include_matcher_fname("^.*\.h$")),
 121         ('stl', '<>', _include_matcher_fname("^\w+$")),
 122         ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
 123         ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
 124         ('swig0', '<>', _include_matcher(keyword="%import")),
 125         ('swig1', '<>', _include_matcher(keyword="%include")),
 126         ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
 127         ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
 128         )
 129
 130     block_order = (
 131         ('main', ),
 132         ('python', ),
 133         ('c', ),
 134         ('stl', ),
 135         ('cc', ),
 136         ('m5header', ),
 137         ('swig0', 'swig1', 'swig2', 'swig3', ),
 138         )
 139
 140     def __init__(self):
 141         self.block_priority = {}
 142         for prio, keys in enumerate(self.block_order):
 143             for key in keys:
 144                 self.block_priority[key] = prio
 145
 146     def reset(self):
 147         # clear all stored headers
 148         self.includes = {}
 149
 150     def dump_blocks(self, block_types):
 151         """Merge includes of from several block types into one large
 152         block of sorted includes. This is useful when we have multiple
 153         include block types (e.g., swig includes) with the same
 154         priority."""
 155
 156         includes = []
 157         for block_type in block_types:
 158             try:
 159                 includes += self.includes[block_type]
 160             except KeyError:
 161                 pass
 162
 163         return sorted(set(includes))
 164
 165     def dump_includes(self):
 166         blocks = []
 167         # Create a list of blocks in the prescribed include
 168         # order. Each entry in the list is a multi-line string with
 169         # multiple includes.
 170         for types in self.block_order:
 171             block = "\n".join(self.dump_blocks(types))
 172             if block:
 173                 blocks.append(block)
 174
 175         self.reset()
 176         return "\n\n".join(blocks)
 177
 178     def __call__(self, lines, filename, language):
 179         self.reset()
 180
 181         context = {
 182             "filename" : filename,
 183             "language" : language,
 184             }
 185
 186         def match_line(line):
 187             if not line:
 188                 return (None, line)
 189
 190             for include_type, (ldelim, rdelim), matcher in self.includes_re:
 191                 keyword, include, extra = matcher(context, line)
 192                 if keyword:
 193                     # if we've got a match, clean up the #include line,
 194                     # fix up stl headers and store it in the proper category
 195                     if include_type == 'c' and language == 'C++':
 196                         stl_inc = cpp_c_headers.get(include, None)
 197                         if stl_inc:
 198                             include = stl_inc
 199                             include_type = 'stl'
 200
 201                     return (include_type,
 202                             keyword + ' ' + ldelim + include + rdelim + extra)
 203
 204             return (None, line)
 205
 206         processing_includes = False
 207         for line in lines:
 208             include_type, line = match_line(line)
 209             if include_type:
 210                 try:
 211                     self.includes[include_type].append(line)
 212                 except KeyError:
 213                     self.includes[include_type] = [ line ]
 214
 215                 processing_includes = True
 216             elif processing_includes and not line.strip():
 217                 # Skip empty lines while processing includes
 218                 pass
 219             elif processing_includes:
 220                 # We are now exiting an include block
 221                 processing_includes = False
 222
 223                 # Output pending includes, a new line between, and the
 224                 # current l.
 225                 yield self.dump_includes()
 226                 yield ''
 227                 yield line
 228             else:
 229                 # We are not in an include block, so just emit the line
 230                 yield line
 231
 232         # We've reached EOF, so dump any pending includes
 233         if processing_includes:
 234             yield self.dump_includes()
 235
 236
 237
 238 # default language types to try to apply our sorting rules to
 239 default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
 240
 241 def options():
 242     import optparse
 243     options = optparse.OptionParser()
 244     add_option = options.add_option
 245     add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
 246                default=','.join(default_dir_ignore),
 247                help="ignore directories")
 248     add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
 249                default=','.join(default_file_ignore),
 250                help="ignore files")
 251     add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
 252                default=','.join(default_languages),
 253                help="languages")
 254     add_option('-n', '--dry-run', action='store_true',
 255                help="don't overwrite files")
 256
 257     return options
 258
 259 def parse_args(parser):
 260     opts,args = parser.parse_args()
 261
 262     opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
 263     opts.file_ignore = frozenset(opts.file_ignore.split(','))
 264     opts.languages = frozenset(opts.languages.split(','))
 265
 266     return opts,args
 267
 268 if __name__ == '__main__':
 269     parser = options()
 270     opts, args = parse_args(parser)
 271
 272     for base in args:
 273         for filename,language in find_files(base, languages=opts.languages,
 274                 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
 275             if opts.dry_run:
 276                 print "%s: %s" % (filename, language)
 277             else:
 278                 update_file(filename, filename, language, SortIncludes())