util/sort_includes.py

   1 #!/usr/bin/env python
   2
   3 import os
   4 import re
   5 import sys
   6
   7 from file_types import *
   8
   9 cpp_c_headers = {
  10     'assert.h' : 'cassert',
  11     'ctype.h'  : 'cctype',
  12     'errno.h'  : 'cerrno',
  13     'float.h'  : 'cfloat',
  14     'limits.h' : 'climits',
  15     'locale.h' : 'clocale',
  16     'math.h'   : 'cmath',
  17     'setjmp.h' : 'csetjmp',
  18     'signal.h' : 'csignal',
  19     'stdarg.h' : 'cstdarg',
  20     'stddef.h' : 'cstddef',
  21     'stdio.h'  : 'cstdio',
  22     'stdlib.h' : 'cstdlib',
  23     'string.h' : 'cstring',
  24     'time.h'   : 'ctime',
  25     'wchar.h'  : 'cwchar',
  26     'wctype.h' : 'cwctype',
  27 }
  28
  29 include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
  30 def include_key(line):
  31     '''Mark directories with a leading space so directories
  32     are sorted before files'''
  33
  34     match = include_re.match(line)
  35     assert match, line
  36     keyword = match.group(2)
  37     include = match.group(3)
  38
  39     # Everything but the file part needs to have a space prepended
  40     parts = include.split('/')
  41     if len(parts) == 2 and parts[0] == 'dnet':
  42         # Don't sort the dnet includes with respect to each other, but
  43         # make them sorted with respect to non dnet includes.  Python
  44         # guarantees that sorting is stable, so just clear the
  45         # basename part of the filename.
  46         parts[1] = ' '
  47     parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
  48     key = '/'.join(parts)
  49
  50     return key
  51
  52 class SortIncludes(object):
  53     # different types of includes for different sorting of headers
  54     # <Python.h>         - Python header needs to be first if it exists
  55     # <*.h>              - system headers (directories before files)
  56     # <*>                - STL headers
  57     # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
  58     # "*"                - M5 headers (directories before files)
  59     includes_re = (
  60         ('python', '<>', r'^(#include)[ \t]+<(Python.*\.h)>(.*)'),
  61         ('c', '<>', r'^(#include)[ \t]<(.+\.h)>(.*)'),
  62         ('stl', '<>', r'^(#include)[ \t]+<([0-9A-z_]+)>(.*)'),
  63         ('cc', '<>', r'^(#include)[ \t]+<([0-9A-z_]+\.(hh|hxx|hpp|H))>(.*)'),
  64         ('m5cc', '""', r'^(#include)[ \t]"(.+\.h{1,2})"(.*)'),
  65         ('swig0', '<>', r'^(%import)[ \t]<(.+)>(.*)'),
  66         ('swig1', '<>', r'^(%include)[ \t]<(.+)>(.*)'),
  67         ('swig2', '""', r'^(%import)[ \t]"(.+)"(.*)'),
  68         ('swig3', '""', r'^(%include)[ \t]"(.+)"(.*)'),
  69         )
  70
  71     # compile the regexes
  72     includes_re = tuple((a, b, re.compile(c)) for a,b,c in includes_re)
  73
  74     def __init__(self):
  75         self.reset()
  76
  77     def reset(self):
  78         # clear all stored headers
  79         self.includes = {}
  80         for include_type,_,_ in self.includes_re:
  81             self.includes[include_type] = []
  82
  83     def dump_block(self):
  84         '''dump the includes'''
  85         first = True
  86         for include,_,_ in self.includes_re:
  87             if not self.includes[include]:
  88                 continue
  89
  90             if not first:
  91                 # print a newline between groups of
  92                 # include types
  93                 yield ''
  94             first = False
  95
  96             # print out the includes in the current group
  97             # and sort them according to include_key()
  98             prev = None
  99             for l in sorted(self.includes[include],
 100                             key=include_key):
 101                 if l != prev:
 102                     yield l
 103                 prev = l
 104
 105     def __call__(self, lines, filename, language):
 106         leading_blank = False
 107         blanks = 0
 108         block = False
 109
 110         for line in lines:
 111             if not line:
 112                 blanks += 1
 113                 if not block:
 114                     # if we're not in an include block, spit out the
 115                     # newline otherwise, skip it since we're going to
 116                     # control newlines withinin include block
 117                     yield ''
 118                 continue
 119
 120             # Try to match each of the include types
 121             for include_type,(ldelim,rdelim),include_re in self.includes_re:
 122                 match = include_re.match(line)
 123                 if not match:
 124                     continue
 125
 126                 # if we've got a match, clean up the #include line,
 127                 # fix up stl headers and store it in the proper category
 128                 groups = match.groups()
 129                 keyword = groups[0]
 130                 include = groups[1]
 131                 extra = groups[-1]
 132                 if include_type == 'c' and language == 'C++':
 133                     stl_inc = cpp_c_headers.get(include, None)
 134                     if stl_inc:
 135                         include = stl_inc
 136                         include_type = 'stl'
 137
 138                 line = keyword + ' ' + ldelim + include + rdelim + extra
 139
 140                 self.includes[include_type].append(line)
 141
 142                 # We've entered a block, don't keep track of blank
 143                 # lines while in a block
 144                 block = True
 145                 blanks = 0
 146                 break
 147             else:
 148                 # this line did not match a #include
 149                 assert not include_re.match(line)
 150
 151                 # if we're not in a block and we didn't match an include
 152                 # to enter a block, just emit the line and continue
 153                 if not block:
 154                     yield line
 155                     continue
 156
 157                 # We've exited an include block.
 158                 for block_line in self.dump_block():
 159                     yield block_line
 160
 161                 # if there are any newlines after the include block,
 162                 # emit a single newline (removing extras)
 163                 if blanks and block:
 164                     yield ''
 165
 166                 blanks = 0
 167                 block = False
 168                 self.reset()
 169
 170                 # emit the line that ended the block
 171                 yield line
 172
 173         if block:
 174             # We've exited an include block.
 175             for block_line in self.dump_block():
 176                 yield block_line
 177
 178
 179
 180 # default language types to try to apply our sorting rules to
 181 default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
 182
 183 def options():
 184     import optparse
 185     options = optparse.OptionParser()
 186     add_option = options.add_option
 187     add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
 188                default=','.join(default_dir_ignore),
 189                help="ignore directories")
 190     add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
 191                default=','.join(default_file_ignore),
 192                help="ignore files")
 193     add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
 194                default=','.join(default_languages),
 195                help="languages")
 196     add_option('-n', '--dry-run', action='store_true',
 197                help="don't overwrite files")
 198
 199     return options
 200
 201 def parse_args(parser):
 202     opts,args = parser.parse_args()
 203
 204     opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
 205     opts.file_ignore = frozenset(opts.file_ignore.split(','))
 206     opts.languages = frozenset(opts.languages.split(','))
 207
 208     return opts,args
 209
 210 if __name__ == '__main__':
 211     parser = options()
 212     opts, args = parse_args(parser)
 213
 214     for base in args:
 215         for filename,language in find_files(base, languages=opts.languages,
 216                 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
 217             if opts.dry_run:
 218                 print "%s: %s" % (filename, language)
 219             else:
 220                 update_file(filename, filename, language, SortIncludes())