util/sort_includes.py

   1 #!/usr/bin/env python
   2
   3 # Copyright (c) 2011 The Hewlett-Packard Development Company
   4 # All rights reserved.
   5 #
   6 # Redistribution and use in source and binary forms, with or without
   7 # modification, are permitted provided that the following conditions are
   8 # met: redistributions of source code must retain the above copyright
   9 # notice, this list of conditions and the following disclaimer;
  10 # redistributions in binary form must reproduce the above copyright
  11 # notice, this list of conditions and the following disclaimer in the
  12 # documentation and/or other materials provided with the distribution;
  13 # neither the name of the copyright holders nor the names of its
  14 # contributors may be used to endorse or promote products derived from
  15 # this software without specific prior written permission.
  16 #
  17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28 #
  29 # Authors: Nathan Binkert
  30
  31 import os
  32 import re
  33 import sys
  34
  35 from file_types import *
  36
  37 cpp_c_headers = {
  38     'assert.h' : 'cassert',
  39     'ctype.h'  : 'cctype',
  40     'errno.h'  : 'cerrno',
  41     'float.h'  : 'cfloat',
  42     'limits.h' : 'climits',
  43     'locale.h' : 'clocale',
  44     'math.h'   : 'cmath',
  45     'setjmp.h' : 'csetjmp',
  46     'signal.h' : 'csignal',
  47     'stdarg.h' : 'cstdarg',
  48     'stddef.h' : 'cstddef',
  49     'stdio.h'  : 'cstdio',
  50     'stdlib.h' : 'cstdlib',
  51     'string.h' : 'cstring',
  52     'time.h'   : 'ctime',
  53     'wchar.h'  : 'cwchar',
  54     'wctype.h' : 'cwctype',
  55 }
  56
  57 include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
  58 def include_key(line):
  59     '''Mark directories with a leading space so directories
  60     are sorted before files'''
  61
  62     match = include_re.match(line)
  63     assert match, line
  64     keyword = match.group(2)
  65     include = match.group(3)
  66
  67     # Everything but the file part needs to have a space prepended
  68     parts = include.split('/')
  69     if len(parts) == 2 and parts[0] == 'dnet':
  70         # Don't sort the dnet includes with respect to each other, but
  71         # make them sorted with respect to non dnet includes.  Python
  72         # guarantees that sorting is stable, so just clear the
  73         # basename part of the filename.
  74         parts[1] = ' '
  75     parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
  76     key = '/'.join(parts)
  77
  78     return key
  79
  80
  81 def _include_matcher(keyword="#include", delim="<>"):
  82     """Match an include statement and return a (keyword, file, extra)
  83     duple, or a touple of None values if there isn't a match."""
  84
  85     rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
  86
  87     def matcher(context, line):
  88         m = rex.match(line)
  89         return m.groups() if m else (None, ) * 3
  90
  91     return matcher
  92
  93 def _include_matcher_fname(fname, **kwargs):
  94     """Match an include of a specific file name. Any keyword arguments
  95     are forwarded to _include_matcher, which is used to match the
  96     actual include line."""
  97
  98     rex = re.compile(fname)
  99     base_matcher = _include_matcher(**kwargs)
 100
 101     def matcher(context, line):
 102         (keyword, fname, extra) = base_matcher(context, line)
 103         if fname and rex.match(fname):
 104             return (keyword, fname, extra)
 105         else:
 106             return (None, ) * 3
 107
 108     return matcher
 109
 110
 111 def _include_matcher_main():
 112     """Match a C/C++ source file's primary header (i.e., a file with
 113     the same base name, but a header extension)."""
 114
 115     base_matcher = _include_matcher(delim='""')
 116     rex = re.compile(r"^src/(.*)\.([^.]+)$")
 117     header_map = {
 118         "c" : "h",
 119         "cc" : "hh",
 120         "cpp" : "hh",
 121         }
 122     def matcher(context, line):
 123         m = rex.match(context["filename"])
 124         if not m:
 125             return (None, ) * 3
 126         base, ext = m.groups()
 127         (keyword, fname, extra) = base_matcher(context, line)
 128         try:
 129             if fname == "%s.%s" % (base, header_map[ext]):
 130                 return (keyword, fname, extra)
 131         except KeyError:
 132             pass
 133
 134         return (None, ) * 3
 135
 136     return matcher
 137
 138 class SortIncludes(object):
 139     # different types of includes for different sorting of headers
 140     # <Python.h>         - Python header needs to be first if it exists
 141     # <*.h>              - system headers (directories before files)
 142     # <*>                - STL headers
 143     # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
 144     # "*"                - M5 headers (directories before files)
 145     includes_re = (
 146         ('main', '""', _include_matcher_main()),
 147         ('python', '<>', _include_matcher_fname("^Python\.h$")),
 148         ('c', '<>', _include_matcher_fname("^.*\.h$")),
 149         ('stl', '<>', _include_matcher_fname("^\w+$")),
 150         ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
 151         ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
 152         ('swig0', '<>', _include_matcher(keyword="%import")),
 153         ('swig1', '<>', _include_matcher(keyword="%include")),
 154         ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
 155         ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
 156         )
 157
 158     block_order = (
 159         ('main', ),
 160         ('python', ),
 161         ('c', ),
 162         ('stl', ),
 163         ('cc', ),
 164         ('m5header', ),
 165         ('swig0', 'swig1', 'swig2', 'swig3', ),
 166         )
 167
 168     def __init__(self):
 169         self.block_priority = {}
 170         for prio, keys in enumerate(self.block_order):
 171             for key in keys:
 172                 self.block_priority[key] = prio
 173
 174     def reset(self):
 175         # clear all stored headers
 176         self.includes = {}
 177
 178     def dump_blocks(self, block_types):
 179         """Merge includes of from several block types into one large
 180         block of sorted includes. This is useful when we have multiple
 181         include block types (e.g., swig includes) with the same
 182         priority."""
 183
 184         includes = []
 185         for block_type in block_types:
 186             try:
 187                 includes += self.includes[block_type]
 188             except KeyError:
 189                 pass
 190
 191         return sorted(set(includes))
 192
 193     def dump_includes(self):
 194         blocks = []
 195         # Create a list of blocks in the prescribed include
 196         # order. Each entry in the list is a multi-line string with
 197         # multiple includes.
 198         for types in self.block_order:
 199             block = "\n".join(self.dump_blocks(types))
 200             if block:
 201                 blocks.append(block)
 202
 203         self.reset()
 204         return "\n\n".join(blocks)
 205
 206     def __call__(self, lines, filename, language):
 207         self.reset()
 208
 209         context = {
 210             "filename" : filename,
 211             "language" : language,
 212             }
 213
 214         def match_line(line):
 215             if not line:
 216                 return (None, line)
 217
 218             for include_type, (ldelim, rdelim), matcher in self.includes_re:
 219                 keyword, include, extra = matcher(context, line)
 220                 if keyword:
 221                     # if we've got a match, clean up the #include line,
 222                     # fix up stl headers and store it in the proper category
 223                     if include_type == 'c' and language == 'C++':
 224                         stl_inc = cpp_c_headers.get(include, None)
 225                         if stl_inc:
 226                             include = stl_inc
 227                             include_type = 'stl'
 228
 229                     return (include_type,
 230                             keyword + ' ' + ldelim + include + rdelim + extra)
 231
 232             return (None, line)
 233
 234         processing_includes = False
 235         for line in lines:
 236             include_type, line = match_line(line)
 237             if include_type:
 238                 try:
 239                     self.includes[include_type].append(line)
 240                 except KeyError:
 241                     self.includes[include_type] = [ line ]
 242
 243                 processing_includes = True
 244             elif processing_includes and not line.strip():
 245                 # Skip empty lines while processing includes
 246                 pass
 247             elif processing_includes:
 248                 # We are now exiting an include block
 249                 processing_includes = False
 250
 251                 # Output pending includes, a new line between, and the
 252                 # current l.
 253                 yield self.dump_includes()
 254                 yield ''
 255                 yield line
 256             else:
 257                 # We are not in an include block, so just emit the line
 258                 yield line
 259
 260         # We've reached EOF, so dump any pending includes
 261         if processing_includes:
 262             yield self.dump_includes()
 263
 264
 265
 266 # default language types to try to apply our sorting rules to
 267 default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
 268
 269 def options():
 270     import optparse
 271     options = optparse.OptionParser()
 272     add_option = options.add_option
 273     add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
 274                default=','.join(default_dir_ignore),
 275                help="ignore directories")
 276     add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
 277                default=','.join(default_file_ignore),
 278                help="ignore files")
 279     add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
 280                default=','.join(default_languages),
 281                help="languages")
 282     add_option('-n', '--dry-run', action='store_true',
 283                help="don't overwrite files")
 284
 285     return options
 286
 287 def parse_args(parser):
 288     opts,args = parser.parse_args()
 289
 290     opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
 291     opts.file_ignore = frozenset(opts.file_ignore.split(','))
 292     opts.languages = frozenset(opts.languages.split(','))
 293
 294     return opts,args
 295
 296 if __name__ == '__main__':
 297     parser = options()
 298     opts, args = parse_args(parser)
 299
 300     for base in args:
 301         for filename,language in find_files(base, languages=opts.languages,
 302                 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
 303             if opts.dry_run:
 304                 print "%s: %s" % (filename, language)
 305             else:
 306                 update_file(filename, filename, language, SortIncludes())