util/style/sort_includes.py

   1 #!/usr/bin/env python2.7
   2 #
   3 # Copyright (c) 2014-2015 ARM Limited
   4 # All rights reserved
   5 #
   6 # The license below extends only to copyright in the software and shall
   7 # not be construed as granting a license to any other intellectual
   8 # property including but not limited to intellectual property relating
   9 # to a hardware implementation of the functionality of the software
  10 # licensed hereunder.  You may use the software subject to the license
  11 # terms below provided that you ensure that this notice is replicated
  12 # unmodified and in its entirety in all distributions of the software,
  13 # modified or unmodified, in source code or in binary form.
  14 #
  15 # Copyright (c) 2011 The Hewlett-Packard Development Company
  16 # All rights reserved.
  17 #
  18 # Redistribution and use in source and binary forms, with or without
  19 # modification, are permitted provided that the following conditions are
  20 # met: redistributions of source code must retain the above copyright
  21 # notice, this list of conditions and the following disclaimer;
  22 # redistributions in binary form must reproduce the above copyright
  23 # notice, this list of conditions and the following disclaimer in the
  24 # documentation and/or other materials provided with the distribution;
  25 # neither the name of the copyright holders nor the names of its
  26 # contributors may be used to endorse or promote products derived from
  27 # this software without specific prior written permission.
  28 #
  29 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40
  41 import os
  42 import re
  43 import sys
  44
  45 from file_types import *
  46
  47 cpp_c_headers = {
  48     'assert.h' : 'cassert',
  49     'ctype.h'  : 'cctype',
  50     'errno.h'  : 'cerrno',
  51     'float.h'  : 'cfloat',
  52     'limits.h' : 'climits',
  53     'locale.h' : 'clocale',
  54     'math.h'   : 'cmath',
  55     'setjmp.h' : 'csetjmp',
  56     'signal.h' : 'csignal',
  57     'stdarg.h' : 'cstdarg',
  58     'stddef.h' : 'cstddef',
  59     'stdio.h'  : 'cstdio',
  60     'stdlib.h' : 'cstdlib',
  61     'string.h' : 'cstring',
  62     'time.h'   : 'ctime',
  63     'wchar.h'  : 'cwchar',
  64     'wctype.h' : 'cwctype',
  65 }
  66
  67 include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
  68 def include_key(line):
  69     '''Mark directories with a leading space so directories
  70     are sorted before files'''
  71
  72     match = include_re.match(line)
  73     assert match, line
  74     keyword = match.group(2)
  75     include = match.group(3)
  76
  77     # Everything but the file part needs to have a space prepended
  78     parts = include.split('/')
  79     if len(parts) == 2 and parts[0] == 'dnet':
  80         # Don't sort the dnet includes with respect to each other, but
  81         # make them sorted with respect to non dnet includes.  Python
  82         # guarantees that sorting is stable, so just clear the
  83         # basename part of the filename.
  84         parts[1] = ' '
  85     parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
  86     key = '/'.join(parts)
  87
  88     return key
  89
  90
  91 def _include_matcher(keyword="#include", delim="<>"):
  92     """Match an include statement and return a (keyword, file, extra)
  93     duple, or a touple of None values if there isn't a match."""
  94
  95     rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
  96
  97     def matcher(context, line):
  98         m = rex.match(line)
  99         return m.groups() if m else (None, ) * 3
 100
 101     return matcher
 102
 103 def _include_matcher_fname(fname, **kwargs):
 104     """Match an include of a specific file name. Any keyword arguments
 105     are forwarded to _include_matcher, which is used to match the
 106     actual include line."""
 107
 108     rex = re.compile(fname)
 109     base_matcher = _include_matcher(**kwargs)
 110
 111     def matcher(context, line):
 112         (keyword, fname, extra) = base_matcher(context, line)
 113         if fname and rex.match(fname):
 114             return (keyword, fname, extra)
 115         else:
 116             return (None, ) * 3
 117
 118     return matcher
 119
 120
 121 def _include_matcher_main():
 122     """Match a C/C++ source file's primary header (i.e., a file with
 123     the same base name, but a header extension)."""
 124
 125     base_matcher = _include_matcher(delim='""')
 126     rex = re.compile(r"^src/(.*)\.([^.]+)$")
 127     header_map = {
 128         "c" : "h",
 129         "cc" : "hh",
 130         "cpp" : "hh",
 131         }
 132     def matcher(context, line):
 133         m = rex.match(context["filename"])
 134         if not m:
 135             return (None, ) * 3
 136         base, ext = m.groups()
 137         (keyword, fname, extra) = base_matcher(context, line)
 138         try:
 139             if fname == "%s.%s" % (base, header_map[ext]):
 140                 return (keyword, fname, extra)
 141         except KeyError:
 142             pass
 143
 144         return (None, ) * 3
 145
 146     return matcher
 147
 148 class SortIncludes(object):
 149     # different types of includes for different sorting of headers
 150     # <Python.h>         - Python header needs to be first if it exists
 151     # <*.h>              - system headers (directories before files)
 152     # <*>                - STL headers
 153     # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
 154     # "*"                - M5 headers (directories before files)
 155     includes_re = (
 156         ('main', '""', _include_matcher_main()),
 157         ('python', '<>', _include_matcher_fname("^Python\.h$")),
 158         ('pybind', '""', _include_matcher_fname("^pybind11/.*\.h$",
 159                                                 delim='""')),
 160         ('m5shared', '<>', _include_matcher_fname("^gem5/")),
 161         ('c', '<>', _include_matcher_fname("^.*\.h$")),
 162         ('stl', '<>', _include_matcher_fname("^\w+$")),
 163         ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
 164         ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
 165         ('swig0', '<>', _include_matcher(keyword="%import")),
 166         ('swig1', '<>', _include_matcher(keyword="%include")),
 167         ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
 168         ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
 169         )
 170
 171     block_order = (
 172         ('python', ),
 173         ('pybind', ),
 174         ('main', ),
 175         ('c', ),
 176         ('stl', ),
 177         ('cc', ),
 178         ('m5shared', ),
 179         ('m5header', ),
 180         ('swig0', 'swig1', 'swig2', 'swig3', ),
 181         )
 182
 183     def __init__(self):
 184         self.block_priority = {}
 185         for prio, keys in enumerate(self.block_order):
 186             for key in keys:
 187                 self.block_priority[key] = prio
 188
 189     def reset(self):
 190         # clear all stored headers
 191         self.includes = {}
 192
 193     def dump_blocks(self, block_types):
 194         """Merge includes of from several block types into one large
 195         block of sorted includes. This is useful when we have multiple
 196         include block types (e.g., swig includes) with the same
 197         priority."""
 198
 199         includes = []
 200         for block_type in block_types:
 201             try:
 202                 includes += self.includes[block_type]
 203             except KeyError:
 204                 pass
 205
 206         return sorted(set(includes))
 207
 208     def dump_includes(self):
 209         includes = []
 210         for types in self.block_order:
 211             block = self.dump_blocks(types)
 212             if includes and block:
 213                 includes.append("")
 214             includes += block
 215
 216         self.reset()
 217         return includes
 218
 219     def __call__(self, lines, filename, language):
 220         self.reset()
 221
 222         context = {
 223             "filename" : filename,
 224             "language" : language,
 225             }
 226
 227         def match_line(line):
 228             if not line:
 229                 return (None, line)
 230
 231             for include_type, (ldelim, rdelim), matcher in self.includes_re:
 232                 keyword, include, extra = matcher(context, line)
 233                 if keyword:
 234                     # if we've got a match, clean up the #include line,
 235                     # fix up stl headers and store it in the proper category
 236                     if include_type == 'c' and language == 'C++':
 237                         stl_inc = cpp_c_headers.get(include, None)
 238                         if stl_inc:
 239                             include = stl_inc
 240                             include_type = 'stl'
 241
 242                     return (include_type,
 243                             keyword + ' ' + ldelim + include + rdelim + extra)
 244
 245             return (None, line)
 246
 247         processing_includes = False
 248         for line in lines:
 249             include_type, line = match_line(line)
 250             if include_type:
 251                 try:
 252                     self.includes[include_type].append(line)
 253                 except KeyError:
 254                     self.includes[include_type] = [ line ]
 255
 256                 processing_includes = True
 257             elif processing_includes and not line.strip():
 258                 # Skip empty lines while processing includes
 259                 pass
 260             elif processing_includes:
 261                 # We are now exiting an include block
 262                 processing_includes = False
 263
 264                 # Output pending includes, a new line between, and the
 265                 # current l.
 266                 for include in self.dump_includes():
 267                     yield include
 268                 yield ''
 269                 yield line
 270             else:
 271                 # We are not in an include block, so just emit the line
 272                 yield line
 273
 274         # We've reached EOF, so dump any pending includes
 275         if processing_includes:
 276             for include in self.dump_includes():
 277                 yield include
 278
 279 # default language types to try to apply our sorting rules to
 280 default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
 281
 282 def options():
 283     import optparse
 284     options = optparse.OptionParser()
 285     add_option = options.add_option
 286     add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
 287                default=','.join(default_dir_ignore),
 288                help="ignore directories")
 289     add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
 290                default=','.join(default_file_ignore),
 291                help="ignore files")
 292     add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
 293                default=','.join(default_languages),
 294                help="languages")
 295     add_option('-n', '--dry-run', action='store_true',
 296                help="don't overwrite files")
 297
 298     return options
 299
 300 def parse_args(parser):
 301     opts,args = parser.parse_args()
 302
 303     opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
 304     opts.file_ignore = frozenset(opts.file_ignore.split(','))
 305     opts.languages = frozenset(opts.languages.split(','))
 306
 307     return opts,args
 308
 309 if __name__ == '__main__':
 310     parser = options()
 311     opts, args = parse_args(parser)
 312
 313     for base in args:
 314         for filename,language in find_files(base, languages=opts.languages,
 315                 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
 316             if opts.dry_run:
 317                 print "%s: %s" % (filename, language)
 318             else:
 319                 update_file(filename, filename, language, SortIncludes())