util/style/sort_includes.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright (c) 2014-2015 ARM Limited
   4 # All rights reserved
   5 #
   6 # The license below extends only to copyright in the software and shall
   7 # not be construed as granting a license to any other intellectual
   8 # property including but not limited to intellectual property relating
   9 # to a hardware implementation of the functionality of the software
  10 # licensed hereunder.  You may use the software subject to the license
  11 # terms below provided that you ensure that this notice is replicated
  12 # unmodified and in its entirety in all distributions of the software,
  13 # modified or unmodified, in source code or in binary form.
  14 #
  15 # Copyright (c) 2011 The Hewlett-Packard Development Company
  16 # All rights reserved.
  17 #
  18 # Redistribution and use in source and binary forms, with or without
  19 # modification, are permitted provided that the following conditions are
  20 # met: redistributions of source code must retain the above copyright
  21 # notice, this list of conditions and the following disclaimer;
  22 # redistributions in binary form must reproduce the above copyright
  23 # notice, this list of conditions and the following disclaimer in the
  24 # documentation and/or other materials provided with the distribution;
  25 # neither the name of the copyright holders nor the names of its
  26 # contributors may be used to endorse or promote products derived from
  27 # this software without specific prior written permission.
  28 #
  29 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40 #
  41 # Authors: Nathan Binkert
  42 #          Andreas Sandberg
  43
  44 import os
  45 import re
  46 import sys
  47
  48 from file_types import *
  49
  50 cpp_c_headers = {
  51     'assert.h' : 'cassert',
  52     'ctype.h'  : 'cctype',
  53     'errno.h'  : 'cerrno',
  54     'float.h'  : 'cfloat',
  55     'limits.h' : 'climits',
  56     'locale.h' : 'clocale',
  57     'math.h'   : 'cmath',
  58     'setjmp.h' : 'csetjmp',
  59     'signal.h' : 'csignal',
  60     'stdarg.h' : 'cstdarg',
  61     'stddef.h' : 'cstddef',
  62     'stdio.h'  : 'cstdio',
  63     'stdlib.h' : 'cstdlib',
  64     'string.h' : 'cstring',
  65     'time.h'   : 'ctime',
  66     'wchar.h'  : 'cwchar',
  67     'wctype.h' : 'cwctype',
  68 }
  69
  70 include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
  71 def include_key(line):
  72     '''Mark directories with a leading space so directories
  73     are sorted before files'''
  74
  75     match = include_re.match(line)
  76     assert match, line
  77     keyword = match.group(2)
  78     include = match.group(3)
  79
  80     # Everything but the file part needs to have a space prepended
  81     parts = include.split('/')
  82     if len(parts) == 2 and parts[0] == 'dnet':
  83         # Don't sort the dnet includes with respect to each other, but
  84         # make them sorted with respect to non dnet includes.  Python
  85         # guarantees that sorting is stable, so just clear the
  86         # basename part of the filename.
  87         parts[1] = ' '
  88     parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
  89     key = '/'.join(parts)
  90
  91     return key
  92
  93
  94 def _include_matcher(keyword="#include", delim="<>"):
  95     """Match an include statement and return a (keyword, file, extra)
  96     duple, or a touple of None values if there isn't a match."""
  97
  98     rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
  99
 100     def matcher(context, line):
 101         m = rex.match(line)
 102         return m.groups() if m else (None, ) * 3
 103
 104     return matcher
 105
 106 def _include_matcher_fname(fname, **kwargs):
 107     """Match an include of a specific file name. Any keyword arguments
 108     are forwarded to _include_matcher, which is used to match the
 109     actual include line."""
 110
 111     rex = re.compile(fname)
 112     base_matcher = _include_matcher(**kwargs)
 113
 114     def matcher(context, line):
 115         (keyword, fname, extra) = base_matcher(context, line)
 116         if fname and rex.match(fname):
 117             return (keyword, fname, extra)
 118         else:
 119             return (None, ) * 3
 120
 121     return matcher
 122
 123
 124 def _include_matcher_main():
 125     """Match a C/C++ source file's primary header (i.e., a file with
 126     the same base name, but a header extension)."""
 127
 128     base_matcher = _include_matcher(delim='""')
 129     rex = re.compile(r"^src/(.*)\.([^.]+)$")
 130     header_map = {
 131         "c" : "h",
 132         "cc" : "hh",
 133         "cpp" : "hh",
 134         }
 135     def matcher(context, line):
 136         m = rex.match(context["filename"])
 137         if not m:
 138             return (None, ) * 3
 139         base, ext = m.groups()
 140         (keyword, fname, extra) = base_matcher(context, line)
 141         try:
 142             if fname == "%s.%s" % (base, header_map[ext]):
 143                 return (keyword, fname, extra)
 144         except KeyError:
 145             pass
 146
 147         return (None, ) * 3
 148
 149     return matcher
 150
 151 class SortIncludes(object):
 152     # different types of includes for different sorting of headers
 153     # <Python.h>         - Python header needs to be first if it exists
 154     # <*.h>              - system headers (directories before files)
 155     # <*>                - STL headers
 156     # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
 157     # "*"                - M5 headers (directories before files)
 158     includes_re = (
 159         ('main', '""', _include_matcher_main()),
 160         ('python', '<>', _include_matcher_fname("^Python\.h$")),
 161         ('c', '<>', _include_matcher_fname("^.*\.h$")),
 162         ('stl', '<>', _include_matcher_fname("^\w+$")),
 163         ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
 164         ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
 165         ('swig0', '<>', _include_matcher(keyword="%import")),
 166         ('swig1', '<>', _include_matcher(keyword="%include")),
 167         ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
 168         ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
 169         )
 170
 171     block_order = (
 172         ('python', ),
 173         ('main', ),
 174         ('c', ),
 175         ('stl', ),
 176         ('cc', ),
 177         ('m5header', ),
 178         ('swig0', 'swig1', 'swig2', 'swig3', ),
 179         )
 180
 181     def __init__(self):
 182         self.block_priority = {}
 183         for prio, keys in enumerate(self.block_order):
 184             for key in keys:
 185                 self.block_priority[key] = prio
 186
 187     def reset(self):
 188         # clear all stored headers
 189         self.includes = {}
 190
 191     def dump_blocks(self, block_types):
 192         """Merge includes of from several block types into one large
 193         block of sorted includes. This is useful when we have multiple
 194         include block types (e.g., swig includes) with the same
 195         priority."""
 196
 197         includes = []
 198         for block_type in block_types:
 199             try:
 200                 includes += self.includes[block_type]
 201             except KeyError:
 202                 pass
 203
 204         return sorted(set(includes))
 205
 206     def dump_includes(self):
 207         includes = []
 208         for types in self.block_order:
 209             block = self.dump_blocks(types)
 210             if includes and block:
 211                 includes.append("")
 212             includes += block
 213
 214         self.reset()
 215         return includes
 216
 217     def __call__(self, lines, filename, language):
 218         self.reset()
 219
 220         context = {
 221             "filename" : filename,
 222             "language" : language,
 223             }
 224
 225         def match_line(line):
 226             if not line:
 227                 return (None, line)
 228
 229             for include_type, (ldelim, rdelim), matcher in self.includes_re:
 230                 keyword, include, extra = matcher(context, line)
 231                 if keyword:
 232                     # if we've got a match, clean up the #include line,
 233                     # fix up stl headers and store it in the proper category
 234                     if include_type == 'c' and language == 'C++':
 235                         stl_inc = cpp_c_headers.get(include, None)
 236                         if stl_inc:
 237                             include = stl_inc
 238                             include_type = 'stl'
 239
 240                     return (include_type,
 241                             keyword + ' ' + ldelim + include + rdelim + extra)
 242
 243             return (None, line)
 244
 245         processing_includes = False
 246         for line in lines:
 247             include_type, line = match_line(line)
 248             if include_type:
 249                 try:
 250                     self.includes[include_type].append(line)
 251                 except KeyError:
 252                     self.includes[include_type] = [ line ]
 253
 254                 processing_includes = True
 255             elif processing_includes and not line.strip():
 256                 # Skip empty lines while processing includes
 257                 pass
 258             elif processing_includes:
 259                 # We are now exiting an include block
 260                 processing_includes = False
 261
 262                 # Output pending includes, a new line between, and the
 263                 # current l.
 264                 for include in self.dump_includes():
 265                     yield include
 266                 yield ''
 267                 yield line
 268             else:
 269                 # We are not in an include block, so just emit the line
 270                 yield line
 271
 272         # We've reached EOF, so dump any pending includes
 273         if processing_includes:
 274             for include in self.dump_includes():
 275                 yield include
 276
 277 # default language types to try to apply our sorting rules to
 278 default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
 279
 280 def options():
 281     import optparse
 282     options = optparse.OptionParser()
 283     add_option = options.add_option
 284     add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
 285                default=','.join(default_dir_ignore),
 286                help="ignore directories")
 287     add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
 288                default=','.join(default_file_ignore),
 289                help="ignore files")
 290     add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
 291                default=','.join(default_languages),
 292                help="languages")
 293     add_option('-n', '--dry-run', action='store_true',
 294                help="don't overwrite files")
 295
 296     return options
 297
 298 def parse_args(parser):
 299     opts,args = parser.parse_args()
 300
 301     opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
 302     opts.file_ignore = frozenset(opts.file_ignore.split(','))
 303     opts.languages = frozenset(opts.languages.split(','))
 304
 305     return opts,args
 306
 307 if __name__ == '__main__':
 308     parser = options()
 309     opts, args = parse_args(parser)
 310
 311     for base in args:
 312         for filename,language in find_files(base, languages=opts.languages,
 313                 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
 314             if opts.dry_run:
 315                 print "%s: %s" % (filename, language)
 316             else:
 317                 update_file(filename, filename, language, SortIncludes())