f2f929c09b41c84c618d7fc0283e6b085ba6212f
[gem5.git] / util / style / sort_includes.py
1 #!/usr/bin/env python2.7
2 #
3 # Copyright (c) 2014-2015 ARM Limited
4 # All rights reserved
5 #
6 # The license below extends only to copyright in the software and shall
7 # not be construed as granting a license to any other intellectual
8 # property including but not limited to intellectual property relating
9 # to a hardware implementation of the functionality of the software
10 # licensed hereunder. You may use the software subject to the license
11 # terms below provided that you ensure that this notice is replicated
12 # unmodified and in its entirety in all distributions of the software,
13 # modified or unmodified, in source code or in binary form.
14 #
15 # Copyright (c) 2011 The Hewlett-Packard Development Company
16 # All rights reserved.
17 #
18 # Redistribution and use in source and binary forms, with or without
19 # modification, are permitted provided that the following conditions are
20 # met: redistributions of source code must retain the above copyright
21 # notice, this list of conditions and the following disclaimer;
22 # redistributions in binary form must reproduce the above copyright
23 # notice, this list of conditions and the following disclaimer in the
24 # documentation and/or other materials provided with the distribution;
25 # neither the name of the copyright holders nor the names of its
26 # contributors may be used to endorse or promote products derived from
27 # this software without specific prior written permission.
28 #
29 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40
41 import os
42 import re
43 import sys
44
45 from file_types import *
46
47 cpp_c_headers = {
48 'assert.h' : 'cassert',
49 'ctype.h' : 'cctype',
50 'errno.h' : 'cerrno',
51 'float.h' : 'cfloat',
52 'limits.h' : 'climits',
53 'locale.h' : 'clocale',
54 'math.h' : 'cmath',
55 'setjmp.h' : 'csetjmp',
56 'signal.h' : 'csignal',
57 'stdarg.h' : 'cstdarg',
58 'stddef.h' : 'cstddef',
59 'stdio.h' : 'cstdio',
60 'stdlib.h' : 'cstdlib',
61 'string.h' : 'cstring',
62 'time.h' : 'ctime',
63 'wchar.h' : 'cwchar',
64 'wctype.h' : 'cwctype',
65 }
66
67 include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
68 def include_key(line):
69 '''Mark directories with a leading space so directories
70 are sorted before files'''
71
72 match = include_re.match(line)
73 assert match, line
74 keyword = match.group(2)
75 include = match.group(3)
76
77 # Everything but the file part needs to have a space prepended
78 parts = include.split('/')
79 if len(parts) == 2 and parts[0] == 'dnet':
80 # Don't sort the dnet includes with respect to each other, but
81 # make them sorted with respect to non dnet includes. Python
82 # guarantees that sorting is stable, so just clear the
83 # basename part of the filename.
84 parts[1] = ' '
85 parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
86 key = '/'.join(parts)
87
88 return key
89
90
91 def _include_matcher(keyword="#include", delim="<>"):
92 """Match an include statement and return a (keyword, file, extra)
93 duple, or a touple of None values if there isn't a match."""
94
95 rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
96
97 def matcher(context, line):
98 m = rex.match(line)
99 return m.groups() if m else (None, ) * 3
100
101 return matcher
102
103 def _include_matcher_fname(fname, **kwargs):
104 """Match an include of a specific file name. Any keyword arguments
105 are forwarded to _include_matcher, which is used to match the
106 actual include line."""
107
108 rex = re.compile(fname)
109 base_matcher = _include_matcher(**kwargs)
110
111 def matcher(context, line):
112 (keyword, fname, extra) = base_matcher(context, line)
113 if fname and rex.match(fname):
114 return (keyword, fname, extra)
115 else:
116 return (None, ) * 3
117
118 return matcher
119
120
121 def _include_matcher_main():
122 """Match a C/C++ source file's primary header (i.e., a file with
123 the same base name, but a header extension)."""
124
125 base_matcher = _include_matcher(delim='""')
126 rex = re.compile(r"^src/(.*)\.([^.]+)$")
127 header_map = {
128 "c" : "h",
129 "cc" : "hh",
130 "cpp" : "hh",
131 }
132 def matcher(context, line):
133 m = rex.match(context["filename"])
134 if not m:
135 return (None, ) * 3
136 base, ext = m.groups()
137 (keyword, fname, extra) = base_matcher(context, line)
138 try:
139 if fname == "%s.%s" % (base, header_map[ext]):
140 return (keyword, fname, extra)
141 except KeyError:
142 pass
143
144 return (None, ) * 3
145
146 return matcher
147
148 class SortIncludes(object):
149 # different types of includes for different sorting of headers
150 # <Python.h> - Python header needs to be first if it exists
151 # <*.h> - system headers (directories before files)
152 # <*> - STL headers
153 # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
154 # "*" - M5 headers (directories before files)
155 includes_re = (
156 ('main', '""', _include_matcher_main()),
157 ('python', '<>', _include_matcher_fname("^Python\.h$")),
158 ('pybind', '""', _include_matcher_fname("^pybind11/.*\.h$",
159 delim='""')),
160 ('m5shared', '<>', _include_matcher_fname("^gem5/")),
161 ('c', '<>', _include_matcher_fname("^.*\.h$")),
162 ('stl', '<>', _include_matcher_fname("^\w+$")),
163 ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
164 ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
165 ('swig0', '<>', _include_matcher(keyword="%import")),
166 ('swig1', '<>', _include_matcher(keyword="%include")),
167 ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
168 ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
169 )
170
171 block_order = (
172 ('python', ),
173 ('pybind', ),
174 ('main', ),
175 ('c', ),
176 ('stl', ),
177 ('cc', ),
178 ('m5shared', ),
179 ('m5header', ),
180 ('swig0', 'swig1', 'swig2', 'swig3', ),
181 )
182
183 def __init__(self):
184 self.block_priority = {}
185 for prio, keys in enumerate(self.block_order):
186 for key in keys:
187 self.block_priority[key] = prio
188
189 def reset(self):
190 # clear all stored headers
191 self.includes = {}
192
193 def dump_blocks(self, block_types):
194 """Merge includes of from several block types into one large
195 block of sorted includes. This is useful when we have multiple
196 include block types (e.g., swig includes) with the same
197 priority."""
198
199 includes = []
200 for block_type in block_types:
201 try:
202 includes += self.includes[block_type]
203 except KeyError:
204 pass
205
206 return sorted(set(includes))
207
208 def dump_includes(self):
209 includes = []
210 for types in self.block_order:
211 block = self.dump_blocks(types)
212 if includes and block:
213 includes.append("")
214 includes += block
215
216 self.reset()
217 return includes
218
219 def __call__(self, lines, filename, language):
220 self.reset()
221
222 context = {
223 "filename" : filename,
224 "language" : language,
225 }
226
227 def match_line(line):
228 if not line:
229 return (None, line)
230
231 for include_type, (ldelim, rdelim), matcher in self.includes_re:
232 keyword, include, extra = matcher(context, line)
233 if keyword:
234 # if we've got a match, clean up the #include line,
235 # fix up stl headers and store it in the proper category
236 if include_type == 'c' and language == 'C++':
237 stl_inc = cpp_c_headers.get(include, None)
238 if stl_inc:
239 include = stl_inc
240 include_type = 'stl'
241
242 return (include_type,
243 keyword + ' ' + ldelim + include + rdelim + extra)
244
245 return (None, line)
246
247 processing_includes = False
248 for line in lines:
249 include_type, line = match_line(line)
250 if include_type:
251 try:
252 self.includes[include_type].append(line)
253 except KeyError:
254 self.includes[include_type] = [ line ]
255
256 processing_includes = True
257 elif processing_includes and not line.strip():
258 # Skip empty lines while processing includes
259 pass
260 elif processing_includes:
261 # We are now exiting an include block
262 processing_includes = False
263
264 # Output pending includes, a new line between, and the
265 # current l.
266 for include in self.dump_includes():
267 yield include
268 yield ''
269 yield line
270 else:
271 # We are not in an include block, so just emit the line
272 yield line
273
274 # We've reached EOF, so dump any pending includes
275 if processing_includes:
276 for include in self.dump_includes():
277 yield include
278
279 # default language types to try to apply our sorting rules to
280 default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
281
282 def options():
283 import optparse
284 options = optparse.OptionParser()
285 add_option = options.add_option
286 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
287 default=','.join(default_dir_ignore),
288 help="ignore directories")
289 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
290 default=','.join(default_file_ignore),
291 help="ignore files")
292 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
293 default=','.join(default_languages),
294 help="languages")
295 add_option('-n', '--dry-run', action='store_true',
296 help="don't overwrite files")
297
298 return options
299
300 def parse_args(parser):
301 opts,args = parser.parse_args()
302
303 opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
304 opts.file_ignore = frozenset(opts.file_ignore.split(','))
305 opts.languages = frozenset(opts.languages.split(','))
306
307 return opts,args
308
309 if __name__ == '__main__':
310 parser = options()
311 opts, args = parse_args(parser)
312
313 for base in args:
314 for filename,language in find_files(base, languages=opts.languages,
315 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
316 if opts.dry_run:
317 print "%s: %s" % (filename, language)
318 else:
319 update_file(filename, filename, language, SortIncludes())