a35b0846807e394b4defff315170813e69080c54
[gem5.git] / util / style / sort_includes.py
1 #!/usr/bin/env python
2 #
3 # Copyright (c) 2014-2015 ARM Limited
4 # All rights reserved
5 #
6 # The license below extends only to copyright in the software and shall
7 # not be construed as granting a license to any other intellectual
8 # property including but not limited to intellectual property relating
9 # to a hardware implementation of the functionality of the software
10 # licensed hereunder. You may use the software subject to the license
11 # terms below provided that you ensure that this notice is replicated
12 # unmodified and in its entirety in all distributions of the software,
13 # modified or unmodified, in source code or in binary form.
14 #
15 # Copyright (c) 2011 The Hewlett-Packard Development Company
16 # All rights reserved.
17 #
18 # Redistribution and use in source and binary forms, with or without
19 # modification, are permitted provided that the following conditions are
20 # met: redistributions of source code must retain the above copyright
21 # notice, this list of conditions and the following disclaimer;
22 # redistributions in binary form must reproduce the above copyright
23 # notice, this list of conditions and the following disclaimer in the
24 # documentation and/or other materials provided with the distribution;
25 # neither the name of the copyright holders nor the names of its
26 # contributors may be used to endorse or promote products derived from
27 # this software without specific prior written permission.
28 #
29 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #
41 # Authors: Nathan Binkert
42 # Andreas Sandberg
43
44 import os
45 import re
46 import sys
47
48 from file_types import *
49
50 cpp_c_headers = {
51 'assert.h' : 'cassert',
52 'ctype.h' : 'cctype',
53 'errno.h' : 'cerrno',
54 'float.h' : 'cfloat',
55 'limits.h' : 'climits',
56 'locale.h' : 'clocale',
57 'math.h' : 'cmath',
58 'setjmp.h' : 'csetjmp',
59 'signal.h' : 'csignal',
60 'stdarg.h' : 'cstdarg',
61 'stddef.h' : 'cstddef',
62 'stdio.h' : 'cstdio',
63 'stdlib.h' : 'cstdlib',
64 'string.h' : 'cstring',
65 'time.h' : 'ctime',
66 'wchar.h' : 'cwchar',
67 'wctype.h' : 'cwctype',
68 }
69
70 include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
71 def include_key(line):
72 '''Mark directories with a leading space so directories
73 are sorted before files'''
74
75 match = include_re.match(line)
76 assert match, line
77 keyword = match.group(2)
78 include = match.group(3)
79
80 # Everything but the file part needs to have a space prepended
81 parts = include.split('/')
82 if len(parts) == 2 and parts[0] == 'dnet':
83 # Don't sort the dnet includes with respect to each other, but
84 # make them sorted with respect to non dnet includes. Python
85 # guarantees that sorting is stable, so just clear the
86 # basename part of the filename.
87 parts[1] = ' '
88 parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
89 key = '/'.join(parts)
90
91 return key
92
93
94 def _include_matcher(keyword="#include", delim="<>"):
95 """Match an include statement and return a (keyword, file, extra)
96 duple, or a touple of None values if there isn't a match."""
97
98 rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
99
100 def matcher(context, line):
101 m = rex.match(line)
102 return m.groups() if m else (None, ) * 3
103
104 return matcher
105
106 def _include_matcher_fname(fname, **kwargs):
107 """Match an include of a specific file name. Any keyword arguments
108 are forwarded to _include_matcher, which is used to match the
109 actual include line."""
110
111 rex = re.compile(fname)
112 base_matcher = _include_matcher(**kwargs)
113
114 def matcher(context, line):
115 (keyword, fname, extra) = base_matcher(context, line)
116 if fname and rex.match(fname):
117 return (keyword, fname, extra)
118 else:
119 return (None, ) * 3
120
121 return matcher
122
123
124 def _include_matcher_main():
125 """Match a C/C++ source file's primary header (i.e., a file with
126 the same base name, but a header extension)."""
127
128 base_matcher = _include_matcher(delim='""')
129 rex = re.compile(r"^src/(.*)\.([^.]+)$")
130 header_map = {
131 "c" : "h",
132 "cc" : "hh",
133 "cpp" : "hh",
134 }
135 def matcher(context, line):
136 m = rex.match(context["filename"])
137 if not m:
138 return (None, ) * 3
139 base, ext = m.groups()
140 (keyword, fname, extra) = base_matcher(context, line)
141 try:
142 if fname == "%s.%s" % (base, header_map[ext]):
143 return (keyword, fname, extra)
144 except KeyError:
145 pass
146
147 return (None, ) * 3
148
149 return matcher
150
151 class SortIncludes(object):
152 # different types of includes for different sorting of headers
153 # <Python.h> - Python header needs to be first if it exists
154 # <*.h> - system headers (directories before files)
155 # <*> - STL headers
156 # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
157 # "*" - M5 headers (directories before files)
158 includes_re = (
159 ('main', '""', _include_matcher_main()),
160 ('python', '<>', _include_matcher_fname("^Python\.h$")),
161 ('c', '<>', _include_matcher_fname("^.*\.h$")),
162 ('stl', '<>', _include_matcher_fname("^\w+$")),
163 ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
164 ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
165 ('swig0', '<>', _include_matcher(keyword="%import")),
166 ('swig1', '<>', _include_matcher(keyword="%include")),
167 ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
168 ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
169 )
170
171 block_order = (
172 ('python', ),
173 ('main', ),
174 ('c', ),
175 ('stl', ),
176 ('cc', ),
177 ('m5header', ),
178 ('swig0', 'swig1', 'swig2', 'swig3', ),
179 )
180
181 def __init__(self):
182 self.block_priority = {}
183 for prio, keys in enumerate(self.block_order):
184 for key in keys:
185 self.block_priority[key] = prio
186
187 def reset(self):
188 # clear all stored headers
189 self.includes = {}
190
191 def dump_blocks(self, block_types):
192 """Merge includes of from several block types into one large
193 block of sorted includes. This is useful when we have multiple
194 include block types (e.g., swig includes) with the same
195 priority."""
196
197 includes = []
198 for block_type in block_types:
199 try:
200 includes += self.includes[block_type]
201 except KeyError:
202 pass
203
204 return sorted(set(includes))
205
206 def dump_includes(self):
207 includes = []
208 for types in self.block_order:
209 block = self.dump_blocks(types)
210 if includes and block:
211 includes.append("")
212 includes += block
213
214 self.reset()
215 return includes
216
217 def __call__(self, lines, filename, language):
218 self.reset()
219
220 context = {
221 "filename" : filename,
222 "language" : language,
223 }
224
225 def match_line(line):
226 if not line:
227 return (None, line)
228
229 for include_type, (ldelim, rdelim), matcher in self.includes_re:
230 keyword, include, extra = matcher(context, line)
231 if keyword:
232 # if we've got a match, clean up the #include line,
233 # fix up stl headers and store it in the proper category
234 if include_type == 'c' and language == 'C++':
235 stl_inc = cpp_c_headers.get(include, None)
236 if stl_inc:
237 include = stl_inc
238 include_type = 'stl'
239
240 return (include_type,
241 keyword + ' ' + ldelim + include + rdelim + extra)
242
243 return (None, line)
244
245 processing_includes = False
246 for line in lines:
247 include_type, line = match_line(line)
248 if include_type:
249 try:
250 self.includes[include_type].append(line)
251 except KeyError:
252 self.includes[include_type] = [ line ]
253
254 processing_includes = True
255 elif processing_includes and not line.strip():
256 # Skip empty lines while processing includes
257 pass
258 elif processing_includes:
259 # We are now exiting an include block
260 processing_includes = False
261
262 # Output pending includes, a new line between, and the
263 # current l.
264 for include in self.dump_includes():
265 yield include
266 yield ''
267 yield line
268 else:
269 # We are not in an include block, so just emit the line
270 yield line
271
272 # We've reached EOF, so dump any pending includes
273 if processing_includes:
274 for include in self.dump_includes():
275 yield include
276
277 # default language types to try to apply our sorting rules to
278 default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
279
280 def options():
281 import optparse
282 options = optparse.OptionParser()
283 add_option = options.add_option
284 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
285 default=','.join(default_dir_ignore),
286 help="ignore directories")
287 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
288 default=','.join(default_file_ignore),
289 help="ignore files")
290 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
291 default=','.join(default_languages),
292 help="languages")
293 add_option('-n', '--dry-run', action='store_true',
294 help="don't overwrite files")
295
296 return options
297
298 def parse_args(parser):
299 opts,args = parser.parse_args()
300
301 opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
302 opts.file_ignore = frozenset(opts.file_ignore.split(','))
303 opts.languages = frozenset(opts.languages.split(','))
304
305 return opts,args
306
307 if __name__ == '__main__':
308 parser = options()
309 opts, args = parse_args(parser)
310
311 for base in args:
312 for filename,language in find_files(base, languages=opts.languages,
313 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
314 if opts.dry_run:
315 print "%s: %s" % (filename, language)
316 else:
317 update_file(filename, filename, language, SortIncludes())