style: Update the style checker to handle new include order
[gem5.git] / util / sort_includes.py
1 #!/usr/bin/env python
2
3 import os
4 import re
5 import sys
6
7 from file_types import *
8
9 cpp_c_headers = {
10 'assert.h' : 'cassert',
11 'ctype.h' : 'cctype',
12 'errno.h' : 'cerrno',
13 'float.h' : 'cfloat',
14 'limits.h' : 'climits',
15 'locale.h' : 'clocale',
16 'math.h' : 'cmath',
17 'setjmp.h' : 'csetjmp',
18 'signal.h' : 'csignal',
19 'stdarg.h' : 'cstdarg',
20 'stddef.h' : 'cstddef',
21 'stdio.h' : 'cstdio',
22 'stdlib.h' : 'cstdlib',
23 'string.h' : 'cstring',
24 'time.h' : 'ctime',
25 'wchar.h' : 'cwchar',
26 'wctype.h' : 'cwctype',
27 }
28
29 include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
30 def include_key(line):
31 '''Mark directories with a leading space so directories
32 are sorted before files'''
33
34 match = include_re.match(line)
35 assert match, line
36 keyword = match.group(2)
37 include = match.group(3)
38
39 # Everything but the file part needs to have a space prepended
40 parts = include.split('/')
41 if len(parts) == 2 and parts[0] == 'dnet':
42 # Don't sort the dnet includes with respect to each other, but
43 # make them sorted with respect to non dnet includes. Python
44 # guarantees that sorting is stable, so just clear the
45 # basename part of the filename.
46 parts[1] = ' '
47 parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
48 key = '/'.join(parts)
49
50 return key
51
52
53 def _include_matcher(keyword="#include", delim="<>"):
54 """Match an include statement and return a (keyword, file, extra)
55 duple, or a touple of None values if there isn't a match."""
56
57 rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
58
59 def matcher(context, line):
60 m = rex.match(line)
61 return m.groups() if m else (None, ) * 3
62
63 return matcher
64
65 def _include_matcher_fname(fname, **kwargs):
66 """Match an include of a specific file name. Any keyword arguments
67 are forwarded to _include_matcher, which is used to match the
68 actual include line."""
69
70 rex = re.compile(fname)
71 base_matcher = _include_matcher(**kwargs)
72
73 def matcher(context, line):
74 (keyword, fname, extra) = base_matcher(context, line)
75 if fname and rex.match(fname):
76 return (keyword, fname, extra)
77 else:
78 return (None, ) * 3
79
80 return matcher
81
82
83 def _include_matcher_main():
84 """Match a C/C++ source file's primary header (i.e., a file with
85 the same base name, but a header extension)."""
86
87 base_matcher = _include_matcher(delim='""')
88 rex = re.compile(r"^src/(.*)\.([^.]+)$")
89 header_map = {
90 "c" : "h",
91 "cc" : "hh",
92 "cpp" : "hh",
93 }
94 def matcher(context, line):
95 m = rex.match(context["filename"])
96 if not m:
97 return (None, ) * 3
98 base, ext = m.groups()
99 (keyword, fname, extra) = base_matcher(context, line)
100 try:
101 if fname == "%s.%s" % (base, header_map[ext]):
102 return (keyword, fname, extra)
103 except KeyError:
104 pass
105
106 return (None, ) * 3
107
108 return matcher
109
110 class SortIncludes(object):
111 # different types of includes for different sorting of headers
112 # <Python.h> - Python header needs to be first if it exists
113 # <*.h> - system headers (directories before files)
114 # <*> - STL headers
115 # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
116 # "*" - M5 headers (directories before files)
117 includes_re = (
118 ('main', '""', _include_matcher_main()),
119 ('python', '<>', _include_matcher_fname("^Python\.h$")),
120 ('c', '<>', _include_matcher_fname("^.*\.h$")),
121 ('stl', '<>', _include_matcher_fname("^\w+$")),
122 ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
123 ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
124 ('swig0', '<>', _include_matcher(keyword="%import")),
125 ('swig1', '<>', _include_matcher(keyword="%include")),
126 ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
127 ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
128 )
129
130 block_order = (
131 ('main', ),
132 ('python', ),
133 ('c', ),
134 ('stl', ),
135 ('cc', ),
136 ('m5header', ),
137 ('swig0', 'swig1', 'swig2', 'swig3', ),
138 )
139
140 def __init__(self):
141 self.block_priority = {}
142 for prio, keys in enumerate(self.block_order):
143 for key in keys:
144 self.block_priority[key] = prio
145
146 def reset(self):
147 # clear all stored headers
148 self.includes = {}
149
150 def dump_blocks(self, block_types):
151 """Merge includes of from several block types into one large
152 block of sorted includes. This is useful when we have multiple
153 include block types (e.g., swig includes) with the same
154 priority."""
155
156 includes = []
157 for block_type in block_types:
158 try:
159 includes += self.includes[block_type]
160 except KeyError:
161 pass
162
163 return sorted(set(includes))
164
165 def dump_includes(self):
166 blocks = []
167 # Create a list of blocks in the prescribed include
168 # order. Each entry in the list is a multi-line string with
169 # multiple includes.
170 for types in self.block_order:
171 block = "\n".join(self.dump_blocks(types))
172 if block:
173 blocks.append(block)
174
175 self.reset()
176 return "\n\n".join(blocks)
177
178 def __call__(self, lines, filename, language):
179 self.reset()
180
181 context = {
182 "filename" : filename,
183 "language" : language,
184 }
185
186 def match_line(line):
187 if not line:
188 return (None, line)
189
190 for include_type, (ldelim, rdelim), matcher in self.includes_re:
191 keyword, include, extra = matcher(context, line)
192 if keyword:
193 # if we've got a match, clean up the #include line,
194 # fix up stl headers and store it in the proper category
195 if include_type == 'c' and language == 'C++':
196 stl_inc = cpp_c_headers.get(include, None)
197 if stl_inc:
198 include = stl_inc
199 include_type = 'stl'
200
201 return (include_type,
202 keyword + ' ' + ldelim + include + rdelim + extra)
203
204 return (None, line)
205
206 processing_includes = False
207 for line in lines:
208 include_type, line = match_line(line)
209 if include_type:
210 try:
211 self.includes[include_type].append(line)
212 except KeyError:
213 self.includes[include_type] = [ line ]
214
215 processing_includes = True
216 elif processing_includes and not line.strip():
217 # Skip empty lines while processing includes
218 pass
219 elif processing_includes:
220 # We are now exiting an include block
221 processing_includes = False
222
223 # Output pending includes, a new line between, and the
224 # current l.
225 yield self.dump_includes()
226 yield ''
227 yield line
228 else:
229 # We are not in an include block, so just emit the line
230 yield line
231
232 # We've reached EOF, so dump any pending includes
233 if processing_includes:
234 yield self.dump_includes()
235
236
237
238 # default language types to try to apply our sorting rules to
239 default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
240
241 def options():
242 import optparse
243 options = optparse.OptionParser()
244 add_option = options.add_option
245 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
246 default=','.join(default_dir_ignore),
247 help="ignore directories")
248 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
249 default=','.join(default_file_ignore),
250 help="ignore files")
251 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
252 default=','.join(default_languages),
253 help="languages")
254 add_option('-n', '--dry-run', action='store_true',
255 help="don't overwrite files")
256
257 return options
258
259 def parse_args(parser):
260 opts,args = parser.parse_args()
261
262 opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
263 opts.file_ignore = frozenset(opts.file_ignore.split(','))
264 opts.languages = frozenset(opts.languages.split(','))
265
266 return opts,args
267
268 if __name__ == '__main__':
269 parser = options()
270 opts, args = parse_args(parser)
271
272 for base in args:
273 for filename,language in find_files(base, languages=opts.languages,
274 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
275 if opts.dry_run:
276 print "%s: %s" % (filename, language)
277 else:
278 update_file(filename, filename, language, SortIncludes())