copyright: add copyright missing from files I created
[gem5.git] / util / sort_includes.py
1 #!/usr/bin/env python
2
3 # Copyright (c) 2011 The Hewlett-Packard Development Company
4 # All rights reserved.
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
8 # met: redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer;
10 # redistributions in binary form must reproduce the above copyright
11 # notice, this list of conditions and the following disclaimer in the
12 # documentation and/or other materials provided with the distribution;
13 # neither the name of the copyright holders nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #
29 # Authors: Nathan Binkert
30
31 import os
32 import re
33 import sys
34
35 from file_types import *
36
37 cpp_c_headers = {
38 'assert.h' : 'cassert',
39 'ctype.h' : 'cctype',
40 'errno.h' : 'cerrno',
41 'float.h' : 'cfloat',
42 'limits.h' : 'climits',
43 'locale.h' : 'clocale',
44 'math.h' : 'cmath',
45 'setjmp.h' : 'csetjmp',
46 'signal.h' : 'csignal',
47 'stdarg.h' : 'cstdarg',
48 'stddef.h' : 'cstddef',
49 'stdio.h' : 'cstdio',
50 'stdlib.h' : 'cstdlib',
51 'string.h' : 'cstring',
52 'time.h' : 'ctime',
53 'wchar.h' : 'cwchar',
54 'wctype.h' : 'cwctype',
55 }
56
57 include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
58 def include_key(line):
59 '''Mark directories with a leading space so directories
60 are sorted before files'''
61
62 match = include_re.match(line)
63 assert match, line
64 keyword = match.group(2)
65 include = match.group(3)
66
67 # Everything but the file part needs to have a space prepended
68 parts = include.split('/')
69 if len(parts) == 2 and parts[0] == 'dnet':
70 # Don't sort the dnet includes with respect to each other, but
71 # make them sorted with respect to non dnet includes. Python
72 # guarantees that sorting is stable, so just clear the
73 # basename part of the filename.
74 parts[1] = ' '
75 parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
76 key = '/'.join(parts)
77
78 return key
79
80
81 def _include_matcher(keyword="#include", delim="<>"):
82 """Match an include statement and return a (keyword, file, extra)
83 duple, or a touple of None values if there isn't a match."""
84
85 rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
86
87 def matcher(context, line):
88 m = rex.match(line)
89 return m.groups() if m else (None, ) * 3
90
91 return matcher
92
93 def _include_matcher_fname(fname, **kwargs):
94 """Match an include of a specific file name. Any keyword arguments
95 are forwarded to _include_matcher, which is used to match the
96 actual include line."""
97
98 rex = re.compile(fname)
99 base_matcher = _include_matcher(**kwargs)
100
101 def matcher(context, line):
102 (keyword, fname, extra) = base_matcher(context, line)
103 if fname and rex.match(fname):
104 return (keyword, fname, extra)
105 else:
106 return (None, ) * 3
107
108 return matcher
109
110
111 def _include_matcher_main():
112 """Match a C/C++ source file's primary header (i.e., a file with
113 the same base name, but a header extension)."""
114
115 base_matcher = _include_matcher(delim='""')
116 rex = re.compile(r"^src/(.*)\.([^.]+)$")
117 header_map = {
118 "c" : "h",
119 "cc" : "hh",
120 "cpp" : "hh",
121 }
122 def matcher(context, line):
123 m = rex.match(context["filename"])
124 if not m:
125 return (None, ) * 3
126 base, ext = m.groups()
127 (keyword, fname, extra) = base_matcher(context, line)
128 try:
129 if fname == "%s.%s" % (base, header_map[ext]):
130 return (keyword, fname, extra)
131 except KeyError:
132 pass
133
134 return (None, ) * 3
135
136 return matcher
137
138 class SortIncludes(object):
139 # different types of includes for different sorting of headers
140 # <Python.h> - Python header needs to be first if it exists
141 # <*.h> - system headers (directories before files)
142 # <*> - STL headers
143 # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
144 # "*" - M5 headers (directories before files)
145 includes_re = (
146 ('main', '""', _include_matcher_main()),
147 ('python', '<>', _include_matcher_fname("^Python\.h$")),
148 ('c', '<>', _include_matcher_fname("^.*\.h$")),
149 ('stl', '<>', _include_matcher_fname("^\w+$")),
150 ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
151 ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
152 ('swig0', '<>', _include_matcher(keyword="%import")),
153 ('swig1', '<>', _include_matcher(keyword="%include")),
154 ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
155 ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
156 )
157
158 block_order = (
159 ('main', ),
160 ('python', ),
161 ('c', ),
162 ('stl', ),
163 ('cc', ),
164 ('m5header', ),
165 ('swig0', 'swig1', 'swig2', 'swig3', ),
166 )
167
168 def __init__(self):
169 self.block_priority = {}
170 for prio, keys in enumerate(self.block_order):
171 for key in keys:
172 self.block_priority[key] = prio
173
174 def reset(self):
175 # clear all stored headers
176 self.includes = {}
177
178 def dump_blocks(self, block_types):
179 """Merge includes of from several block types into one large
180 block of sorted includes. This is useful when we have multiple
181 include block types (e.g., swig includes) with the same
182 priority."""
183
184 includes = []
185 for block_type in block_types:
186 try:
187 includes += self.includes[block_type]
188 except KeyError:
189 pass
190
191 return sorted(set(includes))
192
193 def dump_includes(self):
194 blocks = []
195 # Create a list of blocks in the prescribed include
196 # order. Each entry in the list is a multi-line string with
197 # multiple includes.
198 for types in self.block_order:
199 block = "\n".join(self.dump_blocks(types))
200 if block:
201 blocks.append(block)
202
203 self.reset()
204 return "\n\n".join(blocks)
205
206 def __call__(self, lines, filename, language):
207 self.reset()
208
209 context = {
210 "filename" : filename,
211 "language" : language,
212 }
213
214 def match_line(line):
215 if not line:
216 return (None, line)
217
218 for include_type, (ldelim, rdelim), matcher in self.includes_re:
219 keyword, include, extra = matcher(context, line)
220 if keyword:
221 # if we've got a match, clean up the #include line,
222 # fix up stl headers and store it in the proper category
223 if include_type == 'c' and language == 'C++':
224 stl_inc = cpp_c_headers.get(include, None)
225 if stl_inc:
226 include = stl_inc
227 include_type = 'stl'
228
229 return (include_type,
230 keyword + ' ' + ldelim + include + rdelim + extra)
231
232 return (None, line)
233
234 processing_includes = False
235 for line in lines:
236 include_type, line = match_line(line)
237 if include_type:
238 try:
239 self.includes[include_type].append(line)
240 except KeyError:
241 self.includes[include_type] = [ line ]
242
243 processing_includes = True
244 elif processing_includes and not line.strip():
245 # Skip empty lines while processing includes
246 pass
247 elif processing_includes:
248 # We are now exiting an include block
249 processing_includes = False
250
251 # Output pending includes, a new line between, and the
252 # current l.
253 yield self.dump_includes()
254 yield ''
255 yield line
256 else:
257 # We are not in an include block, so just emit the line
258 yield line
259
260 # We've reached EOF, so dump any pending includes
261 if processing_includes:
262 yield self.dump_includes()
263
264
265
266 # default language types to try to apply our sorting rules to
267 default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
268
269 def options():
270 import optparse
271 options = optparse.OptionParser()
272 add_option = options.add_option
273 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
274 default=','.join(default_dir_ignore),
275 help="ignore directories")
276 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
277 default=','.join(default_file_ignore),
278 help="ignore files")
279 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
280 default=','.join(default_languages),
281 help="languages")
282 add_option('-n', '--dry-run', action='store_true',
283 help="don't overwrite files")
284
285 return options
286
287 def parse_args(parser):
288 opts,args = parser.parse_args()
289
290 opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
291 opts.file_ignore = frozenset(opts.file_ignore.split(','))
292 opts.languages = frozenset(opts.languages.split(','))
293
294 return opts,args
295
296 if __name__ == '__main__':
297 parser = options()
298 opts, args = parse_args(parser)
299
300 for base in args:
301 for filename,language in find_files(base, languages=opts.languages,
302 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
303 if opts.dry_run:
304 print "%s: %s" % (filename, language)
305 else:
306 update_file(filename, filename, language, SortIncludes())