style: Update the style checker to handle new include order
[gem5.git] / util / style.py
1 #! /usr/bin/env python
2 # Copyright (c) 2014 ARM Limited
3 # All rights reserved
4 #
5 # The license below extends only to copyright in the software and shall
6 # not be construed as granting a license to any other intellectual
7 # property including but not limited to intellectual property relating
8 # to a hardware implementation of the functionality of the software
9 # licensed hereunder. You may use the software subject to the license
10 # terms below provided that you ensure that this notice is replicated
11 # unmodified and in its entirety in all distributions of the software,
12 # modified or unmodified, in source code or in binary form.
13 #
14 # Copyright (c) 2006 The Regents of The University of Michigan
15 # Copyright (c) 2007,2011 The Hewlett-Packard Development Company
16 # All rights reserved.
17 #
18 # Redistribution and use in source and binary forms, with or without
19 # modification, are permitted provided that the following conditions are
20 # met: redistributions of source code must retain the above copyright
21 # notice, this list of conditions and the following disclaimer;
22 # redistributions in binary form must reproduce the above copyright
23 # notice, this list of conditions and the following disclaimer in the
24 # documentation and/or other materials provided with the distribution;
25 # neither the name of the copyright holders nor the names of its
26 # contributors may be used to endorse or promote products derived from
27 # this software without specific prior written permission.
28 #
29 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #
41 # Authors: Nathan Binkert
42
43 import heapq
44 import os
45 import re
46 import sys
47
48 from os.path import dirname, join as joinpath
49 from itertools import count
50 from mercurial import bdiff, mdiff, commands
51
52 current_dir = dirname(__file__)
53 sys.path.insert(0, current_dir)
54 sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))
55
56 from m5.util import neg_inf, pos_inf, Region, Regions
57 import sort_includes
58 from file_types import lang_type
59
60 all_regions = Regions(Region(neg_inf, pos_inf))
61
62 tabsize = 8
63 lead = re.compile(r'^([ \t]+)')
64 trail = re.compile(r'([ \t]+)$')
65 any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
66 good_control = re.compile(r'\b(if|while|for) [(]')
67
68 format_types = set(('C', 'C++'))
69
70
71 def re_ignore(expr):
72 """Helper function to create regular expression ignore file
73 matcher functions"""
74
75 rex = re.compile(expr)
76 def match_re(fname):
77 return rex.match(fname)
78 return match_re
79
80 # This list contains a list of functions that are called to determine
81 # if a file should be excluded from the style matching rules or
82 # not. The functions are called with the file name relative to the
83 # repository root (without a leading slash) as their argument. A file
84 # is excluded if any function in the list returns true.
85 style_ignores = [
86 # Ignore external projects as they are unlikely to follow the gem5
87 # coding convention.
88 re_ignore("^ext/"),
89 ]
90
91 def check_ignores(fname):
92 """Check if a file name matches any of the ignore rules"""
93
94 for rule in style_ignores:
95 if rule(fname):
96 return True
97
98 return False
99
100
101 def modified_regions(old_data, new_data):
102 regions = Regions()
103 beg = None
104 for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
105 if beg is not None and beg != fbeg:
106 regions.append(beg, fbeg)
107 beg = fend
108 return regions
109
110 def modregions(wctx, fname):
111 fctx = wctx.filectx(fname)
112 pctx = fctx.parents()
113
114 file_data = fctx.data()
115 lines = mdiff.splitnewlines(file_data)
116 if len(pctx) in (1, 2):
117 mod_regions = modified_regions(pctx[0].data(), file_data)
118 if len(pctx) == 2:
119 m2 = modified_regions(pctx[1].data(), file_data)
120 # only the lines that are new in both
121 mod_regions &= m2
122 else:
123 mod_regions = Regions()
124 mod_regions.append(0, len(lines))
125
126 return mod_regions
127
128 class UserInterface(object):
129 def __init__(self, verbose=False, auto=False):
130 self.auto = auto
131 self.verbose = verbose
132
133 def prompt(self, prompt, results, default):
134 if self.auto:
135 return self.auto
136
137 while True:
138 result = self.do_prompt(prompt, results, default)
139 if result in results:
140 return result
141
142 class MercurialUI(UserInterface):
143 def __init__(self, ui, *args, **kwargs):
144 super(MercurialUI, self).__init__(*args, **kwargs)
145 self.ui = ui
146
147 def do_prompt(self, prompt, results, default):
148 return self.ui.prompt(prompt, default=default)
149
150 def write(self, string):
151 self.ui.write(string)
152
153 class StdioUI(UserInterface):
154 def do_prompt(self, prompt, results, default):
155 return raw_input(prompt) or default
156
157 def write(self, string):
158 sys.stdout.write(string)
159
160 class Verifier(object):
161 def __init__(self, ui, repo):
162 self.ui = ui
163 self.repo = repo
164
165 def __getattr__(self, attr):
166 if attr in ('prompt', 'write'):
167 return getattr(self.ui, attr)
168
169 if attr == 'wctx':
170 try:
171 wctx = repo.workingctx()
172 except:
173 from mercurial import context
174 wctx = context.workingctx(repo)
175 self.wctx = wctx
176 return wctx
177
178 raise AttributeError
179
180 def open(self, filename, mode):
181 filename = self.repo.wjoin(filename)
182
183 try:
184 f = file(filename, mode)
185 except OSError, msg:
186 print 'could not open file %s: %s' % (filename, msg)
187 return None
188
189 return f
190
191 def skip(self, filename):
192 filename = self.repo.wjoin(filename)
193
194 # We never want to handle symlinks, so always skip them: If the location
195 # pointed to is a directory, skip it. If the location is a file inside
196 # the gem5 directory, it will be checked as a file, so symlink can be
197 # skipped. If the location is a file outside gem5, we don't want to
198 # check it anyway.
199 if os.path.islink(filename):
200 return True
201 return lang_type(filename) not in self.languages
202
203 def check(self, filename, regions=all_regions):
204 f = self.open(filename, 'r')
205
206 errors = 0
207 for num,line in enumerate(f):
208 if num not in regions:
209 continue
210 if not self.check_line(line):
211 self.write("invalid %s in %s:%d\n" % \
212 (self.test_name, filename, num + 1))
213 if self.ui.verbose:
214 self.write(">>%s<<\n" % line[-1])
215 errors += 1
216 return errors
217
218 def fix(self, filename, regions=all_regions):
219 f = self.open(filename, 'r+')
220
221 lines = list(f)
222
223 f.seek(0)
224 f.truncate()
225
226 for i,line in enumerate(lines):
227 if i in regions:
228 line = self.fix_line(line)
229
230 f.write(line)
231 f.close()
232
233 def apply(self, filename, prompt, regions=all_regions):
234 if not self.skip(filename):
235 errors = self.check(filename, regions)
236 if errors:
237 if prompt(filename, self.fix, regions):
238 return True
239 return False
240
241
242 class Whitespace(Verifier):
243 languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
244 test_name = 'whitespace'
245 def check_line(self, line):
246 match = lead.search(line)
247 if match and match.group(1).find('\t') != -1:
248 return False
249
250 match = trail.search(line)
251 if match:
252 return False
253
254 return True
255
256 def fix_line(self, line):
257 if lead.search(line):
258 newline = ''
259 for i,c in enumerate(line):
260 if c == ' ':
261 newline += ' '
262 elif c == '\t':
263 newline += ' ' * (tabsize - len(newline) % tabsize)
264 else:
265 newline += line[i:]
266 break
267
268 line = newline
269
270 return line.rstrip() + '\n'
271
272 class SortedIncludes(Verifier):
273 languages = sort_includes.default_languages
274 def __init__(self, *args, **kwargs):
275 super(SortedIncludes, self).__init__(*args, **kwargs)
276 self.sort_includes = sort_includes.SortIncludes()
277
278 def check(self, filename, regions=all_regions):
279 f = self.open(filename, 'r')
280
281 lines = [ l.rstrip('\n') for l in f.xreadlines() ]
282 old = ''.join(line + '\n' for line in lines)
283 f.close()
284
285 if len(lines) == 0:
286 return 0
287
288 language = lang_type(filename, lines[0])
289 sort_lines = list(self.sort_includes(lines, filename, language))
290 new = ''.join(line + '\n' for line in sort_lines)
291
292 mod = modified_regions(old, new)
293 modified = mod & regions
294
295 if modified:
296 self.write("invalid sorting of includes in %s\n" % (filename))
297 if self.ui.verbose:
298 for start, end in modified.regions:
299 self.write("bad region [%d, %d)\n" % (start, end))
300 return 1
301
302 return 0
303
304 def fix(self, filename, regions=all_regions):
305 f = self.open(filename, 'r+')
306
307 old = f.readlines()
308 lines = [ l.rstrip('\n') for l in old ]
309 language = lang_type(filename, lines[0])
310 sort_lines = list(self.sort_includes(lines, filename, language))
311 new = ''.join(line + '\n' for line in sort_lines)
312
313 f.seek(0)
314 f.truncate()
315
316 for i,line in enumerate(sort_lines):
317 f.write(line)
318 f.write('\n')
319 f.close()
320
321 def linelen(line):
322 tabs = line.count('\t')
323 if not tabs:
324 return len(line)
325
326 count = 0
327 for c in line:
328 if c == '\t':
329 count += tabsize - count % tabsize
330 else:
331 count += 1
332
333 return count
334
335 class ValidationStats(object):
336 def __init__(self):
337 self.toolong = 0
338 self.toolong80 = 0
339 self.leadtabs = 0
340 self.trailwhite = 0
341 self.badcontrol = 0
342 self.cret = 0
343
344 def dump(self):
345 print '''\
346 %d violations of lines over 79 chars. %d of which are 80 chars exactly.
347 %d cases of whitespace at the end of a line.
348 %d cases of tabs to indent.
349 %d bad parens after if/while/for.
350 %d carriage returns found.
351 ''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
352 self.badcontrol, self.cret)
353
354 def __nonzero__(self):
355 return self.toolong or self.toolong80 or self.leadtabs or \
356 self.trailwhite or self.badcontrol or self.cret
357
358 def validate(filename, stats, verbose, exit_code):
359 if lang_type(filename) not in format_types:
360 return
361
362 def msg(lineno, line, message):
363 print '%s:%d>' % (filename, lineno + 1), message
364 if verbose > 2:
365 print line
366
367 def bad():
368 if exit_code is not None:
369 sys.exit(exit_code)
370
371 try:
372 f = file(filename, 'r')
373 except OSError:
374 if verbose > 0:
375 print 'could not open file %s' % filename
376 bad()
377 return
378
379 for i,line in enumerate(f):
380 line = line.rstrip('\n')
381
382 # no carriage returns
383 if line.find('\r') != -1:
384 self.cret += 1
385 if verbose > 1:
386 msg(i, line, 'carriage return found')
387 bad()
388
389 # lines max out at 79 chars
390 llen = linelen(line)
391 if llen > 79:
392 stats.toolong += 1
393 if llen == 80:
394 stats.toolong80 += 1
395 if verbose > 1:
396 msg(i, line, 'line too long (%d chars)' % llen)
397 bad()
398
399 # no tabs used to indent
400 match = lead.search(line)
401 if match and match.group(1).find('\t') != -1:
402 stats.leadtabs += 1
403 if verbose > 1:
404 msg(i, line, 'using tabs to indent')
405 bad()
406
407 # no trailing whitespace
408 if trail.search(line):
409 stats.trailwhite +=1
410 if verbose > 1:
411 msg(i, line, 'trailing whitespace')
412 bad()
413
414 # for c++, exactly one space betwen if/while/for and (
415 if cpp:
416 match = any_control.search(line)
417 if match and not good_control.search(line):
418 stats.badcontrol += 1
419 if verbose > 1:
420 msg(i, line, 'improper spacing after %s' % match.group(1))
421 bad()
422
423
424 def do_check_style(hgui, repo, *pats, **opts):
425 """check files for proper m5 style guidelines
426
427 Without an argument, checks all modified and added files for gem5
428 coding style violations. A list of files can be specified to limit
429 the checker to a subset of the repository. The style rules are
430 normally applied on a diff of the repository state (i.e., added
431 files are checked in their entirety while only modifications of
432 modified files are checked).
433
434 The --all option can be specified to include clean files and check
435 modified files in their entirety.
436 """
437 from mercurial import mdiff, util
438
439 opt_fix_white = opts.get('fix_white', False)
440 opt_all = opts.get('all', False)
441 opt_no_ignore = opts.get('no_ignore', False)
442 ui = MercurialUI(hgui, hgui.verbose, opt_fix_white)
443
444 def prompt(name, func, regions=all_regions):
445 result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", 'aif', 'a')
446 if result == 'a':
447 return True
448 elif result == 'f':
449 func(name, regions)
450
451 return False
452
453
454 # Import the match (repository file name matching helper)
455 # function. Different versions of Mercurial keep it in different
456 # modules and implement them differently.
457 try:
458 from mercurial import scmutil
459 m = scmutil.match(repo[None], pats, opts)
460 except ImportError:
461 from mercurial import cmdutil
462 m = cmdutil.match(repo, pats, opts)
463
464 modified, added, removed, deleted, unknown, ignore, clean = \
465 repo.status(match=m, clean=opt_all)
466 if not opt_all:
467 try:
468 wctx = repo.workingctx()
469 except:
470 from mercurial import context
471 wctx = context.workingctx(repo)
472
473 files = [ (fn, all_regions) for fn in added ] + \
474 [ (fn, modregions(wctx, fn)) for fn in modified ]
475 else:
476 files = [ (fn, all_regions) for fn in added + modified + clean ]
477
478 whitespace = Whitespace(ui, repo)
479 sorted_includes = SortedIncludes(ui, repo)
480 for fname, mod_regions in files:
481 if not opt_no_ignore and check_ignores(fname):
482 continue
483
484 if whitespace.apply(fname, prompt, mod_regions):
485 return True
486
487 if sorted_includes.apply(fname, prompt, mod_regions):
488 return True
489
490 return False
491
492 def do_check_format(hgui, repo, **args):
493 ui = MercurialUI(hgui, hgui.verbose, auto)
494
495 modified, added, removed, deleted, unknown, ignore, clean = repo.status()
496
497 verbose = 0
498 stats = ValidationStats()
499 for f in modified + added:
500 validate(joinpath(repo.root, f), stats, verbose, None)
501
502 if stats:
503 stats.dump()
504 result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
505 'ai', 'a')
506 if result == 'a':
507 return True
508
509 return False
510
511 def check_hook(hooktype):
512 if hooktype not in ('pretxncommit', 'pre-qrefresh'):
513 raise AttributeError, \
514 "This hook is not meant for %s" % hooktype
515
516 def check_style(ui, repo, hooktype, **kwargs):
517 check_hook(hooktype)
518 args = {}
519
520 try:
521 return do_check_style(ui, repo, **args)
522 except Exception, e:
523 import traceback
524 traceback.print_exc()
525 return True
526
527 def check_format(ui, repo, hooktype, **kwargs):
528 check_hook(hooktype)
529 args = {}
530
531 try:
532 return do_check_format(ui, repo, **args)
533 except Exception, e:
534 import traceback
535 traceback.print_exc()
536 return True
537
538 try:
539 from mercurial.i18n import _
540 except ImportError:
541 def _(arg):
542 return arg
543
544 cmdtable = {
545 '^m5style' : (
546 do_check_style, [
547 ('w', 'fix-white', False, _("automatically fix whitespace")),
548 ('a', 'all', False,
549 _("include clean files and unmodified parts of modified files")),
550 ('', 'no-ignore', False, _("ignore the style ignore list")),
551 ] + commands.walkopts,
552 _('hg m5style [-a] [FILE]...')),
553 '^m5format' :
554 ( do_check_format,
555 [ ],
556 _('hg m5format [FILE]...')),
557 }
558
559 if __name__ == '__main__':
560 import getopt
561
562 progname = sys.argv[0]
563 if len(sys.argv) < 2:
564 sys.exit('usage: %s <command> [<command args>]' % progname)
565
566 fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
567 chkformat_usage = '%s chkformat <path> [...] \n' % progname
568 chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
569
570 command = sys.argv[1]
571 if command == 'fixwhite':
572 flags = 't:'
573 usage = fixwhite_usage
574 elif command == 'chkwhite':
575 flags = 'nv'
576 usage = chkwhite_usage
577 elif command == 'chkformat':
578 flags = 'nv'
579 usage = chkformat_usage
580 else:
581 sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
582
583 opts, args = getopt.getopt(sys.argv[2:], flags)
584
585 code = 1
586 verbose = 1
587 for opt,arg in opts:
588 if opt == '-n':
589 code = None
590 if opt == '-t':
591 tabsize = int(arg)
592 if opt == '-v':
593 verbose += 1
594
595 if command == 'fixwhite':
596 for filename in args:
597 fixwhite(filename, tabsize)
598 elif command == 'chkwhite':
599 for filename in args:
600 for line,num in checkwhite(filename):
601 print 'invalid whitespace: %s:%d' % (filename, num)
602 if verbose:
603 print '>>%s<<' % line[:-1]
604 elif command == 'chkformat':
605 stats = ValidationStats()
606 for filename in args:
607 validate(filename, stats=stats, verbose=verbose, exit_code=code)
608
609 if verbose > 0:
610 stats.dump()
611 else:
612 sys.exit("command '%s' not found" % command)