util/style.py

   1 #! /usr/bin/env python
   2 # Copyright (c) 2014 ARM Limited
   3 # All rights reserved
   4 #
   5 # The license below extends only to copyright in the software and shall
   6 # not be construed as granting a license to any other intellectual
   7 # property including but not limited to intellectual property relating
   8 # to a hardware implementation of the functionality of the software
   9 # licensed hereunder.  You may use the software subject to the license
  10 # terms below provided that you ensure that this notice is replicated
  11 # unmodified and in its entirety in all distributions of the software,
  12 # modified or unmodified, in source code or in binary form.
  13 #
  14 # Copyright (c) 2006 The Regents of The University of Michigan
  15 # Copyright (c) 2007,2011 The Hewlett-Packard Development Company
  16 # All rights reserved.
  17 #
  18 # Redistribution and use in source and binary forms, with or without
  19 # modification, are permitted provided that the following conditions are
  20 # met: redistributions of source code must retain the above copyright
  21 # notice, this list of conditions and the following disclaimer;
  22 # redistributions in binary form must reproduce the above copyright
  23 # notice, this list of conditions and the following disclaimer in the
  24 # documentation and/or other materials provided with the distribution;
  25 # neither the name of the copyright holders nor the names of its
  26 # contributors may be used to endorse or promote products derived from
  27 # this software without specific prior written permission.
  28 #
  29 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40 #
  41 # Authors: Nathan Binkert
  42
  43 import heapq
  44 import os
  45 import re
  46 import sys
  47
  48 from os.path import dirname, join as joinpath
  49 from itertools import count
  50 from mercurial import bdiff, mdiff, commands
  51
  52 current_dir = dirname(__file__)
  53 sys.path.insert(0, current_dir)
  54 sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))
  55
  56 from m5.util import neg_inf, pos_inf, Region, Regions
  57 import sort_includes
  58 from file_types import lang_type
  59
  60 all_regions = Regions(Region(neg_inf, pos_inf))
  61
  62 tabsize = 8
  63 lead = re.compile(r'^([ \t]+)')
  64 trail = re.compile(r'([ \t]+)$')
  65 any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
  66 good_control = re.compile(r'\b(if|while|for) [(]')
  67
  68 format_types = set(('C', 'C++'))
  69
  70
  71 def re_ignore(expr):
  72     """Helper function to create regular expression ignore file
  73     matcher functions"""
  74
  75     rex = re.compile(expr)
  76     def match_re(fname):
  77         return rex.match(fname)
  78     return match_re
  79
  80 # This list contains a list of functions that are called to determine
  81 # if a file should be excluded from the style matching rules or
  82 # not. The functions are called with the file name relative to the
  83 # repository root (without a leading slash) as their argument. A file
  84 # is excluded if any function in the list returns true.
  85 style_ignores = [
  86     # Ignore external projects as they are unlikely to follow the gem5
  87     # coding convention.
  88     re_ignore("^ext/"),
  89 ]
  90
  91 def check_ignores(fname):
  92     """Check if a file name matches any of the ignore rules"""
  93
  94     for rule in style_ignores:
  95         if rule(fname):
  96             return True
  97
  98     return False
  99
 100
 101 def modified_regions(old_data, new_data):
 102     regions = Regions()
 103     beg = None
 104     for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
 105         if beg is not None and beg != fbeg:
 106             regions.append(beg, fbeg)
 107         beg = fend
 108     return regions
 109
 110 def modregions(wctx, fname):
 111     fctx = wctx.filectx(fname)
 112     pctx = fctx.parents()
 113
 114     file_data = fctx.data()
 115     lines = mdiff.splitnewlines(file_data)
 116     if len(pctx) in (1, 2):
 117         mod_regions = modified_regions(pctx[0].data(), file_data)
 118         if len(pctx) == 2:
 119             m2 = modified_regions(pctx[1].data(), file_data)
 120             # only the lines that are new in both
 121             mod_regions &= m2
 122     else:
 123         mod_regions = Regions()
 124         mod_regions.append(0, len(lines))
 125
 126     return mod_regions
 127
 128 class UserInterface(object):
 129     def __init__(self, verbose=False, auto=False):
 130         self.auto = auto
 131         self.verbose = verbose
 132
 133     def prompt(self, prompt, results, default):
 134         if self.auto:
 135             return self.auto
 136
 137         while True:
 138             result = self.do_prompt(prompt, results, default)
 139             if result in results:
 140                 return result
 141
 142 class MercurialUI(UserInterface):
 143     def __init__(self, ui, *args, **kwargs):
 144         super(MercurialUI, self).__init__(*args, **kwargs)
 145         self.ui = ui
 146
 147     def do_prompt(self, prompt, results, default):
 148         return self.ui.prompt(prompt, default=default)
 149
 150     def write(self, string):
 151         self.ui.write(string)
 152
 153 class StdioUI(UserInterface):
 154     def do_prompt(self, prompt, results, default):
 155         return raw_input(prompt) or default
 156
 157     def write(self, string):
 158         sys.stdout.write(string)
 159
 160 class Verifier(object):
 161     def __init__(self, ui, repo):
 162         self.ui = ui
 163         self.repo = repo
 164
 165     def __getattr__(self, attr):
 166         if attr in ('prompt', 'write'):
 167             return getattr(self.ui, attr)
 168
 169         if attr == 'wctx':
 170             try:
 171                 wctx = repo.workingctx()
 172             except:
 173                 from mercurial import context
 174                 wctx = context.workingctx(repo)
 175             self.wctx = wctx
 176             return wctx
 177
 178         raise AttributeError
 179
 180     def open(self, filename, mode):
 181         filename = self.repo.wjoin(filename)
 182
 183         try:
 184             f = file(filename, mode)
 185         except OSError, msg:
 186             print 'could not open file %s: %s' % (filename, msg)
 187             return None
 188
 189         return f
 190
 191     def skip(self, filename):
 192         filename = self.repo.wjoin(filename)
 193
 194         # We never want to handle symlinks, so always skip them: If the location
 195         # pointed to is a directory, skip it. If the location is a file inside
 196         # the gem5 directory, it will be checked as a file, so symlink can be
 197         # skipped. If the location is a file outside gem5, we don't want to
 198         # check it anyway.
 199         if os.path.islink(filename):
 200             return True
 201         return lang_type(filename) not in self.languages
 202
 203     def check(self, filename, regions=all_regions):
 204         f = self.open(filename, 'r')
 205
 206         errors = 0
 207         for num,line in enumerate(f):
 208             if num not in regions:
 209                 continue
 210             if not self.check_line(line):
 211                 self.write("invalid %s in %s:%d\n" % \
 212                                (self.test_name, filename, num + 1))
 213                 if self.ui.verbose:
 214                     self.write(">>%s<<\n" % line[-1])
 215                 errors += 1
 216         return errors
 217
 218     def fix(self, filename, regions=all_regions):
 219         f = self.open(filename, 'r+')
 220
 221         lines = list(f)
 222
 223         f.seek(0)
 224         f.truncate()
 225
 226         for i,line in enumerate(lines):
 227             if i in regions:
 228                 line = self.fix_line(line)
 229
 230             f.write(line)
 231         f.close()
 232
 233     def apply(self, filename, prompt, regions=all_regions):
 234         if not self.skip(filename):
 235             errors = self.check(filename, regions)
 236             if errors:
 237                 if prompt(filename, self.fix, regions):
 238                     return True
 239         return False
 240
 241
 242 class Whitespace(Verifier):
 243     languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
 244     test_name = 'whitespace'
 245     def check_line(self, line):
 246         match = lead.search(line)
 247         if match and match.group(1).find('\t') != -1:
 248             return False
 249
 250         match = trail.search(line)
 251         if match:
 252             return False
 253
 254         return True
 255
 256     def fix_line(self, line):
 257         if lead.search(line):
 258             newline = ''
 259             for i,c in enumerate(line):
 260                 if c == ' ':
 261                     newline += ' '
 262                 elif c == '\t':
 263                     newline += ' ' * (tabsize - len(newline) % tabsize)
 264                 else:
 265                     newline += line[i:]
 266                     break
 267
 268             line = newline
 269
 270         return line.rstrip() + '\n'
 271
 272 class SortedIncludes(Verifier):
 273     languages = sort_includes.default_languages
 274     def __init__(self, *args, **kwargs):
 275         super(SortedIncludes, self).__init__(*args, **kwargs)
 276         self.sort_includes = sort_includes.SortIncludes()
 277
 278     def check(self, filename, regions=all_regions):
 279         f = self.open(filename, 'r')
 280
 281         lines = [ l.rstrip('\n') for l in f.xreadlines() ]
 282         old = ''.join(line + '\n' for line in lines)
 283         f.close()
 284
 285         if len(lines) == 0:
 286             return 0
 287
 288         language = lang_type(filename, lines[0])
 289         sort_lines = list(self.sort_includes(lines, filename, language))
 290         new = ''.join(line + '\n' for line in sort_lines)
 291
 292         mod = modified_regions(old, new)
 293         modified = mod & regions
 294
 295         if modified:
 296             self.write("invalid sorting of includes in %s\n" % (filename))
 297             if self.ui.verbose:
 298                 for start, end in modified.regions:
 299                     self.write("bad region [%d, %d)\n" % (start, end))
 300             return 1
 301
 302         return 0
 303
 304     def fix(self, filename, regions=all_regions):
 305         f = self.open(filename, 'r+')
 306
 307         old = f.readlines()
 308         lines = [ l.rstrip('\n') for l in old ]
 309         language = lang_type(filename, lines[0])
 310         sort_lines = list(self.sort_includes(lines, filename, language))
 311         new = ''.join(line + '\n' for line in sort_lines)
 312
 313         f.seek(0)
 314         f.truncate()
 315
 316         for i,line in enumerate(sort_lines):
 317             f.write(line)
 318             f.write('\n')
 319         f.close()
 320
 321 def linelen(line):
 322     tabs = line.count('\t')
 323     if not tabs:
 324         return len(line)
 325
 326     count = 0
 327     for c in line:
 328         if c == '\t':
 329             count += tabsize - count % tabsize
 330         else:
 331             count += 1
 332
 333     return count
 334
 335 class ValidationStats(object):
 336     def __init__(self):
 337         self.toolong = 0
 338         self.toolong80 = 0
 339         self.leadtabs = 0
 340         self.trailwhite = 0
 341         self.badcontrol = 0
 342         self.cret = 0
 343
 344     def dump(self):
 345         print '''\
 346 %d violations of lines over 79 chars. %d of which are 80 chars exactly.
 347 %d cases of whitespace at the end of a line.
 348 %d cases of tabs to indent.
 349 %d bad parens after if/while/for.
 350 %d carriage returns found.
 351 ''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
 352        self.badcontrol, self.cret)
 353
 354     def __nonzero__(self):
 355         return self.toolong or self.toolong80 or self.leadtabs or \
 356                self.trailwhite or self.badcontrol or self.cret
 357
 358 def validate(filename, stats, verbose, exit_code):
 359     if lang_type(filename) not in format_types:
 360         return
 361
 362     def msg(lineno, line, message):
 363         print '%s:%d>' % (filename, lineno + 1), message
 364         if verbose > 2:
 365             print line
 366
 367     def bad():
 368         if exit_code is not None:
 369             sys.exit(exit_code)
 370
 371     try:
 372         f = file(filename, 'r')
 373     except OSError:
 374         if verbose > 0:
 375             print 'could not open file %s' % filename
 376         bad()
 377         return
 378
 379     for i,line in enumerate(f):
 380         line = line.rstrip('\n')
 381
 382         # no carriage returns
 383         if line.find('\r') != -1:
 384             self.cret += 1
 385             if verbose > 1:
 386                 msg(i, line, 'carriage return found')
 387             bad()
 388
 389         # lines max out at 79 chars
 390         llen = linelen(line)
 391         if llen > 79:
 392             stats.toolong += 1
 393             if llen == 80:
 394                 stats.toolong80 += 1
 395             if verbose > 1:
 396                 msg(i, line, 'line too long (%d chars)' % llen)
 397             bad()
 398
 399         # no tabs used to indent
 400         match = lead.search(line)
 401         if match and match.group(1).find('\t') != -1:
 402             stats.leadtabs += 1
 403             if verbose > 1:
 404                 msg(i, line, 'using tabs to indent')
 405             bad()
 406
 407         # no trailing whitespace
 408         if trail.search(line):
 409             stats.trailwhite +=1
 410             if verbose > 1:
 411                 msg(i, line, 'trailing whitespace')
 412             bad()
 413
 414         # for c++, exactly one space betwen if/while/for and (
 415         if cpp:
 416             match = any_control.search(line)
 417             if match and not good_control.search(line):
 418                 stats.badcontrol += 1
 419                 if verbose > 1:
 420                     msg(i, line, 'improper spacing after %s' % match.group(1))
 421                 bad()
 422
 423
 424 def do_check_style(hgui, repo, *pats, **opts):
 425     """check files for proper m5 style guidelines
 426
 427     Without an argument, checks all modified and added files for gem5
 428     coding style violations. A list of files can be specified to limit
 429     the checker to a subset of the repository. The style rules are
 430     normally applied on a diff of the repository state (i.e., added
 431     files are checked in their entirety while only modifications of
 432     modified files are checked).
 433
 434     The --all option can be specified to include clean files and check
 435     modified files in their entirety.
 436     """
 437     from mercurial import mdiff, util
 438
 439     opt_fix_white = opts.get('fix_white', False)
 440     opt_all = opts.get('all', False)
 441     opt_no_ignore = opts.get('no_ignore', False)
 442     ui = MercurialUI(hgui, hgui.verbose, opt_fix_white)
 443
 444     def prompt(name, func, regions=all_regions):
 445         result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", 'aif', 'a')
 446         if result == 'a':
 447             return True
 448         elif result == 'f':
 449             func(name, regions)
 450
 451         return False
 452
 453
 454     # Import the match (repository file name matching helper)
 455     # function. Different versions of Mercurial keep it in different
 456     # modules and implement them differently.
 457     try:
 458         from mercurial import scmutil
 459         m = scmutil.match(repo[None], pats, opts)
 460     except ImportError:
 461         from mercurial import cmdutil
 462         m = cmdutil.match(repo, pats, opts)
 463
 464     modified, added, removed, deleted, unknown, ignore, clean = \
 465         repo.status(match=m, clean=opt_all)
 466     if not opt_all:
 467         try:
 468             wctx = repo.workingctx()
 469         except:
 470             from mercurial import context
 471             wctx = context.workingctx(repo)
 472
 473         files = [ (fn, all_regions) for fn in added ] + \
 474             [ (fn,  modregions(wctx, fn)) for fn in modified ]
 475     else:
 476         files = [ (fn, all_regions) for fn in added + modified + clean ]
 477
 478     whitespace = Whitespace(ui, repo)
 479     sorted_includes = SortedIncludes(ui, repo)
 480     for fname, mod_regions in files:
 481         if not opt_no_ignore and check_ignores(fname):
 482             continue
 483
 484         if whitespace.apply(fname, prompt, mod_regions):
 485             return True
 486
 487         if sorted_includes.apply(fname, prompt, mod_regions):
 488             return True
 489
 490     return False
 491
 492 def do_check_format(hgui, repo, **args):
 493     ui = MercurialUI(hgui, hgui.verbose, auto)
 494
 495     modified, added, removed, deleted, unknown, ignore, clean = repo.status()
 496
 497     verbose = 0
 498     stats = ValidationStats()
 499     for f in modified + added:
 500         validate(joinpath(repo.root, f), stats, verbose, None)
 501
 502     if stats:
 503         stats.dump()
 504         result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
 505                            'ai', 'a')
 506         if result == 'a':
 507             return True
 508
 509     return False
 510
 511 def check_hook(hooktype):
 512     if hooktype not in ('pretxncommit', 'pre-qrefresh'):
 513         raise AttributeError, \
 514               "This hook is not meant for %s" % hooktype
 515
 516 def check_style(ui, repo, hooktype, **kwargs):
 517     check_hook(hooktype)
 518     args = {}
 519
 520     try:
 521         return do_check_style(ui, repo, **args)
 522     except Exception, e:
 523         import traceback
 524         traceback.print_exc()
 525         return True
 526
 527 def check_format(ui, repo, hooktype, **kwargs):
 528     check_hook(hooktype)
 529     args = {}
 530
 531     try:
 532         return do_check_format(ui, repo, **args)
 533     except Exception, e:
 534         import traceback
 535         traceback.print_exc()
 536         return True
 537
 538 try:
 539     from mercurial.i18n import _
 540 except ImportError:
 541     def _(arg):
 542         return arg
 543
 544 cmdtable = {
 545     '^m5style' : (
 546         do_check_style, [
 547             ('w', 'fix-white', False, _("automatically fix whitespace")),
 548             ('a', 'all', False,
 549              _("include clean files and unmodified parts of modified files")),
 550             ('', 'no-ignore', False, _("ignore the style ignore list")),
 551             ] +  commands.walkopts,
 552         _('hg m5style [-a] [FILE]...')),
 553     '^m5format' :
 554     ( do_check_format,
 555       [ ],
 556       _('hg m5format [FILE]...')),
 557 }
 558
 559 if __name__ == '__main__':
 560     import getopt
 561
 562     progname = sys.argv[0]
 563     if len(sys.argv) < 2:
 564         sys.exit('usage: %s <command> [<command args>]' % progname)
 565
 566     fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
 567     chkformat_usage = '%s chkformat <path> [...] \n' % progname
 568     chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
 569
 570     command = sys.argv[1]
 571     if command == 'fixwhite':
 572         flags = 't:'
 573         usage = fixwhite_usage
 574     elif command == 'chkwhite':
 575         flags = 'nv'
 576         usage = chkwhite_usage
 577     elif command == 'chkformat':
 578         flags = 'nv'
 579         usage = chkformat_usage
 580     else:
 581         sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
 582
 583     opts, args = getopt.getopt(sys.argv[2:], flags)
 584
 585     code = 1
 586     verbose = 1
 587     for opt,arg in opts:
 588         if opt == '-n':
 589             code = None
 590         if opt == '-t':
 591             tabsize = int(arg)
 592         if opt == '-v':
 593             verbose += 1
 594
 595     if command == 'fixwhite':
 596         for filename in args:
 597             fixwhite(filename, tabsize)
 598     elif command == 'chkwhite':
 599         for filename in args:
 600             for line,num in checkwhite(filename):
 601                 print 'invalid whitespace: %s:%d' % (filename, num)
 602                 if verbose:
 603                     print '>>%s<<' % line[:-1]
 604     elif command == 'chkformat':
 605         stats = ValidationStats()
 606         for filename in args:
 607             validate(filename, stats=stats, verbose=verbose, exit_code=code)
 608
 609         if verbose > 0:
 610             stats.dump()
 611     else:
 612         sys.exit("command '%s' not found" % command)