From: Andreas Sandberg Date: Wed, 30 Mar 2016 14:30:32 +0000 (+0100) Subject: style: Refactor the style checker as a Python package X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2580fcd9d79e2be5933c2575ce1a6eb26380f8a5;p=gem5.git style: Refactor the style checker as a Python package Refactor the style checker into a Python module that can be reused by command line tools that integrate with git. In particular: * Create a style package in util * Move style validators from style.py to the style/validators.py. * Move style verifiers from style.py to the style/verifiers.py. * Move utility functions (sort_includes, region handling, file_types) into the style package * Move generic code from style.py to style/style.py. Signed-off-by: Andreas Sandberg Reviewed-by: Curtis Dunham Reviewed-by: Steve Reinhardt --HG-- rename : util/style.py => util/hgstyle.py rename : util/sort_includes.py => util/style/sort_includes.py extra : rebase_source : ad6cf9b9a18c48350dfc7b7c77bea6c5344fb53c --- diff --git a/src/python/m5/util/__init__.py b/src/python/m5/util/__init__.py index 3ff69d851..a832f0e93 100644 --- a/src/python/m5/util/__init__.py +++ b/src/python/m5/util/__init__.py @@ -40,7 +40,6 @@ from multidict import multidict from orderdict import orderdict from smartdict import SmartDict from sorteddict import SortedDict -from region import neg_inf, pos_inf, Region, Regions # panic() should be called when something happens that should never # ever happen regardless of what the user does (i.e., an acutal m5 diff --git a/src/python/m5/util/region.py b/src/python/m5/util/region.py deleted file mode 100644 index 247c397fe..000000000 --- a/src/python/m5/util/region.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright (c) 2006 Nathan Binkert -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -class _neg_inf(object): - '''This object always compares less than any other object''' - def __repr__(self): return '' - def __lt__(self, other): return type(self) != type(other) - def __le__(self, other): return True - def __gt__(self, other): return False - def __ge__(self, other): return type(self) == type(other) - def __eq__(self, other): return type(self) == type(other) - def __ne__(self, other): return type(self) != type(other) -neg_inf = _neg_inf() - -class _pos_inf(object): - '''This object always compares greater than any other object''' - def __repr__(self): return '' - def __lt__(self, other): return False - def __le__(self, other): return type(self) == type(other) - def __gt__(self, other): return type(self) != type(other) - def __ge__(self, other): return True - def __eq__(self, other): return type(self) == type(other) - def __ne__(self, other): return type(self) != type(other) -pos_inf = _pos_inf() - -class Region(tuple): - '''A region (range) of [start, end). - This includes utility functions to compare overlap of regions.''' - def __new__(cls, *args): - if len(args) == 1: - arg = args[0] - if isinstance(arg, Region): - return arg - args = tuple(arg) - - if len(args) != 2: - raise AttributeError, \ - "Only one or two arguments allowed, %d provided" % (alen, ) - - return tuple.__new__(cls, args) - - def __repr__(self): - return 'Region(%s, %s)' % (self[0], self[1]) - - @property - def start(self): - return self[0] - - @property - def end(self): - return self[1] - - def __contains__(self, other): - '''other is - region: True if self and other is fully contained within self. - pos: True if other is within the region''' - if isinstance(other, tuple): - return self[0] <= other[0] and self[1] >= other[1] - return self[0] <= other and other < self[1] - - def __eq__(self, other): - '''other is - region: True if self and other are identical. - pos: True if other is within the region''' - if isinstance(other, tuple): - return self[0] == other[0] and self[1] == other[1] - return self[0] <= other and other < self[1] - - # @param self is a region. - # @param other is a region. - # @return if self and other are not identical. - def __ne__(self, other): - '''other is - region: true if they are not identical - pos: True if other is not in the region''' - if isinstance(other, tuple): - return self[0] != other[0] or self[1] != other[1] - return other < self[0] or self[1] <= other - - # @param self is a region. - # @param other is a region. - # @return if self is less than other and does not overlap self. - def __lt__(self, other): - "self completely left of other (cannot overlap)" - if isinstance(other, tuple): - return self[1] <= other[0] - return self[1] <= other - - # @param self is a region. - # @param other is a region. - # @return if self is less than other. self may overlap other, - # but not extend beyond the _end of other. - def __le__(self, other): - "self extends to the left of other (can overlap)" - if isinstance(other, tuple): - return self[0] <= other[0] - return self[0] <= other - - # @param self is a region. - # @param other is a region. - # @return if self is greater than other and does not overlap other. - def __gt__(self, other): - "self is completely right of other (cannot overlap)" - if isinstance(other, tuple): - return self[0] >= other[1] - return self[0] > other - - # @param self is a region. - # @param other is a region. - # @return if self is greater than other. self may overlap other, - # but not extend beyond the beginning of other. - def __ge__(self, other): - "self ex_ends beyond other to the right (can overlap)" - if isinstance(other, tuple): - return self[1] >= other[1] - return self[1] > other - -class Regions(object): - '''A set of regions (ranges). Basically a region with holes. - Includes utility functions to merge regions and figure out if - something is in one of the regions.''' - def __init__(self, *args): - self.regions = [] - self.extend(*args) - - def copy(self): - copy = Regions() - copy.regions.extend(self.regions) - return copy - - def append(self, *args): - self.regions.append(Region(*args)) - - def extend(self, *args): - self.regions.extend(Region(a) for a in args) - - def __contains__(self, position): - for region in self.regions: - if position in region: - return True - - return False - - def __len__(self): - return len(self.regions) - - def __iand__(self, other): - A = self.regions - B = other.regions - R = [] - - i = 0 - j = 0 - while i < len(self) and j < len(other): - a = A[i] - b = B[j] - if a[1] <= b[0]: - # A is completely before B. Skip A - i += 1 - elif a[0] <= b[0]: - if a[1] <= b[1]: - # A and B overlap with B not left of A and A not right of B - R.append(Region(b[0], a[1])) - - # Advance A because nothing is left - i += 1 - - if a[1] == b[1]: - # Advance B too - j += 1 - else: - # A and B overlap with B completely within the bounds of A - R.append(Region(b[0], b[1])) - - # Advance only B because some of A may still be useful - j += 1 - elif b[1] <= a[0]: - # B is completely before A. Skip B. - j += 1 - else: - assert b[0] < a[0] - if b[1] <= a[1]: - # A and B overlap with A not left of B and B not right of A - R.append(Region(a[0], b[1])) - - # Advance B because nothing is left - j += 1 - - if a[1] == b[1]: - # Advance A too - i += 1 - else: - # A and B overlap with A completely within the bounds of B - R.append(Region(a[0], a[1])) - - # Advance only A because some of B may still be useful - i += 1 - - self.regions = R - return self - - def __and__(self, other): - result = self.copy() - result &= other - return result - - def __repr__(self): - return 'Regions(%s)' % ([(r[0], r[1]) for r in self.regions], ) - -if __name__ == '__main__': - x = Regions(*((i, i + 1) for i in xrange(0,30,2))) - y = Regions(*((i, i + 4) for i in xrange(0,30,5))) - z = Region(6,7) - n = Region(9,10) - - def test(left, right): - print "%s == %s: %s" % (left, right, left == right) - print "%s != %s: %s" % (left, right, left != right) - print "%s < %s: %s" % (left, right, left < right) - print "%s <= %s: %s" % (left, right, left <= right) - print "%s > %s: %s" % (left, right, left > right) - print "%s >= %s: %s" % (left, right, left >= right) - print - - test(neg_inf, neg_inf) - test(neg_inf, pos_inf) - test(pos_inf, neg_inf) - test(pos_inf, pos_inf) - - test(neg_inf, 0) - test(neg_inf, -11111) - test(neg_inf, 11111) - - test(0, neg_inf) - test(-11111, neg_inf) - test(11111, neg_inf) - - test(pos_inf, 0) - test(pos_inf, -11111) - test(pos_inf, 11111) - - test(0, pos_inf) - test(-11111, pos_inf) - test(11111, pos_inf) - - print x - print y - print x & y - print z - - print 4 in x - print 4 in z - print 5 not in x - print 6 not in z - print z in y - print n in y, n not in y diff --git a/util/file_types.py b/util/file_types.py index b10e274f3..47041ad01 100644 --- a/util/file_types.py +++ b/util/file_types.py @@ -26,179 +26,3 @@ # # Authors: Nathan Binkert -import os - -# lanuage type for each file extension -lang_types = { - '.c' : "C", - '.cl' : "C", - '.h' : "C", - '.cc' : "C++", - '.hh' : "C++", - '.cxx' : "C++", - '.hxx' : "C++", - '.cpp' : "C++", - '.hpp' : "C++", - '.C' : "C++", - '.H' : "C++", - '.i' : "swig", - '.py' : "python", - '.pl' : "perl", - '.pm' : "perl", - '.s' : "asm", - '.S' : "asm", - '.l' : "lex", - '.ll' : "lex", - '.y' : "yacc", - '.yy' : "yacc", - '.isa' : "isa", - '.sh' : "shell", - '.slicc' : "slicc", - '.sm' : "slicc", - '.awk' : "awk", - '.el' : "lisp", - '.txt' : "text", - '.tex' : "tex", - '.mk' : "make", - } - -# languages based on file prefix -lang_prefixes = ( - ('SCons', 'scons'), - ('Make', 'make'), - ('make', 'make'), - ('Doxyfile', 'doxygen'), - ) - -# languages based on #! line of first file -hash_bang = ( - ('python', 'python'), - ('perl', 'perl'), - ('sh', 'shell'), - ) - -# the list of all languages that we detect -all_languages = frozenset(lang_types.itervalues()) -all_languages |= frozenset(lang for start,lang in lang_prefixes) -all_languages |= frozenset(lang for start,lang in hash_bang) - -def lang_type(filename, firstline=None, openok=True): - '''identify the language of a given filename and potentially the - firstline of the file. If the firstline of the file is not - provided and openok is True, open the file and read the first line - if necessary''' - - basename = os.path.basename(filename) - name,extension = os.path.splitext(basename) - - # first try to detect language based on file extension - try: - return lang_types[extension] - except KeyError: - pass - - # now try to detect language based on file prefix - for start,lang in lang_prefixes: - if basename.startswith(start): - return lang - - # if a first line was not provided but the file is ok to open, - # grab the first line of the file. - if firstline is None and openok: - handle = file(filename, 'r') - firstline = handle.readline() - handle.close() - - # try to detect language based on #! in first line - if firstline and firstline.startswith('#!'): - for string,lang in hash_bang: - if firstline.find(string) > 0: - return lang - - # sorry, we couldn't detect the language - return None - -# directories and files to ignore by default -default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext')) -default_file_ignore = frozenset(('parsetab.py', )) - -def find_files(base, languages=all_languages, - dir_ignore=default_dir_ignore, - file_ignore=default_file_ignore): - '''find all files in a directory and its subdirectories based on a - set of languages, ignore directories specified in dir_ignore and - files specified in file_ignore''' - if base[-1] != '/': - base += '/' - - def update_dirs(dirs): - '''strip the ignored directories out of the provided list''' - index = len(dirs) - 1 - for i,d in enumerate(reversed(dirs)): - if d in dir_ignore: - del dirs[index - i] - - # walk over base - for root,dirs,files in os.walk(base): - root = root.replace(base, '', 1) - - # strip ignored directories from the list - update_dirs(dirs) - - for filename in files: - if filename in file_ignore: - # skip ignored files - continue - - # try to figure out the language of the specified file - fullpath = os.path.join(base, root, filename) - language = lang_type(fullpath) - - # if the file is one of the langauges that we want return - # its name and the language - if language in languages: - yield fullpath, language - -def update_file(dst, src, language, mutator): - '''update a file of the specified language with the provided - mutator generator. If inplace is provided, update the file in - place and return the handle to the updated file. If inplace is - false, write the updated file to cStringIO''' - - # if the source and destination are the same, we're updating in place - inplace = dst == src - - if isinstance(src, str): - # if a filename was provided, open the file - if inplace: - mode = 'r+' - else: - mode = 'r' - src = file(src, mode) - - orig_lines = [] - - # grab all of the lines of the file and strip them of their line ending - old_lines = list(line.rstrip('\r\n') for line in src.xreadlines()) - new_lines = list(mutator(old_lines, src.name, language)) - - for line in src.xreadlines(): - line = line - - if inplace: - # if we're updating in place and the file hasn't changed, do nothing - if old_lines == new_lines: - return - - # otherwise, truncate the file and seek to the beginning. - dst = src - dst.truncate(0) - dst.seek(0) - elif isinstance(dst, str): - # if we're not updating in place and a destination file name - # was provided, create a file object - dst = file(dst, 'w') - - for line in new_lines: - dst.write(line) - dst.write('\n') diff --git a/util/hgstyle.py b/util/hgstyle.py index fd40e781a..ccb04a94f 100755 --- a/util/hgstyle.py +++ b/util/hgstyle.py @@ -42,62 +42,20 @@ # Authors: Nathan Binkert # Steve Reinhardt -import heapq -import os -import re import sys +import os +from os.path import join as joinpath -from os.path import dirname, join as joinpath -from itertools import count -from mercurial import bdiff, mdiff, commands - -current_dir = dirname(__file__) +current_dir = os.path.dirname(__file__) sys.path.insert(0, current_dir) -sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python')) - -from m5.util import neg_inf, pos_inf, Region, Regions -import sort_includes -from file_types import lang_type - -all_regions = Regions(Region(neg_inf, pos_inf)) - -tabsize = 8 -lead = re.compile(r'^([ \t]+)') -trail = re.compile(r'([ \t]+)$') -any_control = re.compile(r'\b(if|while|for)([ \t]*)\(') - -format_types = set(('C', 'C++')) +from style.verifiers import all_verifiers +from style.validators import all_validators +from style.file_types import lang_type +from style.style import MercurialUI, check_ignores +from style.region import * -def re_ignore(expr): - """Helper function to create regular expression ignore file - matcher functions""" - - rex = re.compile(expr) - def match_re(fname): - return rex.match(fname) - return match_re - -# This list contains a list of functions that are called to determine -# if a file should be excluded from the style matching rules or -# not. The functions are called with the file name relative to the -# repository root (without a leading slash) as their argument. A file -# is excluded if any function in the list returns true. -style_ignores = [ - # Ignore external projects as they are unlikely to follow the gem5 - # coding convention. - re_ignore("^ext/"), -] - -def check_ignores(fname): - """Check if a file name matches any of the ignore rules""" - - for rule in style_ignores: - if rule(fname): - return True - - return False - +from mercurial import bdiff, mdiff, commands def modified_regions(old_data, new_data): regions = Regions() @@ -126,375 +84,12 @@ def modregions(wctx, fname): return mod_regions -class UserInterface(object): - def __init__(self, verbose=False): - self.verbose = verbose - - def prompt(self, prompt, results, default): - while True: - result = self.do_prompt(prompt, results, default) - if result in results: - return result - -class MercurialUI(UserInterface): - def __init__(self, ui, *args, **kwargs): - super(MercurialUI, self).__init__(*args, **kwargs) - self.ui = ui - - def do_prompt(self, prompt, results, default): - return self.ui.prompt(prompt, default=default) - - def write(self, string): - self.ui.write(string) - -class StdioUI(UserInterface): - def do_prompt(self, prompt, results, default): - return raw_input(prompt) or default - - def write(self, string): - sys.stdout.write(string) - - -class Verifier(object): - """Base class for style verifier objects - - Subclasses must define these class attributes: - languages = set of strings identifying applicable languages - test_name = long descriptive name of test, will be used in - messages such as "error in " or "invalid " - opt_name = short name used to generate command-line options to - control the test (--fix-, --ignore-, etc.) - """ - - def __init__(self, ui, repo, opts): - self.ui = ui - self.repo = repo - # opt_name must be defined as a class attribute of derived classes. - # Check test-specific opts first as these have precedence. - self.opt_fix = opts.get('fix_' + self.opt_name, False) - self.opt_ignore = opts.get('ignore_' + self.opt_name, False) - self.opt_skip = opts.get('skip_' + self.opt_name, False) - # If no test-specific opts were set, then set based on "-all" opts. - if not (self.opt_fix or self.opt_ignore or self.opt_skip): - self.opt_fix = opts.get('fix_all', False) - self.opt_ignore = opts.get('ignore_all', False) - self.opt_skip = opts.get('skip_all', False) - - def __getattr__(self, attr): - if attr in ('prompt', 'write'): - return getattr(self.ui, attr) - - if attr == 'wctx': - try: - wctx = repo.workingctx() - except: - from mercurial import context - wctx = context.workingctx(repo) - self.wctx = wctx - return wctx - - raise AttributeError - - def open(self, filename, mode): - filename = self.repo.wjoin(filename) - - try: - f = file(filename, mode) - except OSError, msg: - print 'could not open file %s: %s' % (filename, msg) - return None - - return f - - def skip(self, filename): - filename = self.repo.wjoin(filename) - - # We never want to handle symlinks, so always skip them: If the location - # pointed to is a directory, skip it. If the location is a file inside - # the gem5 directory, it will be checked as a file, so symlink can be - # skipped. If the location is a file outside gem5, we don't want to - # check it anyway. - if os.path.islink(filename): - return True - return lang_type(filename) not in self.languages - - def check(self, filename, regions=all_regions): - """Check specified regions of file 'filename'. - - Line-by-line checks can simply provide a check_line() method - that returns True if the line is OK and False if it has an - error. Verifiers that need a multi-line view (like - SortedIncludes) must override this entire function. - - Returns a count of errors (0 if none), though actual non-zero - count value is not currently used anywhere. - """ - - f = self.open(filename, 'r') - - errors = 0 - for num,line in enumerate(f): - if num not in regions: - continue - line = line.rstrip('\n') - if not self.check_line(line): - self.write("invalid %s in %s:%d\n" % \ - (self.test_name, filename, num + 1)) - if self.ui.verbose: - self.write(">>%s<<\n" % line[:-1]) - errors += 1 - return errors - - def fix(self, filename, regions=all_regions): - """Fix specified regions of file 'filename'. - - Line-by-line fixes can simply provide a fix_line() method that - returns the fixed line. Verifiers that need a multi-line view - (like SortedIncludes) must override this entire function. - """ - - f = self.open(filename, 'r+') - - lines = list(f) - - f.seek(0) - f.truncate() - - for i,line in enumerate(lines): - if i in regions: - line = self.fix_line(line) - - f.write(line) - f.close() - - - def apply(self, filename, regions=all_regions): - """Possibly apply to specified regions of file 'filename'. - - Verifier is skipped if --skip- option was provided or if - file is not of an applicable type. Otherwise file is checked - and error messages printed. Errors are fixed or ignored if - the corresponding --fix- or --ignore- options were - provided. If neither, the user is prompted for an action. - - Returns True to abort, False otherwise. - """ - if not (self.opt_skip or self.skip(filename)): - errors = self.check(filename, regions) - if errors and not self.opt_ignore: - if self.opt_fix: - self.fix(filename, regions) - else: - result = self.ui.prompt("(a)bort, (i)gnore, or (f)ix?", - 'aif', 'a') - if result == 'f': - self.fix(filename, regions) - elif result == 'a': - return True # abort - - return False - - -class Whitespace(Verifier): - """Check whitespace. - - Specifically: - - No tabs used for indent - - No trailing whitespace - """ - - languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons')) - test_name = 'whitespace' - opt_name = 'white' - - def check_line(self, line): - match = lead.search(line) - if match and match.group(1).find('\t') != -1: - return False - - match = trail.search(line) - if match: - return False - - return True - - def fix_line(self, line): - if lead.search(line): - newline = '' - for i,c in enumerate(line): - if c == ' ': - newline += ' ' - elif c == '\t': - newline += ' ' * (tabsize - len(newline) % tabsize) - else: - newline += line[i:] - break - - line = newline - - return line.rstrip() + '\n' - - -class ControlSpace(Verifier): - """Check for exactly one space after if/while/for""" - - languages = set(('C', 'C++')) - test_name = 'spacing after if/while/for' - opt_name = 'control' - - def check_line(self, line): - match = any_control.search(line) - return not (match and match.group(2) != " ") - - def fix_line(self, line): - new_line = any_control.sub(r'\1 (', line) - return new_line - - -class SortedIncludes(Verifier): - """Check for proper sorting of include statements""" - languages = sort_includes.default_languages - test_name = 'include file order' - opt_name = 'include' - - def __init__(self, *args, **kwargs): - super(SortedIncludes, self).__init__(*args, **kwargs) - self.sort_includes = sort_includes.SortIncludes() - - def check(self, filename, regions=all_regions): - f = self.open(filename, 'r') - - lines = [ l.rstrip('\n') for l in f.xreadlines() ] - old = ''.join(line + '\n' for line in lines) - f.close() - - if len(lines) == 0: - return 0 - - language = lang_type(filename, lines[0]) - sort_lines = list(self.sort_includes(lines, filename, language)) - new = ''.join(line + '\n' for line in sort_lines) - - mod = modified_regions(old, new) - modified = mod & regions - - if modified: - self.write("invalid sorting of includes in %s\n" % (filename)) - if self.ui.verbose: - for start, end in modified.regions: - self.write("bad region [%d, %d)\n" % (start, end)) - return 1 - - return 0 - - def fix(self, filename, regions=all_regions): - f = self.open(filename, 'r+') - - old = f.readlines() - lines = [ l.rstrip('\n') for l in old ] - language = lang_type(filename, lines[0]) - sort_lines = list(self.sort_includes(lines, filename, language)) - new = ''.join(line + '\n' for line in sort_lines) - - f.seek(0) - f.truncate() - - for i,line in enumerate(sort_lines): - f.write(line) - f.write('\n') - f.close() - - -def linelen(line): - tabs = line.count('\t') - if not tabs: - return len(line) - - count = 0 - for c in line: - if c == '\t': - count += tabsize - count % tabsize - else: - count += 1 - - return count - -class LineLength(Verifier): - languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons')) - test_name = 'line length' - opt_name = 'length' - - def check_line(self, line): - return linelen(line) <= 78 - - def fix(self, filename, regions=all_regions): - self.write("Warning: cannot automatically fix overly long lines.\n") - - -class BoolCompare(Verifier): - languages = set(('C', 'C++', 'python')) - test_name = 'boolean comparison' - opt_name = 'boolcomp' - - regex = re.compile(r'\s*==\s*([Tt]rue|[Ff]alse)\b') - - def check_line(self, line): - return self.regex.search(line) == None - - def fix_line(self, line): - match = self.regex.search(line) - if match: - if match.group(1) in ('true', 'True'): - line = self.regex.sub('', line) - else: - self.write("Warning: cannot automatically fix " - "comparisons with false/False.\n") - return line - - -# list of all verifier classes -all_verifiers = [ - Whitespace, - ControlSpace, - LineLength, - BoolCompare, - SortedIncludes -] - -class ValidationStats(object): - def __init__(self): - self.toolong = 0 - self.toolong80 = 0 - self.leadtabs = 0 - self.trailwhite = 0 - self.badcontrol = 0 - self.cret = 0 - - def dump(self): - print '''\ -%d violations of lines over 79 chars. %d of which are 80 chars exactly. -%d cases of whitespace at the end of a line. -%d cases of tabs to indent. -%d bad parens after if/while/for. -%d carriage returns found. -''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs, - self.badcontrol, self.cret) - - def __nonzero__(self): - return self.toolong or self.toolong80 or self.leadtabs or \ - self.trailwhite or self.badcontrol or self.cret - -def validate(filename, stats, verbose, exit_code): +def validate(filename, verbose, exit_code): lang = lang_type(filename) - if lang not in format_types: + if lang not in ('C', 'C++'): return - def msg(lineno, line, message): - print '%s:%d>' % (filename, lineno + 1), message - if verbose > 2: - print line - def bad(): if exit_code is not None: sys.exit(exit_code) @@ -505,51 +100,18 @@ def validate(filename, stats, verbose, exit_code): if verbose > 0: print 'could not open file %s' % filename bad() - return + return None - for i,line in enumerate(f): + vals = [ v(filename, verbose=(verbose > 1), language=lang) + for v in all_validators ] + + for i, line in enumerate(f): line = line.rstrip('\n') + for v in vals: + v.validate_line(i, line) + - # no carriage returns - if line.find('\r') != -1: - self.cret += 1 - if verbose > 1: - msg(i, line, 'carriage return found') - bad() - - # lines max out at 79 chars - llen = linelen(line) - if llen > 79: - stats.toolong += 1 - if llen == 80: - stats.toolong80 += 1 - if verbose > 1: - msg(i, line, 'line too long (%d chars)' % llen) - bad() - - # no tabs used to indent - match = lead.search(line) - if match and match.group(1).find('\t') != -1: - stats.leadtabs += 1 - if verbose > 1: - msg(i, line, 'using tabs to indent') - bad() - - # no trailing whitespace - if trail.search(line): - stats.trailwhite +=1 - if verbose > 1: - msg(i, line, 'trailing whitespace') - bad() - - # for c++, exactly one space betwen if/while/for and ( - if lang == 'C++': - match = any_control.search(line) - if match and match.group(2) != " ": - stats.badcontrol += 1 - if verbose > 1: - msg(i, line, 'improper spacing after %s' % match.group(1)) - bad() + return vals def _modified_regions(repo, patterns, **kwargs): @@ -627,11 +189,11 @@ def do_check_style(hgui, repo, *pats, **opts): ui = MercurialUI(hgui, verbose=hgui.verbose) # instantiate varifier objects - verifiers = [v(ui, repo, opts) for v in all_verifiers] + verifiers = [v(ui, opts, base=repo.root) for v in all_verifiers] for fname, mod_regions in _modified_regions(repo, pats, **opts): for verifier in verifiers: - if verifier.apply(fname, mod_regions): + if verifier.apply(joinpath(repo.root, fname), mod_regions): return True return False @@ -653,11 +215,13 @@ def do_check_format(hgui, repo, *pats, **opts): verbose = 0 for fname, mod_regions in _modified_regions(repo, pats, **opts): - stats = ValidationStats() - validate(joinpath(repo.root, fname), stats, verbose, None) - if stats: + vals = validate(joinpath(repo.root, fname), verbose, None) + if vals is None: + return True + elif any([not v for v in vals]): print "%s:" % fname - stats.dump() + for v in vals: + v.dump() result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?", 'ai', 'a') if result == 'a': @@ -744,9 +308,10 @@ if __name__ == '__main__': args = parser.parse_args() - stats = ValidationStats() for filename in args.file: - validate(filename, stats=stats, verbose=args.verbose, exit_code=1) + vals = validate(filename, verbose=args.verbose, + exit_code=1) - if args.verbose > 0: - stats.dump() + if args.verbose > 0 and vals is not None: + for v in vals: + v.dump() diff --git a/util/sort_includes.py b/util/sort_includes.py deleted file mode 100644 index 334d9e29e..000000000 --- a/util/sort_includes.py +++ /dev/null @@ -1,317 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c) 2014-2015 ARM Limited -# All rights reserved -# -# The license below extends only to copyright in the software and shall -# not be construed as granting a license to any other intellectual -# property including but not limited to intellectual property relating -# to a hardware implementation of the functionality of the software -# licensed hereunder. You may use the software subject to the license -# terms below provided that you ensure that this notice is replicated -# unmodified and in its entirety in all distributions of the software, -# modified or unmodified, in source code or in binary form. -# -# Copyright (c) 2011 The Hewlett-Packard Development Company -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Authors: Nathan Binkert -# Andreas Sandberg - -import os -import re -import sys - -from file_types import * - -cpp_c_headers = { - 'assert.h' : 'cassert', - 'ctype.h' : 'cctype', - 'errno.h' : 'cerrno', - 'float.h' : 'cfloat', - 'limits.h' : 'climits', - 'locale.h' : 'clocale', - 'math.h' : 'cmath', - 'setjmp.h' : 'csetjmp', - 'signal.h' : 'csignal', - 'stdarg.h' : 'cstdarg', - 'stddef.h' : 'cstddef', - 'stdio.h' : 'cstdio', - 'stdlib.h' : 'cstdlib', - 'string.h' : 'cstring', - 'time.h' : 'ctime', - 'wchar.h' : 'cwchar', - 'wctype.h' : 'cwctype', -} - -include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') -def include_key(line): - '''Mark directories with a leading space so directories - are sorted before files''' - - match = include_re.match(line) - assert match, line - keyword = match.group(2) - include = match.group(3) - - # Everything but the file part needs to have a space prepended - parts = include.split('/') - if len(parts) == 2 and parts[0] == 'dnet': - # Don't sort the dnet includes with respect to each other, but - # make them sorted with respect to non dnet includes. Python - # guarantees that sorting is stable, so just clear the - # basename part of the filename. - parts[1] = ' ' - parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] - key = '/'.join(parts) - - return key - - -def _include_matcher(keyword="#include", delim="<>"): - """Match an include statement and return a (keyword, file, extra) - duple, or a touple of None values if there isn't a match.""" - - rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1])) - - def matcher(context, line): - m = rex.match(line) - return m.groups() if m else (None, ) * 3 - - return matcher - -def _include_matcher_fname(fname, **kwargs): - """Match an include of a specific file name. Any keyword arguments - are forwarded to _include_matcher, which is used to match the - actual include line.""" - - rex = re.compile(fname) - base_matcher = _include_matcher(**kwargs) - - def matcher(context, line): - (keyword, fname, extra) = base_matcher(context, line) - if fname and rex.match(fname): - return (keyword, fname, extra) - else: - return (None, ) * 3 - - return matcher - - -def _include_matcher_main(): - """Match a C/C++ source file's primary header (i.e., a file with - the same base name, but a header extension).""" - - base_matcher = _include_matcher(delim='""') - rex = re.compile(r"^src/(.*)\.([^.]+)$") - header_map = { - "c" : "h", - "cc" : "hh", - "cpp" : "hh", - } - def matcher(context, line): - m = rex.match(context["filename"]) - if not m: - return (None, ) * 3 - base, ext = m.groups() - (keyword, fname, extra) = base_matcher(context, line) - try: - if fname == "%s.%s" % (base, header_map[ext]): - return (keyword, fname, extra) - except KeyError: - pass - - return (None, ) * 3 - - return matcher - -class SortIncludes(object): - # different types of includes for different sorting of headers - # - Python header needs to be first if it exists - # <*.h> - system headers (directories before files) - # <*> - STL headers - # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) - # "*" - M5 headers (directories before files) - includes_re = ( - ('main', '""', _include_matcher_main()), - ('python', '<>', _include_matcher_fname("^Python\.h$")), - ('c', '<>', _include_matcher_fname("^.*\.h$")), - ('stl', '<>', _include_matcher_fname("^\w+$")), - ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")), - ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')), - ('swig0', '<>', _include_matcher(keyword="%import")), - ('swig1', '<>', _include_matcher(keyword="%include")), - ('swig2', '""', _include_matcher(keyword="%import", delim='""')), - ('swig3', '""', _include_matcher(keyword="%include", delim='""')), - ) - - block_order = ( - ('main', ), - ('python', ), - ('c', ), - ('stl', ), - ('cc', ), - ('m5header', ), - ('swig0', 'swig1', 'swig2', 'swig3', ), - ) - - def __init__(self): - self.block_priority = {} - for prio, keys in enumerate(self.block_order): - for key in keys: - self.block_priority[key] = prio - - def reset(self): - # clear all stored headers - self.includes = {} - - def dump_blocks(self, block_types): - """Merge includes of from several block types into one large - block of sorted includes. This is useful when we have multiple - include block types (e.g., swig includes) with the same - priority.""" - - includes = [] - for block_type in block_types: - try: - includes += self.includes[block_type] - except KeyError: - pass - - return sorted(set(includes)) - - def dump_includes(self): - includes = [] - for types in self.block_order: - block = self.dump_blocks(types) - if includes and block: - includes.append("") - includes += block - - self.reset() - return includes - - def __call__(self, lines, filename, language): - self.reset() - - context = { - "filename" : filename, - "language" : language, - } - - def match_line(line): - if not line: - return (None, line) - - for include_type, (ldelim, rdelim), matcher in self.includes_re: - keyword, include, extra = matcher(context, line) - if keyword: - # if we've got a match, clean up the #include line, - # fix up stl headers and store it in the proper category - if include_type == 'c' and language == 'C++': - stl_inc = cpp_c_headers.get(include, None) - if stl_inc: - include = stl_inc - include_type = 'stl' - - return (include_type, - keyword + ' ' + ldelim + include + rdelim + extra) - - return (None, line) - - processing_includes = False - for line in lines: - include_type, line = match_line(line) - if include_type: - try: - self.includes[include_type].append(line) - except KeyError: - self.includes[include_type] = [ line ] - - processing_includes = True - elif processing_includes and not line.strip(): - # Skip empty lines while processing includes - pass - elif processing_includes: - # We are now exiting an include block - processing_includes = False - - # Output pending includes, a new line between, and the - # current l. - for include in self.dump_includes(): - yield include - yield '' - yield line - else: - # We are not in an include block, so just emit the line - yield line - - # We've reached EOF, so dump any pending includes - if processing_includes: - for include in self.dump_includes(): - yield include - -# default language types to try to apply our sorting rules to -default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) - -def options(): - import optparse - options = optparse.OptionParser() - add_option = options.add_option - add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', - default=','.join(default_dir_ignore), - help="ignore directories") - add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', - default=','.join(default_file_ignore), - help="ignore files") - add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', - default=','.join(default_languages), - help="languages") - add_option('-n', '--dry-run', action='store_true', - help="don't overwrite files") - - return options - -def parse_args(parser): - opts,args = parser.parse_args() - - opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) - opts.file_ignore = frozenset(opts.file_ignore.split(',')) - opts.languages = frozenset(opts.languages.split(',')) - - return opts,args - -if __name__ == '__main__': - parser = options() - opts, args = parse_args(parser) - - for base in args: - for filename,language in find_files(base, languages=opts.languages, - file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): - if opts.dry_run: - print "%s: %s" % (filename, language) - else: - update_file(filename, filename, language, SortIncludes()) diff --git a/util/style/__init__.py b/util/style/__init__.py new file mode 100644 index 000000000..e7c83da10 --- /dev/null +++ b/util/style/__init__.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# +# Copyright (c) 2016 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andreas Sandberg diff --git a/util/style/file_types.py b/util/style/file_types.py new file mode 100644 index 000000000..d02bd5f1c --- /dev/null +++ b/util/style/file_types.py @@ -0,0 +1,176 @@ +import os + +# lanuage type for each file extension +lang_types = { + '.c' : "C", + '.cl' : "C", + '.h' : "C", + '.cc' : "C++", + '.hh' : "C++", + '.cxx' : "C++", + '.hxx' : "C++", + '.cpp' : "C++", + '.hpp' : "C++", + '.C' : "C++", + '.H' : "C++", + '.i' : "swig", + '.py' : "python", + '.pl' : "perl", + '.pm' : "perl", + '.s' : "asm", + '.S' : "asm", + '.l' : "lex", + '.ll' : "lex", + '.y' : "yacc", + '.yy' : "yacc", + '.isa' : "isa", + '.sh' : "shell", + '.slicc' : "slicc", + '.sm' : "slicc", + '.awk' : "awk", + '.el' : "lisp", + '.txt' : "text", + '.tex' : "tex", + '.mk' : "make", + } + +# languages based on file prefix +lang_prefixes = ( + ('SCons', 'scons'), + ('Make', 'make'), + ('make', 'make'), + ('Doxyfile', 'doxygen'), + ) + +# languages based on #! line of first file +hash_bang = ( + ('python', 'python'), + ('perl', 'perl'), + ('sh', 'shell'), + ) + +# the list of all languages that we detect +all_languages = frozenset(lang_types.itervalues()) +all_languages |= frozenset(lang for start,lang in lang_prefixes) +all_languages |= frozenset(lang for start,lang in hash_bang) + +def lang_type(filename, firstline=None, openok=True): + '''identify the language of a given filename and potentially the + firstline of the file. If the firstline of the file is not + provided and openok is True, open the file and read the first line + if necessary''' + + basename = os.path.basename(filename) + name,extension = os.path.splitext(basename) + + # first try to detect language based on file extension + try: + return lang_types[extension] + except KeyError: + pass + + # now try to detect language based on file prefix + for start,lang in lang_prefixes: + if basename.startswith(start): + return lang + + # if a first line was not provided but the file is ok to open, + # grab the first line of the file. + if firstline is None and openok: + handle = file(filename, 'r') + firstline = handle.readline() + handle.close() + + # try to detect language based on #! in first line + if firstline and firstline.startswith('#!'): + for string,lang in hash_bang: + if firstline.find(string) > 0: + return lang + + # sorry, we couldn't detect the language + return None + +# directories and files to ignore by default +default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext')) +default_file_ignore = frozenset(('parsetab.py', )) + +def find_files(base, languages=all_languages, + dir_ignore=default_dir_ignore, + file_ignore=default_file_ignore): + '''find all files in a directory and its subdirectories based on a + set of languages, ignore directories specified in dir_ignore and + files specified in file_ignore''' + if base[-1] != '/': + base += '/' + + def update_dirs(dirs): + '''strip the ignored directories out of the provided list''' + index = len(dirs) - 1 + for i,d in enumerate(reversed(dirs)): + if d in dir_ignore: + del dirs[index - i] + + # walk over base + for root,dirs,files in os.walk(base): + root = root.replace(base, '', 1) + + # strip ignored directories from the list + update_dirs(dirs) + + for filename in files: + if filename in file_ignore: + # skip ignored files + continue + + # try to figure out the language of the specified file + fullpath = os.path.join(base, root, filename) + language = lang_type(fullpath) + + # if the file is one of the langauges that we want return + # its name and the language + if language in languages: + yield fullpath, language + +def update_file(dst, src, language, mutator): + '''update a file of the specified language with the provided + mutator generator. If inplace is provided, update the file in + place and return the handle to the updated file. If inplace is + false, write the updated file to cStringIO''' + + # if the source and destination are the same, we're updating in place + inplace = dst == src + + if isinstance(src, str): + # if a filename was provided, open the file + if inplace: + mode = 'r+' + else: + mode = 'r' + src = file(src, mode) + + orig_lines = [] + + # grab all of the lines of the file and strip them of their line ending + old_lines = list(line.rstrip('\r\n') for line in src.xreadlines()) + new_lines = list(mutator(old_lines, src.name, language)) + + for line in src.xreadlines(): + line = line + + if inplace: + # if we're updating in place and the file hasn't changed, do nothing + if old_lines == new_lines: + return + + # otherwise, truncate the file and seek to the beginning. + dst = src + dst.truncate(0) + dst.seek(0) + elif isinstance(dst, str): + # if we're not updating in place and a destination file name + # was provided, create a file object + dst = file(dst, 'w') + + for line in new_lines: + dst.write(line) + dst.write('\n') diff --git a/util/style/region.py b/util/style/region.py new file mode 100644 index 000000000..f4e12c52a --- /dev/null +++ b/util/style/region.py @@ -0,0 +1,281 @@ +# Copyright (c) 2006 Nathan Binkert +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +class _neg_inf(object): + '''This object always compares less than any other object''' + def __repr__(self): return '' + def __lt__(self, other): return type(self) != type(other) + def __le__(self, other): return True + def __gt__(self, other): return False + def __ge__(self, other): return type(self) == type(other) + def __eq__(self, other): return type(self) == type(other) + def __ne__(self, other): return type(self) != type(other) +neg_inf = _neg_inf() + +class _pos_inf(object): + '''This object always compares greater than any other object''' + def __repr__(self): return '' + def __lt__(self, other): return False + def __le__(self, other): return type(self) == type(other) + def __gt__(self, other): return type(self) != type(other) + def __ge__(self, other): return True + def __eq__(self, other): return type(self) == type(other) + def __ne__(self, other): return type(self) != type(other) +pos_inf = _pos_inf() + +class Region(tuple): + '''A region (range) of [start, end). + This includes utility functions to compare overlap of regions.''' + def __new__(cls, *args): + if len(args) == 1: + arg = args[0] + if isinstance(arg, Region): + return arg + args = tuple(arg) + + if len(args) != 2: + raise AttributeError, \ + "Only one or two arguments allowed, %d provided" % (alen, ) + + return tuple.__new__(cls, args) + + def __repr__(self): + return 'Region(%s, %s)' % (self[0], self[1]) + + @property + def start(self): + return self[0] + + @property + def end(self): + return self[1] + + def __contains__(self, other): + '''other is + region: True if self and other is fully contained within self. + pos: True if other is within the region''' + if isinstance(other, tuple): + return self[0] <= other[0] and self[1] >= other[1] + return self[0] <= other and other < self[1] + + def __eq__(self, other): + '''other is + region: True if self and other are identical. + pos: True if other is within the region''' + if isinstance(other, tuple): + return self[0] == other[0] and self[1] == other[1] + return self[0] <= other and other < self[1] + + # @param self is a region. + # @param other is a region. + # @return if self and other are not identical. + def __ne__(self, other): + '''other is + region: true if they are not identical + pos: True if other is not in the region''' + if isinstance(other, tuple): + return self[0] != other[0] or self[1] != other[1] + return other < self[0] or self[1] <= other + + # @param self is a region. + # @param other is a region. + # @return if self is less than other and does not overlap self. + def __lt__(self, other): + "self completely left of other (cannot overlap)" + if isinstance(other, tuple): + return self[1] <= other[0] + return self[1] <= other + + # @param self is a region. + # @param other is a region. + # @return if self is less than other. self may overlap other, + # but not extend beyond the _end of other. + def __le__(self, other): + "self extends to the left of other (can overlap)" + if isinstance(other, tuple): + return self[0] <= other[0] + return self[0] <= other + + # @param self is a region. + # @param other is a region. + # @return if self is greater than other and does not overlap other. + def __gt__(self, other): + "self is completely right of other (cannot overlap)" + if isinstance(other, tuple): + return self[0] >= other[1] + return self[0] > other + + # @param self is a region. + # @param other is a region. + # @return if self is greater than other. self may overlap other, + # but not extend beyond the beginning of other. + def __ge__(self, other): + "self ex_ends beyond other to the right (can overlap)" + if isinstance(other, tuple): + return self[1] >= other[1] + return self[1] > other + +class Regions(object): + '''A set of regions (ranges). Basically a region with holes. + Includes utility functions to merge regions and figure out if + something is in one of the regions.''' + def __init__(self, *args): + self.regions = [] + self.extend(*args) + + def copy(self): + copy = Regions() + copy.regions.extend(self.regions) + return copy + + def append(self, *args): + self.regions.append(Region(*args)) + + def extend(self, *args): + self.regions.extend(Region(a) for a in args) + + def __contains__(self, position): + for region in self.regions: + if position in region: + return True + + return False + + def __len__(self): + return len(self.regions) + + def __iand__(self, other): + A = self.regions + B = other.regions + R = [] + + i = 0 + j = 0 + while i < len(self) and j < len(other): + a = A[i] + b = B[j] + if a[1] <= b[0]: + # A is completely before B. Skip A + i += 1 + elif a[0] <= b[0]: + if a[1] <= b[1]: + # A and B overlap with B not left of A and A not right of B + R.append(Region(b[0], a[1])) + + # Advance A because nothing is left + i += 1 + + if a[1] == b[1]: + # Advance B too + j += 1 + else: + # A and B overlap with B completely within the bounds of A + R.append(Region(b[0], b[1])) + + # Advance only B because some of A may still be useful + j += 1 + elif b[1] <= a[0]: + # B is completely before A. Skip B. + j += 1 + else: + assert b[0] < a[0] + if b[1] <= a[1]: + # A and B overlap with A not left of B and B not right of A + R.append(Region(a[0], b[1])) + + # Advance B because nothing is left + j += 1 + + if a[1] == b[1]: + # Advance A too + i += 1 + else: + # A and B overlap with A completely within the bounds of B + R.append(Region(a[0], a[1])) + + # Advance only A because some of B may still be useful + i += 1 + + self.regions = R + return self + + def __and__(self, other): + result = self.copy() + result &= other + return result + + def __repr__(self): + return 'Regions(%s)' % ([(r[0], r[1]) for r in self.regions], ) + +all_regions = Regions(Region(neg_inf, pos_inf)) + +if __name__ == '__main__': + x = Regions(*((i, i + 1) for i in xrange(0,30,2))) + y = Regions(*((i, i + 4) for i in xrange(0,30,5))) + z = Region(6,7) + n = Region(9,10) + + def test(left, right): + print "%s == %s: %s" % (left, right, left == right) + print "%s != %s: %s" % (left, right, left != right) + print "%s < %s: %s" % (left, right, left < right) + print "%s <= %s: %s" % (left, right, left <= right) + print "%s > %s: %s" % (left, right, left > right) + print "%s >= %s: %s" % (left, right, left >= right) + print + + test(neg_inf, neg_inf) + test(neg_inf, pos_inf) + test(pos_inf, neg_inf) + test(pos_inf, pos_inf) + + test(neg_inf, 0) + test(neg_inf, -11111) + test(neg_inf, 11111) + + test(0, neg_inf) + test(-11111, neg_inf) + test(11111, neg_inf) + + test(pos_inf, 0) + test(pos_inf, -11111) + test(pos_inf, 11111) + + test(0, pos_inf) + test(-11111, pos_inf) + test(11111, pos_inf) + + print x + print y + print x & y + print z + + print 4 in x + print 4 in z + print 5 not in x + print 6 not in z + print z in y + print n in y, n not in y diff --git a/util/style/sort_includes.py b/util/style/sort_includes.py new file mode 100644 index 000000000..334d9e29e --- /dev/null +++ b/util/style/sort_includes.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python +# +# Copyright (c) 2014-2015 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2011 The Hewlett-Packard Development Company +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert +# Andreas Sandberg + +import os +import re +import sys + +from file_types import * + +cpp_c_headers = { + 'assert.h' : 'cassert', + 'ctype.h' : 'cctype', + 'errno.h' : 'cerrno', + 'float.h' : 'cfloat', + 'limits.h' : 'climits', + 'locale.h' : 'clocale', + 'math.h' : 'cmath', + 'setjmp.h' : 'csetjmp', + 'signal.h' : 'csignal', + 'stdarg.h' : 'cstdarg', + 'stddef.h' : 'cstddef', + 'stdio.h' : 'cstdio', + 'stdlib.h' : 'cstdlib', + 'string.h' : 'cstring', + 'time.h' : 'ctime', + 'wchar.h' : 'cwchar', + 'wctype.h' : 'cwctype', +} + +include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') +def include_key(line): + '''Mark directories with a leading space so directories + are sorted before files''' + + match = include_re.match(line) + assert match, line + keyword = match.group(2) + include = match.group(3) + + # Everything but the file part needs to have a space prepended + parts = include.split('/') + if len(parts) == 2 and parts[0] == 'dnet': + # Don't sort the dnet includes with respect to each other, but + # make them sorted with respect to non dnet includes. Python + # guarantees that sorting is stable, so just clear the + # basename part of the filename. + parts[1] = ' ' + parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] + key = '/'.join(parts) + + return key + + +def _include_matcher(keyword="#include", delim="<>"): + """Match an include statement and return a (keyword, file, extra) + duple, or a touple of None values if there isn't a match.""" + + rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1])) + + def matcher(context, line): + m = rex.match(line) + return m.groups() if m else (None, ) * 3 + + return matcher + +def _include_matcher_fname(fname, **kwargs): + """Match an include of a specific file name. Any keyword arguments + are forwarded to _include_matcher, which is used to match the + actual include line.""" + + rex = re.compile(fname) + base_matcher = _include_matcher(**kwargs) + + def matcher(context, line): + (keyword, fname, extra) = base_matcher(context, line) + if fname and rex.match(fname): + return (keyword, fname, extra) + else: + return (None, ) * 3 + + return matcher + + +def _include_matcher_main(): + """Match a C/C++ source file's primary header (i.e., a file with + the same base name, but a header extension).""" + + base_matcher = _include_matcher(delim='""') + rex = re.compile(r"^src/(.*)\.([^.]+)$") + header_map = { + "c" : "h", + "cc" : "hh", + "cpp" : "hh", + } + def matcher(context, line): + m = rex.match(context["filename"]) + if not m: + return (None, ) * 3 + base, ext = m.groups() + (keyword, fname, extra) = base_matcher(context, line) + try: + if fname == "%s.%s" % (base, header_map[ext]): + return (keyword, fname, extra) + except KeyError: + pass + + return (None, ) * 3 + + return matcher + +class SortIncludes(object): + # different types of includes for different sorting of headers + # - Python header needs to be first if it exists + # <*.h> - system headers (directories before files) + # <*> - STL headers + # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) + # "*" - M5 headers (directories before files) + includes_re = ( + ('main', '""', _include_matcher_main()), + ('python', '<>', _include_matcher_fname("^Python\.h$")), + ('c', '<>', _include_matcher_fname("^.*\.h$")), + ('stl', '<>', _include_matcher_fname("^\w+$")), + ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")), + ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')), + ('swig0', '<>', _include_matcher(keyword="%import")), + ('swig1', '<>', _include_matcher(keyword="%include")), + ('swig2', '""', _include_matcher(keyword="%import", delim='""')), + ('swig3', '""', _include_matcher(keyword="%include", delim='""')), + ) + + block_order = ( + ('main', ), + ('python', ), + ('c', ), + ('stl', ), + ('cc', ), + ('m5header', ), + ('swig0', 'swig1', 'swig2', 'swig3', ), + ) + + def __init__(self): + self.block_priority = {} + for prio, keys in enumerate(self.block_order): + for key in keys: + self.block_priority[key] = prio + + def reset(self): + # clear all stored headers + self.includes = {} + + def dump_blocks(self, block_types): + """Merge includes of from several block types into one large + block of sorted includes. This is useful when we have multiple + include block types (e.g., swig includes) with the same + priority.""" + + includes = [] + for block_type in block_types: + try: + includes += self.includes[block_type] + except KeyError: + pass + + return sorted(set(includes)) + + def dump_includes(self): + includes = [] + for types in self.block_order: + block = self.dump_blocks(types) + if includes and block: + includes.append("") + includes += block + + self.reset() + return includes + + def __call__(self, lines, filename, language): + self.reset() + + context = { + "filename" : filename, + "language" : language, + } + + def match_line(line): + if not line: + return (None, line) + + for include_type, (ldelim, rdelim), matcher in self.includes_re: + keyword, include, extra = matcher(context, line) + if keyword: + # if we've got a match, clean up the #include line, + # fix up stl headers and store it in the proper category + if include_type == 'c' and language == 'C++': + stl_inc = cpp_c_headers.get(include, None) + if stl_inc: + include = stl_inc + include_type = 'stl' + + return (include_type, + keyword + ' ' + ldelim + include + rdelim + extra) + + return (None, line) + + processing_includes = False + for line in lines: + include_type, line = match_line(line) + if include_type: + try: + self.includes[include_type].append(line) + except KeyError: + self.includes[include_type] = [ line ] + + processing_includes = True + elif processing_includes and not line.strip(): + # Skip empty lines while processing includes + pass + elif processing_includes: + # We are now exiting an include block + processing_includes = False + + # Output pending includes, a new line between, and the + # current l. + for include in self.dump_includes(): + yield include + yield '' + yield line + else: + # We are not in an include block, so just emit the line + yield line + + # We've reached EOF, so dump any pending includes + if processing_includes: + for include in self.dump_includes(): + yield include + +# default language types to try to apply our sorting rules to +default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) + +def options(): + import optparse + options = optparse.OptionParser() + add_option = options.add_option + add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', + default=','.join(default_dir_ignore), + help="ignore directories") + add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', + default=','.join(default_file_ignore), + help="ignore files") + add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', + default=','.join(default_languages), + help="languages") + add_option('-n', '--dry-run', action='store_true', + help="don't overwrite files") + + return options + +def parse_args(parser): + opts,args = parser.parse_args() + + opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) + opts.file_ignore = frozenset(opts.file_ignore.split(',')) + opts.languages = frozenset(opts.languages.split(',')) + + return opts,args + +if __name__ == '__main__': + parser = options() + opts, args = parse_args(parser) + + for base in args: + for filename,language in find_files(base, languages=opts.languages, + file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): + if opts.dry_run: + print "%s: %s" % (filename, language) + else: + update_file(filename, filename, language, SortIncludes()) diff --git a/util/style/style.py b/util/style/style.py new file mode 100644 index 000000000..a966f3097 --- /dev/null +++ b/util/style/style.py @@ -0,0 +1,149 @@ +#! /usr/bin/env python +# Copyright (c) 2014, 2016 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2006 The Regents of The University of Michigan +# Copyright (c) 2007,2011 The Hewlett-Packard Development Company +# Copyright (c) 2016 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert +# Steve Reinhardt +# Andreas Sandberg + +from abc import ABCMeta, abstractmethod +import difflib +import re +import sys + +from region import * + +tabsize = 8 +lead = re.compile(r'^([ \t]+)') +trail = re.compile(r'([ \t]+)$') +any_control = re.compile(r'\b(if|while|for)([ \t]*)\(') + + +class UserInterface(object): + __metaclass__ = ABCMeta + + def __init__(self, verbose=False): + self.verbose = verbose + + def prompt(self, prompt, results, default): + while True: + result = self._prompt(prompt, results, default) + if result in results: + return result + + @abstractmethod + def _prompt(self, prompt, results, default): + pass + + @abstractmethod + def write(self, string): + pass + +class StdioUI(UserInterface): + def _prompt(self, prompt, results, default): + return raw_input(prompt) or default + + def write(self, string): + sys.stdout.write(string) + +class MercurialUI(UserInterface): + def __init__(self, ui, *args, **kwargs): + super(MercurialUI, self).__init__(*args, **kwargs) + self.hg_ui = ui + + def _prompt(self, prompt, results, default): + return self.hg_ui.prompt(prompt, default=default) + + def write(self, string): + self.hg_ui.write(string) + + +def _re_ignore(expr): + """Helper function to create regular expression ignore file + matcher functions""" + + rex = re.compile(expr) + def match_re(fname): + return rex.match(fname) + return match_re + +# This list contains a list of functions that are called to determine +# if a file should be excluded from the style matching rules or +# not. The functions are called with the file name relative to the +# repository root (without a leading slash) as their argument. A file +# is excluded if any function in the list returns true. +style_ignores = [ + # Ignore external projects as they are unlikely to follow the gem5 + # coding convention. + _re_ignore("^ext/"), +] + +def check_ignores(fname): + """Check if a file name matches any of the ignore rules""" + + for rule in style_ignores: + if rule(fname): + return True + + return False + + +def normalized_len(line): + """Return a normalized line length with expanded tabs""" + + count = 0 + for c in line: + if c == '\t': + count += tabsize - count % tabsize + else: + count += 1 + + return count + +def modified_regions(old, new, context=0): + regions = Regions() + m = difflib.SequenceMatcher(a=old, b=new, autojunk=False) + for group in m.get_grouped_opcodes(context): + first = group[0] + last = group[-1] + + regions.extend(Region(first[3], last[4] + 1)) + + return regions diff --git a/util/style/validators.py b/util/style/validators.py new file mode 100644 index 000000000..2c1e594c2 --- /dev/null +++ b/util/style/validators.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# +# Copyright (c) 2014, 2016 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2006 The Regents of The University of Michigan +# Copyright (c) 2007,2011 The Hewlett-Packard Development Company +# Copyright (c) 2016 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert +# Steve Reinhardt +# Andreas Sandberg + +from abc import ABCMeta, abstractmethod +import inspect +import re +import sys + +import style + +tabsize = 8 +lead = re.compile(r'^([ \t]+)') +trail = re.compile(r'([ \t]+)$') +any_control = re.compile(r'\b(if|while|for)([ \t]*)\(') + +class Validator(object): + """Base class for style validators + + Validators analyze source files for common style violations and + produce source code style violation statistics. Unlike style + verifiers (see verifiers.py), they do not try to fix any style + violations violations. + + Deprecation warning: These classes are currently only used by the + "hg m5format" command and not by any style hooks. New style + checkers should inherit from Verifier instead of Validator. + + """ + + __metaclass__ = ABCMeta + + def __init__(self, file_name, verbose=False, language=None): + self.file_name = file_name + self.verbose = verbose + self.bad = 0 + self.language = language + + def fail_line(self, line_no, line, message): + print '%s:%d>' % (self.file_name, line_no + 1), message + if self.verbose: + print line + self.bad += 1 + + def __nonzero__(self): + return self.bad == 0 + + @classmethod + def supported_lang(cls, language): + return True + + @abstractmethod + def validate_line(self, line_no, line): + pass + + @abstractmethod + def dump(self): + pass + +class SimpleValidator(Validator): + supported_langs = set() + + def __init__(self, fail_message, dump_message, file_name, **kwargs): + super(SimpleValidator, self).__init__(file_name, **kwargs) + + self.fail_message = fail_message + self.dump_message = dump_message + + @classmethod + def supported_lang(cls, language): + return not cls.cupported_langs or language in cls.supported_langs + + def validate_line(self, line_no, line): + if not self.simple_validate_line(line): + self.fail_line(line_no, line, self.fail_message) + return False + else: + return True + + @abstractmethod + def simple_validate_line(self, line): + pass + + def dump(self): + print self.dump_message % { + "bad" : self.bad + } + +class LineLength(Validator): + def __init__(self, *args, **kwargs): + super(LineLength, self).__init__(*args, **kwargs) + + self.toolong80 = 0 + + def validate_line(self, line_no, line): + llen = style.normalized_len(line) + if llen == 80: + self.toolong80 += 1 + + if llen > 79: + self.fail_line(line_no, line, 'line too long (%d chars)' % llen) + return False + else: + return True + + def dump(self): + print "%d violations of lines over 79 chars. " \ + "%d of which are 80 chars exactly." % (self.bad, self.toolong80) + +class ControlSpacing(Validator): + supported_langs = set(('C', 'C++')) + + def validate_line(self, line_no, line): + match = any_control.search(line) + if match and match.group(2) != " ": + stats.badcontrol += 1 + self.fail_line(line_no, line, + 'improper spacing after %s' % match.group(1)) + return False + else: + return True + + def dump(self): + print "%d bad parens after if/while/for." % (self.bad, ) + +class CarriageReturn(SimpleValidator): + def __init__(self, *args, **kwargs): + super(CarriageReturn, self).__init__( + "carriage return found", + "%(bad)d carriage returns found.", + *args, **kwargs) + + def simple_validate_line(self, line): + return line.find('\r') == -1 + +class TabIndent(SimpleValidator): + lead = re.compile(r'^([ \t]+)') + + def __init__(self, *args, **kwargs): + super(TabIndent, self).__init__( + "using tabs to indent", + "%(bad)d cases of tabs to indent.", + *args, **kwargs) + + def simple_validate_line(self, line): + match = TabIndent.lead.search(line) + return not (match and match.group(1).find('\t') != -1) + +class TrailingWhitespace(SimpleValidator): + trail = re.compile(r'([ \t]+)$') + + def __init__(self, *args, **kwargs): + super(TrailingWhitespace, self).__init__( + "trailing whitespace", + "%(bad)d cases of whitespace at the end of a line.", + *args, **kwargs) + + def simple_validate_line(self, line): + return not TrailingWhitespace.trail.search(line) + +def is_validator(cls): + """Determine if a class is a Validator that can be instantiated""" + + return inspect.isclass(cls) and issubclass(cls, Validator) and \ + not inspect.isabstract(cls) + +# list of all verifier classes +all_validators = [ v for n, v in \ + inspect.getmembers(sys.modules[__name__], is_validator) ] + diff --git a/util/style/verifiers.py b/util/style/verifiers.py new file mode 100644 index 000000000..a9c78b413 --- /dev/null +++ b/util/style/verifiers.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python +# +# Copyright (c) 2014, 2016 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2006 The Regents of The University of Michigan +# Copyright (c) 2007,2011 The Hewlett-Packard Development Company +# Copyright (c) 2016 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert +# Steve Reinhardt +# Andreas Sandberg + +from abc import ABCMeta, abstractmethod +from difflib import SequenceMatcher +import inspect +import os +import re +import sys + +import style +import sort_includes +from region import * +from file_types import lang_type + +def _modified_regions(old, new): + m = SequenceMatcher(a=old, b=new, autojunk=False) + + regions = Regions() + for tag, i1, i2, j1, j2 in m.get_opcodes(): + if tag != "equal": + regions.extend(Region(i1, i2)) + return regions + + +class Verifier(object): + """Base class for style verifiers + + Verifiers check for style violations and optionally fix such + violations. Implementations should either inherit from this class + (Verifier) if they need to work on entire files or LineVerifier if + they operate on a line-by-line basis. + + Subclasses must define these class attributes: + languages = set of strings identifying applicable languages + test_name = long descriptive name of test, will be used in + messages such as "error in " or "invalid " + opt_name = short name used to generate command-line options to + control the test (--fix-, --ignore-, etc.) + + """ + + __metaclass__ = ABCMeta + + def __init__(self, ui, opts, base=None): + self.ui = ui + self.base = base + + # opt_name must be defined as a class attribute of derived classes. + # Check test-specific opts first as these have precedence. + self.opt_fix = opts.get('fix_' + self.opt_name, False) + self.opt_ignore = opts.get('ignore_' + self.opt_name, False) + self.opt_skip = opts.get('skip_' + self.opt_name, False) + # If no test-specific opts were set, then set based on "-all" opts. + if not (self.opt_fix or self.opt_ignore or self.opt_skip): + self.opt_fix = opts.get('fix_all', False) + self.opt_ignore = opts.get('ignore_all', False) + self.opt_skip = opts.get('skip_all', False) + + def normalize_filename(self, name): + abs_name = os.path.abspath(name) + if self.base is None: + return abs_name + + abs_base = os.path.abspath(self.base) + return os.path.relpath(abs_name, start=abs_base) + + def open(self, filename, mode): + try: + f = file(filename, mode) + except OSError, msg: + print 'could not open file %s: %s' % (filename, msg) + return None + + return f + + def skip(self, filename): + # We never want to handle symlinks, so always skip them: If the location + # pointed to is a directory, skip it. If the location is a file inside + # the gem5 directory, it will be checked as a file, so symlink can be + # skipped. If the location is a file outside gem5, we don't want to + # check it anyway. + if os.path.islink(filename): + return True + return lang_type(filename) not in self.languages + + def apply(self, filename, regions=all_regions): + """Possibly apply to specified regions of file 'filename'. + + Verifier is skipped if --skip- option was provided or if + file is not of an applicable type. Otherwise file is checked + and error messages printed. Errors are fixed or ignored if + the corresponding --fix- or --ignore- options were + provided. If neither, the user is prompted for an action. + + Returns True to abort, False otherwise. + """ + if not (self.opt_skip or self.skip(filename)): + errors = self.check(filename, regions) + if errors and not self.opt_ignore: + if self.opt_fix: + self.fix(filename, regions) + else: + result = self.ui.prompt("(a)bort, (i)gnore, or (f)ix?", + 'aif', 'a') + if result == 'f': + self.fix(filename, regions) + elif result == 'a': + return True # abort + + return False + + @abstractmethod + def check(self, filename, regions=all_regions): + """Check specified regions of file 'filename'. + + Line-by-line checks can simply provide a check_line() method + that returns True if the line is OK and False if it has an + error. Verifiers that need a multi-line view (like + SortedIncludes) must override this entire function. + + Returns a count of errors (0 if none), though actual non-zero + count value is not currently used anywhere. + """ + pass + + @abstractmethod + def fix(self, filename, regions=all_regions): + """Fix specified regions of file 'filename'. + + Line-by-line fixes can simply provide a fix_line() method that + returns the fixed line. Verifiers that need a multi-line view + (like SortedIncludes) must override this entire function. + """ + pass + +class LineVerifier(Verifier): + def check(self, filename, regions=all_regions): + f = self.open(filename, 'r') + + errors = 0 + for num,line in enumerate(f): + if num not in regions: + continue + line = line.rstrip('\n') + if not self.check_line(line): + self.ui.write("invalid %s in %s:%d\n" % \ + (self.test_name, filename, num + 1)) + if self.ui.verbose: + self.ui.write(">>%s<<\n" % line[:-1]) + errors += 1 + return errors + + def fix(self, filename, regions=all_regions): + f = self.open(filename, 'r+') + + lines = list(f) + + f.seek(0) + f.truncate() + + for i,line in enumerate(lines): + line = line.rstrip('\n') + if i in regions: + line = self.fix_line(line) + + f.write(line) + f.write("\n") + f.close() + + + @abstractmethod + def check_line(self, line): + pass + + @abstractmethod + def fix_line(self, line): + pass + +class Whitespace(LineVerifier): + """Check whitespace. + + Specifically: + - No tabs used for indent + - No trailing whitespace + """ + + languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons')) + test_name = 'whitespace' + opt_name = 'white' + + _lead = re.compile(r'^([ \t]+)') + _trail = re.compile(r'([ \t]+)$') + + def check_line(self, line): + match = Whitespace._lead.search(line) + if match and match.group(1).find('\t') != -1: + return False + + match = Whitespace._trail.search(line) + if match: + return False + + return True + + def fix_line(self, line): + if Whitespace._lead.search(line): + newline = '' + for i,c in enumerate(line): + if c == ' ': + newline += ' ' + elif c == '\t': + newline += ' ' * (tabsize - len(newline) % tabsize) + else: + newline += line[i:] + break + + line = newline + + return line.rstrip() + '\n' + + +class SortedIncludes(Verifier): + """Check for proper sorting of include statements""" + + languages = sort_includes.default_languages + test_name = 'include file order' + opt_name = 'include' + + def __init__(self, *args, **kwargs): + super(SortedIncludes, self).__init__(*args, **kwargs) + self.sort_includes = sort_includes.SortIncludes() + + def check(self, filename, regions=all_regions): + f = self.open(filename, 'r') + norm_fname = self.normalize_filename(filename) + + old = [ l.rstrip('\n') for l in f.xreadlines() ] + f.close() + + if len(old) == 0: + return 0 + + language = lang_type(filename, old[0]) + new = list(self.sort_includes(old, norm_fname, language)) + + modified = _modified_regions(old, new) & regions + + if modified: + self.ui.write("invalid sorting of includes in %s\n" % (filename)) + if self.ui.verbose: + for start, end in modified.regions: + self.ui.write("bad region [%d, %d)\n" % (start, end)) + return 1 + + return 0 + + def fix(self, filename, regions=all_regions): + f = self.open(filename, 'r+') + + old = f.readlines() + lines = [ l.rstrip('\n') for l in old ] + language = lang_type(filename, lines[0]) + sort_lines = list(self.sort_includes(lines, filename, language)) + new = ''.join(line + '\n' for line in sort_lines) + + f.seek(0) + f.truncate() + + for i,line in enumerate(sort_lines): + f.write(line) + f.write('\n') + f.close() + + +class ControlSpace(LineVerifier): + """Check for exactly one space after if/while/for""" + + languages = set(('C', 'C++')) + test_name = 'spacing after if/while/for' + opt_name = 'control' + + _any_control = re.compile(r'\b(if|while|for)([ \t]*)\(') + + def check_line(self, line): + match = ControlSpace._any_control.search(line) + return not (match and match.group(2) != " ") + + def fix_line(self, line): + new_line = _any_control.sub(r'\1 (', line) + return new_line + + +class LineLength(LineVerifier): + languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons')) + test_name = 'line length' + opt_name = 'length' + + def check_line(self, line): + return style.normalized_len(line) <= 78 + + def fix(self, filename, regions=all_regions): + self.ui.write("Warning: cannot automatically fix overly long lines.\n") + + def fix_line(self, line): + pass + +class BoolCompare(LineVerifier): + languages = set(('C', 'C++', 'python')) + test_name = 'boolean comparison' + opt_name = 'boolcomp' + + regex = re.compile(r'\s*==\s*([Tt]rue|[Ff]alse)\b') + + def check_line(self, line): + return self.regex.search(line) == None + + def fix_line(self, line): + match = self.regex.search(line) + if match: + if match.group(1) in ('true', 'True'): + line = self.regex.sub('', line) + else: + self.ui.write("Warning: cannot automatically fix " + "comparisons with false/False.\n") + return line + +def is_verifier(cls): + """Determine if a class is a Verifier that can be instantiated""" + + return inspect.isclass(cls) and issubclass(cls, Verifier) and \ + not inspect.isabstract(cls) + +# list of all verifier classes +all_verifiers = [ v for n, v in \ + inspect.getmembers(sys.modules[__name__], is_verifier) ]