From: Richard Sandiford Date: Mon, 4 Feb 2013 20:47:33 +0000 (+0000) Subject: Rename update-copyright.pl to update-copyright.py. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b3dc71024e75a505a7f2d6c78c7e4a2e1e329e0a;p=gcc.git Rename update-copyright.pl to update-copyright.py. From-SVN: r195736 --- diff --git a/contrib/ChangeLog b/contrib/ChangeLog index 3d5b3fca7c5..eaccdbdeee9 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,6 +1,6 @@ 2013-02-04 Richard Sandiford - * update-copyright.pl: New file. + * update-copyright.py: New file. 2013-01-15 David Blaikie diff --git a/contrib/update-copyright.pl b/contrib/update-copyright.pl deleted file mode 100644 index db468724703..00000000000 --- a/contrib/update-copyright.pl +++ /dev/null @@ -1,766 +0,0 @@ -#!/usr/bin/python -# -# Copyright (C) 2013 Free Software Foundation, Inc. -# -# This script is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. - -# This script adjusts the copyright notices at the top of source files -# so that they have the form: -# -# Copyright XXXX-YYYY Free Software Foundation, Inc. -# -# It doesn't change code that is known to be maintained elsewhere or -# that carries a non-FSF copyright. -# -# The script also doesn't change testsuite files, except those in -# libstdc++-v3. This is because libstdc++-v3 has a conformance testsuite, -# while most tests in other directories are just things that failed at some -# point in the past. -# -# Pass --this-year to the script if you want it to add the current year -# to all applicable notices. Pass --quilt if you are using quilt and -# want files to be added to the quilt before being changed. -# -# By default the script will update all directories for which the -# output has been vetted. You can instead pass the names of individual -# directories, including those that haven't been approved. So: -# -# update-copyright.pl --this-year -# -# is the command that would be used at the beginning of a year to update -# all copyright notices (and possibly at other times to check whether -# new files have been added with old years). On the other hand: -# -# update-copyright.pl --this-year libjava -# -# would run the script on just libjava/. -# -# Note that things like --version output strings must be updated before -# this script is run. There's already a separate procedure for that. - -import os -import re -import sys -import time -import subprocess - -class Errors: - def __init__ (self): - self.num_errors = 0 - - def report (self, filename, string): - if filename: - string = filename + ': ' + string - sys.stderr.write (string + '\n') - self.num_errors += 1 - - def ok (self): - return self.num_errors == 0 - -class GenericFilter: - def __init__ (self): - self.skip_files = set() - self.skip_dirs = set() - self.skip_extensions = set() - self.fossilised_files = set() - self.own_files = set() - - self.skip_files |= set ([ - # Skip licence files. - 'COPYING', - 'COPYING.LIB', - 'COPYING3', - 'COPYING3.LIB', - 'LICENSE', - 'fdl.texi', - 'gpl_v3.texi', - 'fdl-1.3.xml', - 'gpl-3.0.xml', - - # Skip auto- and libtool-related files - 'aclocal.m4', - 'compile', - 'config.guess', - 'config.sub', - 'depcomp', - 'install-sh', - 'libtool.m4', - 'ltmain.sh', - 'ltoptions.m4', - 'ltsugar.m4', - 'ltversion.m4', - 'lt~obsolete.m4', - 'missing', - 'mkdep', - 'mkinstalldirs', - 'move-if-change', - 'shlibpath.m4', - 'symlink-tree', - 'ylwrap', - - # Skip FSF mission statement, etc. - 'gnu.texi', - 'funding.texi', - 'appendix_free.xml', - - # Skip imported texinfo files. - 'texinfo.tex', - ]) - - - def get_line_filter (self, dir, filename): - if filename.startswith ('ChangeLog'): - # Ignore references to copyright in changelog entries. - return re.compile ('\t') - - return None - - def skip_file (self, dir, filename): - if filename in self.skip_files: - return True - - (base, extension) = os.path.splitext (os.path.join (dir, filename)) - if extension in self.skip_extensions: - return True - - if extension == '.in': - # Skip .in files produced by automake. - if os.path.exists (base + '.am'): - return True - - # Skip files produced by autogen - if (os.path.exists (base + '.def') - and os.path.exists (base + '.tpl')): - return True - - # Skip configure files produced by autoconf - if filename == 'configure': - if os.path.exists (base + '.ac'): - return True - if os.path.exists (base + '.in'): - return True - - return False - - def skip_dir (self, dir, subdir): - return subdir in self.skip_dirs - - def is_fossilised_file (self, dir, filename): - if filename in self.fossilised_files: - return True - # Only touch current current ChangeLogs. - if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0: - return True - return False - - def by_package_author (self, dir, filename): - return filename in self.own_files - -class Copyright: - def __init__ (self, errors): - self.errors = errors - - # Characters in a range of years. Include '.' for typos. - ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]' - - # Non-whitespace characters in a copyright holder's name. - name = '[\w.,-]' - - # Matches one year. - self.year_re = re.compile ('[0-9]+') - - # Matches part of a year or copyright holder. - self.continuation_re = re.compile (ranges + '|' + name) - - # Matches a full copyright notice: - self.copyright_re = re.compile ( - # 1: 'Copyright (C)', etc. - '([Cc]opyright' - '|[Cc]opyright\s+\([Cc]\)' - '|[Cc]opyright\s+%s' - '|[Cc]opyright\s+©' - '|[Cc]opyright\s+@copyright{}' - '|@set\s+copyright[\w-]+)' - - # 2: the years. Include the whitespace in the year, so that - # we can remove any excess. - '(\s*(?:' + ranges + ',?' - '|@value\{[^{}]*\})\s*)' - - # 3: 'by ', if used - '(by\s+)?' - - # 4: the copyright holder. Don't allow multiple consecutive - # spaces, so that right-margin gloss doesn't get caught - # (e.g. gnat_ugn.texi). - '(' + name + '(?:\s?' + name + ')*)?') - - # A regexp for notices that might have slipped by. Just matching - # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of - # HTML header markers, so check for 'copyright' and two digits. - self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]', - re.IGNORECASE) - self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ') - self.holders = { '@copying': '@copying' } - self.holder_prefixes = set() - - # True to 'quilt add' files before changing them. - self.use_quilt = False - - # If set, force all notices to include this year. - self.max_year = None - - # Goes after the year(s). Could be ', '. - self.separator = ' ' - - def add_package_author (self, holder, canon_form = None): - if not canon_form: - canon_form = holder - self.holders[holder] = canon_form - index = holder.find (' ') - while index >= 0: - self.holder_prefixes.add (holder[:index]) - index = holder.find (' ', index + 1) - - def add_external_author (self, holder): - self.holders[holder] = None - - class BadYear(): - def __init__ (self, year): - self.year = year - - def __str__ (self): - return 'unrecognised year: ' + self.year - - def parse_year (self, string): - year = int (string) - if len (string) == 2: - if year > 70: - return year + 1900 - elif len (string) == 4: - return year - raise self.BadYear (string) - - def year_range (self, years): - year_list = [self.parse_year (year) - for year in self.year_re.findall (years)] - assert len (year_list) > 0 - return (min (year_list), max (year_list)) - - def set_use_quilt (self, use_quilt): - self.use_quilt = use_quilt - - def include_year (self, year): - assert not self.max_year - self.max_year = year - - def canonicalise_years (self, dir, filename, filter, years): - # Leave texinfo variables alone. - if years.startswith ('@value'): - return years - - (min_year, max_year) = self.year_range (years) - - # Update the upper bound, if enabled. - if self.max_year and not filter.is_fossilised_file (dir, filename): - max_year = max (max_year, self.max_year) - - # Use a range. - if min_year == max_year: - return '%d' % min_year - else: - return '%d-%d' % (min_year, max_year) - - def strip_continuation (self, line): - line = line.lstrip() - match = self.comment_re.match (line) - if match: - line = line[match.end():].lstrip() - return line - - def is_complete (self, match): - holder = match.group (4) - return (holder - and (holder not in self.holder_prefixes - or holder in self.holders)) - - def update_copyright (self, dir, filename, filter, file, line, match): - orig_line = line - next_line = None - pathname = os.path.join (dir, filename) - - intro = match.group (1) - if intro.startswith ('@set'): - # Texinfo year variables should always be on one line - after_years = line[match.end (2):].strip() - if after_years != '': - self.errors.report (pathname, - 'trailing characters in @set: ' - + after_years) - return (False, orig_line, next_line) - else: - # If it looks like the copyright is incomplete, add the next line. - while not self.is_complete (match): - try: - next_line = file.next() - except StopIteration: - break - - # If the next line doesn't look like a proper continuation, - # assume that what we've got is complete. - continuation = self.strip_continuation (next_line) - if not self.continuation_re.match (continuation): - break - - # Merge the lines for matching purposes. - orig_line += next_line - line = line.rstrip() + ' ' + continuation - next_line = None - - # Rematch with the longer line, at the original position. - match = self.copyright_re.match (line, match.start()) - assert match - - holder = match.group (4) - - # Use the filter to test cases where markup is getting in the way. - if filter.by_package_author (dir, filename): - assert holder not in self.holders - - elif not holder: - self.errors.report (pathname, 'missing copyright holder') - return (False, orig_line, next_line) - - elif holder not in self.holders: - self.errors.report (pathname, - 'unrecognised copyright holder: ' + holder) - return (False, orig_line, next_line) - - else: - # See whether the copyright is associated with the package - # author. - canon_form = self.holders[holder] - if not canon_form: - return (False, orig_line, next_line) - - # Make sure the author is given in a consistent way. - line = (line[:match.start (4)] - + canon_form - + line[match.end (4):]) - - # Remove any 'by' - line = line[:match.start (3)] + line[match.end (3):] - - # Update the copyright years. - years = match.group (2).strip() - try: - canon_form = self.canonicalise_years (dir, filename, filter, years) - except self.BadYear as e: - self.errors.report (pathname, str (e)) - return (False, orig_line, next_line) - - line = (line[:match.start (2)] - + ' ' + canon_form + self.separator - + line[match.end (2):]) - - # Use the standard (C) form. - if intro.endswith ('right'): - intro += ' (C)' - elif intro.endswith ('(c)'): - intro = intro[:-3] + '(C)' - line = line[:match.start (1)] + intro + line[match.end (1):] - - # Strip trailing whitespace - line = line.rstrip() + '\n' - - return (line != orig_line, line, next_line) - - def process_file (self, dir, filename, filter): - pathname = os.path.join (dir, filename) - if filename.endswith ('.tmp'): - # Looks like something we tried to create before. - try: - os.remove (pathname) - except OSError: - pass - return - - lines = [] - changed = False - line_filter = filter.get_line_filter (dir, filename) - with open (pathname, 'r') as file: - prev = None - for line in file: - while line: - next_line = None - # Leave filtered-out lines alone. - if not (line_filter and line_filter.match (line)): - match = self.copyright_re.search (line) - if match: - res = self.update_copyright (dir, filename, filter, - file, line, match) - (this_changed, line, next_line) = res - changed = changed or this_changed - - # Check for copyright lines that might have slipped by. - elif self.other_copyright_re.search (line): - self.errors.report (pathname, - 'unrecognised copyright: %s' - % line.strip()) - lines.append (line) - line = next_line - - # If something changed, write the new file out. - if changed and self.errors.ok(): - tmp_pathname = pathname + '.tmp' - with open (tmp_pathname, 'w') as file: - for line in lines: - file.write (line) - if self.use_quilt: - subprocess.call (['quilt', 'add', pathname]) - os.rename (tmp_pathname, pathname) - - def process_tree (self, tree, filter): - for (dir, subdirs, filenames) in os.walk (tree): - # Don't recurse through directories that should be skipped. - for i in xrange (len (subdirs) - 1, -1, -1): - if filter.skip_dir (dir, subdirs[i]): - del subdirs[i] - - # Handle the files in this directory. - for filename in filenames: - if filter.skip_file (dir, filename): - sys.stdout.write ('Skipping %s\n' - % os.path.join (dir, filename)) - else: - self.process_file (dir, filename, filter) - -class CmdLine: - def __init__ (self, copyright = Copyright): - self.errors = Errors() - self.copyright = copyright (self.errors) - self.dirs = [] - self.default_dirs = [] - self.chosen_dirs = [] - self.option_handlers = dict() - self.option_help = [] - - self.add_option ('--help', 'Print this help', self.o_help) - self.add_option ('--quilt', '"quilt add" files before changing them', - self.o_quilt) - self.add_option ('--this-year', 'Add the current year to every notice', - self.o_this_year) - - def add_option (self, name, help, handler): - self.option_help.append ((name, help)) - self.option_handlers[name] = handler - - def add_dir (self, dir, filter = GenericFilter()): - self.dirs.append ((dir, filter)) - - def o_help (self, option = None): - sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n' - 'Options:\n' % sys.argv[0]) - format = '%-15s %s\n' - for (what, help) in self.option_help: - sys.stdout.write (format % (what, help)) - sys.stdout.write ('\nDirectories:\n') - - format = '%-25s' - i = 0 - for (dir, filter) in self.dirs: - i += 1 - if i % 3 == 0 or i == len (self.dirs): - sys.stdout.write (dir + '\n') - else: - sys.stdout.write (format % dir) - sys.exit (0) - - def o_quilt (self, option): - self.copyright.set_use_quilt (True) - - def o_this_year (self, option): - self.copyright.include_year (time.localtime().tm_year) - - def main (self): - for arg in sys.argv[1:]: - if arg[:1] != '-': - self.chosen_dirs.append (arg) - elif arg in self.option_handlers: - self.option_handlers[arg] (arg) - else: - self.errors.report (None, 'unrecognised option: ' + arg) - if self.errors.ok(): - if len (self.chosen_dirs) == 0: - self.chosen_dirs = self.default_dirs - if len (self.chosen_dirs) == 0: - self.o_help() - else: - for chosen_dir in self.chosen_dirs: - canon_dir = os.path.join (chosen_dir, '') - count = 0 - for (dir, filter) in self.dirs: - if (dir + os.sep).startswith (canon_dir): - count += 1 - self.copyright.process_tree (dir, filter) - if count == 0: - self.errors.report (None, 'unrecognised directory: ' - + chosen_dir) - sys.exit (0 if self.errors.ok() else 1) - -#---------------------------------------------------------------------------- - -class TopLevelFilter (GenericFilter): - def skip_dir (self, dir, subdir): - return True - -class ConfigFilter (GenericFilter): - def __init__ (self): - GenericFilter.__init__ (self) - - def skip_file (self, dir, filename): - if filename.endswith ('.m4'): - pathname = os.path.join (dir, filename) - with open (pathname) as file: - # Skip files imported from gettext. - if file.readline().find ('gettext-') >= 0: - return True - return GenericFilter.skip_file (self, dir, filename) - -class GCCFilter (GenericFilter): - def __init__ (self): - GenericFilter.__init__ (self) - - self.skip_files |= set ([ - # Not part of GCC - 'math-68881.h', - ]) - - self.skip_dirs |= set ([ - # Better not create a merge nightmare for the GNAT folks. - 'ada', - - # Handled separately. - 'testsuite', - ]) - - self.skip_extensions |= set ([ - # Maintained by the translation project. - '.po', - - # Automatically-generated. - '.pot', - ]) - - self.fossilised_files |= set ([ - # Old news won't be updated. - 'ONEWS', - ]) - -class TestsuiteFilter (GenericFilter): - def __init__ (self): - GenericFilter.__init__ (self) - - self.skip_extensions |= set ([ - # Don't change the tests, which could be woend by anyone. - '.c', - '.C', - '.cc', - '.h', - '.hs', - '.f', - '.f90', - '.go', - '.inc', - '.java', - ]) - - def skip_file (self, dir, filename): - # g++.niklas/README contains historical copyright information - # and isn't updated. - if filename == 'README' and os.path.basename (dir) == 'g++.niklas': - return True - return GenericFilter.skip_file (self, dir, filename) - -class LibCppFilter (GenericFilter): - def __init__ (self): - GenericFilter.__init__ (self) - - self.skip_extensions |= set ([ - # Maintained by the translation project. - '.po', - - # Automatically-generated. - '.pot', - ]) - -class LibGCCFilter (GenericFilter): - def __init__ (self): - GenericFilter.__init__ (self) - - self.skip_dirs |= set ([ - # Imported from GLIBC. - 'soft-fp', - ]) - -class LibJavaFilter (GenericFilter): - def __init__ (self): - GenericFilter.__init__ (self) - - self.skip_dirs |= set ([ - # Handled separately. - 'testsuite', - - # Not really part of the library - 'contrib', - - # Imported from upstream - 'classpath', - 'libltdl', - ]) - - def get_line_filter (self, dir, filename): - if filename == 'NameDecoder.h': - return re.compile ('.*NAME_COPYRIGHT') - if filename == 'ICC_Profile.h': - return re.compile ('.*icSigCopyrightTag') - return GenericFilter.get_line_filter (self, dir, filename) - -class LibMudflapFilter (GenericFilter): - def __init__ (self): - GenericFilter.__init__ (self) - - self.skip_dirs |= set ([ - # Handled separately. - 'testsuite', - ]) - -class LibStdCxxFilter (GenericFilter): - def __init__ (self): - GenericFilter.__init__ (self) - - self.skip_files |= set ([ - # Contains no copyright of its own, but quotes the GPL. - 'intro.xml', - ]) - - self.skip_dirs |= set ([ - # Contains automatically-generated sources. - 'html', - - # The testsuite data files shouldn't be changed. - 'data', - - # Contains imported images - 'images', - ]) - - self.own_files |= set ([ - # Contains markup around the copyright owner. - 'spine.xml', - ]) - - def get_line_filter (self, dir, filename): - if filename == 'boost_concept_check.h': - return re.compile ('// \(C\) Copyright Jeremy Siek') - return GenericFilter.get_line_filter (self, dir, filename) - -class GCCCopyright (Copyright): - def __init__ (self, errors): - Copyright.__init__ (self, errors) - - canon_fsf = 'Free Software Foundation, Inc.' - self.add_package_author ('Free Software Foundation', canon_fsf) - self.add_package_author ('Free Software Foundation.', canon_fsf) - self.add_package_author ('Free Software Foundation Inc.', canon_fsf) - self.add_package_author ('Free Software Foundation, Inc', canon_fsf) - self.add_package_author ('Free Software Foundation, Inc.', canon_fsf) - self.add_package_author ('The Free Software Foundation', canon_fsf) - self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf) - self.add_package_author ('Software Foundation, Inc.', canon_fsf) - - self.add_external_author ('ARM') - self.add_external_author ('AdaCore') - self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.') - self.add_external_author ('Cavium Networks.') - self.add_external_author ('Faraday Technology Corp.') - self.add_external_author ('Florida State University') - self.add_external_author ('Greg Colvin and Beman Dawes.') - self.add_external_author ('Hewlett-Packard Company') - self.add_external_author ('Information Technology Industry Council.') - self.add_external_author ('James Theiler, Brian Gough') - self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,') - self.add_external_author ('National Research Council of Canada.') - self.add_external_author ('Peter Dimov and Multi Media Ltd.') - self.add_external_author ('Peter Dimov') - self.add_external_author ('Pipeline Associates, Inc.') - self.add_external_author ('Regents of the University of California.') - self.add_external_author ('Silicon Graphics Computer Systems, Inc.') - self.add_external_author ('Silicon Graphics') - self.add_external_author ('Stephen L. Moshier') - self.add_external_author ('Sun Microsystems, Inc. All rights reserved.') - self.add_external_author ('The Go Authors. All rights reserved.') - self.add_external_author ('The Go Authors. All rights reserved.') - self.add_external_author ('The Go Authors.') - self.add_external_author ('The Regents of the University of California.') - self.add_external_author ('Unicode, Inc.') - self.add_external_author ('University of Toronto.') - -class GCCCmdLine (CmdLine): - def __init__ (self): - CmdLine.__init__ (self, GCCCopyright) - - self.add_dir ('.', TopLevelFilter()) - # boehm-gc is imported from upstream. - self.add_dir ('config', ConfigFilter()) - # contrib isn't really part of GCC. - self.add_dir ('fixincludes') - self.add_dir ('gcc', GCCFilter()) - self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter()) - self.add_dir ('gnattools') - self.add_dir ('include') - self.add_dir ('libada') - self.add_dir ('libatomic') - self.add_dir ('libbacktrace') - self.add_dir ('libcpp', LibCppFilter()) - self.add_dir ('libdecnumber') - # libffi is imported from upstream. - self.add_dir ('libgcc', LibGCCFilter()) - self.add_dir ('libgfortran') - self.add_dir ('libgomp') - self.add_dir ('libiberty') - self.add_dir ('libitm') - self.add_dir ('libjava', LibJavaFilter()) - self.add_dir (os.path.join ('libjava', 'testsuite'), TestsuiteFilter()) - self.add_dir ('libmudflap', LibMudflapFilter()) - self.add_dir (os.path.join ('libmudflap', 'testsuite'), - TestsuiteFilter()) - self.add_dir ('libobjc') - self.add_dir ('libquadmath') - # libsanitiser is imported from upstream. - self.add_dir ('libssp') - self.add_dir ('libstdc++-v3', LibStdCxxFilter()) - self.add_dir ('lto-plugin') - # zlib is imported from upstream. - - self.default_dirs = [ - 'gcc', - 'libada', - 'libatomic', - 'libbacktrace', - 'libcpp', - 'libgcc', - 'libgfortran', - 'libgomp', - 'libitm', - 'libmudflap', - 'libobjc', - 'libquadmath', - 'libssp', - 'libstdc++-v3', - ] - -GCCCmdLine().main() diff --git a/contrib/update-copyright.py b/contrib/update-copyright.py new file mode 100644 index 00000000000..db468724703 --- /dev/null +++ b/contrib/update-copyright.py @@ -0,0 +1,766 @@ +#!/usr/bin/python +# +# Copyright (C) 2013 Free Software Foundation, Inc. +# +# This script is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# This script adjusts the copyright notices at the top of source files +# so that they have the form: +# +# Copyright XXXX-YYYY Free Software Foundation, Inc. +# +# It doesn't change code that is known to be maintained elsewhere or +# that carries a non-FSF copyright. +# +# The script also doesn't change testsuite files, except those in +# libstdc++-v3. This is because libstdc++-v3 has a conformance testsuite, +# while most tests in other directories are just things that failed at some +# point in the past. +# +# Pass --this-year to the script if you want it to add the current year +# to all applicable notices. Pass --quilt if you are using quilt and +# want files to be added to the quilt before being changed. +# +# By default the script will update all directories for which the +# output has been vetted. You can instead pass the names of individual +# directories, including those that haven't been approved. So: +# +# update-copyright.pl --this-year +# +# is the command that would be used at the beginning of a year to update +# all copyright notices (and possibly at other times to check whether +# new files have been added with old years). On the other hand: +# +# update-copyright.pl --this-year libjava +# +# would run the script on just libjava/. +# +# Note that things like --version output strings must be updated before +# this script is run. There's already a separate procedure for that. + +import os +import re +import sys +import time +import subprocess + +class Errors: + def __init__ (self): + self.num_errors = 0 + + def report (self, filename, string): + if filename: + string = filename + ': ' + string + sys.stderr.write (string + '\n') + self.num_errors += 1 + + def ok (self): + return self.num_errors == 0 + +class GenericFilter: + def __init__ (self): + self.skip_files = set() + self.skip_dirs = set() + self.skip_extensions = set() + self.fossilised_files = set() + self.own_files = set() + + self.skip_files |= set ([ + # Skip licence files. + 'COPYING', + 'COPYING.LIB', + 'COPYING3', + 'COPYING3.LIB', + 'LICENSE', + 'fdl.texi', + 'gpl_v3.texi', + 'fdl-1.3.xml', + 'gpl-3.0.xml', + + # Skip auto- and libtool-related files + 'aclocal.m4', + 'compile', + 'config.guess', + 'config.sub', + 'depcomp', + 'install-sh', + 'libtool.m4', + 'ltmain.sh', + 'ltoptions.m4', + 'ltsugar.m4', + 'ltversion.m4', + 'lt~obsolete.m4', + 'missing', + 'mkdep', + 'mkinstalldirs', + 'move-if-change', + 'shlibpath.m4', + 'symlink-tree', + 'ylwrap', + + # Skip FSF mission statement, etc. + 'gnu.texi', + 'funding.texi', + 'appendix_free.xml', + + # Skip imported texinfo files. + 'texinfo.tex', + ]) + + + def get_line_filter (self, dir, filename): + if filename.startswith ('ChangeLog'): + # Ignore references to copyright in changelog entries. + return re.compile ('\t') + + return None + + def skip_file (self, dir, filename): + if filename in self.skip_files: + return True + + (base, extension) = os.path.splitext (os.path.join (dir, filename)) + if extension in self.skip_extensions: + return True + + if extension == '.in': + # Skip .in files produced by automake. + if os.path.exists (base + '.am'): + return True + + # Skip files produced by autogen + if (os.path.exists (base + '.def') + and os.path.exists (base + '.tpl')): + return True + + # Skip configure files produced by autoconf + if filename == 'configure': + if os.path.exists (base + '.ac'): + return True + if os.path.exists (base + '.in'): + return True + + return False + + def skip_dir (self, dir, subdir): + return subdir in self.skip_dirs + + def is_fossilised_file (self, dir, filename): + if filename in self.fossilised_files: + return True + # Only touch current current ChangeLogs. + if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0: + return True + return False + + def by_package_author (self, dir, filename): + return filename in self.own_files + +class Copyright: + def __init__ (self, errors): + self.errors = errors + + # Characters in a range of years. Include '.' for typos. + ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]' + + # Non-whitespace characters in a copyright holder's name. + name = '[\w.,-]' + + # Matches one year. + self.year_re = re.compile ('[0-9]+') + + # Matches part of a year or copyright holder. + self.continuation_re = re.compile (ranges + '|' + name) + + # Matches a full copyright notice: + self.copyright_re = re.compile ( + # 1: 'Copyright (C)', etc. + '([Cc]opyright' + '|[Cc]opyright\s+\([Cc]\)' + '|[Cc]opyright\s+%s' + '|[Cc]opyright\s+©' + '|[Cc]opyright\s+@copyright{}' + '|@set\s+copyright[\w-]+)' + + # 2: the years. Include the whitespace in the year, so that + # we can remove any excess. + '(\s*(?:' + ranges + ',?' + '|@value\{[^{}]*\})\s*)' + + # 3: 'by ', if used + '(by\s+)?' + + # 4: the copyright holder. Don't allow multiple consecutive + # spaces, so that right-margin gloss doesn't get caught + # (e.g. gnat_ugn.texi). + '(' + name + '(?:\s?' + name + ')*)?') + + # A regexp for notices that might have slipped by. Just matching + # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of + # HTML header markers, so check for 'copyright' and two digits. + self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]', + re.IGNORECASE) + self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ') + self.holders = { '@copying': '@copying' } + self.holder_prefixes = set() + + # True to 'quilt add' files before changing them. + self.use_quilt = False + + # If set, force all notices to include this year. + self.max_year = None + + # Goes after the year(s). Could be ', '. + self.separator = ' ' + + def add_package_author (self, holder, canon_form = None): + if not canon_form: + canon_form = holder + self.holders[holder] = canon_form + index = holder.find (' ') + while index >= 0: + self.holder_prefixes.add (holder[:index]) + index = holder.find (' ', index + 1) + + def add_external_author (self, holder): + self.holders[holder] = None + + class BadYear(): + def __init__ (self, year): + self.year = year + + def __str__ (self): + return 'unrecognised year: ' + self.year + + def parse_year (self, string): + year = int (string) + if len (string) == 2: + if year > 70: + return year + 1900 + elif len (string) == 4: + return year + raise self.BadYear (string) + + def year_range (self, years): + year_list = [self.parse_year (year) + for year in self.year_re.findall (years)] + assert len (year_list) > 0 + return (min (year_list), max (year_list)) + + def set_use_quilt (self, use_quilt): + self.use_quilt = use_quilt + + def include_year (self, year): + assert not self.max_year + self.max_year = year + + def canonicalise_years (self, dir, filename, filter, years): + # Leave texinfo variables alone. + if years.startswith ('@value'): + return years + + (min_year, max_year) = self.year_range (years) + + # Update the upper bound, if enabled. + if self.max_year and not filter.is_fossilised_file (dir, filename): + max_year = max (max_year, self.max_year) + + # Use a range. + if min_year == max_year: + return '%d' % min_year + else: + return '%d-%d' % (min_year, max_year) + + def strip_continuation (self, line): + line = line.lstrip() + match = self.comment_re.match (line) + if match: + line = line[match.end():].lstrip() + return line + + def is_complete (self, match): + holder = match.group (4) + return (holder + and (holder not in self.holder_prefixes + or holder in self.holders)) + + def update_copyright (self, dir, filename, filter, file, line, match): + orig_line = line + next_line = None + pathname = os.path.join (dir, filename) + + intro = match.group (1) + if intro.startswith ('@set'): + # Texinfo year variables should always be on one line + after_years = line[match.end (2):].strip() + if after_years != '': + self.errors.report (pathname, + 'trailing characters in @set: ' + + after_years) + return (False, orig_line, next_line) + else: + # If it looks like the copyright is incomplete, add the next line. + while not self.is_complete (match): + try: + next_line = file.next() + except StopIteration: + break + + # If the next line doesn't look like a proper continuation, + # assume that what we've got is complete. + continuation = self.strip_continuation (next_line) + if not self.continuation_re.match (continuation): + break + + # Merge the lines for matching purposes. + orig_line += next_line + line = line.rstrip() + ' ' + continuation + next_line = None + + # Rematch with the longer line, at the original position. + match = self.copyright_re.match (line, match.start()) + assert match + + holder = match.group (4) + + # Use the filter to test cases where markup is getting in the way. + if filter.by_package_author (dir, filename): + assert holder not in self.holders + + elif not holder: + self.errors.report (pathname, 'missing copyright holder') + return (False, orig_line, next_line) + + elif holder not in self.holders: + self.errors.report (pathname, + 'unrecognised copyright holder: ' + holder) + return (False, orig_line, next_line) + + else: + # See whether the copyright is associated with the package + # author. + canon_form = self.holders[holder] + if not canon_form: + return (False, orig_line, next_line) + + # Make sure the author is given in a consistent way. + line = (line[:match.start (4)] + + canon_form + + line[match.end (4):]) + + # Remove any 'by' + line = line[:match.start (3)] + line[match.end (3):] + + # Update the copyright years. + years = match.group (2).strip() + try: + canon_form = self.canonicalise_years (dir, filename, filter, years) + except self.BadYear as e: + self.errors.report (pathname, str (e)) + return (False, orig_line, next_line) + + line = (line[:match.start (2)] + + ' ' + canon_form + self.separator + + line[match.end (2):]) + + # Use the standard (C) form. + if intro.endswith ('right'): + intro += ' (C)' + elif intro.endswith ('(c)'): + intro = intro[:-3] + '(C)' + line = line[:match.start (1)] + intro + line[match.end (1):] + + # Strip trailing whitespace + line = line.rstrip() + '\n' + + return (line != orig_line, line, next_line) + + def process_file (self, dir, filename, filter): + pathname = os.path.join (dir, filename) + if filename.endswith ('.tmp'): + # Looks like something we tried to create before. + try: + os.remove (pathname) + except OSError: + pass + return + + lines = [] + changed = False + line_filter = filter.get_line_filter (dir, filename) + with open (pathname, 'r') as file: + prev = None + for line in file: + while line: + next_line = None + # Leave filtered-out lines alone. + if not (line_filter and line_filter.match (line)): + match = self.copyright_re.search (line) + if match: + res = self.update_copyright (dir, filename, filter, + file, line, match) + (this_changed, line, next_line) = res + changed = changed or this_changed + + # Check for copyright lines that might have slipped by. + elif self.other_copyright_re.search (line): + self.errors.report (pathname, + 'unrecognised copyright: %s' + % line.strip()) + lines.append (line) + line = next_line + + # If something changed, write the new file out. + if changed and self.errors.ok(): + tmp_pathname = pathname + '.tmp' + with open (tmp_pathname, 'w') as file: + for line in lines: + file.write (line) + if self.use_quilt: + subprocess.call (['quilt', 'add', pathname]) + os.rename (tmp_pathname, pathname) + + def process_tree (self, tree, filter): + for (dir, subdirs, filenames) in os.walk (tree): + # Don't recurse through directories that should be skipped. + for i in xrange (len (subdirs) - 1, -1, -1): + if filter.skip_dir (dir, subdirs[i]): + del subdirs[i] + + # Handle the files in this directory. + for filename in filenames: + if filter.skip_file (dir, filename): + sys.stdout.write ('Skipping %s\n' + % os.path.join (dir, filename)) + else: + self.process_file (dir, filename, filter) + +class CmdLine: + def __init__ (self, copyright = Copyright): + self.errors = Errors() + self.copyright = copyright (self.errors) + self.dirs = [] + self.default_dirs = [] + self.chosen_dirs = [] + self.option_handlers = dict() + self.option_help = [] + + self.add_option ('--help', 'Print this help', self.o_help) + self.add_option ('--quilt', '"quilt add" files before changing them', + self.o_quilt) + self.add_option ('--this-year', 'Add the current year to every notice', + self.o_this_year) + + def add_option (self, name, help, handler): + self.option_help.append ((name, help)) + self.option_handlers[name] = handler + + def add_dir (self, dir, filter = GenericFilter()): + self.dirs.append ((dir, filter)) + + def o_help (self, option = None): + sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n' + 'Options:\n' % sys.argv[0]) + format = '%-15s %s\n' + for (what, help) in self.option_help: + sys.stdout.write (format % (what, help)) + sys.stdout.write ('\nDirectories:\n') + + format = '%-25s' + i = 0 + for (dir, filter) in self.dirs: + i += 1 + if i % 3 == 0 or i == len (self.dirs): + sys.stdout.write (dir + '\n') + else: + sys.stdout.write (format % dir) + sys.exit (0) + + def o_quilt (self, option): + self.copyright.set_use_quilt (True) + + def o_this_year (self, option): + self.copyright.include_year (time.localtime().tm_year) + + def main (self): + for arg in sys.argv[1:]: + if arg[:1] != '-': + self.chosen_dirs.append (arg) + elif arg in self.option_handlers: + self.option_handlers[arg] (arg) + else: + self.errors.report (None, 'unrecognised option: ' + arg) + if self.errors.ok(): + if len (self.chosen_dirs) == 0: + self.chosen_dirs = self.default_dirs + if len (self.chosen_dirs) == 0: + self.o_help() + else: + for chosen_dir in self.chosen_dirs: + canon_dir = os.path.join (chosen_dir, '') + count = 0 + for (dir, filter) in self.dirs: + if (dir + os.sep).startswith (canon_dir): + count += 1 + self.copyright.process_tree (dir, filter) + if count == 0: + self.errors.report (None, 'unrecognised directory: ' + + chosen_dir) + sys.exit (0 if self.errors.ok() else 1) + +#---------------------------------------------------------------------------- + +class TopLevelFilter (GenericFilter): + def skip_dir (self, dir, subdir): + return True + +class ConfigFilter (GenericFilter): + def __init__ (self): + GenericFilter.__init__ (self) + + def skip_file (self, dir, filename): + if filename.endswith ('.m4'): + pathname = os.path.join (dir, filename) + with open (pathname) as file: + # Skip files imported from gettext. + if file.readline().find ('gettext-') >= 0: + return True + return GenericFilter.skip_file (self, dir, filename) + +class GCCFilter (GenericFilter): + def __init__ (self): + GenericFilter.__init__ (self) + + self.skip_files |= set ([ + # Not part of GCC + 'math-68881.h', + ]) + + self.skip_dirs |= set ([ + # Better not create a merge nightmare for the GNAT folks. + 'ada', + + # Handled separately. + 'testsuite', + ]) + + self.skip_extensions |= set ([ + # Maintained by the translation project. + '.po', + + # Automatically-generated. + '.pot', + ]) + + self.fossilised_files |= set ([ + # Old news won't be updated. + 'ONEWS', + ]) + +class TestsuiteFilter (GenericFilter): + def __init__ (self): + GenericFilter.__init__ (self) + + self.skip_extensions |= set ([ + # Don't change the tests, which could be woend by anyone. + '.c', + '.C', + '.cc', + '.h', + '.hs', + '.f', + '.f90', + '.go', + '.inc', + '.java', + ]) + + def skip_file (self, dir, filename): + # g++.niklas/README contains historical copyright information + # and isn't updated. + if filename == 'README' and os.path.basename (dir) == 'g++.niklas': + return True + return GenericFilter.skip_file (self, dir, filename) + +class LibCppFilter (GenericFilter): + def __init__ (self): + GenericFilter.__init__ (self) + + self.skip_extensions |= set ([ + # Maintained by the translation project. + '.po', + + # Automatically-generated. + '.pot', + ]) + +class LibGCCFilter (GenericFilter): + def __init__ (self): + GenericFilter.__init__ (self) + + self.skip_dirs |= set ([ + # Imported from GLIBC. + 'soft-fp', + ]) + +class LibJavaFilter (GenericFilter): + def __init__ (self): + GenericFilter.__init__ (self) + + self.skip_dirs |= set ([ + # Handled separately. + 'testsuite', + + # Not really part of the library + 'contrib', + + # Imported from upstream + 'classpath', + 'libltdl', + ]) + + def get_line_filter (self, dir, filename): + if filename == 'NameDecoder.h': + return re.compile ('.*NAME_COPYRIGHT') + if filename == 'ICC_Profile.h': + return re.compile ('.*icSigCopyrightTag') + return GenericFilter.get_line_filter (self, dir, filename) + +class LibMudflapFilter (GenericFilter): + def __init__ (self): + GenericFilter.__init__ (self) + + self.skip_dirs |= set ([ + # Handled separately. + 'testsuite', + ]) + +class LibStdCxxFilter (GenericFilter): + def __init__ (self): + GenericFilter.__init__ (self) + + self.skip_files |= set ([ + # Contains no copyright of its own, but quotes the GPL. + 'intro.xml', + ]) + + self.skip_dirs |= set ([ + # Contains automatically-generated sources. + 'html', + + # The testsuite data files shouldn't be changed. + 'data', + + # Contains imported images + 'images', + ]) + + self.own_files |= set ([ + # Contains markup around the copyright owner. + 'spine.xml', + ]) + + def get_line_filter (self, dir, filename): + if filename == 'boost_concept_check.h': + return re.compile ('// \(C\) Copyright Jeremy Siek') + return GenericFilter.get_line_filter (self, dir, filename) + +class GCCCopyright (Copyright): + def __init__ (self, errors): + Copyright.__init__ (self, errors) + + canon_fsf = 'Free Software Foundation, Inc.' + self.add_package_author ('Free Software Foundation', canon_fsf) + self.add_package_author ('Free Software Foundation.', canon_fsf) + self.add_package_author ('Free Software Foundation Inc.', canon_fsf) + self.add_package_author ('Free Software Foundation, Inc', canon_fsf) + self.add_package_author ('Free Software Foundation, Inc.', canon_fsf) + self.add_package_author ('The Free Software Foundation', canon_fsf) + self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf) + self.add_package_author ('Software Foundation, Inc.', canon_fsf) + + self.add_external_author ('ARM') + self.add_external_author ('AdaCore') + self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.') + self.add_external_author ('Cavium Networks.') + self.add_external_author ('Faraday Technology Corp.') + self.add_external_author ('Florida State University') + self.add_external_author ('Greg Colvin and Beman Dawes.') + self.add_external_author ('Hewlett-Packard Company') + self.add_external_author ('Information Technology Industry Council.') + self.add_external_author ('James Theiler, Brian Gough') + self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,') + self.add_external_author ('National Research Council of Canada.') + self.add_external_author ('Peter Dimov and Multi Media Ltd.') + self.add_external_author ('Peter Dimov') + self.add_external_author ('Pipeline Associates, Inc.') + self.add_external_author ('Regents of the University of California.') + self.add_external_author ('Silicon Graphics Computer Systems, Inc.') + self.add_external_author ('Silicon Graphics') + self.add_external_author ('Stephen L. Moshier') + self.add_external_author ('Sun Microsystems, Inc. All rights reserved.') + self.add_external_author ('The Go Authors. All rights reserved.') + self.add_external_author ('The Go Authors. All rights reserved.') + self.add_external_author ('The Go Authors.') + self.add_external_author ('The Regents of the University of California.') + self.add_external_author ('Unicode, Inc.') + self.add_external_author ('University of Toronto.') + +class GCCCmdLine (CmdLine): + def __init__ (self): + CmdLine.__init__ (self, GCCCopyright) + + self.add_dir ('.', TopLevelFilter()) + # boehm-gc is imported from upstream. + self.add_dir ('config', ConfigFilter()) + # contrib isn't really part of GCC. + self.add_dir ('fixincludes') + self.add_dir ('gcc', GCCFilter()) + self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter()) + self.add_dir ('gnattools') + self.add_dir ('include') + self.add_dir ('libada') + self.add_dir ('libatomic') + self.add_dir ('libbacktrace') + self.add_dir ('libcpp', LibCppFilter()) + self.add_dir ('libdecnumber') + # libffi is imported from upstream. + self.add_dir ('libgcc', LibGCCFilter()) + self.add_dir ('libgfortran') + self.add_dir ('libgomp') + self.add_dir ('libiberty') + self.add_dir ('libitm') + self.add_dir ('libjava', LibJavaFilter()) + self.add_dir (os.path.join ('libjava', 'testsuite'), TestsuiteFilter()) + self.add_dir ('libmudflap', LibMudflapFilter()) + self.add_dir (os.path.join ('libmudflap', 'testsuite'), + TestsuiteFilter()) + self.add_dir ('libobjc') + self.add_dir ('libquadmath') + # libsanitiser is imported from upstream. + self.add_dir ('libssp') + self.add_dir ('libstdc++-v3', LibStdCxxFilter()) + self.add_dir ('lto-plugin') + # zlib is imported from upstream. + + self.default_dirs = [ + 'gcc', + 'libada', + 'libatomic', + 'libbacktrace', + 'libcpp', + 'libgcc', + 'libgfortran', + 'libgomp', + 'libitm', + 'libmudflap', + 'libobjc', + 'libquadmath', + 'libssp', + 'libstdc++-v3', + ] + +GCCCmdLine().main()