* update-copyright.py (LibJavaFilter): Remove.
[gcc.git] / contrib / update-copyright.py
1 #!/usr/bin/python
2 #
3 # Copyright (C) 2013-2017 Free Software Foundation, Inc.
4 #
5 # This script is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3, or (at your option)
8 # any later version.
9
10 # This script adjusts the copyright notices at the top of source files
11 # so that they have the form:
12 #
13 # Copyright XXXX-YYYY Free Software Foundation, Inc.
14 #
15 # It doesn't change code that is known to be maintained elsewhere or
16 # that carries a non-FSF copyright.
17 #
18 # The script also doesn't change testsuite files, except those in
19 # libstdc++-v3. This is because libstdc++-v3 has a conformance testsuite,
20 # while most tests in other directories are just things that failed at some
21 # point in the past.
22 #
23 # Pass --this-year to the script if you want it to add the current year
24 # to all applicable notices. Pass --quilt if you are using quilt and
25 # want files to be added to the quilt before being changed.
26 #
27 # By default the script will update all directories for which the
28 # output has been vetted. You can instead pass the names of individual
29 # directories, including those that haven't been approved. So:
30 #
31 # update-copyright.pl --this-year
32 #
33 # is the command that would be used at the beginning of a year to update
34 # all copyright notices (and possibly at other times to check whether
35 # new files have been added with old years). On the other hand:
36 #
37 # update-copyright.pl --this-year libitm
38 #
39 # would run the script on just libitm/.
40 #
41 # Note that things like --version output strings must be updated before
42 # this script is run. There's already a separate procedure for that.
43
44 import os
45 import re
46 import sys
47 import time
48 import subprocess
49
50 class Errors:
51 def __init__ (self):
52 self.num_errors = 0
53
54 def report (self, filename, string):
55 if filename:
56 string = filename + ': ' + string
57 sys.stderr.write (string + '\n')
58 self.num_errors += 1
59
60 def ok (self):
61 return self.num_errors == 0
62
63 class GenericFilter:
64 def __init__ (self):
65 self.skip_files = set()
66 self.skip_dirs = set()
67 self.skip_extensions = set()
68 self.fossilised_files = set()
69 self.own_files = set()
70
71 self.skip_files |= set ([
72 # Skip licence files.
73 'COPYING',
74 'COPYING.LIB',
75 'COPYING3',
76 'COPYING3.LIB',
77 'LICENSE',
78 'fdl.texi',
79 'gpl_v3.texi',
80 'fdl-1.3.xml',
81 'gpl-3.0.xml',
82
83 # Skip auto- and libtool-related files
84 'aclocal.m4',
85 'compile',
86 'config.guess',
87 'config.sub',
88 'depcomp',
89 'install-sh',
90 'libtool.m4',
91 'ltmain.sh',
92 'ltoptions.m4',
93 'ltsugar.m4',
94 'ltversion.m4',
95 'lt~obsolete.m4',
96 'missing',
97 'mkdep',
98 'mkinstalldirs',
99 'move-if-change',
100 'shlibpath.m4',
101 'symlink-tree',
102 'ylwrap',
103
104 # Skip FSF mission statement, etc.
105 'gnu.texi',
106 'funding.texi',
107 'appendix_free.xml',
108
109 # Skip imported texinfo files.
110 'texinfo.tex',
111 ])
112
113
114 def get_line_filter (self, dir, filename):
115 if filename.startswith ('ChangeLog'):
116 # Ignore references to copyright in changelog entries.
117 return re.compile ('\t')
118
119 return None
120
121 def skip_file (self, dir, filename):
122 if filename in self.skip_files:
123 return True
124
125 (base, extension) = os.path.splitext (os.path.join (dir, filename))
126 if extension in self.skip_extensions:
127 return True
128
129 if extension == '.in':
130 # Skip .in files produced by automake.
131 if os.path.exists (base + '.am'):
132 return True
133
134 # Skip files produced by autogen
135 if (os.path.exists (base + '.def')
136 and os.path.exists (base + '.tpl')):
137 return True
138
139 # Skip configure files produced by autoconf
140 if filename == 'configure':
141 if os.path.exists (base + '.ac'):
142 return True
143 if os.path.exists (base + '.in'):
144 return True
145
146 return False
147
148 def skip_dir (self, dir, subdir):
149 return subdir in self.skip_dirs
150
151 def is_fossilised_file (self, dir, filename):
152 if filename in self.fossilised_files:
153 return True
154 # Only touch current current ChangeLogs.
155 if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
156 return True
157 return False
158
159 def by_package_author (self, dir, filename):
160 return filename in self.own_files
161
162 class Copyright:
163 def __init__ (self, errors):
164 self.errors = errors
165
166 # Characters in a range of years. Include '.' for typos.
167 ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
168
169 # Non-whitespace characters in a copyright holder's name.
170 name = '[\w.,-]'
171
172 # Matches one year.
173 self.year_re = re.compile ('[0-9]+')
174
175 # Matches part of a year or copyright holder.
176 self.continuation_re = re.compile (ranges + '|' + name)
177
178 # Matches a full copyright notice:
179 self.copyright_re = re.compile (
180 # 1: 'Copyright (C)', etc.
181 '([Cc]opyright'
182 '|[Cc]opyright\s+\([Cc]\)'
183 '|[Cc]opyright\s+%s'
184 '|[Cc]opyright\s+©'
185 '|[Cc]opyright\s+@copyright{}'
186 '|copyright = u\''
187 '|@set\s+copyright[\w-]+)'
188
189 # 2: the years. Include the whitespace in the year, so that
190 # we can remove any excess.
191 '(\s*(?:' + ranges + ',?'
192 '|@value\{[^{}]*\})\s*)'
193
194 # 3: 'by ', if used
195 '(by\s+)?'
196
197 # 4: the copyright holder. Don't allow multiple consecutive
198 # spaces, so that right-margin gloss doesn't get caught
199 # (e.g. gnat_ugn.texi).
200 '(' + name + '(?:\s?' + name + ')*)?')
201
202 # A regexp for notices that might have slipped by. Just matching
203 # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
204 # HTML header markers, so check for 'copyright' and two digits.
205 self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]',
206 re.IGNORECASE)
207 self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
208 self.holders = { '@copying': '@copying' }
209 self.holder_prefixes = set()
210
211 # True to 'quilt add' files before changing them.
212 self.use_quilt = False
213
214 # If set, force all notices to include this year.
215 self.max_year = None
216
217 # Goes after the year(s). Could be ', '.
218 self.separator = ' '
219
220 def add_package_author (self, holder, canon_form = None):
221 if not canon_form:
222 canon_form = holder
223 self.holders[holder] = canon_form
224 index = holder.find (' ')
225 while index >= 0:
226 self.holder_prefixes.add (holder[:index])
227 index = holder.find (' ', index + 1)
228
229 def add_external_author (self, holder):
230 self.holders[holder] = None
231
232 class BadYear():
233 def __init__ (self, year):
234 self.year = year
235
236 def __str__ (self):
237 return 'unrecognised year: ' + self.year
238
239 def parse_year (self, string):
240 year = int (string)
241 if len (string) == 2:
242 if year > 70:
243 return year + 1900
244 elif len (string) == 4:
245 return year
246 raise self.BadYear (string)
247
248 def year_range (self, years):
249 year_list = [self.parse_year (year)
250 for year in self.year_re.findall (years)]
251 assert len (year_list) > 0
252 return (min (year_list), max (year_list))
253
254 def set_use_quilt (self, use_quilt):
255 self.use_quilt = use_quilt
256
257 def include_year (self, year):
258 assert not self.max_year
259 self.max_year = year
260
261 def canonicalise_years (self, dir, filename, filter, years):
262 # Leave texinfo variables alone.
263 if years.startswith ('@value'):
264 return years
265
266 (min_year, max_year) = self.year_range (years)
267
268 # Update the upper bound, if enabled.
269 if self.max_year and not filter.is_fossilised_file (dir, filename):
270 max_year = max (max_year, self.max_year)
271
272 # Use a range.
273 if min_year == max_year:
274 return '%d' % min_year
275 else:
276 return '%d-%d' % (min_year, max_year)
277
278 def strip_continuation (self, line):
279 line = line.lstrip()
280 match = self.comment_re.match (line)
281 if match:
282 line = line[match.end():].lstrip()
283 return line
284
285 def is_complete (self, match):
286 holder = match.group (4)
287 return (holder
288 and (holder not in self.holder_prefixes
289 or holder in self.holders))
290
291 def update_copyright (self, dir, filename, filter, file, line, match):
292 orig_line = line
293 next_line = None
294 pathname = os.path.join (dir, filename)
295
296 intro = match.group (1)
297 if intro.startswith ('@set'):
298 # Texinfo year variables should always be on one line
299 after_years = line[match.end (2):].strip()
300 if after_years != '':
301 self.errors.report (pathname,
302 'trailing characters in @set: '
303 + after_years)
304 return (False, orig_line, next_line)
305 else:
306 # If it looks like the copyright is incomplete, add the next line.
307 while not self.is_complete (match):
308 try:
309 next_line = file.next()
310 except StopIteration:
311 break
312
313 # If the next line doesn't look like a proper continuation,
314 # assume that what we've got is complete.
315 continuation = self.strip_continuation (next_line)
316 if not self.continuation_re.match (continuation):
317 break
318
319 # Merge the lines for matching purposes.
320 orig_line += next_line
321 line = line.rstrip() + ' ' + continuation
322 next_line = None
323
324 # Rematch with the longer line, at the original position.
325 match = self.copyright_re.match (line, match.start())
326 assert match
327
328 holder = match.group (4)
329
330 # Use the filter to test cases where markup is getting in the way.
331 if filter.by_package_author (dir, filename):
332 assert holder not in self.holders
333
334 elif not holder:
335 self.errors.report (pathname, 'missing copyright holder')
336 return (False, orig_line, next_line)
337
338 elif holder not in self.holders:
339 self.errors.report (pathname,
340 'unrecognised copyright holder: ' + holder)
341 return (False, orig_line, next_line)
342
343 else:
344 # See whether the copyright is associated with the package
345 # author.
346 canon_form = self.holders[holder]
347 if not canon_form:
348 return (False, orig_line, next_line)
349
350 # Make sure the author is given in a consistent way.
351 line = (line[:match.start (4)]
352 + canon_form
353 + line[match.end (4):])
354
355 # Remove any 'by'
356 line = line[:match.start (3)] + line[match.end (3):]
357
358 # Update the copyright years.
359 years = match.group (2).strip()
360 try:
361 canon_form = self.canonicalise_years (dir, filename, filter, years)
362 except self.BadYear as e:
363 self.errors.report (pathname, str (e))
364 return (False, orig_line, next_line)
365
366 line = (line[:match.start (2)]
367 + ('' if intro.startswith ('copyright = ') else ' ')
368 + canon_form + self.separator
369 + line[match.end (2):])
370
371 # Use the standard (C) form.
372 if intro.endswith ('right'):
373 intro += ' (C)'
374 elif intro.endswith ('(c)'):
375 intro = intro[:-3] + '(C)'
376 line = line[:match.start (1)] + intro + line[match.end (1):]
377
378 # Strip trailing whitespace
379 line = line.rstrip() + '\n'
380
381 return (line != orig_line, line, next_line)
382
383 def process_file (self, dir, filename, filter):
384 pathname = os.path.join (dir, filename)
385 if filename.endswith ('.tmp'):
386 # Looks like something we tried to create before.
387 try:
388 os.remove (pathname)
389 except OSError:
390 pass
391 return
392
393 lines = []
394 changed = False
395 line_filter = filter.get_line_filter (dir, filename)
396 with open (pathname, 'r') as file:
397 prev = None
398 for line in file:
399 while line:
400 next_line = None
401 # Leave filtered-out lines alone.
402 if not (line_filter and line_filter.match (line)):
403 match = self.copyright_re.search (line)
404 if match:
405 res = self.update_copyright (dir, filename, filter,
406 file, line, match)
407 (this_changed, line, next_line) = res
408 changed = changed or this_changed
409
410 # Check for copyright lines that might have slipped by.
411 elif self.other_copyright_re.search (line):
412 self.errors.report (pathname,
413 'unrecognised copyright: %s'
414 % line.strip())
415 lines.append (line)
416 line = next_line
417
418 # If something changed, write the new file out.
419 if changed and self.errors.ok():
420 tmp_pathname = pathname + '.tmp'
421 with open (tmp_pathname, 'w') as file:
422 for line in lines:
423 file.write (line)
424 if self.use_quilt:
425 subprocess.call (['quilt', 'add', pathname])
426 os.rename (tmp_pathname, pathname)
427
428 def process_tree (self, tree, filter):
429 for (dir, subdirs, filenames) in os.walk (tree):
430 # Don't recurse through directories that should be skipped.
431 for i in xrange (len (subdirs) - 1, -1, -1):
432 if filter.skip_dir (dir, subdirs[i]):
433 del subdirs[i]
434
435 # Handle the files in this directory.
436 for filename in filenames:
437 if filter.skip_file (dir, filename):
438 sys.stdout.write ('Skipping %s\n'
439 % os.path.join (dir, filename))
440 else:
441 self.process_file (dir, filename, filter)
442
443 class CmdLine:
444 def __init__ (self, copyright = Copyright):
445 self.errors = Errors()
446 self.copyright = copyright (self.errors)
447 self.dirs = []
448 self.default_dirs = []
449 self.chosen_dirs = []
450 self.option_handlers = dict()
451 self.option_help = []
452
453 self.add_option ('--help', 'Print this help', self.o_help)
454 self.add_option ('--quilt', '"quilt add" files before changing them',
455 self.o_quilt)
456 self.add_option ('--this-year', 'Add the current year to every notice',
457 self.o_this_year)
458
459 def add_option (self, name, help, handler):
460 self.option_help.append ((name, help))
461 self.option_handlers[name] = handler
462
463 def add_dir (self, dir, filter = GenericFilter()):
464 self.dirs.append ((dir, filter))
465
466 def o_help (self, option = None):
467 sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
468 'Options:\n' % sys.argv[0])
469 format = '%-15s %s\n'
470 for (what, help) in self.option_help:
471 sys.stdout.write (format % (what, help))
472 sys.stdout.write ('\nDirectories:\n')
473
474 format = '%-25s'
475 i = 0
476 for (dir, filter) in self.dirs:
477 i += 1
478 if i % 3 == 0 or i == len (self.dirs):
479 sys.stdout.write (dir + '\n')
480 else:
481 sys.stdout.write (format % dir)
482 sys.exit (0)
483
484 def o_quilt (self, option):
485 self.copyright.set_use_quilt (True)
486
487 def o_this_year (self, option):
488 self.copyright.include_year (time.localtime().tm_year)
489
490 def main (self):
491 for arg in sys.argv[1:]:
492 if arg[:1] != '-':
493 self.chosen_dirs.append (arg)
494 elif arg in self.option_handlers:
495 self.option_handlers[arg] (arg)
496 else:
497 self.errors.report (None, 'unrecognised option: ' + arg)
498 if self.errors.ok():
499 if len (self.chosen_dirs) == 0:
500 self.chosen_dirs = self.default_dirs
501 if len (self.chosen_dirs) == 0:
502 self.o_help()
503 else:
504 for chosen_dir in self.chosen_dirs:
505 canon_dir = os.path.join (chosen_dir, '')
506 count = 0
507 for (dir, filter) in self.dirs:
508 if (dir + os.sep).startswith (canon_dir):
509 count += 1
510 self.copyright.process_tree (dir, filter)
511 if count == 0:
512 self.errors.report (None, 'unrecognised directory: '
513 + chosen_dir)
514 sys.exit (0 if self.errors.ok() else 1)
515
516 #----------------------------------------------------------------------------
517
518 class TopLevelFilter (GenericFilter):
519 def skip_dir (self, dir, subdir):
520 return True
521
522 class ConfigFilter (GenericFilter):
523 def __init__ (self):
524 GenericFilter.__init__ (self)
525
526 def skip_file (self, dir, filename):
527 if filename.endswith ('.m4'):
528 pathname = os.path.join (dir, filename)
529 with open (pathname) as file:
530 # Skip files imported from gettext.
531 if file.readline().find ('gettext-') >= 0:
532 return True
533 return GenericFilter.skip_file (self, dir, filename)
534
535 class GCCFilter (GenericFilter):
536 def __init__ (self):
537 GenericFilter.__init__ (self)
538
539 self.skip_files |= set ([
540 # Not part of GCC
541 'math-68881.h',
542 ])
543
544 self.skip_dirs |= set ([
545 # Better not create a merge nightmare for the GNAT folks.
546 'ada',
547
548 # Handled separately.
549 'testsuite',
550 ])
551
552 self.skip_extensions |= set ([
553 # Maintained by the translation project.
554 '.po',
555
556 # Automatically-generated.
557 '.pot',
558 ])
559
560 self.fossilised_files |= set ([
561 # Old news won't be updated.
562 'ONEWS',
563 ])
564
565 class TestsuiteFilter (GenericFilter):
566 def __init__ (self):
567 GenericFilter.__init__ (self)
568
569 self.skip_extensions |= set ([
570 # Don't change the tests, which could be woend by anyone.
571 '.c',
572 '.C',
573 '.cc',
574 '.h',
575 '.hs',
576 '.f',
577 '.f90',
578 '.go',
579 '.inc',
580 '.java',
581 ])
582
583 def skip_file (self, dir, filename):
584 # g++.niklas/README contains historical copyright information
585 # and isn't updated.
586 if filename == 'README' and os.path.basename (dir) == 'g++.niklas':
587 return True
588 # Similarly params/README.
589 if filename == 'README' and os.path.basename (dir) == 'params':
590 return True
591 return GenericFilter.skip_file (self, dir, filename)
592
593 class LibCppFilter (GenericFilter):
594 def __init__ (self):
595 GenericFilter.__init__ (self)
596
597 self.skip_extensions |= set ([
598 # Maintained by the translation project.
599 '.po',
600
601 # Automatically-generated.
602 '.pot',
603 ])
604
605 class LibGCCFilter (GenericFilter):
606 def __init__ (self):
607 GenericFilter.__init__ (self)
608
609 self.skip_dirs |= set ([
610 # Imported from GLIBC.
611 'soft-fp',
612 ])
613
614 class LibStdCxxFilter (GenericFilter):
615 def __init__ (self):
616 GenericFilter.__init__ (self)
617
618 self.skip_files |= set ([
619 # Contains no copyright of its own, but quotes the GPL.
620 'intro.xml',
621 ])
622
623 self.skip_dirs |= set ([
624 # Contains automatically-generated sources.
625 'html',
626
627 # The testsuite data files shouldn't be changed.
628 'data',
629
630 # Contains imported images
631 'images',
632 ])
633
634 self.own_files |= set ([
635 # Contains markup around the copyright owner.
636 'spine.xml',
637 ])
638
639 def get_line_filter (self, dir, filename):
640 if filename == 'boost_concept_check.h':
641 return re.compile ('// \(C\) Copyright Jeremy Siek')
642 return GenericFilter.get_line_filter (self, dir, filename)
643
644 class GCCCopyright (Copyright):
645 def __init__ (self, errors):
646 Copyright.__init__ (self, errors)
647
648 canon_fsf = 'Free Software Foundation, Inc.'
649 self.add_package_author ('Free Software Foundation', canon_fsf)
650 self.add_package_author ('Free Software Foundation.', canon_fsf)
651 self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
652 self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
653 self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
654 self.add_package_author ('The Free Software Foundation', canon_fsf)
655 self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
656 self.add_package_author ('Software Foundation, Inc.', canon_fsf)
657
658 self.add_external_author ('ARM')
659 self.add_external_author ('AdaCore')
660 self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.')
661 self.add_external_author ('Cavium Networks.')
662 self.add_external_author ('Faraday Technology Corp.')
663 self.add_external_author ('Florida State University')
664 self.add_external_author ('Greg Colvin and Beman Dawes.')
665 self.add_external_author ('Hewlett-Packard Company')
666 self.add_external_author ('Information Technology Industry Council.')
667 self.add_external_author ('James Theiler, Brian Gough')
668 self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,')
669 self.add_external_author ('National Research Council of Canada.')
670 self.add_external_author ('NVIDIA Corporation')
671 self.add_external_author ('Peter Dimov and Multi Media Ltd.')
672 self.add_external_author ('Peter Dimov')
673 self.add_external_author ('Pipeline Associates, Inc.')
674 self.add_external_author ('Regents of the University of California.')
675 self.add_external_author ('Silicon Graphics Computer Systems, Inc.')
676 self.add_external_author ('Silicon Graphics')
677 self.add_external_author ('Stephen L. Moshier')
678 self.add_external_author ('Sun Microsystems, Inc. All rights reserved.')
679 self.add_external_author ('The Go Authors. All rights reserved.')
680 self.add_external_author ('The Go Authors. All rights reserved.')
681 self.add_external_author ('The Go Authors.')
682 self.add_external_author ('The Regents of the University of California.')
683 self.add_external_author ('Unicode, Inc.')
684 self.add_external_author ('University of Toronto.')
685
686 class GCCCmdLine (CmdLine):
687 def __init__ (self):
688 CmdLine.__init__ (self, GCCCopyright)
689
690 self.add_dir ('.', TopLevelFilter())
691 # boehm-gc is imported from upstream.
692 self.add_dir ('config', ConfigFilter())
693 # contrib isn't really part of GCC.
694 self.add_dir ('fixincludes')
695 self.add_dir ('gcc', GCCFilter())
696 self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter())
697 self.add_dir ('gnattools')
698 self.add_dir ('gotools')
699 self.add_dir ('include')
700 # intl is imported from upstream.
701 self.add_dir ('libada')
702 self.add_dir ('libatomic')
703 self.add_dir ('libbacktrace')
704 self.add_dir ('libcc1')
705 # libcilkrts is imported from upstream.
706 self.add_dir ('libcpp', LibCppFilter())
707 self.add_dir ('libdecnumber')
708 # libffi is imported from upstream.
709 self.add_dir ('libgcc', LibGCCFilter())
710 self.add_dir ('libgfortran')
711 # libgo is imported from upstream.
712 self.add_dir ('libgomp')
713 self.add_dir ('libiberty')
714 self.add_dir ('libitm')
715 self.add_dir ('libobjc')
716 # liboffloadmic is imported from upstream.
717 self.add_dir ('libquadmath')
718 # libsanitizer is imported from upstream.
719 self.add_dir ('libssp')
720 self.add_dir ('libstdc++-v3', LibStdCxxFilter())
721 self.add_dir ('libvtv')
722 self.add_dir ('lto-plugin')
723 # maintainer-scripts maintainer-scripts
724 # zlib is imported from upstream.
725
726 self.default_dirs = [
727 'gcc',
728 'include',
729 'libada',
730 'libatomic',
731 'libbacktrace',
732 'libcc1',
733 'libcpp',
734 'libdecnumber',
735 'libgcc',
736 'libgfortran',
737 'libgomp',
738 'libiberty',
739 'libitm',
740 'libobjc',
741 'libssp',
742 'libstdc++-v3',
743 'libvtv',
744 'lto-plugin',
745 ]
746
747 GCCCmdLine().main()