4d003ccf496a433108f2b345cc8bfb61fbeda0c5
[gcc.git] / contrib / gcc-changelog / git_commit.py
1 #!/usr/bin/env python3
2 #
3 # This file is part of GCC.
4 #
5 # GCC is free software; you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free
7 # Software Foundation; either version 3, or (at your option) any later
8 # version.
9 #
10 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 # for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with GCC; see the file COPYING3. If not see
17 # <http://www.gnu.org/licenses/>. */
18
19 import os
20 import re
21
22 changelog_locations = set([
23 'config',
24 'contrib',
25 'contrib/header-tools',
26 'contrib/reghunt',
27 'contrib/regression',
28 'fixincludes',
29 'gcc/ada',
30 'gcc/analyzer',
31 'gcc/brig',
32 'gcc/c',
33 'gcc/c-family',
34 'gcc',
35 'gcc/cp',
36 'gcc/d',
37 'gcc/fortran',
38 'gcc/go',
39 'gcc/jit',
40 'gcc/lto',
41 'gcc/objc',
42 'gcc/objcp',
43 'gcc/po',
44 'gcc/testsuite',
45 'gnattools',
46 'gotools',
47 'include',
48 'intl',
49 'libada',
50 'libatomic',
51 'libbacktrace',
52 'libcc1',
53 'libcpp',
54 'libcpp/po',
55 'libdecnumber',
56 'libffi',
57 'libgcc',
58 'libgcc/config/avr/libf7',
59 'libgcc/config/libbid',
60 'libgfortran',
61 'libgomp',
62 'libhsail-rt',
63 'libiberty',
64 'libitm',
65 'libobjc',
66 'liboffloadmic',
67 'libphobos',
68 'libquadmath',
69 'libsanitizer',
70 'libssp',
71 'libstdc++-v3',
72 'libvtv',
73 'lto-plugin',
74 'maintainer-scripts',
75 'zlib'])
76
77 bug_components = set([
78 'ada',
79 'analyzer',
80 'boehm-gc',
81 'bootstrap',
82 'c',
83 'c++',
84 'd',
85 'debug',
86 'demangler',
87 'driver',
88 'fastjar',
89 'fortran',
90 'gcov-profile',
91 'go',
92 'hsa',
93 'inline-asm',
94 'ipa',
95 'java',
96 'jit',
97 'libbacktrace',
98 'libf2c',
99 'libffi',
100 'libfortran',
101 'libgcc',
102 'libgcj',
103 'libgomp',
104 'libitm',
105 'libobjc',
106 'libquadmath',
107 'libstdc++',
108 'lto',
109 'middle-end',
110 'modula2',
111 'objc',
112 'objc++',
113 'other',
114 'pch',
115 'pending',
116 'plugins',
117 'preprocessor',
118 'regression',
119 'rtl-optimization',
120 'sanitizer',
121 'spam',
122 'target',
123 'testsuite',
124 'translation',
125 'tree-optimization',
126 'web'])
127
128 ignored_prefixes = [
129 'gcc/d/dmd/',
130 'gcc/go/gofrontend/',
131 'gcc/testsuite/gdc.test/',
132 'gcc/testsuite/go.test/test/',
133 'libgo/',
134 'libphobos/libdruntime/',
135 'libphobos/src/',
136 'libsanitizer/',
137 ]
138
139 wildcard_prefixes = [
140 'gcc/testsuite/',
141 'libstdc++-v3/doc/html/'
142 ]
143
144 misc_files = [
145 'gcc/DATESTAMP',
146 'gcc/BASE-VER',
147 'gcc/DEV-PHASE'
148 ]
149
150 author_line_regex = \
151 re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.* <.*>)')
152 additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.* <.*>)')
153 changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
154 pr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?([0-9]+)$')
155 dr_regex = re.compile(r'\tDR ([0-9]+)$')
156 star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
157
158 LINE_LIMIT = 100
159 TAB_WIDTH = 8
160 CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
161 CHERRY_PICK_PREFIX = '(cherry picked from commit '
162 REVERT_PREFIX = 'This reverts commit '
163
164 REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
165 'acked-by: ', 'tested-by: ', 'reported-by: ',
166 'suggested-by: ')
167 DATE_FORMAT = '%Y-%m-%d'
168
169
170 class Error:
171 def __init__(self, message, line=None):
172 self.message = message
173 self.line = line
174
175 def __repr__(self):
176 s = self.message
177 if self.line:
178 s += ':"%s"' % self.line
179 return s
180
181
182 class ChangeLogEntry:
183 def __init__(self, folder, authors, prs):
184 self.folder = folder
185 # The 'list.copy()' function is not available before Python 3.3
186 self.author_lines = list(authors)
187 self.initial_prs = list(prs)
188 self.prs = list(prs)
189 self.lines = []
190 self.files = []
191 self.file_patterns = []
192
193 def parse_file_names(self):
194 # Whether the content currently processed is between a star prefix the
195 # end of the file list: a colon or an open paren.
196 in_location = False
197
198 for line in self.lines:
199 # If this line matches the star prefix, start the location
200 # processing on the information that follows the star.
201 m = star_prefix_regex.match(line)
202 if m:
203 in_location = True
204 line = m.group('content')
205
206 if in_location:
207 # Strip everything that is not a filename in "line": entities
208 # "(NAME)", entry text (the colon, if present, and anything
209 # that follows it).
210 if '(' in line:
211 line = line[:line.index('(')]
212 in_location = False
213 if ':' in line:
214 line = line[:line.index(':')]
215 in_location = False
216
217 # At this point, all that's left is a list of filenames
218 # separated by commas and whitespaces.
219 for file in line.split(','):
220 file = file.strip()
221 if file:
222 if file.endswith('*'):
223 self.file_patterns.append(file[:-1])
224 else:
225 self.files.append(file)
226
227 @property
228 def datetime(self):
229 for author in self.author_lines:
230 if author[1]:
231 return author[1]
232 return None
233
234 @property
235 def authors(self):
236 return [author_line[0] for author_line in self.author_lines]
237
238 @property
239 def is_empty(self):
240 return not self.lines and self.prs == self.initial_prs
241
242 def contains_author(self, author):
243 for author_lines in self.author_lines:
244 if author_lines[0] == author:
245 return True
246 return False
247
248
249 class GitInfo:
250 def __init__(self, hexsha, date, author, lines, modified_files):
251 self.hexsha = hexsha
252 self.date = date
253 self.author = author
254 self.lines = lines
255 self.modified_files = modified_files
256
257
258 class GitCommit:
259 def __init__(self, info, strict=True, commit_to_info_hook=None):
260 self.original_info = info
261 self.info = info
262 self.message = None
263 self.changes = None
264 self.changelog_entries = []
265 self.errors = []
266 self.top_level_authors = []
267 self.co_authors = []
268 self.top_level_prs = []
269 self.cherry_pick_commit = None
270 self.revert_commit = None
271 self.commit_to_info_hook = commit_to_info_hook
272
273 # Identify first if the commit is a Revert commit
274 for line in self.info.lines:
275 if line.startswith(REVERT_PREFIX):
276 self.revert_commit = line[len(REVERT_PREFIX):].rstrip('.')
277 break
278 if self.revert_commit:
279 self.info = self.commit_to_info_hook(self.revert_commit)
280
281 project_files = [f for f in self.info.modified_files
282 if self.is_changelog_filename(f[0])
283 or f[0] in misc_files]
284 ignored_files = [f for f in self.info.modified_files
285 if self.in_ignored_location(f[0])]
286 if len(project_files) == len(self.info.modified_files):
287 # All modified files are only MISC files
288 return
289 elif project_files and strict:
290 self.errors.append(Error('ChangeLog, DATESTAMP, BASE-VER and '
291 'DEV-PHASE updates should be done '
292 'separately from normal commits'))
293 return
294
295 all_are_ignored = (len(project_files) + len(ignored_files)
296 == len(self.info.modified_files))
297 self.parse_lines(all_are_ignored)
298 if self.changes:
299 self.parse_changelog()
300 self.parse_file_names()
301 self.check_for_empty_description()
302 self.deduce_changelog_locations()
303 self.check_file_patterns()
304 if not self.errors:
305 self.check_mentioned_files()
306 self.check_for_correct_changelog()
307
308 @property
309 def success(self):
310 return not self.errors
311
312 @property
313 def new_files(self):
314 return [x[0] for x in self.info.modified_files if x[1] == 'A']
315
316 @classmethod
317 def is_changelog_filename(cls, path):
318 return path.endswith('/ChangeLog') or path == 'ChangeLog'
319
320 @classmethod
321 def find_changelog_location(cls, name):
322 if name.startswith('\t'):
323 name = name[1:]
324 if name.endswith(':'):
325 name = name[:-1]
326 if name.endswith('/'):
327 name = name[:-1]
328 return name if name in changelog_locations else None
329
330 @classmethod
331 def format_git_author(cls, author):
332 assert '<' in author
333 return author.replace('<', ' <')
334
335 @classmethod
336 def parse_git_name_status(cls, string):
337 modified_files = []
338 for entry in string.split('\n'):
339 parts = entry.split('\t')
340 t = parts[0]
341 if t == 'A' or t == 'D' or t == 'M':
342 modified_files.append((parts[1], t))
343 elif t.startswith('R'):
344 modified_files.append((parts[1], 'D'))
345 modified_files.append((parts[2], 'A'))
346 return modified_files
347
348 def parse_lines(self, all_are_ignored):
349 body = self.info.lines
350
351 for i, b in enumerate(body):
352 if not b:
353 continue
354 if (changelog_regex.match(b) or self.find_changelog_location(b)
355 or star_prefix_regex.match(b) or pr_regex.match(b)
356 or dr_regex.match(b) or author_line_regex.match(b)):
357 self.changes = body[i:]
358 return
359 if not all_are_ignored:
360 self.errors.append(Error('cannot find a ChangeLog location in '
361 'message'))
362
363 def parse_changelog(self):
364 last_entry = None
365 will_deduce = False
366 for line in self.changes:
367 if not line:
368 if last_entry and will_deduce:
369 last_entry = None
370 continue
371 if line != line.rstrip():
372 self.errors.append(Error('trailing whitespace', line))
373 if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
374 self.errors.append(Error('line exceeds %d character limit'
375 % LINE_LIMIT, line))
376 m = changelog_regex.match(line)
377 if m:
378 last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
379 self.top_level_authors,
380 self.top_level_prs)
381 self.changelog_entries.append(last_entry)
382 elif self.find_changelog_location(line):
383 last_entry = ChangeLogEntry(self.find_changelog_location(line),
384 self.top_level_authors,
385 self.top_level_prs)
386 self.changelog_entries.append(last_entry)
387 else:
388 author_tuple = None
389 pr_line = None
390 if author_line_regex.match(line):
391 m = author_line_regex.match(line)
392 author_tuple = (m.group('name'), m.group('datetime'))
393 elif additional_author_regex.match(line):
394 m = additional_author_regex.match(line)
395 if len(m.group('spaces')) != 4:
396 msg = 'additional author must be indented with '\
397 'one tab and four spaces'
398 self.errors.append(Error(msg, line))
399 else:
400 author_tuple = (m.group('name'), None)
401 elif pr_regex.match(line):
402 component = pr_regex.match(line).group('component')
403 if not component:
404 self.errors.append(Error('missing PR component', line))
405 continue
406 elif not component[:-1] in bug_components:
407 self.errors.append(Error('invalid PR component', line))
408 continue
409 else:
410 pr_line = line.lstrip()
411 elif dr_regex.match(line):
412 pr_line = line.lstrip()
413
414 lowered_line = line.lower()
415 if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
416 name = line[len(CO_AUTHORED_BY_PREFIX):]
417 author = self.format_git_author(name)
418 self.co_authors.append(author)
419 continue
420 elif lowered_line.startswith(REVIEW_PREFIXES):
421 continue
422 elif line.startswith(CHERRY_PICK_PREFIX):
423 commit = line[len(CHERRY_PICK_PREFIX):].rstrip(')')
424 self.cherry_pick_commit = commit
425 continue
426
427 # ChangeLog name will be deduced later
428 if not last_entry:
429 if author_tuple:
430 self.top_level_authors.append(author_tuple)
431 continue
432 elif pr_line:
433 # append to top_level_prs only when we haven't met
434 # a ChangeLog entry
435 if (pr_line not in self.top_level_prs
436 and not self.changelog_entries):
437 self.top_level_prs.append(pr_line)
438 continue
439 else:
440 last_entry = ChangeLogEntry(None,
441 self.top_level_authors,
442 self.top_level_prs)
443 self.changelog_entries.append(last_entry)
444 will_deduce = True
445 elif author_tuple:
446 if not last_entry.contains_author(author_tuple[0]):
447 last_entry.author_lines.append(author_tuple)
448 continue
449
450 if not line.startswith('\t'):
451 err = Error('line should start with a tab', line)
452 self.errors.append(err)
453 elif pr_line:
454 last_entry.prs.append(pr_line)
455 else:
456 m = star_prefix_regex.match(line)
457 if m:
458 if len(m.group('spaces')) != 1:
459 msg = 'one space should follow asterisk'
460 self.errors.append(Error(msg, line))
461 else:
462 last_entry.lines.append(line)
463 else:
464 if last_entry.is_empty:
465 msg = 'first line should start with a tab, ' \
466 'an asterisk and a space'
467 self.errors.append(Error(msg, line))
468 else:
469 last_entry.lines.append(line)
470
471 def parse_file_names(self):
472 for entry in self.changelog_entries:
473 entry.parse_file_names()
474
475 def check_file_patterns(self):
476 for entry in self.changelog_entries:
477 for pattern in entry.file_patterns:
478 name = os.path.join(entry.folder, pattern)
479 if name not in wildcard_prefixes:
480 msg = 'unsupported wildcard prefix'
481 self.errors.append(Error(msg, name))
482
483 def check_for_empty_description(self):
484 for entry in self.changelog_entries:
485 for i, line in enumerate(entry.lines):
486 if (star_prefix_regex.match(line) and line.endswith(':') and
487 (i == len(entry.lines) - 1
488 or star_prefix_regex.match(entry.lines[i + 1]))):
489 msg = 'missing description of a change'
490 self.errors.append(Error(msg, line))
491
492 def get_file_changelog_location(self, changelog_file):
493 for file in self.info.modified_files:
494 if file[0] == changelog_file:
495 # root ChangeLog file
496 return ''
497 index = file[0].find('/' + changelog_file)
498 if index != -1:
499 return file[0][:index]
500 return None
501
502 def deduce_changelog_locations(self):
503 for entry in self.changelog_entries:
504 if not entry.folder:
505 changelog = None
506 for file in entry.files:
507 location = self.get_file_changelog_location(file)
508 if (location == ''
509 or (location and location in changelog_locations)):
510 if changelog and changelog != location:
511 msg = 'could not deduce ChangeLog file, ' \
512 'not unique location'
513 self.errors.append(Error(msg))
514 return
515 changelog = location
516 if changelog is not None:
517 entry.folder = changelog
518 else:
519 msg = 'could not deduce ChangeLog file'
520 self.errors.append(Error(msg))
521
522 @classmethod
523 def in_ignored_location(cls, path):
524 for ignored in ignored_prefixes:
525 if path.startswith(ignored):
526 return True
527 return False
528
529 @classmethod
530 def get_changelog_by_path(cls, path):
531 components = path.split('/')
532 while components:
533 if '/'.join(components) in changelog_locations:
534 break
535 components = components[:-1]
536 return '/'.join(components)
537
538 def check_mentioned_files(self):
539 folder_count = len([x.folder for x in self.changelog_entries])
540 assert folder_count == len(self.changelog_entries)
541
542 mentioned_files = set()
543 mentioned_patterns = []
544 used_patterns = set()
545 for entry in self.changelog_entries:
546 if not entry.files:
547 msg = 'no files mentioned for ChangeLog in directory'
548 self.errors.append(Error(msg, entry.folder))
549 assert not entry.folder.endswith('/')
550 for file in entry.files:
551 if not self.is_changelog_filename(file):
552 mentioned_files.add(os.path.join(entry.folder, file))
553 for pattern in entry.file_patterns:
554 mentioned_patterns.append(os.path.join(entry.folder, pattern))
555
556 cand = [x[0] for x in self.info.modified_files
557 if not self.is_changelog_filename(x[0])]
558 changed_files = set(cand)
559 for file in sorted(mentioned_files - changed_files):
560 msg = 'unchanged file mentioned in a ChangeLog'
561 self.errors.append(Error(msg, file))
562 for file in sorted(changed_files - mentioned_files):
563 if not self.in_ignored_location(file):
564 if file in self.new_files:
565 changelog_location = self.get_changelog_by_path(file)
566 # Python2: we cannot use next(filter(...))
567 entries = filter(lambda x: x.folder == changelog_location,
568 self.changelog_entries)
569 entries = list(entries)
570 entry = entries[0] if entries else None
571 if not entry:
572 prs = self.top_level_prs
573 if not prs:
574 # if all ChangeLog entries have identical PRs
575 # then use them
576 prs = self.changelog_entries[0].prs
577 for entry in self.changelog_entries:
578 if entry.prs != prs:
579 prs = []
580 break
581 entry = ChangeLogEntry(changelog_location,
582 self.top_level_authors,
583 prs)
584 self.changelog_entries.append(entry)
585 # strip prefix of the file
586 assert file.startswith(entry.folder)
587 file = file[len(entry.folder):].lstrip('/')
588 entry.lines.append('\t* %s: New file.' % file)
589 entry.files.append(file)
590 else:
591 used_pattern = [p for p in mentioned_patterns
592 if file.startswith(p)]
593 used_pattern = used_pattern[0] if used_pattern else None
594 if used_pattern:
595 used_patterns.add(used_pattern)
596 else:
597 msg = 'changed file not mentioned in a ChangeLog'
598 self.errors.append(Error(msg, file))
599
600 for pattern in mentioned_patterns:
601 if pattern not in used_patterns:
602 error = 'pattern doesn''t match any changed files'
603 self.errors.append(Error(error, pattern))
604
605 def check_for_correct_changelog(self):
606 for entry in self.changelog_entries:
607 for file in entry.files:
608 full_path = os.path.join(entry.folder, file)
609 changelog_location = self.get_changelog_by_path(full_path)
610 if changelog_location != entry.folder:
611 msg = 'wrong ChangeLog location "%s", should be "%s"'
612 err = Error(msg % (entry.folder, changelog_location), file)
613 self.errors.append(err)
614
615 @classmethod
616 def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
617 output = ''
618 for i, author in enumerate(authors):
619 if i == 0:
620 output += '%s%s %s\n' % (prefix, timestamp, author)
621 else:
622 output += '%s\t %s\n' % (prefix, author)
623 output += '\n'
624 return output
625
626 def to_changelog_entries(self, use_commit_ts=False):
627 current_timestamp = self.info.date.strftime(DATE_FORMAT)
628 for entry in self.changelog_entries:
629 output = ''
630 timestamp = entry.datetime
631 if self.cherry_pick_commit:
632 info = self.commit_to_info_hook(self.cherry_pick_commit)
633 # it can happen that it is a cherry-pick for a different
634 # repository
635 if info:
636 timestamp = info.date.strftime(DATE_FORMAT)
637 else:
638 timestamp = current_timestamp
639 elif self.revert_commit:
640 timestamp = current_timestamp
641 orig_date = self.original_info.date
642 current_timestamp = orig_date.strftime(DATE_FORMAT)
643 elif not timestamp or use_commit_ts:
644 timestamp = current_timestamp
645 authors = entry.authors if entry.authors else [self.info.author]
646 # add Co-Authored-By authors to all ChangeLog entries
647 for author in self.co_authors:
648 if author not in authors:
649 authors.append(author)
650
651 if self.cherry_pick_commit or self.revert_commit:
652 output += self.format_authors_in_changelog([self.info.author],
653 current_timestamp)
654 if self.cherry_pick_commit:
655 output += '\tBackported from master:\n'
656 else:
657 output += '\tRevert:\n'
658 output += self.format_authors_in_changelog(authors,
659 timestamp, '\t')
660 else:
661 output += self.format_authors_in_changelog(authors, timestamp)
662 for pr in entry.prs:
663 output += '\t%s\n' % pr
664 for line in entry.lines:
665 output += line + '\n'
666 yield (entry.folder, output.rstrip())
667
668 def print_output(self):
669 for entry, output in self.to_changelog_entries():
670 print('------ %s/ChangeLog ------ ' % entry)
671 print(output)
672
673 def print_errors(self):
674 print('Errors:')
675 for error in self.errors:
676 print(error)