a24a251d8f39260ef7b5c46551345da8c48fda37
[gcc.git] / contrib / gcc-changelog / git_commit.py
1 #!/usr/bin/env python3
2 #
3 # This file is part of GCC.
4 #
5 # GCC is free software; you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free
7 # Software Foundation; either version 3, or (at your option) any later
8 # version.
9 #
10 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 # for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with GCC; see the file COPYING3. If not see
17 # <http://www.gnu.org/licenses/>. */
18
19 import os
20 import re
21
22 changelog_locations = set([
23 'config',
24 'contrib',
25 'contrib/header-tools',
26 'contrib/reghunt',
27 'contrib/regression',
28 'fixincludes',
29 'gcc/ada',
30 'gcc/analyzer',
31 'gcc/brig',
32 'gcc/c',
33 'gcc/c-family',
34 'gcc',
35 'gcc/cp',
36 'gcc/d',
37 'gcc/fortran',
38 'gcc/go',
39 'gcc/jit',
40 'gcc/lto',
41 'gcc/objc',
42 'gcc/objcp',
43 'gcc/po',
44 'gcc/testsuite',
45 'gnattools',
46 'gotools',
47 'include',
48 'intl',
49 'libada',
50 'libatomic',
51 'libbacktrace',
52 'libcc1',
53 'libcpp',
54 'libcpp/po',
55 'libdecnumber',
56 'libffi',
57 'libgcc',
58 'libgcc/config/avr/libf7',
59 'libgcc/config/libbid',
60 'libgfortran',
61 'libgomp',
62 'libhsail-rt',
63 'libiberty',
64 'libitm',
65 'libobjc',
66 'liboffloadmic',
67 'libphobos',
68 'libquadmath',
69 'libsanitizer',
70 'libssp',
71 'libstdc++-v3',
72 'libvtv',
73 'lto-plugin',
74 'maintainer-scripts',
75 'zlib'])
76
77 bug_components = set([
78 'ada',
79 'analyzer',
80 'boehm-gc',
81 'bootstrap',
82 'c',
83 'c++',
84 'd',
85 'debug',
86 'demangler',
87 'driver',
88 'fastjar',
89 'fortran',
90 'gcov-profile',
91 'go',
92 'hsa',
93 'inline-asm',
94 'ipa',
95 'java',
96 'jit',
97 'libbacktrace',
98 'libf2c',
99 'libffi',
100 'libfortran',
101 'libgcc',
102 'libgcj',
103 'libgomp',
104 'libitm',
105 'libobjc',
106 'libquadmath',
107 'libstdc++',
108 'lto',
109 'middle-end',
110 'modula2',
111 'objc',
112 'objc++',
113 'other',
114 'pch',
115 'pending',
116 'plugins',
117 'preprocessor',
118 'regression',
119 'rtl-optimization',
120 'sanitizer',
121 'spam',
122 'target',
123 'testsuite',
124 'translation',
125 'tree-optimization',
126 'web'])
127
128 ignored_prefixes = [
129 'gcc/d/dmd/',
130 'gcc/go/gofrontend/',
131 'gcc/testsuite/go.test/test/',
132 'libgo/',
133 'libphobos/libdruntime/',
134 'libphobos/src/',
135 'libsanitizer/',
136 ]
137
138 misc_files = [
139 'gcc/DATESTAMP',
140 'gcc/BASE-VER',
141 'gcc/DEV-PHASE'
142 ]
143
144 author_line_regex = \
145 re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.* <.*>)')
146 additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.* <.*>)')
147 changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)/ChangeLog:?')
148 pr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?([0-9]+)$')
149 dr_regex = re.compile(r'\tDR ([0-9]+)$')
150 star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
151
152 LINE_LIMIT = 100
153 TAB_WIDTH = 8
154 CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
155 CHERRY_PICK_PREFIX = '(cherry picked from commit '
156 REVIEWED_BY_PREFIX = 'reviewed-by: '
157 REVIEWED_ON_PREFIX = 'reviewed-on: '
158 SIGNED_OFF_BY_PREFIX = 'signed-off-by: '
159
160 REVIEW_PREFIXES = (REVIEWED_BY_PREFIX, REVIEWED_ON_PREFIX,
161 SIGNED_OFF_BY_PREFIX)
162
163
164 class Error:
165 def __init__(self, message, line=None):
166 self.message = message
167 self.line = line
168
169 def __repr__(self):
170 s = self.message
171 if self.line:
172 s += ':"%s"' % self.line
173 return s
174
175
176 class ChangeLogEntry:
177 def __init__(self, folder, authors, prs):
178 self.folder = folder
179 # Python2 has not 'copy' function
180 self.author_lines = list(authors)
181 self.initial_prs = list(prs)
182 self.prs = list(prs)
183 self.lines = []
184
185 @property
186 def files(self):
187 files = []
188
189 # Whether the content currently processed is between a star prefix the
190 # end of the file list: a colon or an open paren.
191 in_location = False
192
193 for line in self.lines:
194 # If this line matches the star prefix, start the location
195 # processing on the information that follows the star.
196 m = star_prefix_regex.match(line)
197 if m:
198 in_location = True
199 line = m.group('content')
200
201 if in_location:
202 # Strip everything that is not a filename in "line": entities
203 # "(NAME)", entry text (the colon, if present, and anything
204 # that follows it).
205 if '(' in line:
206 line = line[:line.index('(')]
207 in_location = False
208 if ':' in line:
209 line = line[:line.index(':')]
210 in_location = False
211
212 # At this point, all that 's left is a list of filenames
213 # separated by commas and whitespaces.
214 for file in line.split(','):
215 file = file.strip()
216 if file:
217 files.append(file)
218 return files
219
220 @property
221 def datetime(self):
222 for author in self.author_lines:
223 if author[1]:
224 return author[1]
225 return None
226
227 @property
228 def authors(self):
229 return [author_line[0] for author_line in self.author_lines]
230
231 @property
232 def is_empty(self):
233 return not self.lines and self.prs == self.initial_prs
234
235
236 class GitCommit:
237 def __init__(self, hexsha, date, author, body, modified_files,
238 strict=True):
239 self.hexsha = hexsha
240 self.lines = body
241 self.modified_files = modified_files
242 self.message = None
243 self.changes = None
244 self.changelog_entries = []
245 self.errors = []
246 self.date = date
247 self.author = author
248 self.top_level_authors = []
249 self.co_authors = []
250 self.top_level_prs = []
251
252 project_files = [f for f in self.modified_files
253 if self.is_changelog_filename(f[0])
254 or f[0] in misc_files]
255 ignored_files = [f for f in self.modified_files
256 if self.in_ignored_location(f[0])]
257 if len(project_files) == len(self.modified_files):
258 # All modified files are only MISC files
259 return
260 elif project_files and strict:
261 self.errors.append(Error('ChangeLog, DATESTAMP, BASE-VER and '
262 'DEV-PHASE updates should be done '
263 'separately from normal commits'))
264 return
265
266 all_are_ignored = (len(project_files) + len(ignored_files)
267 == len(self.modified_files))
268 self.parse_lines(all_are_ignored)
269 if self.changes:
270 self.parse_changelog()
271 self.deduce_changelog_locations()
272 if not self.errors:
273 self.check_mentioned_files()
274 self.check_for_correct_changelog()
275
276 @property
277 def success(self):
278 return not self.errors
279
280 @property
281 def new_files(self):
282 return [x[0] for x in self.modified_files if x[1] == 'A']
283
284 @classmethod
285 def is_changelog_filename(cls, path):
286 return path.endswith('/ChangeLog') or path == 'ChangeLog'
287
288 @classmethod
289 def find_changelog_location(cls, name):
290 if name.startswith('\t'):
291 name = name[1:]
292 if name.endswith(':'):
293 name = name[:-1]
294 if name.endswith('/'):
295 name = name[:-1]
296 return name if name in changelog_locations else None
297
298 @classmethod
299 def format_git_author(cls, author):
300 assert '<' in author
301 return author.replace('<', ' <')
302
303 @classmethod
304 def parse_git_name_status(cls, string):
305 modified_files = []
306 for entry in string.split('\n'):
307 parts = entry.split('\t')
308 t = parts[0]
309 if t == 'A' or t == 'D' or t == 'M':
310 modified_files.append((parts[1], t))
311 elif t == 'R':
312 modified_files.append((parts[1], 'D'))
313 modified_files.append((parts[2], 'A'))
314 return modified_files
315
316 def parse_lines(self, all_are_ignored):
317 body = self.lines
318
319 for i, b in enumerate(body):
320 if not b:
321 continue
322 if (changelog_regex.match(b) or self.find_changelog_location(b)
323 or star_prefix_regex.match(b) or pr_regex.match(b)
324 or dr_regex.match(b) or author_line_regex.match(b)):
325 self.changes = body[i:]
326 return
327 if not all_are_ignored:
328 self.errors.append(Error('cannot find a ChangeLog location in '
329 'message'))
330
331 def parse_changelog(self):
332 last_entry = None
333 will_deduce = False
334 for line in self.changes:
335 if not line:
336 if last_entry and will_deduce:
337 last_entry = None
338 continue
339 if line != line.rstrip():
340 self.errors.append(Error('trailing whitespace', line))
341 if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
342 self.errors.append(Error('line limit exceeds %d characters'
343 % LINE_LIMIT, line))
344 m = changelog_regex.match(line)
345 if m:
346 last_entry = ChangeLogEntry(m.group(1), self.top_level_authors,
347 self.top_level_prs)
348 self.changelog_entries.append(last_entry)
349 elif self.find_changelog_location(line):
350 last_entry = ChangeLogEntry(self.find_changelog_location(line),
351 self.top_level_authors,
352 self.top_level_prs)
353 self.changelog_entries.append(last_entry)
354 else:
355 author_tuple = None
356 pr_line = None
357 if author_line_regex.match(line):
358 m = author_line_regex.match(line)
359 author_tuple = (m.group('name'), m.group('datetime'))
360 elif additional_author_regex.match(line):
361 m = additional_author_regex.match(line)
362 if len(m.group('spaces')) != 4:
363 msg = 'additional author must prepend with tab ' \
364 'and 4 spaces'
365 self.errors.append(Error(msg, line))
366 else:
367 author_tuple = (m.group('name'), None)
368 elif pr_regex.match(line):
369 component = pr_regex.match(line).group('component')
370 if not component:
371 self.errors.append(Error('missing PR component', line))
372 continue
373 elif not component[:-1] in bug_components:
374 self.errors.append(Error('invalid PR component', line))
375 continue
376 else:
377 pr_line = line.lstrip()
378 elif dr_regex.match(line):
379 pr_line = line.lstrip()
380
381 lowered_line = line.lower()
382 if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
383 name = line[len(CO_AUTHORED_BY_PREFIX):]
384 author = self.format_git_author(name)
385 self.co_authors.append(author)
386 continue
387 elif lowered_line.startswith(REVIEW_PREFIXES):
388 continue
389 elif line.startswith(CHERRY_PICK_PREFIX):
390 continue
391
392 # ChangeLog name will be deduced later
393 if not last_entry:
394 if author_tuple:
395 self.top_level_authors.append(author_tuple)
396 continue
397 elif pr_line:
398 # append to top_level_prs only when we haven't met
399 # a ChangeLog entry
400 if (pr_line not in self.top_level_prs
401 and not self.changelog_entries):
402 self.top_level_prs.append(pr_line)
403 continue
404 else:
405 last_entry = ChangeLogEntry(None,
406 self.top_level_authors,
407 self.top_level_prs)
408 self.changelog_entries.append(last_entry)
409 will_deduce = True
410 elif author_tuple:
411 if author_tuple not in last_entry.author_lines:
412 last_entry.author_lines.append(author_tuple)
413 continue
414
415 if not line.startswith('\t'):
416 err = Error('line should start with a tab', line)
417 self.errors.append(err)
418 elif pr_line:
419 last_entry.prs.append(pr_line)
420 else:
421 m = star_prefix_regex.match(line)
422 if m:
423 if len(m.group('spaces')) != 1:
424 err = Error('one space should follow asterisk',
425 line)
426 self.errors.append(err)
427 else:
428 last_entry.lines.append(line)
429 else:
430 if last_entry.is_empty:
431 msg = 'first line should start with a tab, ' \
432 'asterisk and space'
433 self.errors.append(Error(msg, line))
434 else:
435 last_entry.lines.append(line)
436
437 def get_file_changelog_location(self, changelog_file):
438 for file in self.modified_files:
439 if file[0] == changelog_file:
440 # root ChangeLog file
441 return ''
442 index = file[0].find('/' + changelog_file)
443 if index != -1:
444 return file[0][:index]
445 return None
446
447 def deduce_changelog_locations(self):
448 for entry in self.changelog_entries:
449 if not entry.folder:
450 changelog = None
451 for file in entry.files:
452 location = self.get_file_changelog_location(file)
453 if (location == ''
454 or (location and location in changelog_locations)):
455 if changelog and changelog != location:
456 msg = 'could not deduce ChangeLog file, ' \
457 'not unique location'
458 self.errors.append(Error(msg))
459 return
460 changelog = location
461 if changelog is not None:
462 entry.folder = changelog
463 else:
464 msg = 'could not deduce ChangeLog file'
465 self.errors.append(Error(msg))
466
467 @classmethod
468 def in_ignored_location(cls, path):
469 for ignored in ignored_prefixes:
470 if path.startswith(ignored):
471 return True
472 return False
473
474 @classmethod
475 def get_changelog_by_path(cls, path):
476 components = path.split('/')
477 while components:
478 if '/'.join(components) in changelog_locations:
479 break
480 components = components[:-1]
481 return '/'.join(components)
482
483 def check_mentioned_files(self):
484 folder_count = len([x.folder for x in self.changelog_entries])
485 assert folder_count == len(self.changelog_entries)
486
487 mentioned_files = set()
488 for entry in self.changelog_entries:
489 if not entry.files:
490 msg = 'ChangeLog must contain a file entry'
491 self.errors.append(Error(msg, entry.folder))
492 assert not entry.folder.endswith('/')
493 for file in entry.files:
494 if not self.is_changelog_filename(file):
495 mentioned_files.add(os.path.join(entry.folder, file))
496
497 cand = [x[0] for x in self.modified_files
498 if not self.is_changelog_filename(x[0])]
499 changed_files = set(cand)
500 for file in sorted(mentioned_files - changed_files):
501 self.errors.append(Error('file not changed in a patch', file))
502 for file in sorted(changed_files - mentioned_files):
503 if not self.in_ignored_location(file):
504 if file in self.new_files:
505 changelog_location = self.get_changelog_by_path(file)
506 # Python2: we cannot use next(filter(...))
507 entries = filter(lambda x: x.folder == changelog_location,
508 self.changelog_entries)
509 entries = list(entries)
510 entry = entries[0] if entries else None
511 if not entry:
512 prs = self.top_level_prs
513 if not prs:
514 # if all ChangeLog entries have identical PRs
515 # then use them
516 prs = self.changelog_entries[0].prs
517 for entry in self.changelog_entries:
518 if entry.prs != prs:
519 prs = []
520 break
521 entry = ChangeLogEntry(changelog_location,
522 self.top_level_authors,
523 prs)
524 self.changelog_entries.append(entry)
525 # strip prefix of the file
526 assert file.startswith(entry.folder)
527 file = file[len(entry.folder):].lstrip('/')
528 entry.lines.append('\t* %s: New file.' % file)
529 else:
530 msg = 'changed file not mentioned in a ChangeLog'
531 self.errors.append(Error(msg, file))
532
533 def check_for_correct_changelog(self):
534 for entry in self.changelog_entries:
535 for file in entry.files:
536 full_path = os.path.join(entry.folder, file)
537 changelog_location = self.get_changelog_by_path(full_path)
538 if changelog_location != entry.folder:
539 msg = 'wrong ChangeLog location "%s", should be "%s"'
540 err = Error(msg % (entry.folder, changelog_location), file)
541 self.errors.append(err)
542
543 def to_changelog_entries(self, use_commit_ts=False):
544 for entry in self.changelog_entries:
545 output = ''
546 timestamp = entry.datetime
547 if not timestamp or use_commit_ts:
548 timestamp = self.date.strftime('%Y-%m-%d')
549 authors = entry.authors if entry.authors else [self.author]
550 # add Co-Authored-By authors to all ChangeLog entries
551 for author in self.co_authors:
552 if author not in authors:
553 authors.append(author)
554
555 for i, author in enumerate(authors):
556 if i == 0:
557 output += '%s %s\n' % (timestamp, author)
558 else:
559 output += '\t %s\n' % author
560 output += '\n'
561 for pr in entry.prs:
562 output += '\t%s\n' % pr
563 for line in entry.lines:
564 output += line + '\n'
565 yield (entry.folder, output.rstrip())
566
567 def print_output(self):
568 for entry, output in self.to_changelog_entries():
569 print('------ %s/ChangeLog ------ ' % entry)
570 print(output)
571
572 def print_errors(self):
573 print('Errors:')
574 for error in self.errors:
575 print(error)