tests: arch-power: Add 64-bit hello binaries
[gem5.git] / util / find_copyrights.py
1 #!/usr/bin/env python3
2
3 import os
4 import re
5 import sys
6
7 from file_types import lang_type, find_files
8
9 mode_line = re.compile('(-\*- *mode:.* *-\*-)')
10 shell_comment = re.compile(r'^\s*#')
11 lisp_comment = re.compile(r';')
12 cpp_comment = re.compile(r'//')
13 c_comment_start = re.compile(r'/\*')
14 c_comment_end = re.compile(r'\*/')
15 def find_copyright_block(lines, lang_type):
16 start = None
17 if lang_type in ('python', 'make', 'shell', 'perl', 'scons'):
18 for i,line in enumerate(lines):
19 if i == 0 and (line.startswith('#!') or mode_line.search(line)):
20 continue
21
22 if shell_comment.search(line):
23 if start is None:
24 start = i
25 elif start is None:
26 if line.strip():
27 return
28 else:
29 yield start, i-1
30 start = None
31
32 elif lang_type in ('lisp', ):
33 for i,line in enumerate(lines):
34 if i == 0 and mode_line.search(line):
35 continue
36
37 if lisp_comment.search(line):
38 if start is None:
39 start = i
40 elif start is None:
41 if line.strip():
42 return
43 else:
44 yield start, i-1
45 start = None
46
47 elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc',
48 'lex', 'yacc'):
49 mode = None
50 for i,line in enumerate(lines):
51 if i == 0 and mode_line.search(line):
52 continue
53
54 if mode == 'C':
55 assert start is not None, 'on line %d' % (i + 1)
56 match = c_comment_end.search(line)
57 if match:
58 yield start, i
59 mode = None
60 continue
61
62 cpp_match = cpp_comment.search(line)
63 c_match = c_comment_start.search(line)
64
65 if cpp_match:
66 assert not c_match, 'on line %d' % (i + 1)
67 if line[:cpp_match.start()].strip():
68 return
69 if mode is None:
70 mode = 'CPP'
71 start = i
72 else:
73 text = line[cpp_match.end():].lstrip()
74 if text.startswith("Copyright") > 0:
75 yield start, i-1
76 start = i
77 continue
78 elif mode == 'CPP':
79 assert start is not None, 'on line %d' % (i + 1)
80 if not line.strip():
81 continue
82 yield start, i-1
83 mode = None
84 if not c_match:
85 return
86
87 if c_match:
88 assert mode is None, 'on line %d' % (i + 1)
89 mode = 'C'
90 start = i
91
92 if mode is None and line.strip():
93 return
94
95 else:
96 raise AttributeError("Could not handle language %s" % lang_type)
97
98 date_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})')
99 def process_dates(dates):
100 dates = [ d.strip() for d in dates.split(',') ]
101
102 output = set()
103 for date in dates:
104 match = date_range_re.match(date)
105 if match:
106 f,l = [ int(d) for d in match.groups() ]
107 for i in range(f, l+1):
108 output.add(i)
109 else:
110 try:
111 date = int(date)
112 output.add(date)
113 except ValueError:
114 pass
115
116 return output
117
118 copyright_re = \
119 re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)',
120 re.DOTALL)
121
122 authors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$')
123 more_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$')
124
125 all_owners = set()
126 def get_data(lang_type, lines):
127 data = []
128 last = None
129 for start,end in find_copyright_block(lines, lang_type):
130 joined = ''.join(lines[start:end+1])
131 match = copyright_re.search(joined)
132 if not match:
133 continue
134
135 c,dates,owner = match.groups()
136 dates = dates.strip()
137 owner = owner.strip()
138
139 all_owners.add(owner)
140 try:
141 dates = process_dates(dates)
142 except Exception:
143 print(dates)
144 print(owner)
145 raise
146
147 authors = []
148 for i in range(start,end+1):
149 line = lines[i]
150 if not authors:
151 match = authors_re.search(line)
152 if match:
153 authors.append(match.group(1).strip())
154 else:
155 match = more_authors_re.search(line)
156 if not match:
157 for j in range(i, end+1):
158 line = lines[j].strip()
159 if not line:
160 end = j
161 break
162 if line.startswith('//'):
163 line = line[2:].lstrip()
164 if line:
165 end = j - 1
166 break
167 break
168 authors.append(match.group(1).strip())
169
170 info = (owner, dates, authors, start, end)
171 data.append(info)
172
173 return data
174
175 def datestr(dates):
176 dates = list(dates)
177 dates.sort()
178
179 output = []
180 def add_output(first, second):
181 if first == second:
182 output.append('%d' % (first))
183 else:
184 output.append('%d-%d' % (first, second))
185
186 first = dates.pop(0)
187 second = first
188 while dates:
189 next = dates.pop(0)
190 if next == second + 1:
191 second = next
192 else:
193 add_output(first, second)
194 first = next
195 second = next
196
197 add_output(first, second)
198
199 return ','.join(output)
200
201 usage_str = """usage:
202 %s [-v] <directory>"""
203
204 def usage(exitcode):
205 print(usage_str % sys.argv[0])
206 if exitcode is not None:
207 sys.exit(exitcode)
208
209 if __name__ == '__main__':
210 import getopt
211
212 show_counts = False
213 ignore = set()
214 verbose = False
215 try:
216 opts, args = getopt.getopt(sys.argv[1:], "ci:v")
217 except getopt.GetoptError:
218 usage(1)
219
220 for o,a in opts:
221 if o == '-c':
222 show_counts = True
223 if o == '-i':
224 ignore.add(a)
225 if o == '-v':
226 verbose = True
227
228 files = []
229
230 for base in args:
231 if os.path.isfile(base):
232 files += [ (base, lang_type(base)) ]
233 elif os.path.isdir(base):
234 files += find_files(base)
235 else:
236 raise AttributeError("can't access '%s'" % base)
237
238 copyrights = {}
239 counts = {}
240
241 for filename, lang in files:
242 f = file(filename, 'r')
243 lines = f.readlines()
244 if not lines:
245 continue
246
247 lines = [ line.rstrip('\r\n') for line in lines ]
248
249 lt = lang_type(filename, lines[0])
250 try:
251 data = get_data(lt, lines)
252 except Exception as e:
253 if verbose:
254 if len(e.args) == 1:
255 e.args = ('%s (%s))' % (e, filename), )
256 print("could not parse %s: %s" % (filename, e))
257 continue
258
259 for owner, dates, authors, start, end in data:
260 if owner not in copyrights:
261 copyrights[owner] = set()
262 if owner not in counts:
263 counts[owner] = 0
264
265 copyrights[owner] |= dates
266 counts[owner] += 1
267
268 info = [ (counts[o], d, o) for o,d in list(copyrights.items()) ]
269
270 for count,dates,owner in sorted(info, reverse=True):
271 if show_counts:
272 owner = '%s (%s files)' % (owner, count)
273 print('Copyright (c) %s %s' % (datestr(dates), owner))