Refactor and update copyright headers. (#6316)
[cvc5.git] / test / regress / run_regression.py
1 #!/usr/bin/env python3
2 ###############################################################################
3 # Top contributors (to current version):
4 # Andres Noetzli, Mathias Preiner, Yoni Zohar
5 #
6 # This file is part of the cvc5 project.
7 #
8 # Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
9 # in the top-level source directory and their institutional affiliations.
10 # All rights reserved. See the file COPYING in the top-level source
11 # directory for licensing information.
12 # #############################################################################
13 ##
14
15 """
16 Runs benchmark and checks for correct exit status and output.
17 """
18
19 import argparse
20 import difflib
21 import os
22 import re
23 import shlex
24 import subprocess
25 import sys
26 import threading
27
28
29 class Color:
30 BLUE = '\033[94m'
31 GREEN = '\033[92m'
32 YELLOW = '\033[93m'
33 RED = '\033[91m'
34 ENDC = '\033[0m'
35
36
37 SCRUBBER = 'SCRUBBER:'
38 ERROR_SCRUBBER = 'ERROR-SCRUBBER:'
39 EXPECT = 'EXPECT:'
40 EXPECT_ERROR = 'EXPECT-ERROR:'
41 EXIT = 'EXIT:'
42 COMMAND_LINE = 'COMMAND-LINE:'
43 REQUIRES = 'REQUIRES:'
44
45 EXIT_OK = 0
46 EXIT_FAILURE = 1
47 EXIT_SKIP = 77
48 STATUS_TIMEOUT = 124
49
50
51 def print_colored(color, text):
52 """Prints `text` in color `color`."""
53
54 for line in text.splitlines():
55 print(color + line + Color.ENDC)
56
57
58 def print_diff(actual, expected):
59 """Prints the difference between `actual` and `expected`."""
60
61 for line in difflib.unified_diff(expected.splitlines(),
62 actual.splitlines(),
63 'expected',
64 'actual',
65 lineterm=''):
66 if line.startswith('+'):
67 print_colored(Color.GREEN, line)
68 elif line.startswith('-'):
69 print_colored(Color.RED, line)
70 else:
71 print(line)
72
73
74 def run_process(args, cwd, timeout, s_input=None):
75 """Runs a process with a timeout `timeout` in seconds. `args` are the
76 arguments to execute, `cwd` is the working directory and `s_input` is the
77 input to be sent to the process over stdin. Returns the output, the error
78 output and the exit code of the process. If the process times out, the
79 output and the error output are empty and the exit code is 124."""
80
81 proc = subprocess.Popen(args,
82 cwd=cwd,
83 stdin=subprocess.PIPE,
84 stdout=subprocess.PIPE,
85 stderr=subprocess.PIPE)
86
87 out = ''
88 err = ''
89 exit_status = STATUS_TIMEOUT
90 try:
91 if timeout:
92 timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
93 timer.start()
94 out, err = proc.communicate(input=s_input)
95 exit_status = proc.returncode
96 finally:
97 if timeout:
98 # The timer killed the process and is not active anymore.
99 if exit_status == -9 and not timer.is_alive():
100 exit_status = STATUS_TIMEOUT
101 timer.cancel()
102
103 return out, err, exit_status
104
105
106 def get_cvc4_features(cvc4_binary):
107 """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
108
109 output, _, _ = run_process([cvc4_binary, '--show-config'], None, None)
110 if isinstance(output, bytes):
111 output = output.decode()
112
113 features = []
114 disabled_features = []
115 for line in output.split('\n'):
116 tokens = [t.strip() for t in line.split(':')]
117 if len(tokens) == 2:
118 key, value = tokens
119 if value == 'yes':
120 features.append(key)
121 elif value == 'no':
122 disabled_features.append(key)
123
124 return features, disabled_features
125
126
127 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
128 command_line, benchmark_dir, benchmark_filename, timeout):
129 """Runs CVC4 on the file `benchmark_filename` in the directory
130 `benchmark_dir` using the binary `cvc4_binary` with the command line
131 options `command_line`. The output is scrubbed using `scrubber` and
132 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
133 function first uses CVC4 to read in and dump the benchmark file and then
134 uses that as input."""
135
136 bin_args = wrapper[:]
137 bin_args.append(cvc4_binary)
138
139 output = None
140 error = None
141 exit_status = None
142 if dump:
143 dump_args = [
144 '--preprocess-only', '--dump', 'raw-benchmark',
145 '--output-lang=smt2', '-qq'
146 ]
147 dump_output, _, _ = run_process(
148 bin_args + command_line + dump_args + [benchmark_filename],
149 benchmark_dir, timeout)
150 output, error, exit_status = run_process(
151 bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir,
152 timeout, dump_output)
153 else:
154 output, error, exit_status = run_process(
155 bin_args + command_line + [benchmark_filename], benchmark_dir,
156 timeout)
157
158 # If a scrubber command has been specified then apply it to the output.
159 if scrubber:
160 output, _, _ = run_process(shlex.split(scrubber), benchmark_dir,
161 timeout, output)
162 if error_scrubber:
163 error, _, _ = run_process(shlex.split(error_scrubber), benchmark_dir,
164 timeout, error)
165
166 # Popen in Python 3 returns a bytes object instead of a string for
167 # stdout/stderr.
168 if isinstance(output, bytes):
169 output = output.decode()
170 if isinstance(error, bytes):
171 error = error.decode()
172 return (output.strip(), error.strip(), exit_status)
173
174
175 def run_regression(check_unsat_cores, check_proofs, dump, use_skip_return_code,
176 skip_timeout, wrapper, cvc4_binary, benchmark_path,
177 timeout):
178 """Determines the expected output for a benchmark, runs CVC4 on it and then
179 checks whether the output corresponds to the expected output. Optionally
180 uses a wrapper `wrapper`, tests unsat cores (if check_unsat_cores is true),
181 checks proofs (if check_proofs is true), or dumps a benchmark and uses that as
182 the input (if dump is true). `use_skip_return_code` enables/disables
183 returning 77 when a test is skipped."""
184
185 if not os.access(cvc4_binary, os.X_OK):
186 sys.exit(
187 '"{}" does not exist or is not executable'.format(cvc4_binary))
188 if not os.path.isfile(benchmark_path):
189 sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
190
191 cvc4_features, cvc4_disabled_features = get_cvc4_features(cvc4_binary)
192
193 basic_command_line_args = []
194
195 benchmark_basename = os.path.basename(benchmark_path)
196 benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
197 benchmark_dir = os.path.dirname(benchmark_path)
198 comment_char = '%'
199 status_regex = None
200 status_to_output = lambda s: s
201 if benchmark_ext == '.smt':
202 status_regex = r':status\s*(sat|unsat)'
203 comment_char = ';'
204 elif benchmark_ext == '.smt2':
205 status_regex = r'set-info\s*:status\s*(sat|unsat)'
206 comment_char = ';'
207 elif benchmark_ext == '.cvc':
208 pass
209 elif benchmark_ext == '.p':
210 status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
211 status_to_output = lambda s: '% SZS status {} for {}'.format(
212 s, benchmark_filename)
213 elif benchmark_ext == '.sy':
214 comment_char = ';'
215 # Do not check proofs/unsat-cores with .sy files
216 check_unsat_cores = False
217 check_proofs = False
218 else:
219 sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
220 benchmark_basename))
221
222 benchmark_lines = None
223 with open(benchmark_path, 'r') as benchmark_file:
224 benchmark_lines = benchmark_file.readlines()
225 benchmark_content = ''.join(benchmark_lines)
226
227 # Extract the metadata for the benchmark.
228 scrubber = None
229 error_scrubber = None
230 expected_output = ''
231 expected_error = ''
232 expected_exit_status = None
233 command_lines = []
234 requires = []
235 for line in benchmark_lines:
236 # Skip lines that do not start with a comment character.
237 if line[0] != comment_char:
238 continue
239 line = line[1:].lstrip()
240
241 if line.startswith(SCRUBBER):
242 scrubber = line[len(SCRUBBER):].strip()
243 elif line.startswith(ERROR_SCRUBBER):
244 error_scrubber = line[len(ERROR_SCRUBBER):].strip()
245 elif line.startswith(EXPECT):
246 expected_output += line[len(EXPECT):].strip() + '\n'
247 elif line.startswith(EXPECT_ERROR):
248 expected_error += line[len(EXPECT_ERROR):].strip() + '\n'
249 elif line.startswith(EXIT):
250 expected_exit_status = int(line[len(EXIT):].strip())
251 elif line.startswith(COMMAND_LINE):
252 command_lines.append(line[len(COMMAND_LINE):].strip())
253 elif line.startswith(REQUIRES):
254 requires.append(line[len(REQUIRES):].strip())
255 expected_output = expected_output.strip()
256 expected_error = expected_error.strip()
257
258 # Expected output/expected error has not been defined in the metadata for
259 # the benchmark. Try to extract the information from the benchmark itself.
260 if expected_output == '' and expected_error == '':
261 match = None
262 if status_regex:
263 match = re.findall(status_regex, benchmark_content)
264
265 if match:
266 expected_output = status_to_output('\n'.join(match))
267 elif expected_exit_status is None:
268 # If there is no expected output/error and the exit status has not
269 # been set explicitly, the benchmark is invalid.
270 sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
271 if expected_exit_status is None:
272 expected_exit_status = 0
273
274 if 'CVC5_REGRESSION_ARGS' in os.environ:
275 basic_command_line_args += shlex.split(
276 os.environ['CVC5_REGRESSION_ARGS'])
277
278 if not check_unsat_cores and ('(get-unsat-core)' in benchmark_content
279 or '(get-unsat-assumptions)' in benchmark_content):
280 print(
281 '1..0 # Skipped regression: unsat cores not supported without proof support'
282 )
283 return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
284
285 for req_feature in requires:
286 is_negative = False
287 if req_feature.startswith("no-"):
288 req_feature = req_feature[len("no-"):]
289 is_negative = True
290 if req_feature not in (cvc4_features + cvc4_disabled_features):
291 print(
292 'Illegal requirement in regression: {}\nAllowed requirements: {}'
293 .format(req_feature,
294 ' '.join(cvc4_features + cvc4_disabled_features)))
295 return EXIT_FAILURE
296 if is_negative:
297 if req_feature in cvc4_features:
298 print('1..0 # Skipped regression: not valid with {}'.format(
299 req_feature))
300 return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
301 elif req_feature not in cvc4_features:
302 print('1..0 # Skipped regression: {} not supported'.format(
303 req_feature))
304 return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
305
306 if not command_lines:
307 command_lines.append('')
308
309 command_line_args_configs = []
310 for command_line in command_lines:
311 args = shlex.split(command_line)
312 all_args = basic_command_line_args + args
313
314 if not check_unsat_cores and ('--check-unsat-cores' in all_args):
315 print(
316 '# Skipped command line options ({}): unsat cores not supported without proof support'
317 .format(all_args))
318 continue
319 if not check_proofs and '--dump-proofs' in all_args:
320 print(
321 '# Skipped command line options ({}): proof production not supported'
322 .format(all_args))
323 continue
324
325 command_line_args_configs.append(all_args)
326
327 expected_output_lines = expected_output.split()
328 extra_command_line_args = []
329 if benchmark_ext == '.sy' and \
330 '--no-check-synth-sol' not in all_args and \
331 '--sygus-rr' not in all_args and \
332 '--check-synth-sol' not in all_args:
333 all_args += ['--check-synth-sol']
334 if ('sat' in expected_output_lines or \
335 'not_entailed' in expected_output_lines or \
336 'unknown' in expected_output_lines) and \
337 '--no-debug-check-models' not in all_args and \
338 '--no-check-models' not in all_args and \
339 '--debug-check-models' not in all_args:
340 extra_command_line_args += ['--debug-check-models']
341 if 'unsat' in expected_output_lines or 'entailed' in expected_output_lines:
342 if check_unsat_cores and \
343 '--no-produce-unsat-cores' not in all_args and \
344 '--no-check-unsat-cores' not in all_args and \
345 '--check-unsat-cores' not in all_args and \
346 '--incremental' not in all_args and \
347 '--unconstrained-simp' not in all_args:
348 extra_command_line_args += ['--check-unsat-cores']
349 if check_proofs and \
350 '--no-produce-proofs' not in all_args and \
351 '--no-check-proofs' not in all_args and \
352 '--check-proofs' not in all_args:
353 extra_command_line_args += ['--check-proofs']
354 if '--no-check-abducts' not in all_args and \
355 '--check-abducts' not in all_args and \
356 'get-abduct' in benchmark_content:
357 all_args += ['--check-abducts']
358
359 # Create a test case for each extra argument
360 for extra_arg in extra_command_line_args:
361 command_line_args_configs.append(all_args + [extra_arg])
362
363 # Run CVC4 on the benchmark with the different option sets and check
364 # whether the exit status, stdout output, stderr output are as expected.
365 print('1..{}'.format(len(command_line_args_configs)))
366 print('# Starting')
367 exit_code = EXIT_OK
368 for command_line_args in command_line_args_configs:
369 output, error, exit_status = run_benchmark(dump, wrapper, scrubber,
370 error_scrubber, cvc4_binary,
371 command_line_args,
372 benchmark_dir,
373 benchmark_basename, timeout)
374 output = re.sub(r'^[ \t]*', '', output, flags=re.MULTILINE)
375 error = re.sub(r'^[ \t]*', '', error, flags=re.MULTILINE)
376 if exit_status == STATUS_TIMEOUT:
377 exit_code = EXIT_SKIP if skip_timeout else EXIT_FAILURE
378 print('Timeout - Flags: {}'.format(command_line_args))
379 elif output != expected_output:
380 exit_code = EXIT_FAILURE
381 print('not ok - Flags: {}'.format(command_line_args))
382 print()
383 print('Standard output difference')
384 print('=' * 80)
385 print_diff(output, expected_output)
386 print('=' * 80)
387 print()
388 print()
389 if error:
390 print('Error output')
391 print('=' * 80)
392 print_colored(Color.YELLOW, error)
393 print('=' * 80)
394 print()
395 elif error != expected_error:
396 exit_code = EXIT_FAILURE
397 print(
398 'not ok - Differences between expected and actual output on stderr - Flags: {}'
399 .format(command_line_args))
400 print()
401 print('Error output difference')
402 print('=' * 80)
403 print_diff(error, expected_error)
404 print('=' * 80)
405 print()
406 elif expected_exit_status != exit_status:
407 exit_code = EXIT_FAILURE
408 print(
409 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
410 format(expected_exit_status, exit_status, command_line_args))
411 print()
412 print('Output:')
413 print('=' * 80)
414 print_colored(Color.BLUE, output)
415 print('=' * 80)
416 print()
417 print()
418 print('Error output:')
419 print('=' * 80)
420 print_colored(Color.YELLOW, error)
421 print('=' * 80)
422 print()
423 else:
424 print('ok - Flags: {}'.format(command_line_args))
425
426 return exit_code
427
428
429 def main():
430 """Parses the command line arguments and then calls the core of the
431 script."""
432
433 parser = argparse.ArgumentParser(
434 description=
435 'Runs benchmark and checks for correct exit status and output.')
436 parser.add_argument('--dump', action='store_true')
437 parser.add_argument('--use-skip-return-code', action='store_true')
438 parser.add_argument('--skip-timeout', action='store_true')
439 parser.add_argument('--check-unsat-cores', action='store_true',
440 default=True)
441 parser.add_argument('--no-check-unsat-cores', dest='check_unsat_cores',
442 action='store_false')
443 parser.add_argument('--check-proofs', action='store_true', default=True)
444 parser.add_argument('--no-check-proofs', dest='check_proofs',
445 action='store_false')
446 parser.add_argument('wrapper', nargs='*')
447 parser.add_argument('cvc4_binary')
448 parser.add_argument('benchmark')
449
450 argv = sys.argv[1:]
451 # Append options passed via RUN_REGRESSION_ARGS to argv
452 if os.environ.get('RUN_REGRESSION_ARGS'):
453 argv.extend(shlex.split(os.getenv('RUN_REGRESSION_ARGS')))
454
455 args = parser.parse_args(argv)
456
457 cvc4_binary = os.path.abspath(args.cvc4_binary)
458
459 wrapper = args.wrapper
460 if os.environ.get('VALGRIND') == '1' and not wrapper:
461 wrapper = ['libtool', '--mode=execute', 'valgrind']
462
463 timeout = float(os.getenv('TEST_TIMEOUT', '600'))
464
465 return run_regression(args.check_unsat_cores, args.check_proofs, args.dump,
466 args.use_skip_return_code, args.skip_timeout,
467 wrapper, cvc4_binary, args.benchmark, timeout)
468
469
470 if __name__ == "__main__":
471 exit_code = main()
472 sys.exit(exit_code)