[Regressions] Support for running w/ default args (#3436)
[cvc5.git] / test / regress / run_regression.py
1 #!/usr/bin/env python3
2 """
3 Usage:
4
5 run_regression.py [--enable-proof] [--with-lfsc] [--dump]
6 [--use-skip-return-code] [wrapper] cvc4-binary
7 [benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p]
8
9 Runs benchmark and checks for correct exit status and output.
10 """
11
12 import argparse
13 import difflib
14 import os
15 import re
16 import shlex
17 import subprocess
18 import sys
19 import threading
20
21 SCRUBBER = 'SCRUBBER:'
22 ERROR_SCRUBBER = 'ERROR-SCRUBBER:'
23 EXPECT = 'EXPECT:'
24 EXPECT_ERROR = 'EXPECT-ERROR:'
25 EXIT = 'EXIT:'
26 COMMAND_LINE = 'COMMAND-LINE:'
27 REQUIRES = 'REQUIRES:'
28
29 EXIT_OK = 0
30 EXIT_FAILURE = 1
31 EXIT_SKIP = 77
32
33
34 def run_process(args, cwd, timeout, s_input=None):
35 """Runs a process with a timeout `timeout` in seconds. `args` are the
36 arguments to execute, `cwd` is the working directory and `s_input` is the
37 input to be sent to the process over stdin. Returns the output, the error
38 output and the exit code of the process. If the process times out, the
39 output and the error output are empty and the exit code is 124."""
40
41 proc = subprocess.Popen(
42 args,
43 cwd=cwd,
44 stdin=subprocess.PIPE,
45 stdout=subprocess.PIPE,
46 stderr=subprocess.PIPE)
47
48 out = ''
49 err = ''
50 exit_status = 124
51 try:
52 if timeout:
53 timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
54 timer.start()
55 out, err = proc.communicate(input=s_input)
56 exit_status = proc.returncode
57 finally:
58 if timeout:
59 timer.cancel()
60
61 return out, err, exit_status
62
63
64 def get_cvc4_features(cvc4_binary):
65 """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
66
67 output, _, _ = run_process([cvc4_binary, '--show-config'], None, None)
68 if isinstance(output, bytes):
69 output = output.decode()
70
71 features = []
72 for line in output.split('\n'):
73 tokens = [t.strip() for t in line.split(':')]
74 if len(tokens) == 2:
75 key, value = tokens
76 if value == 'yes':
77 features.append(key)
78
79 return features
80
81
82 def logic_supported_with_proofs(logic):
83 assert logic is None or isinstance(logic, str)
84 return logic in [
85 #single theories
86 "QF_BV",
87 "QF_UF",
88 "QF_A",
89 "QF_LRA",
90 #two theories
91 "QF_UFBV",
92 "QF_UFLRA",
93 "QF_AUF",
94 "QF_ALRA",
95 "QF_ABV",
96 "QF_BVLRA"
97 #three theories
98 "QF_AUFBV",
99 "QF_ABVLRA",
100 "QF_UFBVLRA",
101 "QF_AUFLRA",
102 #four theories
103 "QF_AUFBVLRA"
104 ]
105
106 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
107 command_line, benchmark_dir, benchmark_filename, timeout):
108 """Runs CVC4 on the file `benchmark_filename` in the directory
109 `benchmark_dir` using the binary `cvc4_binary` with the command line
110 options `command_line`. The output is scrubbed using `scrubber` and
111 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
112 function first uses CVC4 to read in and dump the benchmark file and then
113 uses that as input."""
114
115 bin_args = wrapper[:]
116 bin_args.append(cvc4_binary)
117
118 output = None
119 error = None
120 exit_status = None
121 if dump:
122 dump_args = [
123 '--preprocess-only', '--dump', 'raw-benchmark',
124 '--output-lang=smt2', '-qq'
125 ]
126 dump_output, _, _ = run_process(
127 bin_args + command_line + dump_args + [benchmark_filename],
128 benchmark_dir, timeout)
129 output, error, exit_status = run_process(
130 bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir,
131 timeout, dump_output)
132 else:
133 output, error, exit_status = run_process(
134 bin_args + command_line + [benchmark_filename], benchmark_dir,
135 timeout)
136
137 # If a scrubber command has been specified then apply it to the output.
138 if scrubber:
139 output, _, _ = run_process(
140 shlex.split(scrubber), benchmark_dir, timeout, output)
141 if error_scrubber:
142 error, _, _ = run_process(
143 shlex.split(error_scrubber), benchmark_dir, timeout, error)
144
145 # Popen in Python 3 returns a bytes object instead of a string for
146 # stdout/stderr.
147 if isinstance(output, bytes):
148 output = output.decode()
149 if isinstance(error, bytes):
150 error = error.decode()
151 return (output.strip(), error.strip(), exit_status)
152
153
154 def run_regression(unsat_cores, proofs, dump, use_skip_return_code, wrapper,
155 cvc4_binary, benchmark_path, timeout):
156 """Determines the expected output for a benchmark, runs CVC4 on it and then
157 checks whether the output corresponds to the expected output. Optionally
158 uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
159 checks proofs (if proofs is true), or dumps a benchmark and uses that as
160 the input (if dump is true). `use_skip_return_code` enables/disables
161 returning 77 when a test is skipped."""
162
163 if not os.access(cvc4_binary, os.X_OK):
164 sys.exit(
165 '"{}" does not exist or is not executable'.format(cvc4_binary))
166 if not os.path.isfile(benchmark_path):
167 sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
168
169 cvc4_features = get_cvc4_features(cvc4_binary)
170
171 basic_command_line_args = []
172
173 benchmark_basename = os.path.basename(benchmark_path)
174 benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
175 benchmark_dir = os.path.dirname(benchmark_path)
176 comment_char = '%'
177 status_regex = None
178 logic_regex = None
179 status_to_output = lambda s: s
180 if benchmark_ext == '.smt':
181 status_regex = r':status\s*(sat|unsat)'
182 comment_char = ';'
183 elif benchmark_ext == '.smt2':
184 status_regex = r'set-info\s*:status\s*(sat|unsat)'
185 logic_regex = r'\(\s*set-logic\s*(.*)\)'
186 comment_char = ';'
187 elif benchmark_ext == '.cvc':
188 pass
189 elif benchmark_ext == '.p':
190 status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
191 status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
192 elif benchmark_ext == '.sy':
193 comment_char = ';'
194 # Do not use proofs/unsat-cores with .sy files
195 unsat_cores = False
196 proofs = False
197 else:
198 sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
199 benchmark_basename))
200
201 benchmark_lines = None
202 with open(benchmark_path, 'r') as benchmark_file:
203 benchmark_lines = benchmark_file.readlines()
204 benchmark_content = ''.join(benchmark_lines)
205
206 # Extract the metadata for the benchmark.
207 scrubber = None
208 error_scrubber = None
209 expected_output = ''
210 expected_error = ''
211 expected_exit_status = None
212 command_lines = []
213 requires = []
214 logic = None
215 for line in benchmark_lines:
216 # Skip lines that do not start with a comment character.
217 if line[0] != comment_char:
218 continue
219 line = line[1:].lstrip()
220
221 if line.startswith(SCRUBBER):
222 scrubber = line[len(SCRUBBER):].strip()
223 elif line.startswith(ERROR_SCRUBBER):
224 error_scrubber = line[len(ERROR_SCRUBBER):].strip()
225 elif line.startswith(EXPECT):
226 expected_output += line[len(EXPECT):].strip() + '\n'
227 elif line.startswith(EXPECT_ERROR):
228 expected_error += line[len(EXPECT_ERROR):].strip() + '\n'
229 elif line.startswith(EXIT):
230 expected_exit_status = int(line[len(EXIT):].strip())
231 elif line.startswith(COMMAND_LINE):
232 command_lines.append(line[len(COMMAND_LINE):].strip())
233 elif line.startswith(REQUIRES):
234 requires.append(line[len(REQUIRES):].strip())
235 expected_output = expected_output.strip()
236 expected_error = expected_error.strip()
237
238 # Expected output/expected error has not been defined in the metadata for
239 # the benchmark. Try to extract the information from the benchmark itself.
240 if expected_output == '' and expected_error == '':
241 match = None
242 if status_regex:
243 match = re.findall(status_regex, benchmark_content)
244
245 if match:
246 expected_output = status_to_output('\n'.join(match))
247 elif expected_exit_status is None:
248 # If there is no expected output/error and the exit status has not
249 # been set explicitly, the benchmark is invalid.
250 sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
251 if expected_exit_status is None:
252 expected_exit_status = 0
253 if logic_regex:
254 logic_match = re.findall(logic_regex, benchmark_content)
255 if logic_match and len(logic_match) == 1:
256 logic = logic_match[0]
257
258 if 'CVC4_REGRESSION_ARGS' in os.environ:
259 basic_command_line_args += shlex.split(
260 os.environ['CVC4_REGRESSION_ARGS'])
261
262 if not unsat_cores and ('(get-unsat-core)' in benchmark_content
263 or '(get-unsat-assumptions)' in benchmark_content):
264 print(
265 '1..0 # Skipped regression: unsat cores not supported without proof support'
266 )
267 return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
268
269 for req_feature in requires:
270 if req_feature.startswith("no-"):
271 inv_feature = req_feature[len("no-"):]
272 if inv_feature in cvc4_features:
273 print('1..0 # Skipped regression: not valid with {}'.format(
274 inv_feature))
275 return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
276 elif req_feature not in cvc4_features:
277 print('1..0 # Skipped regression: {} not supported'.format(
278 req_feature))
279 return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
280
281 if not command_lines:
282 command_lines.append('')
283
284 command_line_args_configs = []
285 for command_line in command_lines:
286 args = shlex.split(command_line)
287 all_args = basic_command_line_args + args
288
289 if not unsat_cores and ('--check-unsat-cores' in all_args):
290 print(
291 '# Skipped command line options ({}): unsat cores not supported without proof support'
292 .format(all_args))
293 continue
294 if not proofs and ('--check-proofs' in all_args
295 or '--dump-proofs' in all_args):
296 print(
297 '# Skipped command line options ({}): checking proofs not supported without LFSC support'
298 .format(all_args))
299 continue
300
301 command_line_args_configs.append(all_args)
302
303 extra_command_line_args = []
304 if benchmark_ext == '.sy' and \
305 '--no-check-synth-sol' not in all_args and \
306 '--sygus-rr' not in all_args and \
307 '--check-synth-sol' not in all_args:
308 extra_command_line_args = ['--check-synth-sol']
309 if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
310 '--no-check-models' not in all_args and \
311 '--check-models' not in all_args:
312 extra_command_line_args = ['--check-models']
313 if proofs and re.search(r'^(unsat|valid)$', expected_output):
314 if '--no-check-proofs' not in all_args and \
315 '--check-proofs' not in all_args and \
316 '--incremental' not in all_args and \
317 '--unconstrained-simp' not in all_args and \
318 logic_supported_with_proofs(logic) and \
319 not cvc4_binary.endswith('pcvc4'):
320 extra_command_line_args = ['--check-proofs']
321 if unsat_cores and re.search(r'^(unsat|valid)$', expected_output):
322 if '--no-check-unsat-cores' not in all_args and \
323 '--check-unsat-cores' not in all_args and \
324 '--incremental' not in all_args and \
325 '--unconstrained-simp' not in all_args and \
326 not cvc4_binary.endswith('pcvc4'):
327 extra_command_line_args += ['--check-unsat-cores']
328 if '--no-check-abducts' not in all_args and \
329 '--check-abducts' not in all_args and \
330 not cvc4_binary.endswith('pcvc4'):
331 extra_command_line_args += ['--check-abducts']
332 if extra_command_line_args:
333 command_line_args_configs.append(all_args +
334 extra_command_line_args)
335
336 # Run CVC4 on the benchmark with the different option sets and check
337 # whether the exit status, stdout output, stderr output are as expected.
338 print('1..{}'.format(len(command_line_args_configs)))
339 print('# Starting')
340 exit_code = EXIT_OK
341 for command_line_args in command_line_args_configs:
342 output, error, exit_status = run_benchmark(
343 dump, wrapper, scrubber, error_scrubber, cvc4_binary,
344 command_line_args, benchmark_dir, benchmark_basename, timeout)
345 if output != expected_output:
346 exit_code = EXIT_FAILURE
347 print(
348 'not ok - Differences between expected and actual output on stdout - Flags: {}'
349 .format(command_line_args))
350 for line in difflib.context_diff(output.splitlines(),
351 expected_output.splitlines()):
352 print(line)
353 print()
354 print('Error output:')
355 print(error)
356 elif error != expected_error:
357 exit_code = EXIT_FAILURE
358 print(
359 'not ok - Differences between expected and actual output on stderr - Flags: {}'
360 .format(command_line_args))
361 for line in difflib.context_diff(error.splitlines(),
362 expected_error.splitlines()):
363 print(line)
364 elif expected_exit_status != exit_status:
365 exit_code = EXIT_FAILURE
366 print(
367 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
368 format(expected_exit_status, exit_status, command_line_args))
369 print()
370 print('Output:')
371 print(output)
372 print()
373 print('Error output:')
374 print(error)
375 else:
376 print('ok - Flags: {}'.format(command_line_args))
377
378 return exit_code
379
380
381 def main():
382 """Parses the command line arguments and then calls the core of the
383 script."""
384
385 parser = argparse.ArgumentParser(
386 description=
387 'Runs benchmark and checks for correct exit status and output.')
388 parser.add_argument('--enable-proof', action='store_true')
389 parser.add_argument('--with-lfsc', action='store_true')
390 parser.add_argument('--dump', action='store_true')
391 parser.add_argument('--use-skip-return-code', action='store_true')
392 parser.add_argument('wrapper', nargs='*')
393 parser.add_argument('cvc4_binary')
394 parser.add_argument('benchmark')
395 args = parser.parse_args()
396 cvc4_binary = os.path.abspath(args.cvc4_binary)
397
398 wrapper = args.wrapper
399 if os.environ.get('VALGRIND') == '1' and not wrapper:
400 wrapper = ['libtool', '--mode=execute', 'valgrind']
401
402 timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
403
404 return run_regression(args.enable_proof, args.with_lfsc, args.dump,
405 args.use_skip_return_code, wrapper, cvc4_binary,
406 args.benchmark, timeout)
407
408
409 if __name__ == "__main__":
410 exit_code = main()
411 sys.exit(exit_code)