4 ## Top contributors (to current version):
5 ## Andres Noetzli, Yoni Zohar, Mathias Preiner
6 ## This file is part of the CVC4 project.
7 ## Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
8 ## in the top-level source directory and their institutional affiliations.
9 ## All rights reserved. See the file COPYING in the top-level source
10 ## directory for licensing information.
15 run_regression.py [--enable-proof] [--with-lfsc] [--dump]
16 [--use-skip-return-code] [wrapper] cvc4-binary
17 [benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p]
19 Runs benchmark and checks for correct exit status and output.
40 SCRUBBER
= 'SCRUBBER:'
41 ERROR_SCRUBBER
= 'ERROR-SCRUBBER:'
43 EXPECT_ERROR
= 'EXPECT-ERROR:'
45 COMMAND_LINE
= 'COMMAND-LINE:'
46 REQUIRES
= 'REQUIRES:'
54 def print_colored(color
, text
):
55 """Prints `text` in color `color`."""
57 for line
in text
.splitlines():
58 print(color
+ line
+ Color
.ENDC
)
61 def print_diff(actual
, expected
):
62 """Prints the difference between `actual` and `expected`."""
64 for line
in difflib
.unified_diff(expected
.splitlines(),
69 if line
.startswith('+'):
70 print_colored(Color
.GREEN
, line
)
71 elif line
.startswith('-'):
72 print_colored(Color
.RED
, line
)
77 def run_process(args
, cwd
, timeout
, s_input
=None):
78 """Runs a process with a timeout `timeout` in seconds. `args` are the
79 arguments to execute, `cwd` is the working directory and `s_input` is the
80 input to be sent to the process over stdin. Returns the output, the error
81 output and the exit code of the process. If the process times out, the
82 output and the error output are empty and the exit code is 124."""
84 proc
= subprocess
.Popen(args
,
86 stdin
=subprocess
.PIPE
,
87 stdout
=subprocess
.PIPE
,
88 stderr
=subprocess
.PIPE
)
92 exit_status
= STATUS_TIMEOUT
95 timer
= threading
.Timer(timeout
, lambda p
: p
.kill(), [proc
])
97 out
, err
= proc
.communicate(input=s_input
)
98 exit_status
= proc
.returncode
101 # The timer killed the process and is not active anymore.
102 if exit_status
== -9 and not timer
.is_alive():
103 exit_status
= STATUS_TIMEOUT
106 return out
, err
, exit_status
109 def get_cvc4_features(cvc4_binary
):
110 """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
112 output
, _
, _
= run_process([cvc4_binary
, '--show-config'], None, None)
113 if isinstance(output
, bytes
):
114 output
= output
.decode()
117 disabled_features
= []
118 for line
in output
.split('\n'):
119 tokens
= [t
.strip() for t
in line
.split(':')]
125 disabled_features
.append(key
)
127 return features
, disabled_features
130 def logic_supported_with_proofs(logic
):
131 assert logic
is None or isinstance(logic
, str)
155 def run_benchmark(dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
156 command_line
, benchmark_dir
, benchmark_filename
, timeout
):
157 """Runs CVC4 on the file `benchmark_filename` in the directory
158 `benchmark_dir` using the binary `cvc4_binary` with the command line
159 options `command_line`. The output is scrubbed using `scrubber` and
160 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
161 function first uses CVC4 to read in and dump the benchmark file and then
162 uses that as input."""
164 bin_args
= wrapper
[:]
165 bin_args
.append(cvc4_binary
)
172 '--preprocess-only', '--dump', 'raw-benchmark',
173 '--output-lang=smt2', '-qq'
175 dump_output
, _
, _
= run_process(
176 bin_args
+ command_line
+ dump_args
+ [benchmark_filename
],
177 benchmark_dir
, timeout
)
178 output
, error
, exit_status
= run_process(
179 bin_args
+ command_line
+ ['--lang=smt2', '-'], benchmark_dir
,
180 timeout
, dump_output
)
182 output
, error
, exit_status
= run_process(
183 bin_args
+ command_line
+ [benchmark_filename
], benchmark_dir
,
186 # If a scrubber command has been specified then apply it to the output.
188 output
, _
, _
= run_process(shlex
.split(scrubber
), benchmark_dir
,
191 error
, _
, _
= run_process(shlex
.split(error_scrubber
), benchmark_dir
,
194 # Popen in Python 3 returns a bytes object instead of a string for
196 if isinstance(output
, bytes
):
197 output
= output
.decode()
198 if isinstance(error
, bytes
):
199 error
= error
.decode()
200 return (output
.strip(), error
.strip(), exit_status
)
203 def run_regression(unsat_cores
, proofs
, dump
, use_skip_return_code
,
204 skip_timeout
, wrapper
, cvc4_binary
, benchmark_path
,
206 """Determines the expected output for a benchmark, runs CVC4 on it and then
207 checks whether the output corresponds to the expected output. Optionally
208 uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
209 checks proofs (if proofs is true), or dumps a benchmark and uses that as
210 the input (if dump is true). `use_skip_return_code` enables/disables
211 returning 77 when a test is skipped."""
213 if not os
.access(cvc4_binary
, os
.X_OK
):
215 '"{}" does not exist or is not executable'.format(cvc4_binary
))
216 if not os
.path
.isfile(benchmark_path
):
217 sys
.exit('"{}" does not exist or is not a file'.format(benchmark_path
))
219 cvc4_features
, cvc4_disabled_features
= get_cvc4_features(cvc4_binary
)
221 basic_command_line_args
= []
223 benchmark_basename
= os
.path
.basename(benchmark_path
)
224 benchmark_filename
, benchmark_ext
= os
.path
.splitext(benchmark_basename
)
225 benchmark_dir
= os
.path
.dirname(benchmark_path
)
229 status_to_output
= lambda s
: s
230 if benchmark_ext
== '.smt':
231 status_regex
= r
':status\s*(sat|unsat)'
233 elif benchmark_ext
== '.smt2':
234 status_regex
= r
'set-info\s*:status\s*(sat|unsat)'
235 logic_regex
= r
'\(\s*set-logic\s*(.*)\)'
237 elif benchmark_ext
== '.cvc':
239 elif benchmark_ext
== '.p':
240 status_regex
= r
'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
241 status_to_output
= lambda s
: '% SZS status {} for {}'.format(
242 s
, benchmark_filename
)
243 elif benchmark_ext
== '.sy':
245 # Do not use proofs/unsat-cores with .sy files
249 sys
.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
252 benchmark_lines
= None
253 with
open(benchmark_path
, 'r') as benchmark_file
:
254 benchmark_lines
= benchmark_file
.readlines()
255 benchmark_content
= ''.join(benchmark_lines
)
257 # Extract the metadata for the benchmark.
259 error_scrubber
= None
262 expected_exit_status
= None
266 for line
in benchmark_lines
:
267 # Skip lines that do not start with a comment character.
268 if line
[0] != comment_char
:
270 line
= line
[1:].lstrip()
272 if line
.startswith(SCRUBBER
):
273 scrubber
= line
[len(SCRUBBER
):].strip()
274 elif line
.startswith(ERROR_SCRUBBER
):
275 error_scrubber
= line
[len(ERROR_SCRUBBER
):].strip()
276 elif line
.startswith(EXPECT
):
277 expected_output
+= line
[len(EXPECT
):].strip() + '\n'
278 elif line
.startswith(EXPECT_ERROR
):
279 expected_error
+= line
[len(EXPECT_ERROR
):].strip() + '\n'
280 elif line
.startswith(EXIT
):
281 expected_exit_status
= int(line
[len(EXIT
):].strip())
282 elif line
.startswith(COMMAND_LINE
):
283 command_lines
.append(line
[len(COMMAND_LINE
):].strip())
284 elif line
.startswith(REQUIRES
):
285 requires
.append(line
[len(REQUIRES
):].strip())
286 expected_output
= expected_output
.strip()
287 expected_error
= expected_error
.strip()
289 # Expected output/expected error has not been defined in the metadata for
290 # the benchmark. Try to extract the information from the benchmark itself.
291 if expected_output
== '' and expected_error
== '':
294 match
= re
.findall(status_regex
, benchmark_content
)
297 expected_output
= status_to_output('\n'.join(match
))
298 elif expected_exit_status
is None:
299 # If there is no expected output/error and the exit status has not
300 # been set explicitly, the benchmark is invalid.
301 sys
.exit('Cannot determine status of "{}"'.format(benchmark_path
))
302 if expected_exit_status
is None:
303 expected_exit_status
= 0
305 logic_match
= re
.findall(logic_regex
, benchmark_content
)
306 if logic_match
and len(logic_match
) == 1:
307 logic
= logic_match
[0]
309 if 'CVC4_REGRESSION_ARGS' in os
.environ
:
310 basic_command_line_args
+= shlex
.split(
311 os
.environ
['CVC4_REGRESSION_ARGS'])
313 if not unsat_cores
and ('(get-unsat-core)' in benchmark_content
314 or '(get-unsat-assumptions)' in benchmark_content
):
316 '1..0 # Skipped regression: unsat cores not supported without proof support'
318 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
320 for req_feature
in requires
:
322 if req_feature
.startswith("no-"):
323 req_feature
= req_feature
[len("no-"):]
325 if req_feature
not in (cvc4_features
+ cvc4_disabled_features
):
327 'Illegal requirement in regression: {}\nAllowed requirements: {}'
329 ' '.join(cvc4_features
+ cvc4_disabled_features
)))
332 if req_feature
in cvc4_features
:
333 print('1..0 # Skipped regression: not valid with {}'.format(
335 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
336 elif req_feature
not in cvc4_features
:
337 print('1..0 # Skipped regression: {} not supported'.format(
339 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
341 if not command_lines
:
342 command_lines
.append('')
344 command_line_args_configs
= []
345 for command_line
in command_lines
:
346 args
= shlex
.split(command_line
)
347 all_args
= basic_command_line_args
+ args
349 if not unsat_cores
and ('--check-unsat-cores' in all_args
):
351 '# Skipped command line options ({}): unsat cores not supported without proof support'
354 if not proofs
and '--dump-proofs' in all_args
:
356 '# Skipped command line options ({}): proof production not supported without LFSC support'
360 command_line_args_configs
.append(all_args
)
362 extra_command_line_args
= []
363 if benchmark_ext
== '.sy' and \
364 '--no-check-synth-sol' not in all_args
and \
365 '--sygus-rr' not in all_args
and \
366 '--check-synth-sol' not in all_args
:
367 extra_command_line_args
= ['--check-synth-sol']
368 if re
.search(r
'^(sat|invalid|unknown)$', expected_output
) and \
369 '--no-debug-check-models' not in all_args
and \
370 '--no-check-models' not in all_args
and \
371 '--debug-check-models' not in all_args
:
372 extra_command_line_args
= ['--debug-check-models']
373 if unsat_cores
and re
.search(r
'^(unsat|valid)$', expected_output
):
374 if '--no-check-unsat-cores' not in all_args
and \
375 '--check-unsat-cores' not in all_args
and \
376 '--incremental' not in all_args
and \
377 '--unconstrained-simp' not in all_args
:
378 extra_command_line_args
+= ['--check-unsat-cores']
379 if '--no-check-abducts' not in all_args
and \
380 '--check-abducts' not in all_args
:
381 extra_command_line_args
+= ['--check-abducts']
382 if extra_command_line_args
:
383 command_line_args_configs
.append(all_args
+
384 extra_command_line_args
)
386 # Run CVC4 on the benchmark with the different option sets and check
387 # whether the exit status, stdout output, stderr output are as expected.
388 print('1..{}'.format(len(command_line_args_configs
)))
391 for command_line_args
in command_line_args_configs
:
392 output
, error
, exit_status
= run_benchmark(dump
, wrapper
, scrubber
,
393 error_scrubber
, cvc4_binary
,
396 benchmark_basename
, timeout
)
397 output
= re
.sub(r
'^[ \t]*', '', output
, flags
=re
.MULTILINE
)
398 error
= re
.sub(r
'^[ \t]*', '', error
, flags
=re
.MULTILINE
)
399 if exit_status
== STATUS_TIMEOUT
:
400 exit_code
= EXIT_SKIP
if skip_timeout
else EXIT_FAILURE
401 print('Timeout - Flags: {}'.format(command_line_args
))
402 elif output
!= expected_output
:
403 exit_code
= EXIT_FAILURE
404 print('not ok - Flags: {}'.format(command_line_args
))
406 print('Standard output difference')
408 print_diff(output
, expected_output
)
413 print('Error output')
415 print_colored(Color
.YELLOW
, error
)
418 elif error
!= expected_error
:
419 exit_code
= EXIT_FAILURE
421 'not ok - Differences between expected and actual output on stderr - Flags: {}'
422 .format(command_line_args
))
424 print('Error output difference')
426 print_diff(error
, expected_error
)
429 elif expected_exit_status
!= exit_status
:
430 exit_code
= EXIT_FAILURE
432 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
433 format(expected_exit_status
, exit_status
, command_line_args
))
437 print_colored(Color
.BLUE
, output
)
441 print('Error output:')
443 print_colored(Color
.YELLOW
, error
)
447 print('ok - Flags: {}'.format(command_line_args
))
453 """Parses the command line arguments and then calls the core of the
456 parser
= argparse
.ArgumentParser(
458 'Runs benchmark and checks for correct exit status and output.')
459 parser
.add_argument('--enable-proof', action
='store_true')
460 parser
.add_argument('--with-lfsc', action
='store_true')
461 parser
.add_argument('--dump', action
='store_true')
462 parser
.add_argument('--use-skip-return-code', action
='store_true')
463 parser
.add_argument('--skip-timeout', action
='store_true')
464 parser
.add_argument('wrapper', nargs
='*')
465 parser
.add_argument('cvc4_binary')
466 parser
.add_argument('benchmark')
467 args
= parser
.parse_args()
468 cvc4_binary
= os
.path
.abspath(args
.cvc4_binary
)
470 wrapper
= args
.wrapper
471 if os
.environ
.get('VALGRIND') == '1' and not wrapper
:
472 wrapper
= ['libtool', '--mode=execute', 'valgrind']
474 timeout
= float(os
.getenv('TEST_TIMEOUT', 600.0))
476 return run_regression(args
.enable_proof
, args
.with_lfsc
, args
.dump
,
477 args
.use_skip_return_code
, args
.skip_timeout
,
478 wrapper
, cvc4_binary
, args
.benchmark
, timeout
)
481 if __name__
== "__main__":