4 ## Top contributors (to current version):
5 ## Andres Noetzli, Yoni Zohar, Mathias Preiner
6 ## This file is part of the CVC4 project.
7 ## Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
8 ## in the top-level source directory and their institutional affiliations.
9 ## All rights reserved. See the file COPYING in the top-level source
10 ## directory for licensing information.
15 run_regression.py [--enable-proof] [--with-lfsc] [--dump]
16 [--use-skip-return-code] [wrapper] cvc4-binary
17 [benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p]
19 Runs benchmark and checks for correct exit status and output.
40 SCRUBBER
= 'SCRUBBER:'
41 ERROR_SCRUBBER
= 'ERROR-SCRUBBER:'
43 EXPECT_ERROR
= 'EXPECT-ERROR:'
45 COMMAND_LINE
= 'COMMAND-LINE:'
46 REQUIRES
= 'REQUIRES:'
54 def print_colored(color
, text
):
55 """Prints `text` in color `color`."""
57 for line
in text
.splitlines():
58 print(color
+ line
+ Color
.ENDC
)
61 def print_diff(actual
, expected
):
62 """Prints the difference between `actual` and `expected`."""
64 for line
in difflib
.unified_diff(expected
.splitlines(),
69 if line
.startswith('+'):
70 print_colored(Color
.GREEN
, line
)
71 elif line
.startswith('-'):
72 print_colored(Color
.RED
, line
)
77 def run_process(args
, cwd
, timeout
, s_input
=None):
78 """Runs a process with a timeout `timeout` in seconds. `args` are the
79 arguments to execute, `cwd` is the working directory and `s_input` is the
80 input to be sent to the process over stdin. Returns the output, the error
81 output and the exit code of the process. If the process times out, the
82 output and the error output are empty and the exit code is 124."""
84 proc
= subprocess
.Popen(args
,
86 stdin
=subprocess
.PIPE
,
87 stdout
=subprocess
.PIPE
,
88 stderr
=subprocess
.PIPE
)
92 exit_status
= STATUS_TIMEOUT
95 timer
= threading
.Timer(timeout
, lambda p
: p
.kill(), [proc
])
97 out
, err
= proc
.communicate(input=s_input
)
98 exit_status
= proc
.returncode
101 # The timer killed the process and is not active anymore.
102 if exit_status
== -9 and not timer
.is_alive():
103 exit_status
= STATUS_TIMEOUT
106 return out
, err
, exit_status
109 def get_cvc4_features(cvc4_binary
):
110 """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
112 output
, _
, _
= run_process([cvc4_binary
, '--show-config'], None, None)
113 if isinstance(output
, bytes
):
114 output
= output
.decode()
117 for line
in output
.split('\n'):
118 tokens
= [t
.strip() for t
in line
.split(':')]
127 def logic_supported_with_proofs(logic
):
128 assert logic
is None or isinstance(logic
, str)
152 def run_benchmark(dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
153 command_line
, benchmark_dir
, benchmark_filename
, timeout
):
154 """Runs CVC4 on the file `benchmark_filename` in the directory
155 `benchmark_dir` using the binary `cvc4_binary` with the command line
156 options `command_line`. The output is scrubbed using `scrubber` and
157 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
158 function first uses CVC4 to read in and dump the benchmark file and then
159 uses that as input."""
161 bin_args
= wrapper
[:]
162 bin_args
.append(cvc4_binary
)
169 '--preprocess-only', '--dump', 'raw-benchmark',
170 '--output-lang=smt2', '-qq'
172 dump_output
, _
, _
= run_process(
173 bin_args
+ command_line
+ dump_args
+ [benchmark_filename
],
174 benchmark_dir
, timeout
)
175 output
, error
, exit_status
= run_process(
176 bin_args
+ command_line
+ ['--lang=smt2', '-'], benchmark_dir
,
177 timeout
, dump_output
)
179 output
, error
, exit_status
= run_process(
180 bin_args
+ command_line
+ [benchmark_filename
], benchmark_dir
,
183 # If a scrubber command has been specified then apply it to the output.
185 output
, _
, _
= run_process(shlex
.split(scrubber
), benchmark_dir
,
188 error
, _
, _
= run_process(shlex
.split(error_scrubber
), benchmark_dir
,
191 # Popen in Python 3 returns a bytes object instead of a string for
193 if isinstance(output
, bytes
):
194 output
= output
.decode()
195 if isinstance(error
, bytes
):
196 error
= error
.decode()
197 return (output
.strip(), error
.strip(), exit_status
)
200 def run_regression(unsat_cores
, proofs
, dump
, use_skip_return_code
,
201 skip_timeout
, wrapper
, cvc4_binary
, benchmark_path
,
203 """Determines the expected output for a benchmark, runs CVC4 on it and then
204 checks whether the output corresponds to the expected output. Optionally
205 uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
206 checks proofs (if proofs is true), or dumps a benchmark and uses that as
207 the input (if dump is true). `use_skip_return_code` enables/disables
208 returning 77 when a test is skipped."""
210 if not os
.access(cvc4_binary
, os
.X_OK
):
212 '"{}" does not exist or is not executable'.format(cvc4_binary
))
213 if not os
.path
.isfile(benchmark_path
):
214 sys
.exit('"{}" does not exist or is not a file'.format(benchmark_path
))
216 cvc4_features
= get_cvc4_features(cvc4_binary
)
218 basic_command_line_args
= []
220 benchmark_basename
= os
.path
.basename(benchmark_path
)
221 benchmark_filename
, benchmark_ext
= os
.path
.splitext(benchmark_basename
)
222 benchmark_dir
= os
.path
.dirname(benchmark_path
)
226 status_to_output
= lambda s
: s
227 if benchmark_ext
== '.smt':
228 status_regex
= r
':status\s*(sat|unsat)'
230 elif benchmark_ext
== '.smt2':
231 status_regex
= r
'set-info\s*:status\s*(sat|unsat)'
232 logic_regex
= r
'\(\s*set-logic\s*(.*)\)'
234 elif benchmark_ext
== '.cvc':
236 elif benchmark_ext
== '.p':
237 status_regex
= r
'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
238 status_to_output
= lambda s
: '% SZS status {} for {}'.format(
239 s
, benchmark_filename
)
240 elif benchmark_ext
== '.sy':
242 # Do not use proofs/unsat-cores with .sy files
246 sys
.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
249 benchmark_lines
= None
250 with
open(benchmark_path
, 'r') as benchmark_file
:
251 benchmark_lines
= benchmark_file
.readlines()
252 benchmark_content
= ''.join(benchmark_lines
)
254 # Extract the metadata for the benchmark.
256 error_scrubber
= None
259 expected_exit_status
= None
263 for line
in benchmark_lines
:
264 # Skip lines that do not start with a comment character.
265 if line
[0] != comment_char
:
267 line
= line
[1:].lstrip()
269 if line
.startswith(SCRUBBER
):
270 scrubber
= line
[len(SCRUBBER
):].strip()
271 elif line
.startswith(ERROR_SCRUBBER
):
272 error_scrubber
= line
[len(ERROR_SCRUBBER
):].strip()
273 elif line
.startswith(EXPECT
):
274 expected_output
+= line
[len(EXPECT
):].strip() + '\n'
275 elif line
.startswith(EXPECT_ERROR
):
276 expected_error
+= line
[len(EXPECT_ERROR
):].strip() + '\n'
277 elif line
.startswith(EXIT
):
278 expected_exit_status
= int(line
[len(EXIT
):].strip())
279 elif line
.startswith(COMMAND_LINE
):
280 command_lines
.append(line
[len(COMMAND_LINE
):].strip())
281 elif line
.startswith(REQUIRES
):
282 requires
.append(line
[len(REQUIRES
):].strip())
283 expected_output
= expected_output
.strip()
284 expected_error
= expected_error
.strip()
286 # Expected output/expected error has not been defined in the metadata for
287 # the benchmark. Try to extract the information from the benchmark itself.
288 if expected_output
== '' and expected_error
== '':
291 match
= re
.findall(status_regex
, benchmark_content
)
294 expected_output
= status_to_output('\n'.join(match
))
295 elif expected_exit_status
is None:
296 # If there is no expected output/error and the exit status has not
297 # been set explicitly, the benchmark is invalid.
298 sys
.exit('Cannot determine status of "{}"'.format(benchmark_path
))
299 if expected_exit_status
is None:
300 expected_exit_status
= 0
302 logic_match
= re
.findall(logic_regex
, benchmark_content
)
303 if logic_match
and len(logic_match
) == 1:
304 logic
= logic_match
[0]
306 if 'CVC4_REGRESSION_ARGS' in os
.environ
:
307 basic_command_line_args
+= shlex
.split(
308 os
.environ
['CVC4_REGRESSION_ARGS'])
310 if not unsat_cores
and ('(get-unsat-core)' in benchmark_content
311 or '(get-unsat-assumptions)' in benchmark_content
):
313 '1..0 # Skipped regression: unsat cores not supported without proof support'
315 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
317 for req_feature
in requires
:
318 if req_feature
.startswith("no-"):
319 inv_feature
= req_feature
[len("no-"):]
320 if inv_feature
in cvc4_features
:
321 print('1..0 # Skipped regression: not valid with {}'.format(
323 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
324 elif req_feature
not in cvc4_features
:
325 print('1..0 # Skipped regression: {} not supported'.format(
327 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
329 if not command_lines
:
330 command_lines
.append('')
332 command_line_args_configs
= []
333 for command_line
in command_lines
:
334 args
= shlex
.split(command_line
)
335 all_args
= basic_command_line_args
+ args
337 if not unsat_cores
and ('--check-unsat-cores' in all_args
):
339 '# Skipped command line options ({}): unsat cores not supported without proof support'
342 if not proofs
and '--dump-proofs' in all_args
:
344 '# Skipped command line options ({}): proof production not supported without LFSC support'
348 command_line_args_configs
.append(all_args
)
350 extra_command_line_args
= []
351 if benchmark_ext
== '.sy' and \
352 '--no-check-synth-sol' not in all_args
and \
353 '--sygus-rr' not in all_args
and \
354 '--check-synth-sol' not in all_args
:
355 extra_command_line_args
= ['--check-synth-sol']
356 if re
.search(r
'^(sat|invalid|unknown)$', expected_output
) and \
357 '--no-debug-check-models' not in all_args
and \
358 '--no-check-models' not in all_args
and \
359 '--debug-check-models' not in all_args
:
360 extra_command_line_args
= ['--debug-check-models']
361 if unsat_cores
and re
.search(r
'^(unsat|valid)$', expected_output
):
362 if '--no-check-unsat-cores' not in all_args
and \
363 '--check-unsat-cores' not in all_args
and \
364 '--incremental' not in all_args
and \
365 '--unconstrained-simp' not in all_args
:
366 extra_command_line_args
+= ['--check-unsat-cores']
367 if '--no-check-abducts' not in all_args
and \
368 '--check-abducts' not in all_args
:
369 extra_command_line_args
+= ['--check-abducts']
370 if extra_command_line_args
:
371 command_line_args_configs
.append(all_args
+
372 extra_command_line_args
)
374 # Run CVC4 on the benchmark with the different option sets and check
375 # whether the exit status, stdout output, stderr output are as expected.
376 print('1..{}'.format(len(command_line_args_configs
)))
379 for command_line_args
in command_line_args_configs
:
380 output
, error
, exit_status
= run_benchmark(dump
, wrapper
, scrubber
,
381 error_scrubber
, cvc4_binary
,
384 benchmark_basename
, timeout
)
385 output
= re
.sub(r
'^[ \t]*', '', output
, flags
=re
.MULTILINE
)
386 error
= re
.sub(r
'^[ \t]*', '', error
, flags
=re
.MULTILINE
)
387 if exit_status
== STATUS_TIMEOUT
:
388 exit_code
= EXIT_SKIP
if skip_timeout
else EXIT_FAILURE
389 print('Timeout - Flags: {}'.format(command_line_args
))
390 elif output
!= expected_output
:
391 exit_code
= EXIT_FAILURE
392 print('not ok - Flags: {}'.format(command_line_args
))
394 print('Standard output difference')
396 print_diff(output
, expected_output
)
401 print('Error output')
403 print_colored(Color
.YELLOW
, error
)
406 elif error
!= expected_error
:
407 exit_code
= EXIT_FAILURE
409 'not ok - Differences between expected and actual output on stderr - Flags: {}'
410 .format(command_line_args
))
412 print('Error output difference')
414 print_diff(error
, expected_error
)
417 elif expected_exit_status
!= exit_status
:
418 exit_code
= EXIT_FAILURE
420 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
421 format(expected_exit_status
, exit_status
, command_line_args
))
425 print_colored(Color
.BLUE
, output
)
429 print('Error output:')
431 print_colored(Color
.YELLOW
, error
)
435 print('ok - Flags: {}'.format(command_line_args
))
441 """Parses the command line arguments and then calls the core of the
444 parser
= argparse
.ArgumentParser(
446 'Runs benchmark and checks for correct exit status and output.')
447 parser
.add_argument('--enable-proof', action
='store_true')
448 parser
.add_argument('--with-lfsc', action
='store_true')
449 parser
.add_argument('--dump', action
='store_true')
450 parser
.add_argument('--use-skip-return-code', action
='store_true')
451 parser
.add_argument('--skip-timeout', action
='store_true')
452 parser
.add_argument('wrapper', nargs
='*')
453 parser
.add_argument('cvc4_binary')
454 parser
.add_argument('benchmark')
455 args
= parser
.parse_args()
456 cvc4_binary
= os
.path
.abspath(args
.cvc4_binary
)
458 wrapper
= args
.wrapper
459 if os
.environ
.get('VALGRIND') == '1' and not wrapper
:
460 wrapper
= ['libtool', '--mode=execute', 'valgrind']
462 timeout
= float(os
.getenv('TEST_TIMEOUT', 600.0))
464 return run_regression(args
.enable_proof
, args
.with_lfsc
, args
.dump
,
465 args
.use_skip_return_code
, args
.skip_timeout
,
466 wrapper
, cvc4_binary
, args
.benchmark
, timeout
)
469 if __name__
== "__main__":