2 ###############################################################################
3 # Top contributors (to current version):
4 # Andres Noetzli, Mathias Preiner, Yoni Zohar
6 # This file is part of the cvc5 project.
8 # Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
9 # in the top-level source directory and their institutional affiliations.
10 # All rights reserved. See the file COPYING in the top-level source
11 # directory for licensing information.
12 # #############################################################################
16 Runs benchmark and checks for correct exit status and output.
37 SCRUBBER
= 'SCRUBBER:'
38 ERROR_SCRUBBER
= 'ERROR-SCRUBBER:'
40 EXPECT_ERROR
= 'EXPECT-ERROR:'
42 COMMAND_LINE
= 'COMMAND-LINE:'
43 REQUIRES
= 'REQUIRES:'
51 def print_colored(color
, text
):
52 """Prints `text` in color `color`."""
54 for line
in text
.splitlines():
55 print(color
+ line
+ Color
.ENDC
)
58 def print_diff(actual
, expected
):
59 """Prints the difference between `actual` and `expected`."""
61 for line
in difflib
.unified_diff(expected
.splitlines(),
66 if line
.startswith('+'):
67 print_colored(Color
.GREEN
, line
)
68 elif line
.startswith('-'):
69 print_colored(Color
.RED
, line
)
74 def run_process(args
, cwd
, timeout
, s_input
=None):
75 """Runs a process with a timeout `timeout` in seconds. `args` are the
76 arguments to execute, `cwd` is the working directory and `s_input` is the
77 input to be sent to the process over stdin. Returns the output, the error
78 output and the exit code of the process. If the process times out, the
79 output and the error output are empty and the exit code is 124."""
81 proc
= subprocess
.Popen(args
,
83 stdin
=subprocess
.PIPE
,
84 stdout
=subprocess
.PIPE
,
85 stderr
=subprocess
.PIPE
)
89 exit_status
= STATUS_TIMEOUT
92 timer
= threading
.Timer(timeout
, lambda p
: p
.kill(), [proc
])
94 out
, err
= proc
.communicate(input=s_input
)
95 exit_status
= proc
.returncode
98 # The timer killed the process and is not active anymore.
99 if exit_status
== -9 and not timer
.is_alive():
100 exit_status
= STATUS_TIMEOUT
103 return out
, err
, exit_status
106 def get_cvc4_features(cvc4_binary
):
107 """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
109 output
, _
, _
= run_process([cvc4_binary
, '--show-config'], None, None)
110 if isinstance(output
, bytes
):
111 output
= output
.decode()
114 disabled_features
= []
115 for line
in output
.split('\n'):
116 tokens
= [t
.strip() for t
in line
.split(':')]
122 disabled_features
.append(key
)
124 return features
, disabled_features
127 def run_benchmark(dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
128 command_line
, benchmark_dir
, benchmark_filename
, timeout
):
129 """Runs CVC4 on the file `benchmark_filename` in the directory
130 `benchmark_dir` using the binary `cvc4_binary` with the command line
131 options `command_line`. The output is scrubbed using `scrubber` and
132 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
133 function first uses CVC4 to read in and dump the benchmark file and then
134 uses that as input."""
136 bin_args
= wrapper
[:]
137 bin_args
.append(cvc4_binary
)
144 '--preprocess-only', '--dump', 'raw-benchmark',
145 '--output-lang=smt2', '-qq'
147 dump_output
, _
, _
= run_process(
148 bin_args
+ command_line
+ dump_args
+ [benchmark_filename
],
149 benchmark_dir
, timeout
)
150 output
, error
, exit_status
= run_process(
151 bin_args
+ command_line
+ ['--lang=smt2', '-'], benchmark_dir
,
152 timeout
, dump_output
)
154 output
, error
, exit_status
= run_process(
155 bin_args
+ command_line
+ [benchmark_filename
], benchmark_dir
,
158 # If a scrubber command has been specified then apply it to the output.
160 output
, _
, _
= run_process(shlex
.split(scrubber
), benchmark_dir
,
163 error
, _
, _
= run_process(shlex
.split(error_scrubber
), benchmark_dir
,
166 # Popen in Python 3 returns a bytes object instead of a string for
168 if isinstance(output
, bytes
):
169 output
= output
.decode()
170 if isinstance(error
, bytes
):
171 error
= error
.decode()
172 return (output
.strip(), error
.strip(), exit_status
)
175 def run_regression(check_unsat_cores
, check_proofs
, dump
, use_skip_return_code
,
176 skip_timeout
, wrapper
, cvc4_binary
, benchmark_path
,
178 """Determines the expected output for a benchmark, runs CVC4 on it and then
179 checks whether the output corresponds to the expected output. Optionally
180 uses a wrapper `wrapper`, tests unsat cores (if check_unsat_cores is true),
181 checks proofs (if check_proofs is true), or dumps a benchmark and uses that as
182 the input (if dump is true). `use_skip_return_code` enables/disables
183 returning 77 when a test is skipped."""
185 if not os
.access(cvc4_binary
, os
.X_OK
):
187 '"{}" does not exist or is not executable'.format(cvc4_binary
))
188 if not os
.path
.isfile(benchmark_path
):
189 sys
.exit('"{}" does not exist or is not a file'.format(benchmark_path
))
191 cvc4_features
, cvc4_disabled_features
= get_cvc4_features(cvc4_binary
)
193 basic_command_line_args
= []
195 benchmark_basename
= os
.path
.basename(benchmark_path
)
196 benchmark_filename
, benchmark_ext
= os
.path
.splitext(benchmark_basename
)
197 benchmark_dir
= os
.path
.dirname(benchmark_path
)
200 status_to_output
= lambda s
: s
201 if benchmark_ext
== '.smt':
202 status_regex
= r
':status\s*(sat|unsat)'
204 elif benchmark_ext
== '.smt2':
205 status_regex
= r
'set-info\s*:status\s*(sat|unsat)'
207 elif benchmark_ext
== '.cvc':
209 elif benchmark_ext
== '.p':
210 status_regex
= r
'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
211 status_to_output
= lambda s
: '% SZS status {} for {}'.format(
212 s
, benchmark_filename
)
213 elif benchmark_ext
== '.sy':
215 # Do not check proofs/unsat-cores with .sy files
216 check_unsat_cores
= False
219 sys
.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
222 benchmark_lines
= None
223 with
open(benchmark_path
, 'r') as benchmark_file
:
224 benchmark_lines
= benchmark_file
.readlines()
225 benchmark_content
= ''.join(benchmark_lines
)
227 # Extract the metadata for the benchmark.
229 error_scrubber
= None
232 expected_exit_status
= None
235 for line
in benchmark_lines
:
236 # Skip lines that do not start with a comment character.
237 if line
[0] != comment_char
:
239 line
= line
[1:].lstrip()
241 if line
.startswith(SCRUBBER
):
242 scrubber
= line
[len(SCRUBBER
):].strip()
243 elif line
.startswith(ERROR_SCRUBBER
):
244 error_scrubber
= line
[len(ERROR_SCRUBBER
):].strip()
245 elif line
.startswith(EXPECT
):
246 expected_output
+= line
[len(EXPECT
):].strip() + '\n'
247 elif line
.startswith(EXPECT_ERROR
):
248 expected_error
+= line
[len(EXPECT_ERROR
):].strip() + '\n'
249 elif line
.startswith(EXIT
):
250 expected_exit_status
= int(line
[len(EXIT
):].strip())
251 elif line
.startswith(COMMAND_LINE
):
252 command_lines
.append(line
[len(COMMAND_LINE
):].strip())
253 elif line
.startswith(REQUIRES
):
254 requires
.append(line
[len(REQUIRES
):].strip())
255 expected_output
= expected_output
.strip()
256 expected_error
= expected_error
.strip()
258 # Expected output/expected error has not been defined in the metadata for
259 # the benchmark. Try to extract the information from the benchmark itself.
260 if expected_output
== '' and expected_error
== '':
263 match
= re
.findall(status_regex
, benchmark_content
)
266 expected_output
= status_to_output('\n'.join(match
))
267 elif expected_exit_status
is None:
268 # If there is no expected output/error and the exit status has not
269 # been set explicitly, the benchmark is invalid.
270 sys
.exit('Cannot determine status of "{}"'.format(benchmark_path
))
271 if expected_exit_status
is None:
272 expected_exit_status
= 0
274 if 'CVC5_REGRESSION_ARGS' in os
.environ
:
275 basic_command_line_args
+= shlex
.split(
276 os
.environ
['CVC5_REGRESSION_ARGS'])
278 if not check_unsat_cores
and ('(get-unsat-core)' in benchmark_content
279 or '(get-unsat-assumptions)' in benchmark_content
):
281 '1..0 # Skipped regression: unsat cores not supported without proof support'
283 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
285 for req_feature
in requires
:
287 if req_feature
.startswith("no-"):
288 req_feature
= req_feature
[len("no-"):]
290 if req_feature
not in (cvc4_features
+ cvc4_disabled_features
):
292 'Illegal requirement in regression: {}\nAllowed requirements: {}'
294 ' '.join(cvc4_features
+ cvc4_disabled_features
)))
297 if req_feature
in cvc4_features
:
298 print('1..0 # Skipped regression: not valid with {}'.format(
300 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
301 elif req_feature
not in cvc4_features
:
302 print('1..0 # Skipped regression: {} not supported'.format(
304 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
306 if not command_lines
:
307 command_lines
.append('')
309 command_line_args_configs
= []
310 for command_line
in command_lines
:
311 args
= shlex
.split(command_line
)
312 all_args
= basic_command_line_args
+ args
314 if not check_unsat_cores
and ('--check-unsat-cores' in all_args
):
316 '# Skipped command line options ({}): unsat cores not supported without proof support'
319 if not check_proofs
and '--dump-proofs' in all_args
:
321 '# Skipped command line options ({}): proof production not supported'
325 command_line_args_configs
.append(all_args
)
327 expected_output_lines
= expected_output
.split()
328 extra_command_line_args
= []
329 if benchmark_ext
== '.sy' and \
330 '--no-check-synth-sol' not in all_args
and \
331 '--sygus-rr' not in all_args
and \
332 '--check-synth-sol' not in all_args
:
333 all_args
+= ['--check-synth-sol']
334 if ('sat' in expected_output_lines
or \
335 'not_entailed' in expected_output_lines
or \
336 'unknown' in expected_output_lines
) and \
337 '--no-debug-check-models' not in all_args
and \
338 '--no-check-models' not in all_args
and \
339 '--debug-check-models' not in all_args
:
340 extra_command_line_args
+= ['--debug-check-models']
341 if 'unsat' in expected_output_lines
or 'entailed' in expected_output_lines
:
342 if check_unsat_cores
and \
343 '--no-produce-unsat-cores' not in all_args
and \
344 '--no-check-unsat-cores' not in all_args
and \
345 '--check-unsat-cores' not in all_args
and \
346 '--incremental' not in all_args
and \
347 '--unconstrained-simp' not in all_args
:
348 extra_command_line_args
+= ['--check-unsat-cores']
349 if check_proofs
and \
350 '--no-produce-proofs' not in all_args
and \
351 '--no-check-proofs' not in all_args
and \
352 '--check-proofs' not in all_args
:
353 extra_command_line_args
+= ['--check-proofs']
354 if '--no-check-abducts' not in all_args
and \
355 '--check-abducts' not in all_args
and \
356 'get-abduct' in benchmark_content
:
357 all_args
+= ['--check-abducts']
359 # Create a test case for each extra argument
360 for extra_arg
in extra_command_line_args
:
361 command_line_args_configs
.append(all_args
+ [extra_arg
])
363 # Run CVC4 on the benchmark with the different option sets and check
364 # whether the exit status, stdout output, stderr output are as expected.
365 print('1..{}'.format(len(command_line_args_configs
)))
368 for command_line_args
in command_line_args_configs
:
369 output
, error
, exit_status
= run_benchmark(dump
, wrapper
, scrubber
,
370 error_scrubber
, cvc4_binary
,
373 benchmark_basename
, timeout
)
374 output
= re
.sub(r
'^[ \t]*', '', output
, flags
=re
.MULTILINE
)
375 error
= re
.sub(r
'^[ \t]*', '', error
, flags
=re
.MULTILINE
)
376 if exit_status
== STATUS_TIMEOUT
:
377 exit_code
= EXIT_SKIP
if skip_timeout
else EXIT_FAILURE
378 print('Timeout - Flags: {}'.format(command_line_args
))
379 elif output
!= expected_output
:
380 exit_code
= EXIT_FAILURE
381 print('not ok - Flags: {}'.format(command_line_args
))
383 print('Standard output difference')
385 print_diff(output
, expected_output
)
390 print('Error output')
392 print_colored(Color
.YELLOW
, error
)
395 elif error
!= expected_error
:
396 exit_code
= EXIT_FAILURE
398 'not ok - Differences between expected and actual output on stderr - Flags: {}'
399 .format(command_line_args
))
401 print('Error output difference')
403 print_diff(error
, expected_error
)
406 elif expected_exit_status
!= exit_status
:
407 exit_code
= EXIT_FAILURE
409 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
410 format(expected_exit_status
, exit_status
, command_line_args
))
414 print_colored(Color
.BLUE
, output
)
418 print('Error output:')
420 print_colored(Color
.YELLOW
, error
)
424 print('ok - Flags: {}'.format(command_line_args
))
430 """Parses the command line arguments and then calls the core of the
433 parser
= argparse
.ArgumentParser(
435 'Runs benchmark and checks for correct exit status and output.')
436 parser
.add_argument('--dump', action
='store_true')
437 parser
.add_argument('--use-skip-return-code', action
='store_true')
438 parser
.add_argument('--skip-timeout', action
='store_true')
439 parser
.add_argument('--check-unsat-cores', action
='store_true',
441 parser
.add_argument('--no-check-unsat-cores', dest
='check_unsat_cores',
442 action
='store_false')
443 parser
.add_argument('--check-proofs', action
='store_true', default
=True)
444 parser
.add_argument('--no-check-proofs', dest
='check_proofs',
445 action
='store_false')
446 parser
.add_argument('wrapper', nargs
='*')
447 parser
.add_argument('cvc4_binary')
448 parser
.add_argument('benchmark')
451 # Append options passed via RUN_REGRESSION_ARGS to argv
452 if os
.environ
.get('RUN_REGRESSION_ARGS'):
453 argv
.extend(shlex
.split(os
.getenv('RUN_REGRESSION_ARGS')))
455 args
= parser
.parse_args(argv
)
457 cvc4_binary
= os
.path
.abspath(args
.cvc4_binary
)
459 wrapper
= args
.wrapper
460 if os
.environ
.get('VALGRIND') == '1' and not wrapper
:
461 wrapper
= ['libtool', '--mode=execute', 'valgrind']
463 timeout
= float(os
.getenv('TEST_TIMEOUT', '600'))
465 return run_regression(args
.check_unsat_cores
, args
.check_proofs
, args
.dump
,
466 args
.use_skip_return_code
, args
.skip_timeout
,
467 wrapper
, cvc4_binary
, args
.benchmark
, timeout
)
470 if __name__
== "__main__":