5 run_regression.py [--enable-proof] [--with-lfsc] [--dump]
6 [--use-skip-return-code] [wrapper] cvc4-binary
7 [benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p]
9 Runs benchmark and checks for correct exit status and output.
21 SCRUBBER
= 'SCRUBBER:'
22 ERROR_SCRUBBER
= 'ERROR-SCRUBBER:'
24 EXPECT_ERROR
= 'EXPECT-ERROR:'
26 COMMAND_LINE
= 'COMMAND-LINE:'
27 REQUIRES
= 'REQUIRES:'
34 def run_process(args
, cwd
, timeout
, s_input
=None):
35 """Runs a process with a timeout `timeout` in seconds. `args` are the
36 arguments to execute, `cwd` is the working directory and `s_input` is the
37 input to be sent to the process over stdin. Returns the output, the error
38 output and the exit code of the process. If the process times out, the
39 output and the error output are empty and the exit code is 124."""
41 proc
= subprocess
.Popen(
44 stdin
=subprocess
.PIPE
,
45 stdout
=subprocess
.PIPE
,
46 stderr
=subprocess
.PIPE
)
53 timer
= threading
.Timer(timeout
, lambda p
: p
.kill(), [proc
])
55 out
, err
= proc
.communicate(input=s_input
)
56 exit_status
= proc
.returncode
61 return out
, err
, exit_status
64 def get_cvc4_features(cvc4_binary
):
65 """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
67 output
, _
, _
= run_process([cvc4_binary
, '--show-config'], None, None)
68 if isinstance(output
, bytes
):
69 output
= output
.decode()
72 for line
in output
.split('\n'):
73 tokens
= [t
.strip() for t
in line
.split(':')]
82 def logic_supported_with_proofs(logic
):
83 assert logic
is None or isinstance(logic
, str)
106 def run_benchmark(dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
107 command_line
, benchmark_dir
, benchmark_filename
, timeout
):
108 """Runs CVC4 on the file `benchmark_filename` in the directory
109 `benchmark_dir` using the binary `cvc4_binary` with the command line
110 options `command_line`. The output is scrubbed using `scrubber` and
111 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
112 function first uses CVC4 to read in and dump the benchmark file and then
113 uses that as input."""
115 bin_args
= wrapper
[:]
116 bin_args
.append(cvc4_binary
)
123 '--preprocess-only', '--dump', 'raw-benchmark',
124 '--output-lang=smt2', '-qq'
126 dump_output
, _
, _
= run_process(
127 bin_args
+ command_line
+ dump_args
+ [benchmark_filename
],
128 benchmark_dir
, timeout
)
129 output
, error
, exit_status
= run_process(
130 bin_args
+ command_line
+ ['--lang=smt2', '-'], benchmark_dir
,
131 timeout
, dump_output
)
133 output
, error
, exit_status
= run_process(
134 bin_args
+ command_line
+ [benchmark_filename
], benchmark_dir
,
137 # If a scrubber command has been specified then apply it to the output.
139 output
, _
, _
= run_process(
140 shlex
.split(scrubber
), benchmark_dir
, timeout
, output
)
142 error
, _
, _
= run_process(
143 shlex
.split(error_scrubber
), benchmark_dir
, timeout
, error
)
145 # Popen in Python 3 returns a bytes object instead of a string for
147 if isinstance(output
, bytes
):
148 output
= output
.decode()
149 if isinstance(error
, bytes
):
150 error
= error
.decode()
151 return (output
.strip(), error
.strip(), exit_status
)
154 def run_regression(unsat_cores
, proofs
, dump
, use_skip_return_code
, wrapper
,
155 cvc4_binary
, benchmark_path
, timeout
):
156 """Determines the expected output for a benchmark, runs CVC4 on it and then
157 checks whether the output corresponds to the expected output. Optionally
158 uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
159 checks proofs (if proofs is true), or dumps a benchmark and uses that as
160 the input (if dump is true). `use_skip_return_code` enables/disables
161 returning 77 when a test is skipped."""
163 if not os
.access(cvc4_binary
, os
.X_OK
):
165 '"{}" does not exist or is not executable'.format(cvc4_binary
))
166 if not os
.path
.isfile(benchmark_path
):
167 sys
.exit('"{}" does not exist or is not a file'.format(benchmark_path
))
169 cvc4_features
= get_cvc4_features(cvc4_binary
)
171 basic_command_line_args
= []
173 benchmark_basename
= os
.path
.basename(benchmark_path
)
174 benchmark_filename
, benchmark_ext
= os
.path
.splitext(benchmark_basename
)
175 benchmark_dir
= os
.path
.dirname(benchmark_path
)
179 status_to_output
= lambda s
: s
180 if benchmark_ext
== '.smt':
181 status_regex
= r
':status\s*(sat|unsat)'
183 elif benchmark_ext
== '.smt2':
184 status_regex
= r
'set-info\s*:status\s*(sat|unsat)'
185 logic_regex
= r
'\(\s*set-logic\s*(.*)\)'
187 elif benchmark_ext
== '.cvc':
189 elif benchmark_ext
== '.p':
190 status_regex
= r
'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
191 status_to_output
= lambda s
: '% SZS status {} for {}'.format(s
, benchmark_filename
)
192 elif benchmark_ext
== '.sy':
194 # Do not use proofs/unsat-cores with .sy files
198 sys
.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
201 benchmark_lines
= None
202 with
open(benchmark_path
, 'r') as benchmark_file
:
203 benchmark_lines
= benchmark_file
.readlines()
204 benchmark_content
= ''.join(benchmark_lines
)
206 # Extract the metadata for the benchmark.
208 error_scrubber
= None
211 expected_exit_status
= None
215 for line
in benchmark_lines
:
216 # Skip lines that do not start with a comment character.
217 if line
[0] != comment_char
:
219 line
= line
[1:].lstrip()
221 if line
.startswith(SCRUBBER
):
222 scrubber
= line
[len(SCRUBBER
):].strip()
223 elif line
.startswith(ERROR_SCRUBBER
):
224 error_scrubber
= line
[len(ERROR_SCRUBBER
):].strip()
225 elif line
.startswith(EXPECT
):
226 expected_output
+= line
[len(EXPECT
):].strip() + '\n'
227 elif line
.startswith(EXPECT_ERROR
):
228 expected_error
+= line
[len(EXPECT_ERROR
):].strip() + '\n'
229 elif line
.startswith(EXIT
):
230 expected_exit_status
= int(line
[len(EXIT
):].strip())
231 elif line
.startswith(COMMAND_LINE
):
232 command_lines
.append(line
[len(COMMAND_LINE
):].strip())
233 elif line
.startswith(REQUIRES
):
234 requires
.append(line
[len(REQUIRES
):].strip())
235 expected_output
= expected_output
.strip()
236 expected_error
= expected_error
.strip()
238 # Expected output/expected error has not been defined in the metadata for
239 # the benchmark. Try to extract the information from the benchmark itself.
240 if expected_output
== '' and expected_error
== '':
243 match
= re
.findall(status_regex
, benchmark_content
)
246 expected_output
= status_to_output('\n'.join(match
))
247 elif expected_exit_status
is None:
248 # If there is no expected output/error and the exit status has not
249 # been set explicitly, the benchmark is invalid.
250 sys
.exit('Cannot determine status of "{}"'.format(benchmark_path
))
251 if expected_exit_status
is None:
252 expected_exit_status
= 0
254 logic_match
= re
.findall(logic_regex
, benchmark_content
)
255 if logic_match
and len(logic_match
) == 1:
256 logic
= logic_match
[0]
258 if 'CVC4_REGRESSION_ARGS' in os
.environ
:
259 basic_command_line_args
+= shlex
.split(
260 os
.environ
['CVC4_REGRESSION_ARGS'])
262 if not unsat_cores
and ('(get-unsat-core)' in benchmark_content
263 or '(get-unsat-assumptions)' in benchmark_content
):
265 '1..0 # Skipped regression: unsat cores not supported without proof support'
267 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
269 for req_feature
in requires
:
270 if req_feature
.startswith("no-"):
271 inv_feature
= req_feature
[len("no-"):]
272 if inv_feature
in cvc4_features
:
273 print('1..0 # Skipped regression: not valid with {}'.format(
275 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
276 elif req_feature
not in cvc4_features
:
277 print('1..0 # Skipped regression: {} not supported'.format(
279 return (EXIT_SKIP
if use_skip_return_code
else EXIT_OK
)
281 if not command_lines
:
282 command_lines
.append('')
284 command_line_args_configs
= []
285 for command_line
in command_lines
:
286 args
= shlex
.split(command_line
)
287 all_args
= basic_command_line_args
+ args
289 if not unsat_cores
and ('--check-unsat-cores' in all_args
):
291 '# Skipped command line options ({}): unsat cores not supported without proof support'
294 if not proofs
and ('--check-proofs' in all_args
295 or '--dump-proofs' in all_args
):
297 '# Skipped command line options ({}): checking proofs not supported without LFSC support'
301 command_line_args_configs
.append(all_args
)
303 extra_command_line_args
= []
304 if benchmark_ext
== '.sy' and \
305 '--no-check-synth-sol' not in all_args
and \
306 '--sygus-rr' not in all_args
and \
307 '--check-synth-sol' not in all_args
:
308 extra_command_line_args
= ['--check-synth-sol']
309 if re
.search(r
'^(sat|invalid|unknown)$', expected_output
) and \
310 '--no-check-models' not in all_args
and \
311 '--check-models' not in all_args
:
312 extra_command_line_args
= ['--check-models']
313 if proofs
and re
.search(r
'^(unsat|valid)$', expected_output
):
314 if '--no-check-proofs' not in all_args
and \
315 '--check-proofs' not in all_args
and \
316 '--incremental' not in all_args
and \
317 '--unconstrained-simp' not in all_args
and \
318 logic_supported_with_proofs(logic
) and \
319 not cvc4_binary
.endswith('pcvc4'):
320 extra_command_line_args
= ['--check-proofs']
321 if unsat_cores
and re
.search(r
'^(unsat|valid)$', expected_output
):
322 if '--no-check-unsat-cores' not in all_args
and \
323 '--check-unsat-cores' not in all_args
and \
324 '--incremental' not in all_args
and \
325 '--unconstrained-simp' not in all_args
and \
326 not cvc4_binary
.endswith('pcvc4'):
327 extra_command_line_args
+= ['--check-unsat-cores']
328 if '--no-check-abducts' not in all_args
and \
329 '--check-abducts' not in all_args
and \
330 not cvc4_binary
.endswith('pcvc4'):
331 extra_command_line_args
+= ['--check-abducts']
332 if extra_command_line_args
:
333 command_line_args_configs
.append(all_args
+
334 extra_command_line_args
)
336 # Run CVC4 on the benchmark with the different option sets and check
337 # whether the exit status, stdout output, stderr output are as expected.
338 print('1..{}'.format(len(command_line_args_configs
)))
341 for command_line_args
in command_line_args_configs
:
342 output
, error
, exit_status
= run_benchmark(
343 dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
344 command_line_args
, benchmark_dir
, benchmark_basename
, timeout
)
345 output
= re
.sub(r
'^[ \t]*', '', output
, flags
=re
.MULTILINE
)
346 error
= re
.sub(r
'^[ \t]*', '', error
, flags
=re
.MULTILINE
)
347 if output
!= expected_output
:
348 exit_code
= EXIT_FAILURE
350 'not ok - Differences between expected and actual output on stdout - Flags: {}'
351 .format(command_line_args
))
352 for line
in difflib
.context_diff(output
.splitlines(),
353 expected_output
.splitlines()):
356 print('Error output:')
358 elif error
!= expected_error
:
359 exit_code
= EXIT_FAILURE
361 'not ok - Differences between expected and actual output on stderr - Flags: {}'
362 .format(command_line_args
))
363 for line
in difflib
.context_diff(error
.splitlines(),
364 expected_error
.splitlines()):
366 elif expected_exit_status
!= exit_status
:
367 exit_code
= EXIT_FAILURE
369 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
370 format(expected_exit_status
, exit_status
, command_line_args
))
375 print('Error output:')
378 print('ok - Flags: {}'.format(command_line_args
))
384 """Parses the command line arguments and then calls the core of the
387 parser
= argparse
.ArgumentParser(
389 'Runs benchmark and checks for correct exit status and output.')
390 parser
.add_argument('--enable-proof', action
='store_true')
391 parser
.add_argument('--with-lfsc', action
='store_true')
392 parser
.add_argument('--dump', action
='store_true')
393 parser
.add_argument('--use-skip-return-code', action
='store_true')
394 parser
.add_argument('wrapper', nargs
='*')
395 parser
.add_argument('cvc4_binary')
396 parser
.add_argument('benchmark')
397 args
= parser
.parse_args()
398 cvc4_binary
= os
.path
.abspath(args
.cvc4_binary
)
400 wrapper
= args
.wrapper
401 if os
.environ
.get('VALGRIND') == '1' and not wrapper
:
402 wrapper
= ['libtool', '--mode=execute', 'valgrind']
404 timeout
= float(os
.getenv('TEST_TIMEOUT', 600.0))
406 return run_regression(args
.enable_proof
, args
.with_lfsc
, args
.dump
,
407 args
.use_skip_return_code
, wrapper
, cvc4_binary
,
408 args
.benchmark
, timeout
)
411 if __name__
== "__main__":