5 run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
6 [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
8 Runs benchmark and checks for correct exit status and output.
20 SCRUBBER
= 'SCRUBBER: '
21 ERROR_SCRUBBER
= 'ERROR-SCRUBBER: '
23 EXPECT_ERROR
= 'EXPECT-ERROR: '
25 COMMAND_LINE
= 'COMMAND-LINE: '
26 REQUIRES
= 'REQUIRES: '
31 def run_process(args
, cwd
, timeout
, s_input
=None):
32 """Runs a process with a timeout `timeout` in seconds. `args` are the
33 arguments to execute, `cwd` is the working directory and `s_input` is the
34 input to be sent to the process over stdin. Returns the output, the error
35 output and the exit code of the process. If the process times out, the
36 output and the error output are empty and the exit code is 124."""
38 proc
= subprocess
.Popen(
41 stdin
=subprocess
.PIPE
,
42 stdout
=subprocess
.PIPE
,
43 stderr
=subprocess
.PIPE
)
50 timer
= threading
.Timer(timeout
, lambda p
: p
.kill(), [proc
])
52 out
, err
= proc
.communicate(input=s_input
)
53 exit_status
= proc
.returncode
58 return out
, err
, exit_status
61 def get_cvc4_features(cvc4_binary
):
62 """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
64 output
, _
, _
= run_process([cvc4_binary
, '--show-config'], None, None)
65 if isinstance(output
, bytes
):
66 output
= output
.decode()
69 for line
in output
.split('\n'):
70 tokens
= [t
.strip() for t
in line
.split(':')]
79 def run_benchmark(dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
80 command_line
, benchmark_dir
, benchmark_filename
, timeout
):
81 """Runs CVC4 on the file `benchmark_filename` in the directory
82 `benchmark_dir` using the binary `cvc4_binary` with the command line
83 options `command_line`. The output is scrubbed using `scrubber` and
84 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
85 function first uses CVC4 to read in and dump the benchmark file and then
86 uses that as input."""
89 bin_args
.append(cvc4_binary
)
96 '--preprocess-only', '--dump', 'raw-benchmark',
97 '--output-lang=smt2', '-qq'
99 dump_output
, _
, _
= run_process(
100 bin_args
+ command_line
+ dump_args
+ [benchmark_filename
],
101 benchmark_dir
, timeout
)
102 output
, error
, exit_status
= run_process(
103 bin_args
+ command_line
+ ['--lang=smt2', '-'], benchmark_dir
,
104 timeout
, dump_output
)
106 output
, error
, exit_status
= run_process(
107 bin_args
+ command_line
+ [benchmark_filename
], benchmark_dir
,
110 # If a scrubber command has been specified then apply it to the output.
112 output
, _
, _
= run_process(
113 shlex
.split(scrubber
), benchmark_dir
, timeout
, output
)
115 error
, _
, _
= run_process(
116 shlex
.split(error_scrubber
), benchmark_dir
, timeout
, error
)
118 # Popen in Python 3 returns a bytes object instead of a string for
120 if isinstance(output
, bytes
):
121 output
= output
.decode()
122 if isinstance(error
, bytes
):
123 error
= error
.decode()
124 return (output
.strip(), error
.strip(), exit_status
)
127 def run_regression(unsat_cores
, proofs
, dump
, wrapper
, cvc4_binary
,
128 benchmark_path
, timeout
):
129 """Determines the expected output for a benchmark, runs CVC4 on it and then
130 checks whether the output corresponds to the expected output. Optionally
131 uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
132 checks proofs (if proofs is true), or dumps a benchmark and uses that as
133 the input (if dump is true)."""
135 if not os
.access(cvc4_binary
, os
.X_OK
):
137 '"{}" does not exist or is not executable'.format(cvc4_binary
))
138 if not os
.path
.isfile(benchmark_path
):
139 sys
.exit('"{}" does not exist or is not a file'.format(benchmark_path
))
141 cvc4_features
= get_cvc4_features(cvc4_binary
)
143 basic_command_line_args
= []
145 benchmark_basename
= os
.path
.basename(benchmark_path
)
146 benchmark_filename
, benchmark_ext
= os
.path
.splitext(benchmark_basename
)
147 benchmark_dir
= os
.path
.dirname(benchmark_path
)
150 status_to_output
= lambda s
: s
151 if benchmark_ext
== '.smt':
152 status_regex
= r
':status\s*(sat|unsat)'
154 elif benchmark_ext
== '.smt2':
155 status_regex
= r
'set-info\s*:status\s*(sat|unsat)'
157 elif benchmark_ext
== '.cvc':
159 elif benchmark_ext
== '.p':
160 basic_command_line_args
.append('--finite-model-find')
161 status_regex
= r
'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
162 status_to_output
= lambda s
: '% SZS status {} for {}'.format(s
, benchmark_filename
)
163 elif benchmark_ext
== '.sy':
165 # Do not use proofs/unsat-cores with .sy files
169 sys
.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
172 benchmark_lines
= None
173 with
open(benchmark_path
, 'r') as benchmark_file
:
174 benchmark_lines
= benchmark_file
.readlines()
175 benchmark_content
= ''.join(benchmark_lines
)
177 # Extract the metadata for the benchmark.
179 error_scrubber
= None
182 expected_exit_status
= None
185 for line
in benchmark_lines
:
186 # Skip lines that do not start with a comment character.
187 if line
[0] != comment_char
:
189 line
= line
[1:].lstrip()
191 if line
.startswith(SCRUBBER
):
192 scrubber
= line
[len(SCRUBBER
):]
193 elif line
.startswith(ERROR_SCRUBBER
):
194 error_scrubber
= line
[len(ERROR_SCRUBBER
):]
195 elif line
.startswith(EXPECT
):
196 expected_output
+= line
[len(EXPECT
):]
197 elif line
.startswith(EXPECT_ERROR
):
198 expected_error
+= line
[len(EXPECT_ERROR
):]
199 elif line
.startswith(EXIT
):
200 expected_exit_status
= int(line
[len(EXIT
):])
201 elif line
.startswith(COMMAND_LINE
):
202 command_lines
.append(line
[len(COMMAND_LINE
):])
203 elif line
.startswith(REQUIRES
):
204 requires
.append(line
[len(REQUIRES
):].strip())
205 expected_output
= expected_output
.strip()
206 expected_error
= expected_error
.strip()
208 # Expected output/expected error has not been defined in the metadata for
209 # the benchmark. Try to extract the information from the benchmark itself.
210 if expected_output
== '' and expected_error
== '':
213 match
= re
.search(status_regex
, benchmark_content
)
216 expected_output
= status_to_output(match
.group(1))
217 elif expected_exit_status
is None:
218 # If there is no expected output/error and the exit status has not
219 # been set explicitly, the benchmark is invalid.
220 sys
.exit('Cannot determine status of "{}"'.format(benchmark_path
))
221 if expected_exit_status
is None:
222 expected_exit_status
= 0
224 if 'CVC4_REGRESSION_ARGS' in os
.environ
:
225 basic_command_line_args
+= shlex
.split(
226 os
.environ
['CVC4_REGRESSION_ARGS'])
228 if not unsat_cores
and ('(get-unsat-core)' in benchmark_content
229 or '(get-unsat-assumptions)' in benchmark_content
):
231 '1..0 # Skipped regression: unsat cores not supported without proof support'
235 for req_feature
in requires
:
236 if req_feature
.startswith("no-"):
237 inv_feature
= req_feature
[len("no-"):]
238 if inv_feature
in cvc4_features
:
239 print('1..0 # Skipped regression: not valid with {}'.format(
242 elif req_feature
not in cvc4_features
:
243 print('1..0 # Skipped regression: {} not supported'.format(
247 if not command_lines
:
248 command_lines
.append('')
250 command_line_args_configs
= []
251 for command_line
in command_lines
:
252 args
= shlex
.split(command_line
)
253 all_args
= basic_command_line_args
+ args
255 if not unsat_cores
and ('--check-unsat-cores' in all_args
):
257 '# Skipped command line options ({}): unsat cores not supported without proof support'.
260 if not proofs
and ('--check-proofs' in all_args
261 or '--dump-proofs' in all_args
):
263 '# Skipped command line options ({}): checking proofs not supported without LFSC support'.
267 command_line_args_configs
.append(all_args
)
269 extra_command_line_args
= []
270 if benchmark_ext
== '.sy' and \
271 '--no-check-synth-sol' not in all_args
and \
272 '--check-synth-sol' not in all_args
:
273 extra_command_line_args
= ['--check-synth-sol']
274 if re
.search(r
'^(sat|invalid|unknown)$', expected_output
) and \
275 '--no-check-models' not in all_args
and \
276 '--check-models' not in all_args
:
277 extra_command_line_args
= ['--check-models']
278 if proofs
and re
.search(r
'^(unsat|valid)$', expected_output
):
279 if '--no-check-proofs' not in all_args
and \
280 '--check-proofs' not in all_args
and \
281 '--incremental' not in all_args
and \
282 '--unconstrained-simp' not in all_args
and \
283 not cvc4_binary
.endswith('pcvc4'):
284 extra_command_line_args
= ['--check-proofs']
285 if unsat_cores
and re
.search(r
'^(unsat|valid)$', expected_output
):
286 if '--no-check-unsat-cores' not in all_args
and \
287 '--check-unsat-cores' not in all_args
and \
288 '--incremental' not in all_args
and \
289 '--unconstrained-simp' not in all_args
and \
290 not cvc4_binary
.endswith('pcvc4'):
291 extra_command_line_args
+= ['--check-unsat-cores']
292 if extra_command_line_args
:
293 command_line_args_configs
.append(
294 all_args
+ extra_command_line_args
)
296 # Run CVC4 on the benchmark with the different option sets and check
297 # whether the exit status, stdout output, stderr output are as expected.
298 print('1..{}'.format(len(command_line_args_configs
)))
301 for command_line_args
in command_line_args_configs
:
302 output
, error
, exit_status
= run_benchmark(
303 dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
304 command_line_args
, benchmark_dir
, benchmark_basename
, timeout
)
305 if output
!= expected_output
:
306 exit_code
= EXIT_FAILURE
308 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
309 format(command_line_args
))
310 for line
in difflib
.context_diff(output
.splitlines(),
311 expected_output
.splitlines()):
314 print('Error output:')
316 elif error
!= expected_error
:
317 exit_code
= EXIT_FAILURE
319 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
320 format(command_line_args
))
321 for line
in difflib
.context_diff(error
.splitlines(),
322 expected_error
.splitlines()):
324 elif expected_exit_status
!= exit_status
:
325 exit_code
= EXIT_FAILURE
327 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
328 format(expected_exit_status
, exit_status
, command_line_args
))
330 print('ok - Flags: {}'.format(command_line_args
))
336 """Parses the command line arguments and then calls the core of the
339 parser
= argparse
.ArgumentParser(
341 'Runs benchmark and checks for correct exit status and output.')
342 parser
.add_argument('--enable-proof', action
='store_true')
343 parser
.add_argument('--with-lfsc', action
='store_true')
344 parser
.add_argument('--dump', action
='store_true')
345 parser
.add_argument('wrapper', nargs
='*')
346 parser
.add_argument('cvc4_binary')
347 parser
.add_argument('benchmark')
348 args
= parser
.parse_args()
349 cvc4_binary
= os
.path
.abspath(args
.cvc4_binary
)
351 wrapper
= args
.wrapper
352 if os
.environ
.get('VALGRIND') == '1' and not wrapper
:
353 wrapper
= ['libtool', '--mode=execute', 'valgrind']
355 timeout
= float(os
.getenv('TEST_TIMEOUT', 600.0))
357 return run_regression(args
.enable_proof
, args
.with_lfsc
, args
.dump
, wrapper
,
358 cvc4_binary
, args
.benchmark
, timeout
)
361 if __name__
== "__main__":