5 run_regression.py [--enable-proof] [--with-lfsc] [--dump] [--cmake]
7 [benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p]
9 Runs benchmark and checks for correct exit status and output.
21 SCRUBBER
= 'SCRUBBER: '
22 ERROR_SCRUBBER
= 'ERROR-SCRUBBER: '
24 EXPECT_ERROR
= 'EXPECT-ERROR: '
26 COMMAND_LINE
= 'COMMAND-LINE: '
27 REQUIRES
= 'REQUIRES: '
34 def run_process(args
, cwd
, timeout
, s_input
=None):
35 """Runs a process with a timeout `timeout` in seconds. `args` are the
36 arguments to execute, `cwd` is the working directory and `s_input` is the
37 input to be sent to the process over stdin. Returns the output, the error
38 output and the exit code of the process. If the process times out, the
39 output and the error output are empty and the exit code is 124."""
41 proc
= subprocess
.Popen(
44 stdin
=subprocess
.PIPE
,
45 stdout
=subprocess
.PIPE
,
46 stderr
=subprocess
.PIPE
)
53 timer
= threading
.Timer(timeout
, lambda p
: p
.kill(), [proc
])
55 out
, err
= proc
.communicate(input=s_input
)
56 exit_status
= proc
.returncode
61 return out
, err
, exit_status
64 def get_cvc4_features(cvc4_binary
):
65 """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
67 output
, _
, _
= run_process([cvc4_binary
, '--show-config'], None, None)
68 if isinstance(output
, bytes
):
69 output
= output
.decode()
72 for line
in output
.split('\n'):
73 tokens
= [t
.strip() for t
in line
.split(':')]
82 def run_benchmark(dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
83 command_line
, benchmark_dir
, benchmark_filename
, timeout
):
84 """Runs CVC4 on the file `benchmark_filename` in the directory
85 `benchmark_dir` using the binary `cvc4_binary` with the command line
86 options `command_line`. The output is scrubbed using `scrubber` and
87 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
88 function first uses CVC4 to read in and dump the benchmark file and then
89 uses that as input."""
92 bin_args
.append(cvc4_binary
)
99 '--preprocess-only', '--dump', 'raw-benchmark',
100 '--output-lang=smt2', '-qq'
102 dump_output
, _
, _
= run_process(
103 bin_args
+ command_line
+ dump_args
+ [benchmark_filename
],
104 benchmark_dir
, timeout
)
105 output
, error
, exit_status
= run_process(
106 bin_args
+ command_line
+ ['--lang=smt2', '-'], benchmark_dir
,
107 timeout
, dump_output
)
109 output
, error
, exit_status
= run_process(
110 bin_args
+ command_line
+ [benchmark_filename
], benchmark_dir
,
113 # If a scrubber command has been specified then apply it to the output.
115 output
, _
, _
= run_process(
116 shlex
.split(scrubber
), benchmark_dir
, timeout
, output
)
118 error
, _
, _
= run_process(
119 shlex
.split(error_scrubber
), benchmark_dir
, timeout
, error
)
121 # Popen in Python 3 returns a bytes object instead of a string for
123 if isinstance(output
, bytes
):
124 output
= output
.decode()
125 if isinstance(error
, bytes
):
126 error
= error
.decode()
127 return (output
.strip(), error
.strip(), exit_status
)
130 def run_regression(unsat_cores
, proofs
, dump
, cmake
, wrapper
, cvc4_binary
,
131 benchmark_path
, timeout
):
132 """Determines the expected output for a benchmark, runs CVC4 on it and then
133 checks whether the output corresponds to the expected output. Optionally
134 uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
135 checks proofs (if proofs is true), or dumps a benchmark and uses that as
136 the input (if dump is true). `cmake` enables/disables CMake-specific
139 if not os
.access(cvc4_binary
, os
.X_OK
):
141 '"{}" does not exist or is not executable'.format(cvc4_binary
))
142 if not os
.path
.isfile(benchmark_path
):
143 sys
.exit('"{}" does not exist or is not a file'.format(benchmark_path
))
145 cvc4_features
= get_cvc4_features(cvc4_binary
)
147 basic_command_line_args
= []
149 benchmark_basename
= os
.path
.basename(benchmark_path
)
150 benchmark_filename
, benchmark_ext
= os
.path
.splitext(benchmark_basename
)
151 benchmark_dir
= os
.path
.dirname(benchmark_path
)
154 status_to_output
= lambda s
: s
155 if benchmark_ext
== '.smt':
156 status_regex
= r
':status\s*(sat|unsat)'
158 elif benchmark_ext
== '.smt2':
159 status_regex
= r
'set-info\s*:status\s*(sat|unsat)'
161 elif benchmark_ext
== '.cvc':
163 elif benchmark_ext
== '.p':
164 basic_command_line_args
.append('--finite-model-find')
165 status_regex
= r
'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
166 status_to_output
= lambda s
: '% SZS status {} for {}'.format(s
, benchmark_filename
)
167 elif benchmark_ext
== '.sy':
169 # Do not use proofs/unsat-cores with .sy files
173 sys
.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
176 benchmark_lines
= None
177 with
open(benchmark_path
, 'r') as benchmark_file
:
178 benchmark_lines
= benchmark_file
.readlines()
179 benchmark_content
= ''.join(benchmark_lines
)
181 # Extract the metadata for the benchmark.
183 error_scrubber
= None
186 expected_exit_status
= None
189 for line
in benchmark_lines
:
190 # Skip lines that do not start with a comment character.
191 if line
[0] != comment_char
:
193 line
= line
[1:].lstrip()
195 if line
.startswith(SCRUBBER
):
196 scrubber
= line
[len(SCRUBBER
):]
197 elif line
.startswith(ERROR_SCRUBBER
):
198 error_scrubber
= line
[len(ERROR_SCRUBBER
):]
199 elif line
.startswith(EXPECT
):
200 expected_output
+= line
[len(EXPECT
):]
201 elif line
.startswith(EXPECT_ERROR
):
202 expected_error
+= line
[len(EXPECT_ERROR
):]
203 elif line
.startswith(EXIT
):
204 expected_exit_status
= int(line
[len(EXIT
):])
205 elif line
.startswith(COMMAND_LINE
):
206 command_lines
.append(line
[len(COMMAND_LINE
):])
207 elif line
.startswith(REQUIRES
):
208 requires
.append(line
[len(REQUIRES
):].strip())
209 expected_output
= expected_output
.strip()
210 expected_error
= expected_error
.strip()
212 # Expected output/expected error has not been defined in the metadata for
213 # the benchmark. Try to extract the information from the benchmark itself.
214 if expected_output
== '' and expected_error
== '':
217 match
= re
.search(status_regex
, benchmark_content
)
220 expected_output
= status_to_output(match
.group(1))
221 elif expected_exit_status
is None:
222 # If there is no expected output/error and the exit status has not
223 # been set explicitly, the benchmark is invalid.
224 sys
.exit('Cannot determine status of "{}"'.format(benchmark_path
))
225 if expected_exit_status
is None:
226 expected_exit_status
= 0
228 if 'CVC4_REGRESSION_ARGS' in os
.environ
:
229 basic_command_line_args
+= shlex
.split(
230 os
.environ
['CVC4_REGRESSION_ARGS'])
232 if not unsat_cores
and ('(get-unsat-core)' in benchmark_content
233 or '(get-unsat-assumptions)' in benchmark_content
):
235 '1..0 # Skipped regression: unsat cores not supported without proof support'
237 return (EXIT_SKIP
if cmake
else EXIT_OK
)
239 for req_feature
in requires
:
240 if req_feature
.startswith("no-"):
241 inv_feature
= req_feature
[len("no-"):]
242 if inv_feature
in cvc4_features
:
243 print('1..0 # Skipped regression: not valid with {}'.format(
245 return (EXIT_SKIP
if cmake
else EXIT_OK
)
246 elif req_feature
not in cvc4_features
:
247 print('1..0 # Skipped regression: {} not supported'.format(
249 return (EXIT_SKIP
if cmake
else EXIT_OK
)
251 if not command_lines
:
252 command_lines
.append('')
254 command_line_args_configs
= []
255 for command_line
in command_lines
:
256 args
= shlex
.split(command_line
)
257 all_args
= basic_command_line_args
+ args
259 if not unsat_cores
and ('--check-unsat-cores' in all_args
):
261 '# Skipped command line options ({}): unsat cores not supported without proof support'
264 if not proofs
and ('--check-proofs' in all_args
265 or '--dump-proofs' in all_args
):
267 '# Skipped command line options ({}): checking proofs not supported without LFSC support'
271 command_line_args_configs
.append(all_args
)
273 extra_command_line_args
= []
274 if benchmark_ext
== '.sy' and \
275 '--no-check-synth-sol' not in all_args
and \
276 '--sygus-rr' not in all_args
and \
277 '--check-synth-sol' not in all_args
:
278 extra_command_line_args
= ['--check-synth-sol']
279 if re
.search(r
'^(sat|invalid|unknown)$', expected_output
) and \
280 '--no-check-models' not in all_args
and \
281 '--check-models' not in all_args
:
282 extra_command_line_args
= ['--check-models']
283 if proofs
and re
.search(r
'^(unsat|valid)$', expected_output
):
284 if '--no-check-proofs' not in all_args
and \
285 '--check-proofs' not in all_args
and \
286 '--incremental' not in all_args
and \
287 '--unconstrained-simp' not in all_args
and \
288 not cvc4_binary
.endswith('pcvc4'):
289 extra_command_line_args
= ['--check-proofs']
290 if unsat_cores
and re
.search(r
'^(unsat|valid)$', expected_output
):
291 if '--no-check-unsat-cores' not in all_args
and \
292 '--check-unsat-cores' not in all_args
and \
293 '--incremental' not in all_args
and \
294 '--unconstrained-simp' not in all_args
and \
295 not cvc4_binary
.endswith('pcvc4'):
296 extra_command_line_args
+= ['--check-unsat-cores']
297 if extra_command_line_args
:
298 command_line_args_configs
.append(all_args
+
299 extra_command_line_args
)
301 # Run CVC4 on the benchmark with the different option sets and check
302 # whether the exit status, stdout output, stderr output are as expected.
303 print('1..{}'.format(len(command_line_args_configs
)))
306 for command_line_args
in command_line_args_configs
:
307 output
, error
, exit_status
= run_benchmark(
308 dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
309 command_line_args
, benchmark_dir
, benchmark_basename
, timeout
)
310 if output
!= expected_output
:
311 exit_code
= EXIT_FAILURE
313 'not ok - Differences between expected and actual output on stdout - Flags: {}'
314 .format(command_line_args
))
315 for line
in difflib
.context_diff(output
.splitlines(),
316 expected_output
.splitlines()):
319 print('Error output:')
321 elif error
!= expected_error
:
322 exit_code
= EXIT_FAILURE
324 'not ok - Differences between expected and actual output on stderr - Flags: {}'
325 .format(command_line_args
))
326 for line
in difflib
.context_diff(error
.splitlines(),
327 expected_error
.splitlines()):
329 elif expected_exit_status
!= exit_status
:
330 exit_code
= EXIT_FAILURE
332 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
333 format(expected_exit_status
, exit_status
, command_line_args
))
338 print('Error output:')
341 print('ok - Flags: {}'.format(command_line_args
))
347 """Parses the command line arguments and then calls the core of the
350 parser
= argparse
.ArgumentParser(
352 'Runs benchmark and checks for correct exit status and output.')
353 parser
.add_argument('--enable-proof', action
='store_true')
354 parser
.add_argument('--with-lfsc', action
='store_true')
355 parser
.add_argument('--dump', action
='store_true')
356 parser
.add_argument('--cmake', action
='store_true')
357 parser
.add_argument('wrapper', nargs
='*')
358 parser
.add_argument('cvc4_binary')
359 parser
.add_argument('benchmark')
360 args
= parser
.parse_args()
361 cvc4_binary
= os
.path
.abspath(args
.cvc4_binary
)
363 wrapper
= args
.wrapper
364 if os
.environ
.get('VALGRIND') == '1' and not wrapper
:
365 wrapper
= ['libtool', '--mode=execute', 'valgrind']
367 timeout
= float(os
.getenv('TEST_TIMEOUT', 600.0))
369 return run_regression(args
.enable_proof
, args
.with_lfsc
, args
.dump
,
370 args
.cmake
, wrapper
, cvc4_binary
, args
.benchmark
,
374 if __name__
== "__main__":