5 run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
6 [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
8 Runs benchmark and checks for correct exit status and output.
20 SCRUBBER
= 'SCRUBBER: '
21 ERROR_SCRUBBER
= 'ERROR-SCRUBBER: '
23 EXPECT_ERROR
= 'EXPECT-ERROR: '
25 COMMAND_LINE
= 'COMMAND-LINE: '
28 def run_process(args
, cwd
, timeout
, s_input
=None):
29 """Runs a process with a timeout `timeout` in seconds. `args` are the
30 arguments to execute, `cwd` is the working directory and `s_input` is the
31 input to be sent to the process over stdin. Returns the output, the error
32 output and the exit code of the process. If the process times out, the
33 output and the error output are empty and the exit code is 124."""
35 proc
= subprocess
.Popen(
38 stdin
=subprocess
.PIPE
,
39 stdout
=subprocess
.PIPE
,
40 stderr
=subprocess
.PIPE
)
45 timer
= threading
.Timer(timeout
, lambda p
: p
.kill(), [proc
])
48 out
, err
= proc
.communicate(input=s_input
)
49 exit_status
= proc
.returncode
53 return out
, err
, exit_status
56 def run_benchmark(dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
57 command_line
, benchmark_dir
, benchmark_filename
, timeout
):
58 """Runs CVC4 on the file `benchmark_filename` in the directory
59 `benchmark_dir` using the binary `cvc4_binary` with the command line
60 options `command_line`. The output is scrubbed using `scrubber` and
61 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
62 function first uses CVC4 to read in and dump the benchmark file and then
63 uses that as input."""
66 bin_args
.append(cvc4_binary
)
73 '--preprocess-only', '--dump', 'raw-benchmark',
74 '--output-lang=smt2', '-qq'
76 dump_output
, _
, _
= run_process(
77 bin_args
+ command_line
+ dump_args
+ [benchmark_filename
],
78 benchmark_dir
, timeout
)
79 output
, error
, exit_status
= run_process(
80 bin_args
+ command_line
+ ['--lang=smt2', '-'], benchmark_dir
,
83 output
, error
, exit_status
= run_process(
84 bin_args
+ command_line
+ [benchmark_filename
], benchmark_dir
,
87 # If a scrubber command has been specified then apply it to the output.
89 output
, _
, _
= run_process(
90 shlex
.split(scrubber
), benchmark_dir
, timeout
, output
)
92 error
, _
, _
= run_process(
93 shlex
.split(error_scrubber
), benchmark_dir
, timeout
, error
)
95 # Popen in Python 3 returns a bytes object instead of a string for
97 if isinstance(output
, bytes
):
98 output
= output
.decode()
99 if isinstance(error
, bytes
):
100 error
= error
.decode()
101 return (output
.strip(), error
.strip(), exit_status
)
104 def run_regression(proof
, dump
, wrapper
, cvc4_binary
, benchmark_path
, timeout
):
105 """Determines the expected output for a benchmark, runs CVC4 on it and then
106 checks whether the output corresponds to the expected output. Optionally
107 uses a wrapper `wrapper`, tests proof generation (if proof is true), or
108 dumps a benchmark and uses that as the input (if dump is true)."""
110 if not os
.access(cvc4_binary
, os
.X_OK
):
112 '"{}" does not exist or is not executable'.format(cvc4_binary
))
113 if not os
.path
.isfile(benchmark_path
):
114 sys
.exit('"{}" does not exist or is not a file'.format(benchmark_path
))
116 basic_command_line_args
= []
118 benchmark_basename
= os
.path
.basename(benchmark_path
)
119 benchmark_filename
, benchmark_ext
= os
.path
.splitext(benchmark_basename
)
120 benchmark_dir
= os
.path
.dirname(benchmark_path
)
123 status_to_output
= lambda s
: s
124 if benchmark_ext
== '.smt':
125 status_regex
= r
':status\s*(sat|unsat)'
127 elif benchmark_ext
== '.smt2':
128 status_regex
= r
'set-info\s*:status\s*(sat|unsat)'
130 elif benchmark_ext
== '.cvc':
132 elif benchmark_ext
== '.p':
133 basic_command_line_args
.append('--finite-model-find')
134 status_regex
= r
'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
135 status_to_output
= lambda s
: '% SZS status {} for {}'.format(s
, benchmark_filename
)
136 elif benchmark_ext
== '.sy':
138 # Do not use proofs/unsat-cores with .sy files
141 sys
.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
144 # If there is an ".expect" file for the benchmark, read the metadata
145 # from there, otherwise from the benchmark file.
146 metadata_filename
= benchmark_path
+ '.expect'
147 if os
.path
.isfile(metadata_filename
):
150 metadata_filename
= benchmark_path
152 metadata_lines
= None
153 with
open(metadata_filename
, 'r') as metadata_file
:
154 metadata_lines
= metadata_file
.readlines()
156 benchmark_content
= None
157 if metadata_filename
== benchmark_path
:
158 benchmark_content
= ''.join(metadata_lines
)
160 with
open(benchmark_path
, 'r') as benchmark_file
:
161 benchmark_content
= benchmark_file
.read()
163 # Extract the metadata for the benchmark.
165 error_scrubber
= None
168 expected_exit_status
= None
170 for line
in metadata_lines
:
171 # Skip lines that do not start with a comment character.
172 if line
[0] != comment_char
:
174 line
= line
[1:].lstrip()
176 if line
.startswith(SCRUBBER
):
177 scrubber
= line
[len(SCRUBBER
):]
178 elif line
.startswith(ERROR_SCRUBBER
):
179 error_scrubber
= line
[len(ERROR_SCRUBBER
):]
180 elif line
.startswith(EXPECT
):
181 expected_output
+= line
[len(EXPECT
):]
182 elif line
.startswith(EXPECT_ERROR
):
183 expected_error
+= line
[len(EXPECT_ERROR
):]
184 elif line
.startswith(EXIT
):
185 expected_exit_status
= int(line
[len(EXIT
):])
186 elif line
.startswith(COMMAND_LINE
):
187 command_lines
.append(line
[len(COMMAND_LINE
):])
188 expected_output
= expected_output
.strip()
189 expected_error
= expected_error
.strip()
191 # Expected output/expected error has not been defined in the metadata for
192 # the benchmark. Try to extract the information from the benchmark itself.
193 if expected_output
== '' and expected_error
== '':
196 match
= re
.search(status_regex
, benchmark_content
)
199 expected_output
= status_to_output(match
.group(1))
200 elif expected_exit_status
is None:
201 # If there is no expected output/error and the exit status has not
202 # been set explicitly, the benchmark is invalid.
203 sys
.exit('Cannot determine status of "{}"'.format(benchmark_path
))
204 if expected_exit_status
is None:
205 expected_exit_status
= 0
207 if 'CVC4_REGRESSION_ARGS' in os
.environ
:
208 basic_command_line_args
+= shlex
.split(
209 os
.environ
['CVC4_REGRESSION_ARGS'])
211 if not proof
and ('(get-unsat-core)' in benchmark_content
212 or '(get-unsat-assumptions)' in benchmark_content
213 or '--check-proofs' in basic_command_line_args
214 or '--dump-proofs' in basic_command_line_args
):
216 '1..0 # Skipped regression: unsat cores not supported without proof support'
220 if not command_lines
:
221 command_lines
.append('')
223 command_line_args_configs
= []
224 for command_line
in command_lines
:
225 args
= shlex
.split(command_line
)
226 if proof
or ('--check-proofs' not in args
227 and '--dump-proofs' not in args
):
228 all_args
= basic_command_line_args
+ args
229 command_line_args_configs
.append(all_args
)
231 extra_command_line_args
= []
232 if benchmark_ext
== '.sy' and \
233 '--no-check-synth-sol' not in all_args
and \
234 '--check-synth-sol' not in all_args
:
235 extra_command_line_args
= ['--check-synth-sol']
236 if re
.search(r
'^(sat|invalid|unknown)$', expected_output
) and \
237 '--no-check-models' not in all_args
:
238 extra_command_line_args
= ['--check-models']
239 if proof
and re
.search(r
'^(unsat|valid)$', expected_output
):
240 if '--no-check-proofs' not in all_args
and \
241 '--incremental' not in all_args
and \
242 '--unconstrained-simp' not in all_args
and \
243 not cvc4_binary
.endswith('pcvc4'):
244 extra_command_line_args
= [
245 '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
248 if '--no-check-unsat-cores' not in all_args
and \
249 '--incremental' not in all_args
and \
250 '--unconstrained-simp' not in all_args
and \
251 not cvc4_binary
.endswith('pcvc4'):
252 extra_command_line_args
+= ['--check-unsat-cores']
253 if extra_command_line_args
:
254 command_line_args_configs
.append(
255 all_args
+ extra_command_line_args
)
257 # Run CVC4 on the benchmark with the different option sets and check
258 # whether the exit status, stdout output, stderr output are as expected.
259 print('1..{}'.format(len(command_line_args_configs
)))
261 for command_line_args
in command_line_args_configs
:
262 output
, error
, exit_status
= run_benchmark(
263 dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
264 command_line_args
, benchmark_dir
, benchmark_basename
, timeout
)
265 if output
!= expected_output
:
267 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
268 format(command_line_args
))
269 for line
in difflib
.context_diff(output
.splitlines(),
270 expected_output
.splitlines()):
272 elif error
!= expected_error
:
274 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
275 format(command_line_args
))
276 for line
in difflib
.context_diff(error
.splitlines(),
277 expected_error
.splitlines()):
279 elif expected_exit_status
!= exit_status
:
281 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
282 format(expected_exit_status
, exit_status
, command_line_args
))
284 print('ok - Flags: {}'.format(command_line_args
))
288 """Parses the command line arguments and then calls the core of the
291 parser
= argparse
.ArgumentParser(
293 'Runs benchmark and checks for correct exit status and output.')
294 parser
.add_argument('--proof', action
='store_true')
295 parser
.add_argument('--dump', action
='store_true')
296 parser
.add_argument('wrapper', nargs
='*')
297 parser
.add_argument('cvc4_binary')
298 parser
.add_argument('benchmark')
299 args
= parser
.parse_args()
300 cvc4_binary
= os
.path
.abspath(args
.cvc4_binary
)
302 wrapper
= args
.wrapper
303 if os
.environ
.get('VALGRIND') == '1' and not wrapper
:
304 wrapper
= ['libtool', '--mode=execute', 'valgrind']
306 timeout
= float(os
.getenv('TEST_TIMEOUT', 600.0))
308 run_regression(args
.proof
, args
.dump
, wrapper
, cvc4_binary
, args
.benchmark
,
312 if __name__
== "__main__":