5 run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
6 [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
8 Runs benchmark and checks for correct exit status and output.
20 SCRUBBER
= 'SCRUBBER: '
21 ERROR_SCRUBBER
= 'ERROR-SCRUBBER: '
23 EXPECT_ERROR
= 'EXPECT-ERROR: '
25 COMMAND_LINE
= 'COMMAND-LINE: '
28 def run_process(args
, cwd
, timeout
, s_input
=None):
29 """Runs a process with a timeout `timeout` in seconds. `args` are the
30 arguments to execute, `cwd` is the working directory and `s_input` is the
31 input to be sent to the process over stdin. Returns the output, the error
32 output and the exit code of the process. If the process times out, the
33 output and the error output are empty and the exit code is 124."""
35 proc
= subprocess
.Popen(
38 stdin
=subprocess
.PIPE
,
39 stdout
=subprocess
.PIPE
,
40 stderr
=subprocess
.PIPE
)
45 timer
= threading
.Timer(timeout
, lambda p
: p
.kill(), [proc
])
48 out
, err
= proc
.communicate(input=s_input
)
49 exit_status
= proc
.returncode
53 return out
, err
, exit_status
56 def run_benchmark(dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
57 command_line
, benchmark_dir
, benchmark_filename
, timeout
):
58 """Runs CVC4 on the file `benchmark_filename` in the directory
59 `benchmark_dir` using the binary `cvc4_binary` with the command line
60 options `command_line`. The output is scrubbed using `scrubber` and
61 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
62 function first uses CVC4 to read in and dump the benchmark file and then
63 uses that as input."""
66 bin_args
.append(cvc4_binary
)
73 '--preprocess-only', '--dump', 'raw-benchmark',
74 '--output-lang=smt2', '-qq'
76 dump_output
, _
, _
= run_process(
77 bin_args
+ command_line
+ dump_args
+ [benchmark_filename
],
78 benchmark_dir
, timeout
)
79 output
, error
, exit_status
= run_process(
80 bin_args
+ command_line
+ ['--lang=smt2', '-'], benchmark_dir
,
83 output
, error
, exit_status
= run_process(
84 bin_args
+ command_line
+ [benchmark_filename
], benchmark_dir
,
87 # If a scrubber command has been specified then apply it to the output.
89 output
, _
, _
= run_process(
90 shlex
.split(scrubber
), benchmark_dir
, timeout
, output
)
92 error
, _
, _
= run_process(
93 shlex
.split(error_scrubber
), benchmark_dir
, timeout
, error
)
95 # Popen in Python 3 returns a bytes object instead of a string for
97 if isinstance(output
, bytes
):
98 output
= output
.decode()
99 if isinstance(error
, bytes
):
100 error
= error
.decode()
101 return (output
.strip(), error
.strip(), exit_status
)
104 def run_regression(proof
, dump
, wrapper
, cvc4_binary
, benchmark_path
, timeout
):
105 """Determines the expected output for a benchmark, runs CVC4 on it and then
106 checks whether the output corresponds to the expected output. Optionally
107 uses a wrapper `wrapper`, tests proof generation (if proof is true), or
108 dumps a benchmark and uses that as the input (if dump is true)."""
110 if not os
.access(cvc4_binary
, os
.X_OK
):
112 '"{}" does not exist or is not executable'.format(cvc4_binary
))
113 if not os
.path
.isfile(benchmark_path
):
114 sys
.exit('"{}" does not exist or is not a file'.format(benchmark_path
))
116 basic_command_line_args
= []
118 benchmark_basename
= os
.path
.basename(benchmark_path
)
119 benchmark_filename
, benchmark_ext
= os
.path
.splitext(benchmark_basename
)
120 benchmark_dir
= os
.path
.dirname(benchmark_path
)
123 status_to_output
= lambda s
: s
124 if benchmark_ext
== '.smt':
125 status_regex
= r
':status\s*(sat|unsat)'
127 elif benchmark_ext
== '.smt2':
128 status_regex
= r
'set-info\s*:status\s*(sat|unsat)'
130 elif benchmark_ext
== '.cvc':
132 elif benchmark_ext
== '.p':
133 basic_command_line_args
.append('--finite-model-find')
134 status_regex
= r
'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
135 status_to_output
= lambda s
: '% SZS status {} for {}'.format(s
, benchmark_filename
)
136 elif benchmark_ext
== '.sy':
138 # Do not use proofs/unsat-cores with .sy files
141 sys
.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
144 # If there is an ".expect" file for the benchmark, read the metadata
145 # from there, otherwise from the benchmark file.
146 metadata_filename
= benchmark_path
+ '.expect'
147 if os
.path
.isfile(metadata_filename
):
150 metadata_filename
= benchmark_path
152 metadata_lines
= None
153 with
open(metadata_filename
, 'r') as metadata_file
:
154 metadata_lines
= metadata_file
.readlines()
156 benchmark_content
= None
157 if metadata_filename
== benchmark_path
:
158 benchmark_content
= ''.join(metadata_lines
)
160 with
open(benchmark_path
, 'r') as benchmark_file
:
161 benchmark_content
= benchmark_file
.read()
163 # Extract the metadata for the benchmark.
165 error_scrubber
= None
168 expected_exit_status
= None
170 for line
in metadata_lines
:
171 # Skip lines that do not start with a comment character.
172 if line
[0] != comment_char
:
174 line
= line
[1:].lstrip()
176 if line
.startswith(SCRUBBER
):
177 scrubber
= line
[len(SCRUBBER
):]
178 elif line
.startswith(ERROR_SCRUBBER
):
179 error_scrubber
= line
[len(ERROR_SCRUBBER
):]
180 elif line
.startswith(EXPECT
):
181 expected_output
+= line
[len(EXPECT
):]
182 elif line
.startswith(EXPECT_ERROR
):
183 expected_error
+= line
[len(EXPECT_ERROR
):]
184 elif line
.startswith(EXIT
):
185 expected_exit_status
= int(line
[len(EXIT
):])
186 elif line
.startswith(COMMAND_LINE
):
187 command_line
+= line
[len(COMMAND_LINE
):]
188 expected_output
= expected_output
.strip()
189 expected_error
= expected_error
.strip()
191 # Expected output/expected error has not been defined in the metadata for
192 # the benchmark. Try to extract the information from the benchmark itself.
193 if expected_output
== '' and expected_error
== '':
196 match
= re
.search(status_regex
, benchmark_content
)
199 expected_output
= status_to_output(match
.group(1))
200 elif expected_exit_status
is None:
201 # If there is no expected output/error and the exit status has not
202 # been set explicitly, the benchmark is invalid.
203 sys
.exit('Cannot determine status of "{}"'.format(benchmark_path
))
204 if expected_exit_status
is None:
205 expected_exit_status
= 0
207 if 'CVC4_REGRESSION_ARGS' in os
.environ
:
208 basic_command_line_args
+= shlex
.split(
209 os
.environ
['CVC4_REGRESSION_ARGS'])
210 basic_command_line_args
+= shlex
.split(command_line
)
211 command_line_args_configs
= [basic_command_line_args
]
212 if not proof
and ('(get-unsat-core)' in benchmark_content
213 or '(get-unsat-assumptions)' in benchmark_content
214 or '--check-proofs' in basic_command_line_args
215 or '--dump-proofs' in basic_command_line_args
):
217 '1..0 # Skipped: unsat cores not supported without proof support')
220 extra_command_line_args
= []
221 if benchmark_ext
== '.sy' and \
222 '--no-check-synth-sol' not in basic_command_line_args
and \
223 '--check-synth-sol' not in basic_command_line_args
:
224 extra_command_line_args
= ['--check-synth-sol']
225 if re
.search(r
'^(sat|invalid|unknown)$', expected_output
) and \
226 '--no-check-models' not in basic_command_line_args
:
227 extra_command_line_args
= ['--check-models']
228 if proof
and re
.search(r
'^(unsat|valid)$', expected_output
):
229 if '--no-check-proofs' not in basic_command_line_args
and \
230 '--incremental' not in basic_command_line_args
and \
231 '--unconstrained-simp' not in basic_command_line_args
and \
232 not cvc4_binary
.endswith('pcvc4'):
233 extra_command_line_args
= [
234 '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
237 if '--no-check-unsat-cores' not in basic_command_line_args
and \
238 '--incremental' not in basic_command_line_args
and \
239 '--unconstrained-simp' not in basic_command_line_args
and \
240 not cvc4_binary
.endswith('pcvc4'):
241 extra_command_line_args
+= ['--check-unsat-cores']
242 if extra_command_line_args
:
243 command_line_args_configs
.append(
244 basic_command_line_args
+ extra_command_line_args
)
246 # Run CVC4 on the benchmark with the different option sets and check
247 # whether the exit status, stdout output, stderr output are as expected.
248 print('1..{}'.format(len(command_line_args_configs
)))
250 for command_line_args
in command_line_args_configs
:
251 output
, error
, exit_status
= run_benchmark(
252 dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
253 command_line_args
, benchmark_dir
, benchmark_basename
, timeout
)
254 if output
!= expected_output
:
256 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
257 format(command_line_args
))
258 for line
in difflib
.context_diff(output
.splitlines(),
259 expected_output
.splitlines()):
261 elif error
!= expected_error
:
263 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
264 format(command_line_args
))
265 for line
in difflib
.context_diff(error
.splitlines(),
266 expected_error
.splitlines()):
268 elif expected_exit_status
!= exit_status
:
270 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
271 format(expected_exit_status
, exit_status
, command_line_args
))
273 print('ok - Flags: {}'.format(command_line_args
))
277 """Parses the command line arguments and then calls the core of the
280 parser
= argparse
.ArgumentParser(
282 'Runs benchmark and checks for correct exit status and output.')
283 parser
.add_argument('--proof', action
='store_true')
284 parser
.add_argument('--dump', action
='store_true')
285 parser
.add_argument('wrapper', nargs
='*')
286 parser
.add_argument('cvc4_binary')
287 parser
.add_argument('benchmark')
288 args
= parser
.parse_args()
289 cvc4_binary
= os
.path
.abspath(args
.cvc4_binary
)
291 wrapper
= args
.wrapper
292 if os
.environ
.get('VALGRIND') == '1' and not wrapper
:
293 wrapper
= ['libtool', '--mode=execute', 'valgrind']
295 timeout
= float(os
.getenv('TEST_TIMEOUT', 600.0))
297 run_regression(args
.proof
, args
.dump
, wrapper
, cvc4_binary
, args
.benchmark
,
301 if __name__
== "__main__":