5 run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
6 [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
8 Runs benchmark and checks for correct exit status and output.
19 SCRUBBER
= 'SCRUBBER: '
20 ERROR_SCRUBBER
= 'ERROR-SCRUBBER: '
22 EXPECT_ERROR
= 'EXPECT-ERROR: '
24 COMMAND_LINE
= 'COMMAND-LINE: '
27 def run_benchmark(dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
28 command_line
, benchmark_dir
, benchmark_filename
):
29 """Runs CVC4 on the file `benchmark_filename` in the directory
30 `benchmark_dir` using the binary `cvc4_binary` with the command line
31 options `command_line`. The output is scrubbed using `scrubber` and
32 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
33 function first uses CVC4 to read in and dump the benchmark file and then
34 uses that as input."""
37 bin_args
.append(cvc4_binary
)
44 '--preprocess-only', '--dump', 'raw-benchmark',
45 '--output-lang=smt2', '-qq'
47 dump_process
= subprocess
.Popen(
48 bin_args
+ command_line
+ dump_args
+ [benchmark_filename
],
50 stdout
=subprocess
.PIPE
,
51 stderr
=subprocess
.PIPE
)
52 dump_output
, _
= dump_process
.communicate()
53 process
= subprocess
.Popen(
54 bin_args
+ command_line
+ ['--lang=smt2', '-'],
56 stdin
=subprocess
.PIPE
,
57 stdout
=subprocess
.PIPE
,
58 stderr
=subprocess
.PIPE
)
59 output
, error
= process
.communicate(input=dump_output
)
60 exit_status
= process
.returncode
62 process
= subprocess
.Popen(
63 bin_args
+ command_line
+ [benchmark_filename
],
65 stdout
=subprocess
.PIPE
,
66 stderr
=subprocess
.PIPE
)
67 output
, error
= process
.communicate()
68 exit_status
= process
.returncode
70 # If a scrubber command has been specified then apply it to the output.
72 scrubber_process
= subprocess
.Popen(
73 shlex
.split(scrubber
),
74 stdin
=subprocess
.PIPE
,
75 stdout
=subprocess
.PIPE
,
76 stderr
=subprocess
.PIPE
)
77 output
, _
= scrubber_process
.communicate(input=output
)
79 error_scrubber_process
= subprocess
.Popen(
80 shlex
.split(error_scrubber
),
81 stdin
=subprocess
.PIPE
,
82 stdout
=subprocess
.PIPE
,
83 stderr
=subprocess
.PIPE
)
84 error
, _
= error_scrubber_process
.communicate(input=error
)
86 # Popen in Python 3 returns a bytes object instead of a string for
88 if isinstance(output
, bytes
):
89 output
= output
.decode()
90 if isinstance(error
, bytes
):
91 error
= error
.decode()
92 return (output
.strip(), error
.strip(), exit_status
)
95 def run_regression(proof
, dump
, wrapper
, cvc4_binary
, benchmark_path
):
96 """Determines the expected output for a benchmark, runs CVC4 on it and then
97 checks whether the output corresponds to the expected output. Optionally
98 uses a wrapper `wrapper`, tests proof generation (if proof is true), or
99 dumps a benchmark and uses that as the input (if dump is true)."""
101 if not os
.access(cvc4_binary
, os
.X_OK
):
103 '"{}" does not exist or is not executable'.format(cvc4_binary
))
104 if not os
.path
.isfile(benchmark_path
):
105 sys
.exit('"{}" does not exist or is not a file'.format(benchmark_path
))
107 basic_command_line_args
= []
109 benchmark_basename
= os
.path
.basename(benchmark_path
)
110 benchmark_filename
, benchmark_ext
= os
.path
.splitext(benchmark_basename
)
111 benchmark_dir
= os
.path
.dirname(benchmark_path
)
114 status_to_output
= lambda s
: s
115 if benchmark_ext
== '.smt':
116 status_regex
= r
':status\s*(sat|unsat)'
118 elif benchmark_ext
== '.smt2':
119 status_regex
= r
'set-info\s*:status\s*(sat|unsat)'
121 elif benchmark_ext
== '.cvc':
123 elif benchmark_ext
== '.p':
124 basic_command_line_args
.append('--finite-model-find')
125 status_regex
= r
'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
126 status_to_output
= lambda s
: '% SZS status {} for {}'.format(s
, benchmark_filename
)
127 elif benchmark_ext
== '.sy':
129 # Do not use proofs/unsat-cores with .sy files
132 sys
.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
135 # If there is an ".expect" file for the benchmark, read the metadata
136 # from there, otherwise from the benchmark file.
137 metadata_filename
= benchmark_path
+ '.expect'
138 if os
.path
.isfile(metadata_filename
):
141 metadata_filename
= benchmark_path
143 metadata_lines
= None
144 with
open(metadata_filename
, 'r') as metadata_file
:
145 metadata_lines
= metadata_file
.readlines()
146 metadata_content
= ''.join(metadata_lines
)
148 # Extract the metadata for the benchmark.
150 error_scrubber
= None
153 expected_exit_status
= None
155 for line
in metadata_lines
:
156 # Skip lines that do not start with "%"
157 if line
[0] != comment_char
:
161 if line
.startswith(SCRUBBER
):
162 scrubber
= line
[len(SCRUBBER
):]
163 elif line
.startswith(ERROR_SCRUBBER
):
164 error_scrubber
= line
[len(ERROR_SCRUBBER
):]
165 elif line
.startswith(EXPECT
):
166 expected_output
+= line
[len(EXPECT
):]
167 elif line
.startswith(EXPECT_ERROR
):
168 expected_error
+= line
[len(EXPECT_ERROR
):]
169 elif line
.startswith(EXIT
):
170 expected_exit_status
= int(line
[len(EXIT
):])
171 elif line
.startswith(COMMAND_LINE
):
172 command_line
+= line
[len(COMMAND_LINE
):]
173 expected_output
= expected_output
.strip()
174 expected_error
= expected_error
.strip()
176 # Expected output/expected error has not been defined in the metadata for
177 # the benchmark. Try to extract the information from the benchmark itself.
178 if expected_output
== '' and expected_error
== '':
181 match
= re
.search(status_regex
, metadata_content
)
184 expected_output
= status_to_output(match
.group(1))
185 elif expected_exit_status
is None:
186 # If there is no expected output/error and the exit status has not
187 # been set explicitly, the benchmark is invalid.
188 sys
.exit('Cannot determine status of "{}"'.format(benchmark_path
))
190 if not proof
and ('(get-unsat-core)' in metadata_content
191 or '(get-unsat-assumptions)' in metadata_content
):
193 '1..0 # Skipped: unsat cores not supported without proof support')
196 if expected_exit_status
is None:
197 expected_exit_status
= 0
199 if 'CVC4_REGRESSION_ARGS' in os
.environ
:
200 basic_command_line_args
+= shlex
.split(
201 os
.environ
['CVC4_REGRESSION_ARGS'])
202 basic_command_line_args
+= shlex
.split(command_line
)
203 command_line_args_configs
= [basic_command_line_args
]
205 extra_command_line_args
= []
206 if benchmark_ext
== '.sy' and \
207 '--no-check-synth-sol' not in basic_command_line_args
and \
208 '--check-synth-sol' not in basic_command_line_args
:
209 extra_command_line_args
= ['--check-synth-sol']
210 if re
.search(r
'^(sat|invalid|unknown)$', expected_output
) and \
211 '--no-check-models' not in basic_command_line_args
:
212 extra_command_line_args
= ['--check-models']
213 if proof
and re
.search(r
'^(unsat|valid)$', expected_output
):
214 if '--no-check-proofs' not in basic_command_line_args
and \
215 '--incremental' not in basic_command_line_args
and \
216 '--unconstrained-simp' not in basic_command_line_args
and \
217 not cvc4_binary
.endswith('pcvc4'):
218 extra_command_line_args
= [
219 '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
222 if '--no-check-unsat-cores' not in basic_command_line_args
and \
223 '--incremental' not in basic_command_line_args
and \
224 '--unconstrained-simp' not in basic_command_line_args
and \
225 not cvc4_binary
.endswith('pcvc4'):
226 extra_command_line_args
+= ['--check-unsat-cores']
227 if extra_command_line_args
:
228 command_line_args_configs
.append(
229 basic_command_line_args
+ extra_command_line_args
)
231 # Run CVC4 on the benchmark with the different option sets and check
232 # whether the exit status, stdout output, stderr output are as expected.
233 print('1..{}'.format(len(command_line_args_configs
)))
235 for command_line_args
in command_line_args_configs
:
236 output
, error
, exit_status
= run_benchmark(
237 dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
238 command_line_args
, benchmark_dir
, benchmark_basename
)
239 if output
!= expected_output
:
241 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
242 format(command_line_args
))
243 for line
in difflib
.context_diff(output
.splitlines(),
244 expected_output
.splitlines()):
246 elif error
!= expected_error
:
248 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
249 format(command_line_args
))
250 for line
in difflib
.context_diff(error
.splitlines(),
251 expected_error
.splitlines()):
253 elif expected_exit_status
!= exit_status
:
255 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
256 format(expected_exit_status
, exit_status
, command_line_args
))
258 print('ok - Flags: {}'.format(command_line_args
))
262 """Parses the command line arguments and then calls the core of the
265 parser
= argparse
.ArgumentParser(
267 'Runs benchmark and checks for correct exit status and output.')
268 parser
.add_argument('--proof', action
='store_true')
269 parser
.add_argument('--dump', action
='store_true')
270 parser
.add_argument('wrapper', nargs
='*')
271 parser
.add_argument('cvc4_binary')
272 parser
.add_argument('benchmark')
273 args
= parser
.parse_args()
274 cvc4_binary
= os
.path
.abspath(args
.cvc4_binary
)
276 wrapper
= args
.wrapper
277 if os
.environ
.get('VALGRIND') == '1' and not wrapper
:
278 wrapper
= ['libtool', '--mode=execute', 'valgrind']
280 run_regression(args
.proof
, args
.dump
, wrapper
, cvc4_binary
, args
.benchmark
)
283 if __name__
== "__main__":