5 run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
6 [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
8 Runs benchmark and checks for correct exit status and output.
19 SCRUBBER
= 'SCRUBBER: '
20 ERROR_SCRUBBER
= 'ERROR-SCRUBBER: '
22 EXPECT_ERROR
= 'EXPECT-ERROR: '
24 COMMAND_LINE
= 'COMMAND-LINE: '
27 def run_benchmark(dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
28 command_line
, benchmark_dir
, benchmark_filename
):
29 """Runs CVC4 on the file `benchmark_filename` in the directory
30 `benchmark_dir` using the binary `cvc4_binary` with the command line
31 options `command_line`. The output is scrubbed using `scrubber` and
32 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
33 function first uses CVC4 to read in and dump the benchmark file and then
34 uses that as input."""
37 bin_args
.append(cvc4_binary
)
44 '--preprocess-only', '--dump', 'raw-benchmark',
45 '--output-lang=smt2', '-qq'
47 dump_process
= subprocess
.Popen(
48 bin_args
+ command_line
+ dump_args
+ [benchmark_filename
],
50 stdout
=subprocess
.PIPE
,
51 stderr
=subprocess
.PIPE
)
52 dump_output
, _
= dump_process
.communicate()
53 process
= subprocess
.Popen(
54 bin_args
+ command_line
+ ['--lang=smt2', '-'],
56 stdin
=subprocess
.PIPE
,
57 stdout
=subprocess
.PIPE
,
58 stderr
=subprocess
.PIPE
)
59 output
, error
= process
.communicate(input=dump_output
)
60 exit_status
= process
.returncode
62 process
= subprocess
.Popen(
63 bin_args
+ command_line
+ [benchmark_filename
],
65 stdout
=subprocess
.PIPE
,
66 stderr
=subprocess
.PIPE
)
67 output
, error
= process
.communicate()
68 exit_status
= process
.returncode
70 # If a scrubber command has been specified then apply it to the output.
72 scrubber_process
= subprocess
.Popen(
73 shlex
.split(scrubber
),
74 stdin
=subprocess
.PIPE
,
75 stdout
=subprocess
.PIPE
,
76 stderr
=subprocess
.PIPE
)
77 output
, _
= scrubber_process
.communicate(input=output
)
79 error_scrubber_process
= subprocess
.Popen(
80 shlex
.split(error_scrubber
),
81 stdin
=subprocess
.PIPE
,
82 stdout
=subprocess
.PIPE
,
83 stderr
=subprocess
.PIPE
)
84 error
, _
= error_scrubber_process
.communicate(input=error
)
86 # Popen in Python 3 returns a bytes object instead of a string for
88 if isinstance(output
, bytes
):
89 output
= output
.decode()
90 if isinstance(error
, bytes
):
91 error
= error
.decode()
92 return (output
.strip(), error
.strip(), exit_status
)
95 def run_regression(proof
, dump
, wrapper
, cvc4_binary
, benchmark_path
):
96 """Determines the expected output for a benchmark, runs CVC4 on it and then
97 checks whether the output corresponds to the expected output. Optionally
98 uses a wrapper `wrapper`, tests proof generation (if proof is true), or
99 dumps a benchmark and uses that as the input (if dump is true)."""
101 if not os
.access(cvc4_binary
, os
.X_OK
):
103 '"{}" does not exist or is not executable'.format(cvc4_binary
))
104 if not os
.path
.isfile(benchmark_path
):
105 sys
.exit('"{}" does not exist or is not a file'.format(benchmark_path
))
107 basic_command_line_args
= []
109 benchmark_basename
= os
.path
.basename(benchmark_path
)
110 benchmark_filename
, benchmark_ext
= os
.path
.splitext(benchmark_basename
)
111 benchmark_dir
= os
.path
.dirname(benchmark_path
)
114 status_to_output
= lambda s
: s
115 if benchmark_ext
== '.smt':
116 status_regex
= r
':status\s*(sat|unsat)'
118 elif benchmark_ext
== '.smt2':
119 status_regex
= r
'set-info\s*:status\s*(sat|unsat)'
121 elif benchmark_ext
== '.cvc':
123 elif benchmark_ext
== '.p':
124 basic_command_line_args
.append('--finite-model-find')
125 status_regex
= r
'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
126 status_to_output
= lambda s
: '% SZS status {} for {}'.format(s
, benchmark_filename
)
127 elif benchmark_ext
== '.sy':
129 # Do not use proofs/unsat-cores with .sy files
132 sys
.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
135 # If there is an ".expect" file for the benchmark, read the metadata
136 # from there, otherwise from the benchmark file.
137 metadata_filename
= benchmark_path
+ '.expect'
138 if os
.path
.isfile(metadata_filename
):
141 metadata_filename
= benchmark_path
143 metadata_lines
= None
144 with
open(metadata_filename
, 'r') as metadata_file
:
145 metadata_lines
= metadata_file
.readlines()
147 benchmark_content
= None
148 if metadata_filename
== benchmark_path
:
149 benchmark_content
= ''.join(metadata_lines
)
151 with
open(benchmark_path
, 'r') as benchmark_file
:
152 benchmark_content
= benchmark_file
.read()
154 # Extract the metadata for the benchmark.
156 error_scrubber
= None
159 expected_exit_status
= None
161 for line
in metadata_lines
:
162 # Skip lines that do not start with "%"
163 if line
[0] != comment_char
:
167 if line
.startswith(SCRUBBER
):
168 scrubber
= line
[len(SCRUBBER
):]
169 elif line
.startswith(ERROR_SCRUBBER
):
170 error_scrubber
= line
[len(ERROR_SCRUBBER
):]
171 elif line
.startswith(EXPECT
):
172 expected_output
+= line
[len(EXPECT
):]
173 elif line
.startswith(EXPECT_ERROR
):
174 expected_error
+= line
[len(EXPECT_ERROR
):]
175 elif line
.startswith(EXIT
):
176 expected_exit_status
= int(line
[len(EXIT
):])
177 elif line
.startswith(COMMAND_LINE
):
178 command_line
+= line
[len(COMMAND_LINE
):]
179 expected_output
= expected_output
.strip()
180 expected_error
= expected_error
.strip()
182 # Expected output/expected error has not been defined in the metadata for
183 # the benchmark. Try to extract the information from the benchmark itself.
184 if expected_output
== '' and expected_error
== '':
187 match
= re
.search(status_regex
, benchmark_content
)
190 expected_output
= status_to_output(match
.group(1))
191 elif expected_exit_status
is None:
192 # If there is no expected output/error and the exit status has not
193 # been set explicitly, the benchmark is invalid.
194 sys
.exit('Cannot determine status of "{}"'.format(benchmark_path
))
196 if not proof
and ('(get-unsat-core)' in benchmark_content
197 or '(get-unsat-assumptions)' in benchmark_content
):
199 '1..0 # Skipped: unsat cores not supported without proof support')
202 if expected_exit_status
is None:
203 expected_exit_status
= 0
205 if 'CVC4_REGRESSION_ARGS' in os
.environ
:
206 basic_command_line_args
+= shlex
.split(
207 os
.environ
['CVC4_REGRESSION_ARGS'])
208 basic_command_line_args
+= shlex
.split(command_line
)
209 command_line_args_configs
= [basic_command_line_args
]
211 extra_command_line_args
= []
212 if benchmark_ext
== '.sy' and \
213 '--no-check-synth-sol' not in basic_command_line_args
and \
214 '--check-synth-sol' not in basic_command_line_args
:
215 extra_command_line_args
= ['--check-synth-sol']
216 if re
.search(r
'^(sat|invalid|unknown)$', expected_output
) and \
217 '--no-check-models' not in basic_command_line_args
:
218 extra_command_line_args
= ['--check-models']
219 if proof
and re
.search(r
'^(unsat|valid)$', expected_output
):
220 if '--no-check-proofs' not in basic_command_line_args
and \
221 '--incremental' not in basic_command_line_args
and \
222 '--unconstrained-simp' not in basic_command_line_args
and \
223 not cvc4_binary
.endswith('pcvc4'):
224 extra_command_line_args
= [
225 '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
228 if '--no-check-unsat-cores' not in basic_command_line_args
and \
229 '--incremental' not in basic_command_line_args
and \
230 '--unconstrained-simp' not in basic_command_line_args
and \
231 not cvc4_binary
.endswith('pcvc4'):
232 extra_command_line_args
+= ['--check-unsat-cores']
233 if extra_command_line_args
:
234 command_line_args_configs
.append(
235 basic_command_line_args
+ extra_command_line_args
)
237 # Run CVC4 on the benchmark with the different option sets and check
238 # whether the exit status, stdout output, stderr output are as expected.
239 print('1..{}'.format(len(command_line_args_configs
)))
241 for command_line_args
in command_line_args_configs
:
242 output
, error
, exit_status
= run_benchmark(
243 dump
, wrapper
, scrubber
, error_scrubber
, cvc4_binary
,
244 command_line_args
, benchmark_dir
, benchmark_basename
)
245 if output
!= expected_output
:
247 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
248 format(command_line_args
))
249 for line
in difflib
.context_diff(output
.splitlines(),
250 expected_output
.splitlines()):
252 elif error
!= expected_error
:
254 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
255 format(command_line_args
))
256 for line
in difflib
.context_diff(error
.splitlines(),
257 expected_error
.splitlines()):
259 elif expected_exit_status
!= exit_status
:
261 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
262 format(expected_exit_status
, exit_status
, command_line_args
))
264 print('ok - Flags: {}'.format(command_line_args
))
268 """Parses the command line arguments and then calls the core of the
271 parser
= argparse
.ArgumentParser(
273 'Runs benchmark and checks for correct exit status and output.')
274 parser
.add_argument('--proof', action
='store_true')
275 parser
.add_argument('--dump', action
='store_true')
276 parser
.add_argument('wrapper', nargs
='*')
277 parser
.add_argument('cvc4_binary')
278 parser
.add_argument('benchmark')
279 args
= parser
.parse_args()
280 cvc4_binary
= os
.path
.abspath(args
.cvc4_binary
)
282 wrapper
= args
.wrapper
283 if os
.environ
.get('VALGRIND') == '1' and not wrapper
:
284 wrapper
= ['libtool', '--mode=execute', 'valgrind']
286 run_regression(args
.proof
, args
.dump
, wrapper
, cvc4_binary
, args
.benchmark
)
289 if __name__
== "__main__":