Fix get-unsat-core detection in regression script (#1773)
[cvc5.git] / test / regress / run_regression.py
1 #!/usr/bin/env python3
2 """
3 Usage:
4
5 run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
6 [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
7
8 Runs benchmark and checks for correct exit status and output.
9 """
10
11 import argparse
12 import difflib
13 import os
14 import re
15 import shlex
16 import subprocess
17 import sys
18
19 SCRUBBER = 'SCRUBBER: '
20 ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
21 EXPECT = 'EXPECT: '
22 EXPECT_ERROR = 'EXPECT-ERROR: '
23 EXIT = 'EXIT: '
24 COMMAND_LINE = 'COMMAND-LINE: '
25
26
27 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
28 command_line, benchmark_dir, benchmark_filename):
29 """Runs CVC4 on the file `benchmark_filename` in the directory
30 `benchmark_dir` using the binary `cvc4_binary` with the command line
31 options `command_line`. The output is scrubbed using `scrubber` and
32 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
33 function first uses CVC4 to read in and dump the benchmark file and then
34 uses that as input."""
35
36 bin_args = wrapper[:]
37 bin_args.append(cvc4_binary)
38
39 output = None
40 error = None
41 exit_status = None
42 if dump:
43 dump_args = [
44 '--preprocess-only', '--dump', 'raw-benchmark',
45 '--output-lang=smt2', '-qq'
46 ]
47 dump_process = subprocess.Popen(
48 bin_args + command_line + dump_args + [benchmark_filename],
49 cwd=benchmark_dir,
50 stdout=subprocess.PIPE,
51 stderr=subprocess.PIPE)
52 dump_output, _ = dump_process.communicate()
53 process = subprocess.Popen(
54 bin_args + command_line + ['--lang=smt2', '-'],
55 cwd=benchmark_dir,
56 stdin=subprocess.PIPE,
57 stdout=subprocess.PIPE,
58 stderr=subprocess.PIPE)
59 output, error = process.communicate(input=dump_output)
60 exit_status = process.returncode
61 else:
62 process = subprocess.Popen(
63 bin_args + command_line + [benchmark_filename],
64 cwd=benchmark_dir,
65 stdout=subprocess.PIPE,
66 stderr=subprocess.PIPE)
67 output, error = process.communicate()
68 exit_status = process.returncode
69
70 # If a scrubber command has been specified then apply it to the output.
71 if scrubber:
72 scrubber_process = subprocess.Popen(
73 shlex.split(scrubber),
74 stdin=subprocess.PIPE,
75 stdout=subprocess.PIPE,
76 stderr=subprocess.PIPE)
77 output, _ = scrubber_process.communicate(input=output)
78 if error_scrubber:
79 error_scrubber_process = subprocess.Popen(
80 shlex.split(error_scrubber),
81 stdin=subprocess.PIPE,
82 stdout=subprocess.PIPE,
83 stderr=subprocess.PIPE)
84 error, _ = error_scrubber_process.communicate(input=error)
85
86 # Popen in Python 3 returns a bytes object instead of a string for
87 # stdout/stderr.
88 if isinstance(output, bytes):
89 output = output.decode()
90 if isinstance(error, bytes):
91 error = error.decode()
92 return (output.strip(), error.strip(), exit_status)
93
94
95 def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path):
96 """Determines the expected output for a benchmark, runs CVC4 on it and then
97 checks whether the output corresponds to the expected output. Optionally
98 uses a wrapper `wrapper`, tests proof generation (if proof is true), or
99 dumps a benchmark and uses that as the input (if dump is true)."""
100
101 if not os.access(cvc4_binary, os.X_OK):
102 sys.exit(
103 '"{}" does not exist or is not executable'.format(cvc4_binary))
104 if not os.path.isfile(benchmark_path):
105 sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
106
107 basic_command_line_args = []
108
109 benchmark_basename = os.path.basename(benchmark_path)
110 benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
111 benchmark_dir = os.path.dirname(benchmark_path)
112 comment_char = '%'
113 status_regex = None
114 status_to_output = lambda s: s
115 if benchmark_ext == '.smt':
116 status_regex = r':status\s*(sat|unsat)'
117 comment_char = ';'
118 elif benchmark_ext == '.smt2':
119 status_regex = r'set-info\s*:status\s*(sat|unsat)'
120 comment_char = ';'
121 elif benchmark_ext == '.cvc':
122 pass
123 elif benchmark_ext == '.p':
124 basic_command_line_args.append('--finite-model-find')
125 status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
126 status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
127 elif benchmark_ext == '.sy':
128 comment_char = ';'
129 # Do not use proofs/unsat-cores with .sy files
130 proof = False
131 else:
132 sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
133 benchmark_basename))
134
135 # If there is an ".expect" file for the benchmark, read the metadata
136 # from there, otherwise from the benchmark file.
137 metadata_filename = benchmark_path + '.expect'
138 if os.path.isfile(metadata_filename):
139 comment_char = '%'
140 else:
141 metadata_filename = benchmark_path
142
143 metadata_lines = None
144 with open(metadata_filename, 'r') as metadata_file:
145 metadata_lines = metadata_file.readlines()
146 metadata_content = ''.join(metadata_lines)
147
148 # Extract the metadata for the benchmark.
149 scrubber = None
150 error_scrubber = None
151 expected_output = ''
152 expected_error = ''
153 expected_exit_status = None
154 command_line = ''
155 for line in metadata_lines:
156 # Skip lines that do not start with "%"
157 if line[0] != comment_char:
158 continue
159 line = line[2:]
160
161 if line.startswith(SCRUBBER):
162 scrubber = line[len(SCRUBBER):]
163 elif line.startswith(ERROR_SCRUBBER):
164 error_scrubber = line[len(ERROR_SCRUBBER):]
165 elif line.startswith(EXPECT):
166 expected_output += line[len(EXPECT):]
167 elif line.startswith(EXPECT_ERROR):
168 expected_error += line[len(EXPECT_ERROR):]
169 elif line.startswith(EXIT):
170 expected_exit_status = int(line[len(EXIT):])
171 elif line.startswith(COMMAND_LINE):
172 command_line += line[len(COMMAND_LINE):]
173 expected_output = expected_output.strip()
174 expected_error = expected_error.strip()
175
176 # Expected output/expected error has not been defined in the metadata for
177 # the benchmark. Try to extract the information from the benchmark itself.
178 if expected_output == '' and expected_error == '':
179 match = None
180 if status_regex:
181 match = re.search(status_regex, metadata_content)
182
183 if match:
184 expected_output = status_to_output(match.group(1))
185 elif expected_exit_status is None:
186 # If there is no expected output/error and the exit status has not
187 # been set explicitly, the benchmark is invalid.
188 sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
189
190 if not proof and ('(get-unsat-core)' in metadata_content
191 or '(get-unsat-assumptions)' in metadata_content):
192 print(
193 '1..0 # Skipped: unsat cores not supported without proof support')
194 return
195
196 if expected_exit_status is None:
197 expected_exit_status = 0
198
199 if 'CVC4_REGRESSION_ARGS' in os.environ:
200 basic_command_line_args += shlex.split(
201 os.environ['CVC4_REGRESSION_ARGS'])
202 basic_command_line_args += shlex.split(command_line)
203 command_line_args_configs = [basic_command_line_args]
204
205 extra_command_line_args = []
206 if benchmark_ext == '.sy' and \
207 '--no-check-synth-sol' not in basic_command_line_args and \
208 '--check-synth-sol' not in basic_command_line_args:
209 extra_command_line_args = ['--check-synth-sol']
210 if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
211 '--no-check-models' not in basic_command_line_args:
212 extra_command_line_args = ['--check-models']
213 if proof and re.search(r'^(unsat|valid)$', expected_output):
214 if '--no-check-proofs' not in basic_command_line_args and \
215 '--incremental' not in basic_command_line_args and \
216 '--unconstrained-simp' not in basic_command_line_args and \
217 not cvc4_binary.endswith('pcvc4'):
218 extra_command_line_args = [
219 '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
220 '--no-bv-algebraic'
221 ]
222 if '--no-check-unsat-cores' not in basic_command_line_args and \
223 '--incremental' not in basic_command_line_args and \
224 '--unconstrained-simp' not in basic_command_line_args and \
225 not cvc4_binary.endswith('pcvc4'):
226 extra_command_line_args += ['--check-unsat-cores']
227 if extra_command_line_args:
228 command_line_args_configs.append(
229 basic_command_line_args + extra_command_line_args)
230
231 # Run CVC4 on the benchmark with the different option sets and check
232 # whether the exit status, stdout output, stderr output are as expected.
233 print('1..{}'.format(len(command_line_args_configs)))
234 print('# Starting')
235 for command_line_args in command_line_args_configs:
236 output, error, exit_status = run_benchmark(
237 dump, wrapper, scrubber, error_scrubber, cvc4_binary,
238 command_line_args, benchmark_dir, benchmark_basename)
239 if output != expected_output:
240 print(
241 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
242 format(command_line_args))
243 for line in difflib.context_diff(output.splitlines(),
244 expected_output.splitlines()):
245 print(line)
246 elif error != expected_error:
247 print(
248 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
249 format(command_line_args))
250 for line in difflib.context_diff(error.splitlines(),
251 expected_error.splitlines()):
252 print(line)
253 elif expected_exit_status != exit_status:
254 print(
255 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
256 format(expected_exit_status, exit_status, command_line_args))
257 else:
258 print('ok - Flags: {}'.format(command_line_args))
259
260
261 def main():
262 """Parses the command line arguments and then calls the core of the
263 script."""
264
265 parser = argparse.ArgumentParser(
266 description=
267 'Runs benchmark and checks for correct exit status and output.')
268 parser.add_argument('--proof', action='store_true')
269 parser.add_argument('--dump', action='store_true')
270 parser.add_argument('wrapper', nargs='*')
271 parser.add_argument('cvc4_binary')
272 parser.add_argument('benchmark')
273 args = parser.parse_args()
274 cvc4_binary = os.path.abspath(args.cvc4_binary)
275
276 wrapper = args.wrapper
277 if os.environ.get('VALGRIND') == '1' and not wrapper:
278 wrapper = ['libtool', '--mode=execute', 'valgrind']
279
280 run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark)
281
282
283 if __name__ == "__main__":
284 main()