Remove support for *.expect files in regressions (#2341)
[cvc5.git] / test / regress / run_regression.py
1 #!/usr/bin/env python3
2 """
3 Usage:
4
5 run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
6 [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
7
8 Runs benchmark and checks for correct exit status and output.
9 """
10
11 import argparse
12 import difflib
13 import os
14 import re
15 import shlex
16 import subprocess
17 import sys
18 import threading
19
20 SCRUBBER = 'SCRUBBER: '
21 ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
22 EXPECT = 'EXPECT: '
23 EXPECT_ERROR = 'EXPECT-ERROR: '
24 EXIT = 'EXIT: '
25 COMMAND_LINE = 'COMMAND-LINE: '
26 REQUIRES = 'REQUIRES: '
27
28 EXIT_OK = 0
29 EXIT_FAILURE = 1
30
31 def run_process(args, cwd, timeout, s_input=None):
32 """Runs a process with a timeout `timeout` in seconds. `args` are the
33 arguments to execute, `cwd` is the working directory and `s_input` is the
34 input to be sent to the process over stdin. Returns the output, the error
35 output and the exit code of the process. If the process times out, the
36 output and the error output are empty and the exit code is 124."""
37
38 proc = subprocess.Popen(
39 args,
40 cwd=cwd,
41 stdin=subprocess.PIPE,
42 stdout=subprocess.PIPE,
43 stderr=subprocess.PIPE)
44
45 out = ''
46 err = ''
47 exit_status = 124
48 try:
49 if timeout:
50 timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
51 timer.start()
52 out, err = proc.communicate(input=s_input)
53 exit_status = proc.returncode
54 finally:
55 if timeout:
56 timer.cancel()
57
58 return out, err, exit_status
59
60
61 def get_cvc4_features(cvc4_binary):
62 """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
63
64 output, _, _ = run_process([cvc4_binary, '--show-config'], None, None)
65 if isinstance(output, bytes):
66 output = output.decode()
67
68 features = []
69 for line in output.split('\n'):
70 tokens = [t.strip() for t in line.split(':')]
71 if len(tokens) == 2:
72 key, value = tokens
73 if value == 'yes':
74 features.append(key)
75
76 return features
77
78
79 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
80 command_line, benchmark_dir, benchmark_filename, timeout):
81 """Runs CVC4 on the file `benchmark_filename` in the directory
82 `benchmark_dir` using the binary `cvc4_binary` with the command line
83 options `command_line`. The output is scrubbed using `scrubber` and
84 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
85 function first uses CVC4 to read in and dump the benchmark file and then
86 uses that as input."""
87
88 bin_args = wrapper[:]
89 bin_args.append(cvc4_binary)
90
91 output = None
92 error = None
93 exit_status = None
94 if dump:
95 dump_args = [
96 '--preprocess-only', '--dump', 'raw-benchmark',
97 '--output-lang=smt2', '-qq'
98 ]
99 dump_output, _, _ = run_process(
100 bin_args + command_line + dump_args + [benchmark_filename],
101 benchmark_dir, timeout)
102 output, error, exit_status = run_process(
103 bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir,
104 timeout, dump_output)
105 else:
106 output, error, exit_status = run_process(
107 bin_args + command_line + [benchmark_filename], benchmark_dir,
108 timeout)
109
110 # If a scrubber command has been specified then apply it to the output.
111 if scrubber:
112 output, _, _ = run_process(
113 shlex.split(scrubber), benchmark_dir, timeout, output)
114 if error_scrubber:
115 error, _, _ = run_process(
116 shlex.split(error_scrubber), benchmark_dir, timeout, error)
117
118 # Popen in Python 3 returns a bytes object instead of a string for
119 # stdout/stderr.
120 if isinstance(output, bytes):
121 output = output.decode()
122 if isinstance(error, bytes):
123 error = error.decode()
124 return (output.strip(), error.strip(), exit_status)
125
126
127 def run_regression(unsat_cores, proofs, dump, wrapper, cvc4_binary,
128 benchmark_path, timeout):
129 """Determines the expected output for a benchmark, runs CVC4 on it and then
130 checks whether the output corresponds to the expected output. Optionally
131 uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
132 checks proofs (if proofs is true), or dumps a benchmark and uses that as
133 the input (if dump is true)."""
134
135 if not os.access(cvc4_binary, os.X_OK):
136 sys.exit(
137 '"{}" does not exist or is not executable'.format(cvc4_binary))
138 if not os.path.isfile(benchmark_path):
139 sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
140
141 cvc4_features = get_cvc4_features(cvc4_binary)
142
143 basic_command_line_args = []
144
145 benchmark_basename = os.path.basename(benchmark_path)
146 benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
147 benchmark_dir = os.path.dirname(benchmark_path)
148 comment_char = '%'
149 status_regex = None
150 status_to_output = lambda s: s
151 if benchmark_ext == '.smt':
152 status_regex = r':status\s*(sat|unsat)'
153 comment_char = ';'
154 elif benchmark_ext == '.smt2':
155 status_regex = r'set-info\s*:status\s*(sat|unsat)'
156 comment_char = ';'
157 elif benchmark_ext == '.cvc':
158 pass
159 elif benchmark_ext == '.p':
160 basic_command_line_args.append('--finite-model-find')
161 status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
162 status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
163 elif benchmark_ext == '.sy':
164 comment_char = ';'
165 # Do not use proofs/unsat-cores with .sy files
166 unsat_cores = False
167 proofs = False
168 else:
169 sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
170 benchmark_basename))
171
172 benchmark_lines = None
173 with open(benchmark_path, 'r') as benchmark_file:
174 benchmark_lines = benchmark_file.readlines()
175 benchmark_content = ''.join(benchmark_lines)
176
177 # Extract the metadata for the benchmark.
178 scrubber = None
179 error_scrubber = None
180 expected_output = ''
181 expected_error = ''
182 expected_exit_status = None
183 command_lines = []
184 requires = []
185 for line in benchmark_lines:
186 # Skip lines that do not start with a comment character.
187 if line[0] != comment_char:
188 continue
189 line = line[1:].lstrip()
190
191 if line.startswith(SCRUBBER):
192 scrubber = line[len(SCRUBBER):]
193 elif line.startswith(ERROR_SCRUBBER):
194 error_scrubber = line[len(ERROR_SCRUBBER):]
195 elif line.startswith(EXPECT):
196 expected_output += line[len(EXPECT):]
197 elif line.startswith(EXPECT_ERROR):
198 expected_error += line[len(EXPECT_ERROR):]
199 elif line.startswith(EXIT):
200 expected_exit_status = int(line[len(EXIT):])
201 elif line.startswith(COMMAND_LINE):
202 command_lines.append(line[len(COMMAND_LINE):])
203 elif line.startswith(REQUIRES):
204 requires.append(line[len(REQUIRES):].strip())
205 expected_output = expected_output.strip()
206 expected_error = expected_error.strip()
207
208 # Expected output/expected error has not been defined in the metadata for
209 # the benchmark. Try to extract the information from the benchmark itself.
210 if expected_output == '' and expected_error == '':
211 match = None
212 if status_regex:
213 match = re.search(status_regex, benchmark_content)
214
215 if match:
216 expected_output = status_to_output(match.group(1))
217 elif expected_exit_status is None:
218 # If there is no expected output/error and the exit status has not
219 # been set explicitly, the benchmark is invalid.
220 sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
221 if expected_exit_status is None:
222 expected_exit_status = 0
223
224 if 'CVC4_REGRESSION_ARGS' in os.environ:
225 basic_command_line_args += shlex.split(
226 os.environ['CVC4_REGRESSION_ARGS'])
227
228 if not unsat_cores and ('(get-unsat-core)' in benchmark_content
229 or '(get-unsat-assumptions)' in benchmark_content):
230 print(
231 '1..0 # Skipped regression: unsat cores not supported without proof support'
232 )
233 return
234
235 for req_feature in requires:
236 if req_feature.startswith("no-"):
237 inv_feature = req_feature[len("no-"):]
238 if inv_feature in cvc4_features:
239 print('1..0 # Skipped regression: not valid with {}'.format(
240 inv_feature))
241 return
242 elif req_feature not in cvc4_features:
243 print('1..0 # Skipped regression: {} not supported'.format(
244 req_feature))
245 return
246
247 if not command_lines:
248 command_lines.append('')
249
250 command_line_args_configs = []
251 for command_line in command_lines:
252 args = shlex.split(command_line)
253 all_args = basic_command_line_args + args
254
255 if not unsat_cores and ('--check-unsat-cores' in all_args):
256 print(
257 '# Skipped command line options ({}): unsat cores not supported without proof support'.
258 format(all_args))
259 continue
260 if not proofs and ('--check-proofs' in all_args
261 or '--dump-proofs' in all_args):
262 print(
263 '# Skipped command line options ({}): checking proofs not supported without LFSC support'.
264 format(all_args))
265 continue
266
267 command_line_args_configs.append(all_args)
268
269 extra_command_line_args = []
270 if benchmark_ext == '.sy' and \
271 '--no-check-synth-sol' not in all_args and \
272 '--check-synth-sol' not in all_args:
273 extra_command_line_args = ['--check-synth-sol']
274 if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
275 '--no-check-models' not in all_args and \
276 '--check-models' not in all_args:
277 extra_command_line_args = ['--check-models']
278 if proofs and re.search(r'^(unsat|valid)$', expected_output):
279 if '--no-check-proofs' not in all_args and \
280 '--check-proofs' not in all_args and \
281 '--incremental' not in all_args and \
282 '--unconstrained-simp' not in all_args and \
283 not cvc4_binary.endswith('pcvc4'):
284 extra_command_line_args = ['--check-proofs']
285 if unsat_cores and re.search(r'^(unsat|valid)$', expected_output):
286 if '--no-check-unsat-cores' not in all_args and \
287 '--check-unsat-cores' not in all_args and \
288 '--incremental' not in all_args and \
289 '--unconstrained-simp' not in all_args and \
290 not cvc4_binary.endswith('pcvc4'):
291 extra_command_line_args += ['--check-unsat-cores']
292 if extra_command_line_args:
293 command_line_args_configs.append(
294 all_args + extra_command_line_args)
295
296 # Run CVC4 on the benchmark with the different option sets and check
297 # whether the exit status, stdout output, stderr output are as expected.
298 print('1..{}'.format(len(command_line_args_configs)))
299 print('# Starting')
300 exit_code = EXIT_OK
301 for command_line_args in command_line_args_configs:
302 output, error, exit_status = run_benchmark(
303 dump, wrapper, scrubber, error_scrubber, cvc4_binary,
304 command_line_args, benchmark_dir, benchmark_basename, timeout)
305 if output != expected_output:
306 exit_code = EXIT_FAILURE
307 print(
308 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
309 format(command_line_args))
310 for line in difflib.context_diff(output.splitlines(),
311 expected_output.splitlines()):
312 print(line)
313 print()
314 print('Error output:')
315 print(error)
316 elif error != expected_error:
317 exit_code = EXIT_FAILURE
318 print(
319 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
320 format(command_line_args))
321 for line in difflib.context_diff(error.splitlines(),
322 expected_error.splitlines()):
323 print(line)
324 elif expected_exit_status != exit_status:
325 exit_code = EXIT_FAILURE
326 print(
327 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
328 format(expected_exit_status, exit_status, command_line_args))
329 else:
330 print('ok - Flags: {}'.format(command_line_args))
331
332 return exit_code
333
334
335 def main():
336 """Parses the command line arguments and then calls the core of the
337 script."""
338
339 parser = argparse.ArgumentParser(
340 description=
341 'Runs benchmark and checks for correct exit status and output.')
342 parser.add_argument('--enable-proof', action='store_true')
343 parser.add_argument('--with-lfsc', action='store_true')
344 parser.add_argument('--dump', action='store_true')
345 parser.add_argument('wrapper', nargs='*')
346 parser.add_argument('cvc4_binary')
347 parser.add_argument('benchmark')
348 args = parser.parse_args()
349 cvc4_binary = os.path.abspath(args.cvc4_binary)
350
351 wrapper = args.wrapper
352 if os.environ.get('VALGRIND') == '1' and not wrapper:
353 wrapper = ['libtool', '--mode=execute', 'valgrind']
354
355 timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
356
357 return run_regression(args.enable_proof, args.with_lfsc, args.dump, wrapper,
358 cvc4_binary, args.benchmark, timeout)
359
360
361 if __name__ == "__main__":
362 exit_code = main()
363 sys.exit(exit_code)