cmake: Display skipped tests as not run (#2567)
[cvc5.git] / test / regress / run_regression.py
1 #!/usr/bin/env python3
2 """
3 Usage:
4
5 run_regression.py [--enable-proof] [--with-lfsc] [--dump] [--cmake]
6 [wrapper] cvc4-binary
7 [benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p]
8
9 Runs benchmark and checks for correct exit status and output.
10 """
11
12 import argparse
13 import difflib
14 import os
15 import re
16 import shlex
17 import subprocess
18 import sys
19 import threading
20
21 SCRUBBER = 'SCRUBBER: '
22 ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
23 EXPECT = 'EXPECT: '
24 EXPECT_ERROR = 'EXPECT-ERROR: '
25 EXIT = 'EXIT: '
26 COMMAND_LINE = 'COMMAND-LINE: '
27 REQUIRES = 'REQUIRES: '
28
29 EXIT_OK = 0
30 EXIT_FAILURE = 1
31 EXIT_SKIP = 77
32
33
34 def run_process(args, cwd, timeout, s_input=None):
35 """Runs a process with a timeout `timeout` in seconds. `args` are the
36 arguments to execute, `cwd` is the working directory and `s_input` is the
37 input to be sent to the process over stdin. Returns the output, the error
38 output and the exit code of the process. If the process times out, the
39 output and the error output are empty and the exit code is 124."""
40
41 proc = subprocess.Popen(
42 args,
43 cwd=cwd,
44 stdin=subprocess.PIPE,
45 stdout=subprocess.PIPE,
46 stderr=subprocess.PIPE)
47
48 out = ''
49 err = ''
50 exit_status = 124
51 try:
52 if timeout:
53 timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
54 timer.start()
55 out, err = proc.communicate(input=s_input)
56 exit_status = proc.returncode
57 finally:
58 if timeout:
59 timer.cancel()
60
61 return out, err, exit_status
62
63
64 def get_cvc4_features(cvc4_binary):
65 """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
66
67 output, _, _ = run_process([cvc4_binary, '--show-config'], None, None)
68 if isinstance(output, bytes):
69 output = output.decode()
70
71 features = []
72 for line in output.split('\n'):
73 tokens = [t.strip() for t in line.split(':')]
74 if len(tokens) == 2:
75 key, value = tokens
76 if value == 'yes':
77 features.append(key)
78
79 return features
80
81
82 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
83 command_line, benchmark_dir, benchmark_filename, timeout):
84 """Runs CVC4 on the file `benchmark_filename` in the directory
85 `benchmark_dir` using the binary `cvc4_binary` with the command line
86 options `command_line`. The output is scrubbed using `scrubber` and
87 `error_scrubber` for stdout and stderr, respectively. If dump is true, the
88 function first uses CVC4 to read in and dump the benchmark file and then
89 uses that as input."""
90
91 bin_args = wrapper[:]
92 bin_args.append(cvc4_binary)
93
94 output = None
95 error = None
96 exit_status = None
97 if dump:
98 dump_args = [
99 '--preprocess-only', '--dump', 'raw-benchmark',
100 '--output-lang=smt2', '-qq'
101 ]
102 dump_output, _, _ = run_process(
103 bin_args + command_line + dump_args + [benchmark_filename],
104 benchmark_dir, timeout)
105 output, error, exit_status = run_process(
106 bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir,
107 timeout, dump_output)
108 else:
109 output, error, exit_status = run_process(
110 bin_args + command_line + [benchmark_filename], benchmark_dir,
111 timeout)
112
113 # If a scrubber command has been specified then apply it to the output.
114 if scrubber:
115 output, _, _ = run_process(
116 shlex.split(scrubber), benchmark_dir, timeout, output)
117 if error_scrubber:
118 error, _, _ = run_process(
119 shlex.split(error_scrubber), benchmark_dir, timeout, error)
120
121 # Popen in Python 3 returns a bytes object instead of a string for
122 # stdout/stderr.
123 if isinstance(output, bytes):
124 output = output.decode()
125 if isinstance(error, bytes):
126 error = error.decode()
127 return (output.strip(), error.strip(), exit_status)
128
129
130 def run_regression(unsat_cores, proofs, dump, cmake, wrapper, cvc4_binary,
131 benchmark_path, timeout):
132 """Determines the expected output for a benchmark, runs CVC4 on it and then
133 checks whether the output corresponds to the expected output. Optionally
134 uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
135 checks proofs (if proofs is true), or dumps a benchmark and uses that as
136 the input (if dump is true). `cmake` enables/disables CMake-specific
137 behavior."""
138
139 if not os.access(cvc4_binary, os.X_OK):
140 sys.exit(
141 '"{}" does not exist or is not executable'.format(cvc4_binary))
142 if not os.path.isfile(benchmark_path):
143 sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
144
145 cvc4_features = get_cvc4_features(cvc4_binary)
146
147 basic_command_line_args = []
148
149 benchmark_basename = os.path.basename(benchmark_path)
150 benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
151 benchmark_dir = os.path.dirname(benchmark_path)
152 comment_char = '%'
153 status_regex = None
154 status_to_output = lambda s: s
155 if benchmark_ext == '.smt':
156 status_regex = r':status\s*(sat|unsat)'
157 comment_char = ';'
158 elif benchmark_ext == '.smt2':
159 status_regex = r'set-info\s*:status\s*(sat|unsat)'
160 comment_char = ';'
161 elif benchmark_ext == '.cvc':
162 pass
163 elif benchmark_ext == '.p':
164 basic_command_line_args.append('--finite-model-find')
165 status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
166 status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
167 elif benchmark_ext == '.sy':
168 comment_char = ';'
169 # Do not use proofs/unsat-cores with .sy files
170 unsat_cores = False
171 proofs = False
172 else:
173 sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
174 benchmark_basename))
175
176 benchmark_lines = None
177 with open(benchmark_path, 'r') as benchmark_file:
178 benchmark_lines = benchmark_file.readlines()
179 benchmark_content = ''.join(benchmark_lines)
180
181 # Extract the metadata for the benchmark.
182 scrubber = None
183 error_scrubber = None
184 expected_output = ''
185 expected_error = ''
186 expected_exit_status = None
187 command_lines = []
188 requires = []
189 for line in benchmark_lines:
190 # Skip lines that do not start with a comment character.
191 if line[0] != comment_char:
192 continue
193 line = line[1:].lstrip()
194
195 if line.startswith(SCRUBBER):
196 scrubber = line[len(SCRUBBER):]
197 elif line.startswith(ERROR_SCRUBBER):
198 error_scrubber = line[len(ERROR_SCRUBBER):]
199 elif line.startswith(EXPECT):
200 expected_output += line[len(EXPECT):]
201 elif line.startswith(EXPECT_ERROR):
202 expected_error += line[len(EXPECT_ERROR):]
203 elif line.startswith(EXIT):
204 expected_exit_status = int(line[len(EXIT):])
205 elif line.startswith(COMMAND_LINE):
206 command_lines.append(line[len(COMMAND_LINE):])
207 elif line.startswith(REQUIRES):
208 requires.append(line[len(REQUIRES):].strip())
209 expected_output = expected_output.strip()
210 expected_error = expected_error.strip()
211
212 # Expected output/expected error has not been defined in the metadata for
213 # the benchmark. Try to extract the information from the benchmark itself.
214 if expected_output == '' and expected_error == '':
215 match = None
216 if status_regex:
217 match = re.search(status_regex, benchmark_content)
218
219 if match:
220 expected_output = status_to_output(match.group(1))
221 elif expected_exit_status is None:
222 # If there is no expected output/error and the exit status has not
223 # been set explicitly, the benchmark is invalid.
224 sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
225 if expected_exit_status is None:
226 expected_exit_status = 0
227
228 if 'CVC4_REGRESSION_ARGS' in os.environ:
229 basic_command_line_args += shlex.split(
230 os.environ['CVC4_REGRESSION_ARGS'])
231
232 if not unsat_cores and ('(get-unsat-core)' in benchmark_content
233 or '(get-unsat-assumptions)' in benchmark_content):
234 print(
235 '1..0 # Skipped regression: unsat cores not supported without proof support'
236 )
237 return (EXIT_SKIP if cmake else EXIT_OK)
238
239 for req_feature in requires:
240 if req_feature.startswith("no-"):
241 inv_feature = req_feature[len("no-"):]
242 if inv_feature in cvc4_features:
243 print('1..0 # Skipped regression: not valid with {}'.format(
244 inv_feature))
245 return (EXIT_SKIP if cmake else EXIT_OK)
246 elif req_feature not in cvc4_features:
247 print('1..0 # Skipped regression: {} not supported'.format(
248 req_feature))
249 return (EXIT_SKIP if cmake else EXIT_OK)
250
251 if not command_lines:
252 command_lines.append('')
253
254 command_line_args_configs = []
255 for command_line in command_lines:
256 args = shlex.split(command_line)
257 all_args = basic_command_line_args + args
258
259 if not unsat_cores and ('--check-unsat-cores' in all_args):
260 print(
261 '# Skipped command line options ({}): unsat cores not supported without proof support'
262 .format(all_args))
263 continue
264 if not proofs and ('--check-proofs' in all_args
265 or '--dump-proofs' in all_args):
266 print(
267 '# Skipped command line options ({}): checking proofs not supported without LFSC support'
268 .format(all_args))
269 continue
270
271 command_line_args_configs.append(all_args)
272
273 extra_command_line_args = []
274 if benchmark_ext == '.sy' and \
275 '--no-check-synth-sol' not in all_args and \
276 '--sygus-rr' not in all_args and \
277 '--check-synth-sol' not in all_args:
278 extra_command_line_args = ['--check-synth-sol']
279 if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
280 '--no-check-models' not in all_args and \
281 '--check-models' not in all_args:
282 extra_command_line_args = ['--check-models']
283 if proofs and re.search(r'^(unsat|valid)$', expected_output):
284 if '--no-check-proofs' not in all_args and \
285 '--check-proofs' not in all_args and \
286 '--incremental' not in all_args and \
287 '--unconstrained-simp' not in all_args and \
288 not cvc4_binary.endswith('pcvc4'):
289 extra_command_line_args = ['--check-proofs']
290 if unsat_cores and re.search(r'^(unsat|valid)$', expected_output):
291 if '--no-check-unsat-cores' not in all_args and \
292 '--check-unsat-cores' not in all_args and \
293 '--incremental' not in all_args and \
294 '--unconstrained-simp' not in all_args and \
295 not cvc4_binary.endswith('pcvc4'):
296 extra_command_line_args += ['--check-unsat-cores']
297 if extra_command_line_args:
298 command_line_args_configs.append(all_args +
299 extra_command_line_args)
300
301 # Run CVC4 on the benchmark with the different option sets and check
302 # whether the exit status, stdout output, stderr output are as expected.
303 print('1..{}'.format(len(command_line_args_configs)))
304 print('# Starting')
305 exit_code = EXIT_OK
306 for command_line_args in command_line_args_configs:
307 output, error, exit_status = run_benchmark(
308 dump, wrapper, scrubber, error_scrubber, cvc4_binary,
309 command_line_args, benchmark_dir, benchmark_basename, timeout)
310 if output != expected_output:
311 exit_code = EXIT_FAILURE
312 print(
313 'not ok - Differences between expected and actual output on stdout - Flags: {}'
314 .format(command_line_args))
315 for line in difflib.context_diff(output.splitlines(),
316 expected_output.splitlines()):
317 print(line)
318 print()
319 print('Error output:')
320 print(error)
321 elif error != expected_error:
322 exit_code = EXIT_FAILURE
323 print(
324 'not ok - Differences between expected and actual output on stderr - Flags: {}'
325 .format(command_line_args))
326 for line in difflib.context_diff(error.splitlines(),
327 expected_error.splitlines()):
328 print(line)
329 elif expected_exit_status != exit_status:
330 exit_code = EXIT_FAILURE
331 print(
332 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
333 format(expected_exit_status, exit_status, command_line_args))
334 print()
335 print('Output:')
336 print(output)
337 print()
338 print('Error output:')
339 print(error)
340 else:
341 print('ok - Flags: {}'.format(command_line_args))
342
343 return exit_code
344
345
346 def main():
347 """Parses the command line arguments and then calls the core of the
348 script."""
349
350 parser = argparse.ArgumentParser(
351 description=
352 'Runs benchmark and checks for correct exit status and output.')
353 parser.add_argument('--enable-proof', action='store_true')
354 parser.add_argument('--with-lfsc', action='store_true')
355 parser.add_argument('--dump', action='store_true')
356 parser.add_argument('--cmake', action='store_true')
357 parser.add_argument('wrapper', nargs='*')
358 parser.add_argument('cvc4_binary')
359 parser.add_argument('benchmark')
360 args = parser.parse_args()
361 cvc4_binary = os.path.abspath(args.cvc4_binary)
362
363 wrapper = args.wrapper
364 if os.environ.get('VALGRIND') == '1' and not wrapper:
365 wrapper = ['libtool', '--mode=execute', 'valgrind']
366
367 timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
368
369 return run_regression(args.enable_proof, args.with_lfsc, args.dump,
370 args.cmake, wrapper, cvc4_binary, args.benchmark,
371 timeout)
372
373
374 if __name__ == "__main__":
375 exit_code = main()
376 sys.exit(exit_code)