test/regress/run_regression.py

   1 #!/usr/bin/env python3
   2 """
   3 Usage:
   4
   5     run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
   6         [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
   7
   8 Runs benchmark and checks for correct exit status and output.
   9 """
  10
  11 import argparse
  12 import difflib
  13 import os
  14 import re
  15 import shlex
  16 import subprocess
  17 import sys
  18 import threading
  19
  20 SCRUBBER = 'SCRUBBER: '
  21 ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
  22 EXPECT = 'EXPECT: '
  23 EXPECT_ERROR = 'EXPECT-ERROR: '
  24 EXIT = 'EXIT: '
  25 COMMAND_LINE = 'COMMAND-LINE: '
  26 REQUIRES = 'REQUIRES: '
  27
  28 EXIT_OK = 0
  29 EXIT_FAILURE = 1
  30
  31 def run_process(args, cwd, timeout, s_input=None):
  32     """Runs a process with a timeout `timeout` in seconds. `args` are the
  33     arguments to execute, `cwd` is the working directory and `s_input` is the
  34     input to be sent to the process over stdin. Returns the output, the error
  35     output and the exit code of the process. If the process times out, the
  36     output and the error output are empty and the exit code is 124."""
  37
  38     proc = subprocess.Popen(
  39         args,
  40         cwd=cwd,
  41         stdin=subprocess.PIPE,
  42         stdout=subprocess.PIPE,
  43         stderr=subprocess.PIPE)
  44
  45     out = ''
  46     err = ''
  47     exit_status = 124
  48     try:
  49         if timeout:
  50             timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
  51             timer.start()
  52         out, err = proc.communicate(input=s_input)
  53         exit_status = proc.returncode
  54     finally:
  55         if timeout:
  56             timer.cancel()
  57
  58     return out, err, exit_status
  59
  60
  61 def get_cvc4_features(cvc4_binary):
  62     """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
  63
  64     output, _, _ = run_process([cvc4_binary, '--show-config'], None, None)
  65     if isinstance(output, bytes):
  66         output = output.decode()
  67
  68     features = []
  69     for line in output.split('\n'):
  70         tokens = [t.strip() for t in line.split(':')]
  71         if len(tokens) == 2:
  72             key, value = tokens
  73             if value == 'yes':
  74                 features.append(key)
  75
  76     return features
  77
  78
  79 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
  80                   command_line, benchmark_dir, benchmark_filename, timeout):
  81     """Runs CVC4 on the file `benchmark_filename` in the directory
  82     `benchmark_dir` using the binary `cvc4_binary` with the command line
  83     options `command_line`. The output is scrubbed using `scrubber` and
  84     `error_scrubber` for stdout and stderr, respectively. If dump is true, the
  85     function first uses CVC4 to read in and dump the benchmark file and then
  86     uses that as input."""
  87
  88     bin_args = wrapper[:]
  89     bin_args.append(cvc4_binary)
  90
  91     output = None
  92     error = None
  93     exit_status = None
  94     if dump:
  95         dump_args = [
  96             '--preprocess-only', '--dump', 'raw-benchmark',
  97             '--output-lang=smt2', '-qq'
  98         ]
  99         dump_output, _, _ = run_process(
 100             bin_args + command_line + dump_args + [benchmark_filename],
 101             benchmark_dir, timeout)
 102         output, error, exit_status = run_process(
 103             bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir,
 104             timeout, dump_output)
 105     else:
 106         output, error, exit_status = run_process(
 107             bin_args + command_line + [benchmark_filename], benchmark_dir,
 108             timeout)
 109
 110     # If a scrubber command has been specified then apply it to the output.
 111     if scrubber:
 112         output, _, _ = run_process(
 113             shlex.split(scrubber), benchmark_dir, timeout, output)
 114     if error_scrubber:
 115         error, _, _ = run_process(
 116             shlex.split(error_scrubber), benchmark_dir, timeout, error)
 117
 118     # Popen in Python 3 returns a bytes object instead of a string for
 119     # stdout/stderr.
 120     if isinstance(output, bytes):
 121         output = output.decode()
 122     if isinstance(error, bytes):
 123         error = error.decode()
 124     return (output.strip(), error.strip(), exit_status)
 125
 126
 127 def run_regression(unsat_cores, proofs, dump, wrapper, cvc4_binary,
 128                    benchmark_path, timeout):
 129     """Determines the expected output for a benchmark, runs CVC4 on it and then
 130     checks whether the output corresponds to the expected output. Optionally
 131     uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
 132     checks proofs (if proofs is true), or dumps a benchmark and uses that as
 133     the input (if dump is true)."""
 134
 135     if not os.access(cvc4_binary, os.X_OK):
 136         sys.exit(
 137             '"{}" does not exist or is not executable'.format(cvc4_binary))
 138     if not os.path.isfile(benchmark_path):
 139         sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
 140
 141     cvc4_features = get_cvc4_features(cvc4_binary)
 142
 143     basic_command_line_args = []
 144
 145     benchmark_basename = os.path.basename(benchmark_path)
 146     benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
 147     benchmark_dir = os.path.dirname(benchmark_path)
 148     comment_char = '%'
 149     status_regex = None
 150     status_to_output = lambda s: s
 151     if benchmark_ext == '.smt':
 152         status_regex = r':status\s*(sat|unsat)'
 153         comment_char = ';'
 154     elif benchmark_ext == '.smt2':
 155         status_regex = r'set-info\s*:status\s*(sat|unsat)'
 156         comment_char = ';'
 157     elif benchmark_ext == '.cvc':
 158         pass
 159     elif benchmark_ext == '.p':
 160         basic_command_line_args.append('--finite-model-find')
 161         status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
 162         status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
 163     elif benchmark_ext == '.sy':
 164         comment_char = ';'
 165         # Do not use proofs/unsat-cores with .sy files
 166         unsat_cores = False
 167         proofs = False
 168     else:
 169         sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
 170             benchmark_basename))
 171
 172     benchmark_lines = None
 173     with open(benchmark_path, 'r') as benchmark_file:
 174         benchmark_lines = benchmark_file.readlines()
 175     benchmark_content = ''.join(benchmark_lines)
 176
 177     # Extract the metadata for the benchmark.
 178     scrubber = None
 179     error_scrubber = None
 180     expected_output = ''
 181     expected_error = ''
 182     expected_exit_status = None
 183     command_lines = []
 184     requires = []
 185     for line in benchmark_lines:
 186         # Skip lines that do not start with a comment character.
 187         if line[0] != comment_char:
 188             continue
 189         line = line[1:].lstrip()
 190
 191         if line.startswith(SCRUBBER):
 192             scrubber = line[len(SCRUBBER):]
 193         elif line.startswith(ERROR_SCRUBBER):
 194             error_scrubber = line[len(ERROR_SCRUBBER):]
 195         elif line.startswith(EXPECT):
 196             expected_output += line[len(EXPECT):]
 197         elif line.startswith(EXPECT_ERROR):
 198             expected_error += line[len(EXPECT_ERROR):]
 199         elif line.startswith(EXIT):
 200             expected_exit_status = int(line[len(EXIT):])
 201         elif line.startswith(COMMAND_LINE):
 202             command_lines.append(line[len(COMMAND_LINE):])
 203         elif line.startswith(REQUIRES):
 204             requires.append(line[len(REQUIRES):].strip())
 205     expected_output = expected_output.strip()
 206     expected_error = expected_error.strip()
 207
 208     # Expected output/expected error has not been defined in the metadata for
 209     # the benchmark. Try to extract the information from the benchmark itself.
 210     if expected_output == '' and expected_error == '':
 211         match = None
 212         if status_regex:
 213             match = re.search(status_regex, benchmark_content)
 214
 215         if match:
 216             expected_output = status_to_output(match.group(1))
 217         elif expected_exit_status is None:
 218             # If there is no expected output/error and the exit status has not
 219             # been set explicitly, the benchmark is invalid.
 220             sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
 221     if expected_exit_status is None:
 222         expected_exit_status = 0
 223
 224     if 'CVC4_REGRESSION_ARGS' in os.environ:
 225         basic_command_line_args += shlex.split(
 226             os.environ['CVC4_REGRESSION_ARGS'])
 227
 228     if not unsat_cores and ('(get-unsat-core)' in benchmark_content
 229                             or '(get-unsat-assumptions)' in benchmark_content):
 230         print(
 231             '1..0 # Skipped regression: unsat cores not supported without proof support'
 232         )
 233         return
 234
 235     for req_feature in requires:
 236         if req_feature.startswith("no-"):
 237             inv_feature = req_feature[len("no-"):]
 238             if inv_feature in cvc4_features:
 239                 print('1..0 # Skipped regression: not valid with {}'.format(
 240                     inv_feature))
 241                 return
 242         elif req_feature not in cvc4_features:
 243             print('1..0 # Skipped regression: {} not supported'.format(
 244                 req_feature))
 245             return
 246
 247     if not command_lines:
 248         command_lines.append('')
 249
 250     command_line_args_configs = []
 251     for command_line in command_lines:
 252         args = shlex.split(command_line)
 253         all_args = basic_command_line_args + args
 254
 255         if not unsat_cores and ('--check-unsat-cores' in all_args):
 256             print(
 257                 '# Skipped command line options ({}): unsat cores not supported without proof support'.
 258                 format(all_args))
 259             continue
 260         if not proofs and ('--check-proofs' in all_args
 261                            or '--dump-proofs' in all_args):
 262             print(
 263                 '# Skipped command line options ({}): checking proofs not supported without LFSC support'.
 264                 format(all_args))
 265             continue
 266
 267         command_line_args_configs.append(all_args)
 268
 269         extra_command_line_args = []
 270         if benchmark_ext == '.sy' and \
 271             '--no-check-synth-sol' not in all_args and \
 272             '--check-synth-sol' not in all_args:
 273             extra_command_line_args = ['--check-synth-sol']
 274         if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
 275            '--no-check-models' not in all_args and \
 276            '--check-models' not in all_args:
 277             extra_command_line_args = ['--check-models']
 278         if proofs and re.search(r'^(unsat|valid)$', expected_output):
 279             if '--no-check-proofs' not in all_args and \
 280                '--check-proofs' not in all_args and \
 281                '--incremental' not in all_args and \
 282                '--unconstrained-simp' not in all_args and \
 283                not cvc4_binary.endswith('pcvc4'):
 284                 extra_command_line_args = ['--check-proofs']
 285         if unsat_cores and re.search(r'^(unsat|valid)$', expected_output):
 286             if '--no-check-unsat-cores' not in all_args and \
 287                '--check-unsat-cores' not in all_args and \
 288                '--incremental' not in all_args and \
 289                '--unconstrained-simp' not in all_args and \
 290                not cvc4_binary.endswith('pcvc4'):
 291                 extra_command_line_args += ['--check-unsat-cores']
 292         if extra_command_line_args:
 293             command_line_args_configs.append(
 294                 all_args + extra_command_line_args)
 295
 296     # Run CVC4 on the benchmark with the different option sets and check
 297     # whether the exit status, stdout output, stderr output are as expected.
 298     print('1..{}'.format(len(command_line_args_configs)))
 299     print('# Starting')
 300     exit_code = EXIT_OK
 301     for command_line_args in command_line_args_configs:
 302         output, error, exit_status = run_benchmark(
 303             dump, wrapper, scrubber, error_scrubber, cvc4_binary,
 304             command_line_args, benchmark_dir, benchmark_basename, timeout)
 305         if output != expected_output:
 306             exit_code = EXIT_FAILURE
 307             print(
 308                 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
 309                 format(command_line_args))
 310             for line in difflib.context_diff(output.splitlines(),
 311                                              expected_output.splitlines()):
 312                 print(line)
 313             print()
 314             print('Error output:')
 315             print(error)
 316         elif error != expected_error:
 317             exit_code = EXIT_FAILURE
 318             print(
 319                 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
 320                 format(command_line_args))
 321             for line in difflib.context_diff(error.splitlines(),
 322                                              expected_error.splitlines()):
 323                 print(line)
 324         elif expected_exit_status != exit_status:
 325             exit_code = EXIT_FAILURE
 326             print(
 327                 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
 328                 format(expected_exit_status, exit_status, command_line_args))
 329         else:
 330             print('ok - Flags: {}'.format(command_line_args))
 331
 332     return exit_code
 333
 334
 335 def main():
 336     """Parses the command line arguments and then calls the core of the
 337     script."""
 338
 339     parser = argparse.ArgumentParser(
 340         description=
 341         'Runs benchmark and checks for correct exit status and output.')
 342     parser.add_argument('--enable-proof', action='store_true')
 343     parser.add_argument('--with-lfsc', action='store_true')
 344     parser.add_argument('--dump', action='store_true')
 345     parser.add_argument('wrapper', nargs='*')
 346     parser.add_argument('cvc4_binary')
 347     parser.add_argument('benchmark')
 348     args = parser.parse_args()
 349     cvc4_binary = os.path.abspath(args.cvc4_binary)
 350
 351     wrapper = args.wrapper
 352     if os.environ.get('VALGRIND') == '1' and not wrapper:
 353         wrapper = ['libtool', '--mode=execute', 'valgrind']
 354
 355     timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
 356
 357     return run_regression(args.enable_proof, args.with_lfsc, args.dump, wrapper,
 358                           cvc4_binary, args.benchmark, timeout)
 359
 360
 361 if __name__ == "__main__":
 362     exit_code = main()
 363     sys.exit(exit_code)