test/regress/run_regression.py

   1 #!/usr/bin/env python3
   2 """
   3 Usage:
   4
   5     run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
   6         [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
   7
   8 Runs benchmark and checks for correct exit status and output.
   9 """
  10
  11 import argparse
  12 import difflib
  13 import os
  14 import re
  15 import shlex
  16 import subprocess
  17 import sys
  18 import threading
  19
  20 SCRUBBER = 'SCRUBBER: '
  21 ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
  22 EXPECT = 'EXPECT: '
  23 EXPECT_ERROR = 'EXPECT-ERROR: '
  24 EXIT = 'EXIT: '
  25 COMMAND_LINE = 'COMMAND-LINE: '
  26
  27
  28 def run_process(args, cwd, timeout, s_input=None):
  29     """Runs a process with a timeout `timeout` in seconds. `args` are the
  30     arguments to execute, `cwd` is the working directory and `s_input` is the
  31     input to be sent to the process over stdin. Returns the output, the error
  32     output and the exit code of the process. If the process times out, the
  33     output and the error output are empty and the exit code is 124."""
  34
  35     proc = subprocess.Popen(
  36         args,
  37         cwd=cwd,
  38         stdin=subprocess.PIPE,
  39         stdout=subprocess.PIPE,
  40         stderr=subprocess.PIPE)
  41
  42     out = ''
  43     err = ''
  44     exit_status = 124
  45     timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
  46     try:
  47         timer.start()
  48         out, err = proc.communicate(input=s_input)
  49         exit_status = proc.returncode
  50     finally:
  51         timer.cancel()
  52
  53     return out, err, exit_status
  54
  55
  56 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
  57                   command_line, benchmark_dir, benchmark_filename, timeout):
  58     """Runs CVC4 on the file `benchmark_filename` in the directory
  59     `benchmark_dir` using the binary `cvc4_binary` with the command line
  60     options `command_line`. The output is scrubbed using `scrubber` and
  61     `error_scrubber` for stdout and stderr, respectively. If dump is true, the
  62     function first uses CVC4 to read in and dump the benchmark file and then
  63     uses that as input."""
  64
  65     bin_args = wrapper[:]
  66     bin_args.append(cvc4_binary)
  67
  68     output = None
  69     error = None
  70     exit_status = None
  71     if dump:
  72         dump_args = [
  73             '--preprocess-only', '--dump', 'raw-benchmark',
  74             '--output-lang=smt2', '-qq'
  75         ]
  76         dump_output, _, _ = run_process(
  77             bin_args + command_line + dump_args + [benchmark_filename],
  78             benchmark_dir, timeout)
  79         output, error, exit_status = run_process(
  80             bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir,
  81             timeout, dump_output)
  82     else:
  83         output, error, exit_status = run_process(
  84             bin_args + command_line + [benchmark_filename], benchmark_dir,
  85             timeout)
  86
  87     # If a scrubber command has been specified then apply it to the output.
  88     if scrubber:
  89         output, _, _ = run_process(
  90             shlex.split(scrubber), benchmark_dir, timeout, output)
  91     if error_scrubber:
  92         error, _, _ = run_process(
  93             shlex.split(error_scrubber), benchmark_dir, timeout, error)
  94
  95     # Popen in Python 3 returns a bytes object instead of a string for
  96     # stdout/stderr.
  97     if isinstance(output, bytes):
  98         output = output.decode()
  99     if isinstance(error, bytes):
 100         error = error.decode()
 101     return (output.strip(), error.strip(), exit_status)
 102
 103
 104 def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path, timeout):
 105     """Determines the expected output for a benchmark, runs CVC4 on it and then
 106     checks whether the output corresponds to the expected output. Optionally
 107     uses a wrapper `wrapper`, tests proof generation (if proof is true), or
 108     dumps a benchmark and uses that as the input (if dump is true)."""
 109
 110     if not os.access(cvc4_binary, os.X_OK):
 111         sys.exit(
 112             '"{}" does not exist or is not executable'.format(cvc4_binary))
 113     if not os.path.isfile(benchmark_path):
 114         sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
 115
 116     basic_command_line_args = []
 117
 118     benchmark_basename = os.path.basename(benchmark_path)
 119     benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
 120     benchmark_dir = os.path.dirname(benchmark_path)
 121     comment_char = '%'
 122     status_regex = None
 123     status_to_output = lambda s: s
 124     if benchmark_ext == '.smt':
 125         status_regex = r':status\s*(sat|unsat)'
 126         comment_char = ';'
 127     elif benchmark_ext == '.smt2':
 128         status_regex = r'set-info\s*:status\s*(sat|unsat)'
 129         comment_char = ';'
 130     elif benchmark_ext == '.cvc':
 131         pass
 132     elif benchmark_ext == '.p':
 133         basic_command_line_args.append('--finite-model-find')
 134         status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
 135         status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
 136     elif benchmark_ext == '.sy':
 137         comment_char = ';'
 138         # Do not use proofs/unsat-cores with .sy files
 139         proof = False
 140     else:
 141         sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
 142             benchmark_basename))
 143
 144     # If there is an ".expect" file for the benchmark, read the metadata
 145     # from there, otherwise from the benchmark file.
 146     metadata_filename = benchmark_path + '.expect'
 147     if os.path.isfile(metadata_filename):
 148         comment_char = '%'
 149     else:
 150         metadata_filename = benchmark_path
 151
 152     metadata_lines = None
 153     with open(metadata_filename, 'r') as metadata_file:
 154         metadata_lines = metadata_file.readlines()
 155
 156     benchmark_content = None
 157     if metadata_filename == benchmark_path:
 158         benchmark_content = ''.join(metadata_lines)
 159     else:
 160         with open(benchmark_path, 'r') as benchmark_file:
 161             benchmark_content = benchmark_file.read()
 162
 163     # Extract the metadata for the benchmark.
 164     scrubber = None
 165     error_scrubber = None
 166     expected_output = ''
 167     expected_error = ''
 168     expected_exit_status = None
 169     command_line = ''
 170     for line in metadata_lines:
 171         # Skip lines that do not start with a comment character.
 172         if line[0] != comment_char:
 173             continue
 174         line = line[1:].lstrip()
 175
 176         if line.startswith(SCRUBBER):
 177             scrubber = line[len(SCRUBBER):]
 178         elif line.startswith(ERROR_SCRUBBER):
 179             error_scrubber = line[len(ERROR_SCRUBBER):]
 180         elif line.startswith(EXPECT):
 181             expected_output += line[len(EXPECT):]
 182         elif line.startswith(EXPECT_ERROR):
 183             expected_error += line[len(EXPECT_ERROR):]
 184         elif line.startswith(EXIT):
 185             expected_exit_status = int(line[len(EXIT):])
 186         elif line.startswith(COMMAND_LINE):
 187             command_line += line[len(COMMAND_LINE):]
 188     expected_output = expected_output.strip()
 189     expected_error = expected_error.strip()
 190
 191     # Expected output/expected error has not been defined in the metadata for
 192     # the benchmark. Try to extract the information from the benchmark itself.
 193     if expected_output == '' and expected_error == '':
 194         match = None
 195         if status_regex:
 196             match = re.search(status_regex, benchmark_content)
 197
 198         if match:
 199             expected_output = status_to_output(match.group(1))
 200         elif expected_exit_status is None:
 201             # If there is no expected output/error and the exit status has not
 202             # been set explicitly, the benchmark is invalid.
 203             sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
 204     if expected_exit_status is None:
 205         expected_exit_status = 0
 206
 207     if 'CVC4_REGRESSION_ARGS' in os.environ:
 208         basic_command_line_args += shlex.split(
 209             os.environ['CVC4_REGRESSION_ARGS'])
 210     basic_command_line_args += shlex.split(command_line)
 211     command_line_args_configs = [basic_command_line_args]
 212     if not proof and ('(get-unsat-core)' in benchmark_content
 213                       or '(get-unsat-assumptions)' in benchmark_content
 214                       or '--check-proofs' in basic_command_line_args
 215                       or '--dump-proofs' in basic_command_line_args):
 216         print(
 217             '1..0 # Skipped: unsat cores not supported without proof support')
 218         return
 219
 220     extra_command_line_args = []
 221     if benchmark_ext == '.sy' and \
 222         '--no-check-synth-sol' not in basic_command_line_args and \
 223         '--check-synth-sol' not in basic_command_line_args:
 224         extra_command_line_args = ['--check-synth-sol']
 225     if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
 226        '--no-check-models' not in basic_command_line_args:
 227         extra_command_line_args = ['--check-models']
 228     if proof and re.search(r'^(unsat|valid)$', expected_output):
 229         if '--no-check-proofs' not in basic_command_line_args and \
 230            '--incremental' not in basic_command_line_args and \
 231            '--unconstrained-simp' not in basic_command_line_args and \
 232            not cvc4_binary.endswith('pcvc4'):
 233             extra_command_line_args = [
 234                 '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
 235                 '--no-bv-algebraic'
 236             ]
 237         if '--no-check-unsat-cores' not in basic_command_line_args and \
 238            '--incremental' not in basic_command_line_args and \
 239            '--unconstrained-simp' not in basic_command_line_args and \
 240            not cvc4_binary.endswith('pcvc4'):
 241             extra_command_line_args += ['--check-unsat-cores']
 242     if extra_command_line_args:
 243         command_line_args_configs.append(
 244             basic_command_line_args + extra_command_line_args)
 245
 246     # Run CVC4 on the benchmark with the different option sets and check
 247     # whether the exit status, stdout output, stderr output are as expected.
 248     print('1..{}'.format(len(command_line_args_configs)))
 249     print('# Starting')
 250     for command_line_args in command_line_args_configs:
 251         output, error, exit_status = run_benchmark(
 252             dump, wrapper, scrubber, error_scrubber, cvc4_binary,
 253             command_line_args, benchmark_dir, benchmark_basename, timeout)
 254         if output != expected_output:
 255             print(
 256                 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
 257                 format(command_line_args))
 258             for line in difflib.context_diff(output.splitlines(),
 259                                              expected_output.splitlines()):
 260                 print(line)
 261         elif error != expected_error:
 262             print(
 263                 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
 264                 format(command_line_args))
 265             for line in difflib.context_diff(error.splitlines(),
 266                                              expected_error.splitlines()):
 267                 print(line)
 268         elif expected_exit_status != exit_status:
 269             print(
 270                 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
 271                 format(expected_exit_status, exit_status, command_line_args))
 272         else:
 273             print('ok - Flags: {}'.format(command_line_args))
 274
 275
 276 def main():
 277     """Parses the command line arguments and then calls the core of the
 278     script."""
 279
 280     parser = argparse.ArgumentParser(
 281         description=
 282         'Runs benchmark and checks for correct exit status and output.')
 283     parser.add_argument('--proof', action='store_true')
 284     parser.add_argument('--dump', action='store_true')
 285     parser.add_argument('wrapper', nargs='*')
 286     parser.add_argument('cvc4_binary')
 287     parser.add_argument('benchmark')
 288     args = parser.parse_args()
 289     cvc4_binary = os.path.abspath(args.cvc4_binary)
 290
 291     wrapper = args.wrapper
 292     if os.environ.get('VALGRIND') == '1' and not wrapper:
 293         wrapper = ['libtool', '--mode=execute', 'valgrind']
 294
 295     timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
 296
 297     run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark,
 298                    timeout)
 299
 300
 301 if __name__ == "__main__":
 302     main()