test/regress/run_regression.py

   1 #!/usr/bin/env python3
   2 """
   3 Usage:
   4
   5     run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
   6         [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
   7
   8 Runs benchmark and checks for correct exit status and output.
   9 """
  10
  11 import argparse
  12 import difflib
  13 import os
  14 import re
  15 import shlex
  16 import subprocess
  17 import sys
  18
  19 SCRUBBER = 'SCRUBBER: '
  20 ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
  21 EXPECT = 'EXPECT: '
  22 EXPECT_ERROR = 'EXPECT-ERROR: '
  23 EXIT = 'EXIT: '
  24 COMMAND_LINE = 'COMMAND-LINE: '
  25
  26
  27 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
  28                   command_line, benchmark_dir, benchmark_filename):
  29     """Runs CVC4 on the file `benchmark_filename` in the directory
  30     `benchmark_dir` using the binary `cvc4_binary` with the command line
  31     options `command_line`. The output is scrubbed using `scrubber` and
  32     `error_scrubber` for stdout and stderr, respectively. If dump is true, the
  33     function first uses CVC4 to read in and dump the benchmark file and then
  34     uses that as input."""
  35
  36     bin_args = wrapper[:]
  37     bin_args.append(cvc4_binary)
  38
  39     output = None
  40     error = None
  41     exit_status = None
  42     if dump:
  43         dump_args = [
  44             '--preprocess-only', '--dump', 'raw-benchmark',
  45             '--output-lang=smt2', '-qq'
  46         ]
  47         dump_process = subprocess.Popen(
  48             bin_args + command_line + dump_args + [benchmark_filename],
  49             cwd=benchmark_dir,
  50             stdout=subprocess.PIPE,
  51             stderr=subprocess.PIPE)
  52         dump_output, _ = dump_process.communicate()
  53         process = subprocess.Popen(
  54             bin_args + command_line + ['--lang=smt2', '-'],
  55             cwd=benchmark_dir,
  56             stdin=subprocess.PIPE,
  57             stdout=subprocess.PIPE,
  58             stderr=subprocess.PIPE)
  59         output, error = process.communicate(input=dump_output)
  60         exit_status = process.returncode
  61     else:
  62         process = subprocess.Popen(
  63             bin_args + command_line + [benchmark_filename],
  64             cwd=benchmark_dir,
  65             stdout=subprocess.PIPE,
  66             stderr=subprocess.PIPE)
  67         output, error = process.communicate()
  68         exit_status = process.returncode
  69
  70     # If a scrubber command has been specified then apply it to the output.
  71     if scrubber:
  72         scrubber_process = subprocess.Popen(
  73             shlex.split(scrubber),
  74             stdin=subprocess.PIPE,
  75             stdout=subprocess.PIPE,
  76             stderr=subprocess.PIPE)
  77         output, _ = scrubber_process.communicate(input=output)
  78     if error_scrubber:
  79         error_scrubber_process = subprocess.Popen(
  80             shlex.split(error_scrubber),
  81             stdin=subprocess.PIPE,
  82             stdout=subprocess.PIPE,
  83             stderr=subprocess.PIPE)
  84         error, _ = error_scrubber_process.communicate(input=error)
  85
  86     # Popen in Python 3 returns a bytes object instead of a string for
  87     # stdout/stderr.
  88     if isinstance(output, bytes):
  89         output = output.decode()
  90     if isinstance(error, bytes):
  91         error = error.decode()
  92     return (output.strip(), error.strip(), exit_status)
  93
  94
  95 def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path):
  96     """Determines the expected output for a benchmark, runs CVC4 on it and then
  97     checks whether the output corresponds to the expected output. Optionally
  98     uses a wrapper `wrapper`, tests proof generation (if proof is true), or
  99     dumps a benchmark and uses that as the input (if dump is true)."""
 100
 101     if not os.access(cvc4_binary, os.X_OK):
 102         sys.exit(
 103             '"{}" does not exist or is not executable'.format(cvc4_binary))
 104     if not os.path.isfile(benchmark_path):
 105         sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
 106
 107     basic_command_line_args = []
 108
 109     benchmark_basename = os.path.basename(benchmark_path)
 110     benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
 111     benchmark_dir = os.path.dirname(benchmark_path)
 112     comment_char = '%'
 113     status_regex = None
 114     status_to_output = lambda s: s
 115     if benchmark_ext == '.smt':
 116         status_regex = r':status\s*(sat|unsat)'
 117         comment_char = ';'
 118     elif benchmark_ext == '.smt2':
 119         status_regex = r'set-info\s*:status\s*(sat|unsat)'
 120         comment_char = ';'
 121     elif benchmark_ext == '.cvc':
 122         pass
 123     elif benchmark_ext == '.p':
 124         basic_command_line_args.append('--finite-model-find')
 125         status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
 126         status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
 127     elif benchmark_ext == '.sy':
 128         comment_char = ';'
 129         # Do not use proofs/unsat-cores with .sy files
 130         proof = False
 131     else:
 132         sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
 133             benchmark_basename))
 134
 135     # If there is an ".expect" file for the benchmark, read the metadata
 136     # from there, otherwise from the benchmark file.
 137     metadata_filename = benchmark_path + '.expect'
 138     if os.path.isfile(metadata_filename):
 139         comment_char = '%'
 140     else:
 141         metadata_filename = benchmark_path
 142
 143     metadata_lines = None
 144     with open(metadata_filename, 'r') as metadata_file:
 145         metadata_lines = metadata_file.readlines()
 146     metadata_content = ''.join(metadata_lines)
 147
 148     # Extract the metadata for the benchmark.
 149     scrubber = None
 150     error_scrubber = None
 151     expected_output = ''
 152     expected_error = ''
 153     expected_exit_status = None
 154     command_line = ''
 155     for line in metadata_lines:
 156         # Skip lines that do not start with "%"
 157         if line[0] != comment_char:
 158             continue
 159         line = line[2:]
 160
 161         if line.startswith(SCRUBBER):
 162             scrubber = line[len(SCRUBBER):]
 163         elif line.startswith(ERROR_SCRUBBER):
 164             error_scrubber = line[len(ERROR_SCRUBBER):]
 165         elif line.startswith(EXPECT):
 166             expected_output += line[len(EXPECT):]
 167         elif line.startswith(EXPECT_ERROR):
 168             expected_error += line[len(EXPECT_ERROR):]
 169         elif line.startswith(EXIT):
 170             expected_exit_status = int(line[len(EXIT):])
 171         elif line.startswith(COMMAND_LINE):
 172             command_line += line[len(COMMAND_LINE):]
 173     expected_output = expected_output.strip()
 174     expected_error = expected_error.strip()
 175
 176     # Expected output/expected error has not been defined in the metadata for
 177     # the benchmark. Try to extract the information from the benchmark itself.
 178     if expected_output == '' and expected_error == '':
 179         match = None
 180         if status_regex:
 181             match = re.search(status_regex, metadata_content)
 182
 183         if match:
 184             expected_output = status_to_output(match.group(1))
 185         elif expected_exit_status is None:
 186             # If there is no expected output/error and the exit status has not
 187             # been set explicitly, the benchmark is invalid.
 188             sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
 189
 190     if not proof and ('(get-unsat-core)' in metadata_content
 191                       or '(get-unsat-assumptions)' in metadata_content):
 192         print(
 193             '1..0 # Skipped: unsat cores not supported without proof support')
 194         return
 195
 196     if expected_exit_status is None:
 197         expected_exit_status = 0
 198
 199     if 'CVC4_REGRESSION_ARGS' in os.environ:
 200         basic_command_line_args += shlex.split(
 201             os.environ['CVC4_REGRESSION_ARGS'])
 202     basic_command_line_args += shlex.split(command_line)
 203     command_line_args_configs = [basic_command_line_args]
 204
 205     extra_command_line_args = []
 206     if benchmark_ext == '.sy' and \
 207         '--no-check-synth-sol' not in basic_command_line_args and \
 208         '--check-synth-sol' not in basic_command_line_args:
 209         extra_command_line_args = ['--check-synth-sol']
 210     if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
 211        '--no-check-models' not in basic_command_line_args:
 212         extra_command_line_args = ['--check-models']
 213     if proof and re.search(r'^(unsat|valid)$', expected_output):
 214         if '--no-check-proofs' not in basic_command_line_args and \
 215            '--incremental' not in basic_command_line_args and \
 216            '--unconstrained-simp' not in basic_command_line_args and \
 217            not cvc4_binary.endswith('pcvc4'):
 218             extra_command_line_args = [
 219                 '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
 220                 '--no-bv-algebraic'
 221             ]
 222         if '--no-check-unsat-cores' not in basic_command_line_args and \
 223            '--incremental' not in basic_command_line_args and \
 224            '--unconstrained-simp' not in basic_command_line_args and \
 225            not cvc4_binary.endswith('pcvc4'):
 226             extra_command_line_args += ['--check-unsat-cores']
 227     if extra_command_line_args:
 228         command_line_args_configs.append(
 229             basic_command_line_args + extra_command_line_args)
 230
 231     # Run CVC4 on the benchmark with the different option sets and check
 232     # whether the exit status, stdout output, stderr output are as expected.
 233     print('1..{}'.format(len(command_line_args_configs)))
 234     print('# Starting')
 235     for command_line_args in command_line_args_configs:
 236         output, error, exit_status = run_benchmark(
 237             dump, wrapper, scrubber, error_scrubber, cvc4_binary,
 238             command_line_args, benchmark_dir, benchmark_basename)
 239         if output != expected_output:
 240             print(
 241                 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
 242                 format(command_line_args))
 243             for line in difflib.context_diff(output.splitlines(),
 244                                              expected_output.splitlines()):
 245                 print(line)
 246         elif error != expected_error:
 247             print(
 248                 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
 249                 format(command_line_args))
 250             for line in difflib.context_diff(error.splitlines(),
 251                                              expected_error.splitlines()):
 252                 print(line)
 253         elif expected_exit_status != exit_status:
 254             print(
 255                 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
 256                 format(expected_exit_status, exit_status, command_line_args))
 257         else:
 258             print('ok - Flags: {}'.format(command_line_args))
 259
 260
 261 def main():
 262     """Parses the command line arguments and then calls the core of the
 263     script."""
 264
 265     parser = argparse.ArgumentParser(
 266         description=
 267         'Runs benchmark and checks for correct exit status and output.')
 268     parser.add_argument('--proof', action='store_true')
 269     parser.add_argument('--dump', action='store_true')
 270     parser.add_argument('wrapper', nargs='*')
 271     parser.add_argument('cvc4_binary')
 272     parser.add_argument('benchmark')
 273     args = parser.parse_args()
 274     cvc4_binary = os.path.abspath(args.cvc4_binary)
 275
 276     wrapper = args.wrapper
 277     if os.environ.get('VALGRIND') == '1' and not wrapper:
 278         wrapper = ['libtool', '--mode=execute', 'valgrind']
 279
 280     run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark)
 281
 282
 283 if __name__ == "__main__":
 284     main()