test/regress/run_regression.py

   1 #!/usr/bin/env python3
   2 """
   3 Usage:
   4
   5     run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
   6         [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
   7
   8 Runs benchmark and checks for correct exit status and output.
   9 """
  10
  11 import argparse
  12 import difflib
  13 import os
  14 import re
  15 import shlex
  16 import subprocess
  17 import sys
  18
  19 SCRUBBER = 'SCRUBBER: '
  20 ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
  21 EXPECT = 'EXPECT: '
  22 EXPECT_ERROR = 'EXPECT-ERROR: '
  23 EXIT = 'EXIT: '
  24 COMMAND_LINE = 'COMMAND-LINE: '
  25
  26
  27 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
  28                   command_line, benchmark_dir, benchmark_filename):
  29     """Runs CVC4 on the file `benchmark_filename` in the directory
  30     `benchmark_dir` using the binary `cvc4_binary` with the command line
  31     options `command_line`. The output is scrubbed using `scrubber` and
  32     `error_scrubber` for stdout and stderr, respectively. If dump is true, the
  33     function first uses CVC4 to read in and dump the benchmark file and then
  34     uses that as input."""
  35
  36     bin_args = wrapper[:]
  37     bin_args.append(cvc4_binary)
  38
  39     output = None
  40     error = None
  41     exit_status = None
  42     if dump:
  43         dump_args = [
  44             '--preprocess-only', '--dump', 'raw-benchmark',
  45             '--output-lang=smt2', '-qq'
  46         ]
  47         dump_process = subprocess.Popen(
  48             bin_args + command_line + dump_args + [benchmark_filename],
  49             cwd=benchmark_dir,
  50             stdout=subprocess.PIPE,
  51             stderr=subprocess.PIPE)
  52         dump_output, _ = dump_process.communicate()
  53         process = subprocess.Popen(
  54             bin_args + command_line + ['--lang=smt2', '-'],
  55             cwd=benchmark_dir,
  56             stdin=subprocess.PIPE,
  57             stdout=subprocess.PIPE,
  58             stderr=subprocess.PIPE)
  59         output, error = process.communicate(input=dump_output)
  60         exit_status = process.returncode
  61     else:
  62         process = subprocess.Popen(
  63             bin_args + command_line + [benchmark_filename],
  64             cwd=benchmark_dir,
  65             stdout=subprocess.PIPE,
  66             stderr=subprocess.PIPE)
  67         output, error = process.communicate()
  68         exit_status = process.returncode
  69
  70     # If a scrubber command has been specified then apply it to the output.
  71     if scrubber:
  72         scrubber_process = subprocess.Popen(
  73             shlex.split(scrubber),
  74             stdin=subprocess.PIPE,
  75             stdout=subprocess.PIPE,
  76             stderr=subprocess.PIPE)
  77         output, _ = scrubber_process.communicate(input=output)
  78     if error_scrubber:
  79         error_scrubber_process = subprocess.Popen(
  80             shlex.split(error_scrubber),
  81             stdin=subprocess.PIPE,
  82             stdout=subprocess.PIPE,
  83             stderr=subprocess.PIPE)
  84         error, _ = error_scrubber_process.communicate(input=error)
  85
  86     # Popen in Python 3 returns a bytes object instead of a string for
  87     # stdout/stderr.
  88     if isinstance(output, bytes):
  89         output = output.decode()
  90     if isinstance(error, bytes):
  91         error = error.decode()
  92     return (output.strip(), error.strip(), exit_status)
  93
  94
  95 def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path):
  96     """Determines the expected output for a benchmark, runs CVC4 on it and then
  97     checks whether the output corresponds to the expected output. Optionally
  98     uses a wrapper `wrapper`, tests proof generation (if proof is true), or
  99     dumps a benchmark and uses that as the input (if dump is true)."""
 100
 101     if not os.access(cvc4_binary, os.X_OK):
 102         sys.exit(
 103             '"{}" does not exist or is not executable'.format(cvc4_binary))
 104     if not os.path.isfile(benchmark_path):
 105         sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
 106
 107     basic_command_line_args = []
 108
 109     benchmark_basename = os.path.basename(benchmark_path)
 110     benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
 111     benchmark_dir = os.path.dirname(benchmark_path)
 112     comment_char = '%'
 113     status_regex = None
 114     status_to_output = lambda s: s
 115     if benchmark_ext == '.smt':
 116         status_regex = r':status\s*(sat|unsat)'
 117         comment_char = ';'
 118     elif benchmark_ext == '.smt2':
 119         status_regex = r'set-info\s*:status\s*(sat|unsat)'
 120         comment_char = ';'
 121     elif benchmark_ext == '.cvc':
 122         pass
 123     elif benchmark_ext == '.p':
 124         basic_command_line_args.append('--finite-model-find')
 125         status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
 126         status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
 127     elif benchmark_ext == '.sy':
 128         comment_char = ';'
 129         # Do not use proofs/unsat-cores with .sy files
 130         proof = False
 131     else:
 132         sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
 133             benchmark_basename))
 134
 135     # If there is an ".expect" file for the benchmark, read the metadata
 136     # from there, otherwise from the benchmark file.
 137     metadata_filename = benchmark_path + '.expect'
 138     if os.path.isfile(metadata_filename):
 139         comment_char = '%'
 140     else:
 141         metadata_filename = benchmark_path
 142
 143     metadata_lines = None
 144     with open(metadata_filename, 'r') as metadata_file:
 145         metadata_lines = metadata_file.readlines()
 146
 147     benchmark_content = None
 148     if metadata_filename == benchmark_path:
 149         benchmark_content = ''.join(metadata_lines)
 150     else:
 151         with open(benchmark_path, 'r') as benchmark_file:
 152             benchmark_content = benchmark_file.read()
 153
 154     # Extract the metadata for the benchmark.
 155     scrubber = None
 156     error_scrubber = None
 157     expected_output = ''
 158     expected_error = ''
 159     expected_exit_status = None
 160     command_line = ''
 161     for line in metadata_lines:
 162         # Skip lines that do not start with "%"
 163         if line[0] != comment_char:
 164             continue
 165         line = line[2:]
 166
 167         if line.startswith(SCRUBBER):
 168             scrubber = line[len(SCRUBBER):]
 169         elif line.startswith(ERROR_SCRUBBER):
 170             error_scrubber = line[len(ERROR_SCRUBBER):]
 171         elif line.startswith(EXPECT):
 172             expected_output += line[len(EXPECT):]
 173         elif line.startswith(EXPECT_ERROR):
 174             expected_error += line[len(EXPECT_ERROR):]
 175         elif line.startswith(EXIT):
 176             expected_exit_status = int(line[len(EXIT):])
 177         elif line.startswith(COMMAND_LINE):
 178             command_line += line[len(COMMAND_LINE):]
 179     expected_output = expected_output.strip()
 180     expected_error = expected_error.strip()
 181
 182     # Expected output/expected error has not been defined in the metadata for
 183     # the benchmark. Try to extract the information from the benchmark itself.
 184     if expected_output == '' and expected_error == '':
 185         match = None
 186         if status_regex:
 187             match = re.search(status_regex, benchmark_content)
 188
 189         if match:
 190             expected_output = status_to_output(match.group(1))
 191         elif expected_exit_status is None:
 192             # If there is no expected output/error and the exit status has not
 193             # been set explicitly, the benchmark is invalid.
 194             sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
 195
 196     if not proof and ('(get-unsat-core)' in benchmark_content
 197                       or '(get-unsat-assumptions)' in benchmark_content):
 198         print(
 199             '1..0 # Skipped: unsat cores not supported without proof support')
 200         return
 201
 202     if expected_exit_status is None:
 203         expected_exit_status = 0
 204
 205     if 'CVC4_REGRESSION_ARGS' in os.environ:
 206         basic_command_line_args += shlex.split(
 207             os.environ['CVC4_REGRESSION_ARGS'])
 208     basic_command_line_args += shlex.split(command_line)
 209     command_line_args_configs = [basic_command_line_args]
 210
 211     extra_command_line_args = []
 212     if benchmark_ext == '.sy' and \
 213         '--no-check-synth-sol' not in basic_command_line_args and \
 214         '--check-synth-sol' not in basic_command_line_args:
 215         extra_command_line_args = ['--check-synth-sol']
 216     if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
 217        '--no-check-models' not in basic_command_line_args:
 218         extra_command_line_args = ['--check-models']
 219     if proof and re.search(r'^(unsat|valid)$', expected_output):
 220         if '--no-check-proofs' not in basic_command_line_args and \
 221            '--incremental' not in basic_command_line_args and \
 222            '--unconstrained-simp' not in basic_command_line_args and \
 223            not cvc4_binary.endswith('pcvc4'):
 224             extra_command_line_args = [
 225                 '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
 226                 '--no-bv-algebraic'
 227             ]
 228         if '--no-check-unsat-cores' not in basic_command_line_args and \
 229            '--incremental' not in basic_command_line_args and \
 230            '--unconstrained-simp' not in basic_command_line_args and \
 231            not cvc4_binary.endswith('pcvc4'):
 232             extra_command_line_args += ['--check-unsat-cores']
 233     if extra_command_line_args:
 234         command_line_args_configs.append(
 235             basic_command_line_args + extra_command_line_args)
 236
 237     # Run CVC4 on the benchmark with the different option sets and check
 238     # whether the exit status, stdout output, stderr output are as expected.
 239     print('1..{}'.format(len(command_line_args_configs)))
 240     print('# Starting')
 241     for command_line_args in command_line_args_configs:
 242         output, error, exit_status = run_benchmark(
 243             dump, wrapper, scrubber, error_scrubber, cvc4_binary,
 244             command_line_args, benchmark_dir, benchmark_basename)
 245         if output != expected_output:
 246             print(
 247                 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
 248                 format(command_line_args))
 249             for line in difflib.context_diff(output.splitlines(),
 250                                              expected_output.splitlines()):
 251                 print(line)
 252         elif error != expected_error:
 253             print(
 254                 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
 255                 format(command_line_args))
 256             for line in difflib.context_diff(error.splitlines(),
 257                                              expected_error.splitlines()):
 258                 print(line)
 259         elif expected_exit_status != exit_status:
 260             print(
 261                 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
 262                 format(expected_exit_status, exit_status, command_line_args))
 263         else:
 264             print('ok - Flags: {}'.format(command_line_args))
 265
 266
 267 def main():
 268     """Parses the command line arguments and then calls the core of the
 269     script."""
 270
 271     parser = argparse.ArgumentParser(
 272         description=
 273         'Runs benchmark and checks for correct exit status and output.')
 274     parser.add_argument('--proof', action='store_true')
 275     parser.add_argument('--dump', action='store_true')
 276     parser.add_argument('wrapper', nargs='*')
 277     parser.add_argument('cvc4_binary')
 278     parser.add_argument('benchmark')
 279     args = parser.parse_args()
 280     cvc4_binary = os.path.abspath(args.cvc4_binary)
 281
 282     wrapper = args.wrapper
 283     if os.environ.get('VALGRIND') == '1' and not wrapper:
 284         wrapper = ['libtool', '--mode=execute', 'valgrind']
 285
 286     run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark)
 287
 288
 289 if __name__ == "__main__":
 290     main()