test/regress/run_regression.py

   1 #!/usr/bin/env python3
   2 """
   3 Usage:
   4
   5     run_regression.py [--enable-proof] [--with-lfsc] [--dump] [--cmake]
   6         [wrapper] cvc4-binary
   7         [benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p]
   8
   9 Runs benchmark and checks for correct exit status and output.
  10 """
  11
  12 import argparse
  13 import difflib
  14 import os
  15 import re
  16 import shlex
  17 import subprocess
  18 import sys
  19 import threading
  20
  21 SCRUBBER = 'SCRUBBER: '
  22 ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
  23 EXPECT = 'EXPECT: '
  24 EXPECT_ERROR = 'EXPECT-ERROR: '
  25 EXIT = 'EXIT: '
  26 COMMAND_LINE = 'COMMAND-LINE: '
  27 REQUIRES = 'REQUIRES: '
  28
  29 EXIT_OK = 0
  30 EXIT_FAILURE = 1
  31 EXIT_SKIP = 77
  32
  33
  34 def run_process(args, cwd, timeout, s_input=None):
  35     """Runs a process with a timeout `timeout` in seconds. `args` are the
  36     arguments to execute, `cwd` is the working directory and `s_input` is the
  37     input to be sent to the process over stdin. Returns the output, the error
  38     output and the exit code of the process. If the process times out, the
  39     output and the error output are empty and the exit code is 124."""
  40
  41     proc = subprocess.Popen(
  42         args,
  43         cwd=cwd,
  44         stdin=subprocess.PIPE,
  45         stdout=subprocess.PIPE,
  46         stderr=subprocess.PIPE)
  47
  48     out = ''
  49     err = ''
  50     exit_status = 124
  51     try:
  52         if timeout:
  53             timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
  54             timer.start()
  55         out, err = proc.communicate(input=s_input)
  56         exit_status = proc.returncode
  57     finally:
  58         if timeout:
  59             timer.cancel()
  60
  61     return out, err, exit_status
  62
  63
  64 def get_cvc4_features(cvc4_binary):
  65     """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
  66
  67     output, _, _ = run_process([cvc4_binary, '--show-config'], None, None)
  68     if isinstance(output, bytes):
  69         output = output.decode()
  70
  71     features = []
  72     for line in output.split('\n'):
  73         tokens = [t.strip() for t in line.split(':')]
  74         if len(tokens) == 2:
  75             key, value = tokens
  76             if value == 'yes':
  77                 features.append(key)
  78
  79     return features
  80
  81
  82 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
  83                   command_line, benchmark_dir, benchmark_filename, timeout):
  84     """Runs CVC4 on the file `benchmark_filename` in the directory
  85     `benchmark_dir` using the binary `cvc4_binary` with the command line
  86     options `command_line`. The output is scrubbed using `scrubber` and
  87     `error_scrubber` for stdout and stderr, respectively. If dump is true, the
  88     function first uses CVC4 to read in and dump the benchmark file and then
  89     uses that as input."""
  90
  91     bin_args = wrapper[:]
  92     bin_args.append(cvc4_binary)
  93
  94     output = None
  95     error = None
  96     exit_status = None
  97     if dump:
  98         dump_args = [
  99             '--preprocess-only', '--dump', 'raw-benchmark',
 100             '--output-lang=smt2', '-qq'
 101         ]
 102         dump_output, _, _ = run_process(
 103             bin_args + command_line + dump_args + [benchmark_filename],
 104             benchmark_dir, timeout)
 105         output, error, exit_status = run_process(
 106             bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir,
 107             timeout, dump_output)
 108     else:
 109         output, error, exit_status = run_process(
 110             bin_args + command_line + [benchmark_filename], benchmark_dir,
 111             timeout)
 112
 113     # If a scrubber command has been specified then apply it to the output.
 114     if scrubber:
 115         output, _, _ = run_process(
 116             shlex.split(scrubber), benchmark_dir, timeout, output)
 117     if error_scrubber:
 118         error, _, _ = run_process(
 119             shlex.split(error_scrubber), benchmark_dir, timeout, error)
 120
 121     # Popen in Python 3 returns a bytes object instead of a string for
 122     # stdout/stderr.
 123     if isinstance(output, bytes):
 124         output = output.decode()
 125     if isinstance(error, bytes):
 126         error = error.decode()
 127     return (output.strip(), error.strip(), exit_status)
 128
 129
 130 def run_regression(unsat_cores, proofs, dump, cmake, wrapper, cvc4_binary,
 131                    benchmark_path, timeout):
 132     """Determines the expected output for a benchmark, runs CVC4 on it and then
 133     checks whether the output corresponds to the expected output. Optionally
 134     uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
 135     checks proofs (if proofs is true), or dumps a benchmark and uses that as
 136     the input (if dump is true). `cmake` enables/disables CMake-specific
 137     behavior."""
 138
 139     if not os.access(cvc4_binary, os.X_OK):
 140         sys.exit(
 141             '"{}" does not exist or is not executable'.format(cvc4_binary))
 142     if not os.path.isfile(benchmark_path):
 143         sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
 144
 145     cvc4_features = get_cvc4_features(cvc4_binary)
 146
 147     basic_command_line_args = []
 148
 149     benchmark_basename = os.path.basename(benchmark_path)
 150     benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
 151     benchmark_dir = os.path.dirname(benchmark_path)
 152     comment_char = '%'
 153     status_regex = None
 154     status_to_output = lambda s: s
 155     if benchmark_ext == '.smt':
 156         status_regex = r':status\s*(sat|unsat)'
 157         comment_char = ';'
 158     elif benchmark_ext == '.smt2':
 159         status_regex = r'set-info\s*:status\s*(sat|unsat)'
 160         comment_char = ';'
 161     elif benchmark_ext == '.cvc':
 162         pass
 163     elif benchmark_ext == '.p':
 164         basic_command_line_args.append('--finite-model-find')
 165         status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
 166         status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
 167     elif benchmark_ext == '.sy':
 168         comment_char = ';'
 169         # Do not use proofs/unsat-cores with .sy files
 170         unsat_cores = False
 171         proofs = False
 172     else:
 173         sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
 174             benchmark_basename))
 175
 176     benchmark_lines = None
 177     with open(benchmark_path, 'r') as benchmark_file:
 178         benchmark_lines = benchmark_file.readlines()
 179     benchmark_content = ''.join(benchmark_lines)
 180
 181     # Extract the metadata for the benchmark.
 182     scrubber = None
 183     error_scrubber = None
 184     expected_output = ''
 185     expected_error = ''
 186     expected_exit_status = None
 187     command_lines = []
 188     requires = []
 189     for line in benchmark_lines:
 190         # Skip lines that do not start with a comment character.
 191         if line[0] != comment_char:
 192             continue
 193         line = line[1:].lstrip()
 194
 195         if line.startswith(SCRUBBER):
 196             scrubber = line[len(SCRUBBER):]
 197         elif line.startswith(ERROR_SCRUBBER):
 198             error_scrubber = line[len(ERROR_SCRUBBER):]
 199         elif line.startswith(EXPECT):
 200             expected_output += line[len(EXPECT):]
 201         elif line.startswith(EXPECT_ERROR):
 202             expected_error += line[len(EXPECT_ERROR):]
 203         elif line.startswith(EXIT):
 204             expected_exit_status = int(line[len(EXIT):])
 205         elif line.startswith(COMMAND_LINE):
 206             command_lines.append(line[len(COMMAND_LINE):])
 207         elif line.startswith(REQUIRES):
 208             requires.append(line[len(REQUIRES):].strip())
 209     expected_output = expected_output.strip()
 210     expected_error = expected_error.strip()
 211
 212     # Expected output/expected error has not been defined in the metadata for
 213     # the benchmark. Try to extract the information from the benchmark itself.
 214     if expected_output == '' and expected_error == '':
 215         match = None
 216         if status_regex:
 217             match = re.search(status_regex, benchmark_content)
 218
 219         if match:
 220             expected_output = status_to_output(match.group(1))
 221         elif expected_exit_status is None:
 222             # If there is no expected output/error and the exit status has not
 223             # been set explicitly, the benchmark is invalid.
 224             sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
 225     if expected_exit_status is None:
 226         expected_exit_status = 0
 227
 228     if 'CVC4_REGRESSION_ARGS' in os.environ:
 229         basic_command_line_args += shlex.split(
 230             os.environ['CVC4_REGRESSION_ARGS'])
 231
 232     if not unsat_cores and ('(get-unsat-core)' in benchmark_content
 233                             or '(get-unsat-assumptions)' in benchmark_content):
 234         print(
 235             '1..0 # Skipped regression: unsat cores not supported without proof support'
 236         )
 237         return (EXIT_SKIP if cmake else EXIT_OK)
 238
 239     for req_feature in requires:
 240         if req_feature.startswith("no-"):
 241             inv_feature = req_feature[len("no-"):]
 242             if inv_feature in cvc4_features:
 243                 print('1..0 # Skipped regression: not valid with {}'.format(
 244                     inv_feature))
 245                 return (EXIT_SKIP if cmake else EXIT_OK)
 246         elif req_feature not in cvc4_features:
 247             print('1..0 # Skipped regression: {} not supported'.format(
 248                 req_feature))
 249             return (EXIT_SKIP if cmake else EXIT_OK)
 250
 251     if not command_lines:
 252         command_lines.append('')
 253
 254     command_line_args_configs = []
 255     for command_line in command_lines:
 256         args = shlex.split(command_line)
 257         all_args = basic_command_line_args + args
 258
 259         if not unsat_cores and ('--check-unsat-cores' in all_args):
 260             print(
 261                 '# Skipped command line options ({}): unsat cores not supported without proof support'
 262                 .format(all_args))
 263             continue
 264         if not proofs and ('--check-proofs' in all_args
 265                            or '--dump-proofs' in all_args):
 266             print(
 267                 '# Skipped command line options ({}): checking proofs not supported without LFSC support'
 268                 .format(all_args))
 269             continue
 270
 271         command_line_args_configs.append(all_args)
 272
 273         extra_command_line_args = []
 274         if benchmark_ext == '.sy' and \
 275             '--no-check-synth-sol' not in all_args and \
 276             '--sygus-rr' not in all_args and \
 277             '--check-synth-sol' not in all_args:
 278             extra_command_line_args = ['--check-synth-sol']
 279         if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
 280            '--no-check-models' not in all_args and \
 281            '--check-models' not in all_args:
 282             extra_command_line_args = ['--check-models']
 283         if proofs and re.search(r'^(unsat|valid)$', expected_output):
 284             if '--no-check-proofs' not in all_args and \
 285                '--check-proofs' not in all_args and \
 286                '--incremental' not in all_args and \
 287                '--unconstrained-simp' not in all_args and \
 288                not cvc4_binary.endswith('pcvc4'):
 289                 extra_command_line_args = ['--check-proofs']
 290         if unsat_cores and re.search(r'^(unsat|valid)$', expected_output):
 291             if '--no-check-unsat-cores' not in all_args and \
 292                '--check-unsat-cores' not in all_args and \
 293                '--incremental' not in all_args and \
 294                '--unconstrained-simp' not in all_args and \
 295                not cvc4_binary.endswith('pcvc4'):
 296                 extra_command_line_args += ['--check-unsat-cores']
 297         if extra_command_line_args:
 298             command_line_args_configs.append(all_args +
 299                                              extra_command_line_args)
 300
 301     # Run CVC4 on the benchmark with the different option sets and check
 302     # whether the exit status, stdout output, stderr output are as expected.
 303     print('1..{}'.format(len(command_line_args_configs)))
 304     print('# Starting')
 305     exit_code = EXIT_OK
 306     for command_line_args in command_line_args_configs:
 307         output, error, exit_status = run_benchmark(
 308             dump, wrapper, scrubber, error_scrubber, cvc4_binary,
 309             command_line_args, benchmark_dir, benchmark_basename, timeout)
 310         if output != expected_output:
 311             exit_code = EXIT_FAILURE
 312             print(
 313                 'not ok - Differences between expected and actual output on stdout - Flags: {}'
 314                 .format(command_line_args))
 315             for line in difflib.context_diff(output.splitlines(),
 316                                              expected_output.splitlines()):
 317                 print(line)
 318             print()
 319             print('Error output:')
 320             print(error)
 321         elif error != expected_error:
 322             exit_code = EXIT_FAILURE
 323             print(
 324                 'not ok - Differences between expected and actual output on stderr - Flags: {}'
 325                 .format(command_line_args))
 326             for line in difflib.context_diff(error.splitlines(),
 327                                              expected_error.splitlines()):
 328                 print(line)
 329         elif expected_exit_status != exit_status:
 330             exit_code = EXIT_FAILURE
 331             print(
 332                 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
 333                 format(expected_exit_status, exit_status, command_line_args))
 334             print()
 335             print('Output:')
 336             print(output)
 337             print()
 338             print('Error output:')
 339             print(error)
 340         else:
 341             print('ok - Flags: {}'.format(command_line_args))
 342
 343     return exit_code
 344
 345
 346 def main():
 347     """Parses the command line arguments and then calls the core of the
 348     script."""
 349
 350     parser = argparse.ArgumentParser(
 351         description=
 352         'Runs benchmark and checks for correct exit status and output.')
 353     parser.add_argument('--enable-proof', action='store_true')
 354     parser.add_argument('--with-lfsc', action='store_true')
 355     parser.add_argument('--dump', action='store_true')
 356     parser.add_argument('--cmake', action='store_true')
 357     parser.add_argument('wrapper', nargs='*')
 358     parser.add_argument('cvc4_binary')
 359     parser.add_argument('benchmark')
 360     args = parser.parse_args()
 361     cvc4_binary = os.path.abspath(args.cvc4_binary)
 362
 363     wrapper = args.wrapper
 364     if os.environ.get('VALGRIND') == '1' and not wrapper:
 365         wrapper = ['libtool', '--mode=execute', 'valgrind']
 366
 367     timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
 368
 369     return run_regression(args.enable_proof, args.with_lfsc, args.dump,
 370                           args.cmake, wrapper, cvc4_binary, args.benchmark,
 371                           timeout)
 372
 373
 374 if __name__ == "__main__":
 375     exit_code = main()
 376     sys.exit(exit_code)