Enable and fix dump test (#7387)

[cvc5.git] / test / regress / run_regression.py
diff --git a/test/regress/run_regression.py b/test/regress/run_regression.py

index 9bd3de9f061fef915f901f5440ccdc27cfefc902..32ba6f6c2924174058542b00be0d84317bc11b60 100755 (executable)
--- a/test/regress/run_regression.py
+++ b/test/regress/run_regression.py
@@ -1,10 +1,18 @@
  #!/usr/bin/env python3
-"""
-Usage:
-
-    run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
-        [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
+###############################################################################
+# Top contributors (to current version):
+#   Andres Noetzli, Mathias Preiner, Yoni Zohar
+#
+# This file is part of the cvc5 project.
+#
+# Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+# in the top-level source directory and their institutional affiliations.
+# All rights reserved.  See the file COPYING in the top-level source
+# directory for licensing information.
+# #############################################################################
+##
  
+"""
  Runs benchmark and checks for correct exit status and output.
  """
  
@@ -17,13 +25,50 @@ import subprocess
  import sys
  import threading
  
-SCRUBBER = 'SCRUBBER: '
-ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
-EXPECT = 'EXPECT: '
-EXPECT_ERROR = 'EXPECT-ERROR: '
-EXIT = 'EXIT: '
-COMMAND_LINE = 'COMMAND-LINE: '
-REQUIRES = 'REQUIRES: '
+
+class Color:
+    BLUE = '\033[94m'
+    GREEN = '\033[92m'
+    YELLOW = '\033[93m'
+    RED = '\033[91m'
+    ENDC = '\033[0m'
+
+
+SCRUBBER = 'SCRUBBER:'
+ERROR_SCRUBBER = 'ERROR-SCRUBBER:'
+EXPECT = 'EXPECT:'
+EXPECT_ERROR = 'EXPECT-ERROR:'
+EXIT = 'EXIT:'
+COMMAND_LINE = 'COMMAND-LINE:'
+REQUIRES = 'REQUIRES:'
+
+EXIT_OK = 0
+EXIT_FAILURE = 1
+EXIT_SKIP = 77
+STATUS_TIMEOUT = 124
+
+
+def print_colored(color, text):
+    """Prints `text` in color `color`."""
+
+    for line in text.splitlines():
+        print(color + line + Color.ENDC)
+
+
+def print_diff(actual, expected):
+    """Prints the difference between `actual` and `expected`."""
+
+    for line in difflib.unified_diff(expected.splitlines(),
+                                     actual.splitlines(),
+                                     'expected',
+                                     'actual',
+                                     lineterm=''):
+        if line.startswith('+'):
+            print_colored(Color.GREEN, line)
+        elif line.startswith('-'):
+            print_colored(Color.RED, line)
+        else:
+            print(line)
  
  
  def run_process(args, cwd, timeout, s_input=None):
@@ -33,16 +78,15 @@ def run_process(args, cwd, timeout, s_input=None):
      output and the exit code of the process. If the process times out, the
      output and the error output are empty and the exit code is 124."""
  
-    proc = subprocess.Popen(
-        args,
-        cwd=cwd,
-        stdin=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
+    proc = subprocess.Popen(args,
+                            cwd=cwd,
+                            stdin=subprocess.PIPE,
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
  
      out = ''
      err = ''
-    exit_status = 124
+    exit_status = STATUS_TIMEOUT
      try:
          if timeout:
              timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
@@ -51,48 +95,53 @@ def run_process(args, cwd, timeout, s_input=None):
          exit_status = proc.returncode
      finally:
          if timeout:
+            # The timer killed the process and is not active anymore.
+            if exit_status == -9 and not timer.is_alive():
+                exit_status = STATUS_TIMEOUT
              timer.cancel()
  
      return out, err, exit_status
  
  
-def get_cvc4_features(cvc4_binary):
-    """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
+def get_cvc5_features(cvc5_binary):
+    """Returns a list of features supported by the cvc5 binary `cvc5_binary`."""
  
-    output, _, _ = run_process([cvc4_binary, '--show-config'], None, None)
+    output, _, _ = run_process([cvc5_binary, '--show-config'], None, None)
      if isinstance(output, bytes):
          output = output.decode()
  
      features = []
+    disabled_features = []
      for line in output.split('\n'):
          tokens = [t.strip() for t in line.split(':')]
          if len(tokens) == 2:
              key, value = tokens
              if value == 'yes':
                  features.append(key)
+            elif value == 'no':
+                disabled_features.append(key)
  
-    return features
+    return features, disabled_features
  
  
-def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
+def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc5_binary,
                    command_line, benchmark_dir, benchmark_filename, timeout):
-    """Runs CVC4 on the file `benchmark_filename` in the directory
-    `benchmark_dir` using the binary `cvc4_binary` with the command line
+    """Runs cvc5 on the file `benchmark_filename` in the directory
+    `benchmark_dir` using the binary `cvc5_binary` with the command line
      options `command_line`. The output is scrubbed using `scrubber` and
      `error_scrubber` for stdout and stderr, respectively. If dump is true, the
-    function first uses CVC4 to read in and dump the benchmark file and then
+    function first uses cvc5 to read in and dump the benchmark file and then
      uses that as input."""
  
      bin_args = wrapper[:]
-    bin_args.append(cvc4_binary)
+    bin_args.append(cvc5_binary)
  
      output = None
      error = None
      exit_status = None
      if dump:
          dump_args = [
-            '--preprocess-only', '--dump', 'raw-benchmark',
-            '--output-lang=smt2', '-qq'
+            '--parse-only', '-o', 'raw-benchmark', '--output-lang=smt2'
          ]
          dump_output, _, _ = run_process(
              bin_args + command_line + dump_args + [benchmark_filename],
@@ -107,11 +156,11 @@ def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
  
      # If a scrubber command has been specified then apply it to the output.
      if scrubber:
-        output, _, _ = run_process(
-            shlex.split(scrubber), benchmark_dir, timeout, output)
+        output, _, _ = run_process(shlex.split(scrubber), benchmark_dir,
+                                   timeout, output)
      if error_scrubber:
-        error, _, _ = run_process(
-            shlex.split(error_scrubber), benchmark_dir, timeout, error)
+        error, _, _ = run_process(shlex.split(error_scrubber), benchmark_dir,
+                                  timeout, error)
  
      # Popen in Python 3 returns a bytes object instead of a string for
      # stdout/stderr.
@@ -122,19 +171,23 @@ def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
      return (output.strip(), error.strip(), exit_status)
  
  
-def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path, timeout):
-    """Determines the expected output for a benchmark, runs CVC4 on it and then
+def run_regression(check_unsat_cores, check_proofs, dump, use_skip_return_code,
+                   skip_timeout, wrapper, cvc5_binary, benchmark_path,
+                   timeout):
+    """Determines the expected output for a benchmark, runs cvc5 on it and then
      checks whether the output corresponds to the expected output. Optionally
-    uses a wrapper `wrapper`, tests proof generation (if proof is true), or
-    dumps a benchmark and uses that as the input (if dump is true)."""
+    uses a wrapper `wrapper`, tests unsat cores (if check_unsat_cores is true),
+    checks proofs (if check_proofs is true), or dumps a benchmark and uses that as
+    the input (if dump is true). `use_skip_return_code` enables/disables
+    returning 77 when a test is skipped."""
  
-    if not os.access(cvc4_binary, os.X_OK):
+    if not os.access(cvc5_binary, os.X_OK):
          sys.exit(
-            '"{}" does not exist or is not executable'.format(cvc4_binary))
+            '"{}" does not exist or is not executable'.format(cvc5_binary))
      if not os.path.isfile(benchmark_path):
          sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
  
-    cvc4_features = get_cvc4_features(cvc4_binary)
+    cvc5_features, cvc5_disabled_features = get_cvc5_features(cvc5_binary)
  
      basic_command_line_args = []
  
@@ -150,38 +203,23 @@ def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path, timeout):
      elif benchmark_ext == '.smt2':
          status_regex = r'set-info\s*:status\s*(sat|unsat)'
          comment_char = ';'
-    elif benchmark_ext == '.cvc':
-        pass
      elif benchmark_ext == '.p':
-        basic_command_line_args.append('--finite-model-find')
          status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
-        status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
+        status_to_output = lambda s: '% SZS status {} for {}'.format(
+            s, benchmark_filename)
      elif benchmark_ext == '.sy':
          comment_char = ';'
-        # Do not use proofs/unsat-cores with .sy files
-        proof = False
+        # Do not check proofs/unsat-cores with .sy files
+        check_unsat_cores = False
+        check_proofs = False
      else:
-        sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
+        sys.exit('"{}" must be *.smt2 or *.p or *.sy'.format(
              benchmark_basename))
  
-    # If there is an ".expect" file for the benchmark, read the metadata
-    # from there, otherwise from the benchmark file.
-    metadata_filename = benchmark_path + '.expect'
-    if os.path.isfile(metadata_filename):
-        comment_char = '%'
-    else:
-        metadata_filename = benchmark_path
-
-    metadata_lines = None
-    with open(metadata_filename, 'r') as metadata_file:
-        metadata_lines = metadata_file.readlines()
-
-    benchmark_content = None
-    if metadata_filename == benchmark_path:
-        benchmark_content = ''.join(metadata_lines)
-    else:
-        with open(benchmark_path, 'r') as benchmark_file:
-            benchmark_content = benchmark_file.read()
+    benchmark_lines = None
+    with open(benchmark_path, 'r') as benchmark_file:
+        benchmark_lines = benchmark_file.readlines()
+    benchmark_content = ''.join(benchmark_lines)
  
      # Extract the metadata for the benchmark.
      scrubber = None
@@ -191,24 +229,24 @@ def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path, timeout):
      expected_exit_status = None
      command_lines = []
      requires = []
-    for line in metadata_lines:
+    for line in benchmark_lines:
          # Skip lines that do not start with a comment character.
          if line[0] != comment_char:
              continue
          line = line[1:].lstrip()
  
          if line.startswith(SCRUBBER):
-            scrubber = line[len(SCRUBBER):]
+            scrubber = line[len(SCRUBBER):].strip()
          elif line.startswith(ERROR_SCRUBBER):
-            error_scrubber = line[len(ERROR_SCRUBBER):]
+            error_scrubber = line[len(ERROR_SCRUBBER):].strip()
          elif line.startswith(EXPECT):
-            expected_output += line[len(EXPECT):]
+            expected_output += line[len(EXPECT):].strip() + '\n'
          elif line.startswith(EXPECT_ERROR):
-            expected_error += line[len(EXPECT_ERROR):]
+            expected_error += line[len(EXPECT_ERROR):].strip() + '\n'
          elif line.startswith(EXIT):
-            expected_exit_status = int(line[len(EXIT):])
+            expected_exit_status = int(line[len(EXIT):].strip())
          elif line.startswith(COMMAND_LINE):
-            command_lines.append(line[len(COMMAND_LINE):])
+            command_lines.append(line[len(COMMAND_LINE):].strip())
          elif line.startswith(REQUIRES):
              requires.append(line[len(REQUIRES):].strip())
      expected_output = expected_output.strip()
@@ -219,10 +257,10 @@ def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path, timeout):
      if expected_output == '' and expected_error == '':
          match = None
          if status_regex:
-            match = re.search(status_regex, benchmark_content)
+            match = re.findall(status_regex, benchmark_content)
  
          if match:
-            expected_output = status_to_output(match.group(1))
+            expected_output = status_to_output('\n'.join(match))
          elif expected_exit_status is None:
              # If there is no expected output/error and the exit status has not
              # been set explicitly, the benchmark is invalid.
@@ -230,24 +268,37 @@ def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path, timeout):
      if expected_exit_status is None:
          expected_exit_status = 0
  
-    if 'CVC4_REGRESSION_ARGS' in os.environ:
+    if 'CVC5_REGRESSION_ARGS' in os.environ:
          basic_command_line_args += shlex.split(
-            os.environ['CVC4_REGRESSION_ARGS'])
+            os.environ['CVC5_REGRESSION_ARGS'])
  
-    if not proof and ('(get-unsat-core)' in benchmark_content
-                      or '(get-unsat-assumptions)' in benchmark_content
-                      or '--check-proofs' in basic_command_line_args
-                      or '--dump-proofs' in basic_command_line_args):
+    if not check_unsat_cores and ('(get-unsat-core)' in benchmark_content
+                            or '(get-unsat-assumptions)' in benchmark_content):
          print(
              '1..0 # Skipped regression: unsat cores not supported without proof support'
          )
-        return
+        return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
  
      for req_feature in requires:
-        if req_feature not in cvc4_features:
+        is_negative = False
+        if req_feature.startswith("no-"):
+            req_feature = req_feature[len("no-"):]
+            is_negative = True
+        if req_feature not in (cvc5_features + cvc5_disabled_features):
+            print(
+                'Illegal requirement in regression: {}\nAllowed requirements: {}'
+                .format(req_feature,
+                        ' '.join(cvc5_features + cvc5_disabled_features)))
+            return EXIT_FAILURE
+        if is_negative:
+            if req_feature in cvc5_features:
+                print('1..0 # Skipped regression: not valid with {}'.format(
+                    req_feature))
+                return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
+        elif req_feature not in cvc5_features:
              print('1..0 # Skipped regression: {} not supported'.format(
                  req_feature))
-            return
+            return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
  
      if not command_lines:
          command_lines.append('')
@@ -255,66 +306,121 @@ def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path, timeout):
      command_line_args_configs = []
      for command_line in command_lines:
          args = shlex.split(command_line)
-        if proof or ('--check-proofs' not in args
-                     and '--dump-proofs' not in args):
-            all_args = basic_command_line_args + args
-            command_line_args_configs.append(all_args)
-
-            extra_command_line_args = []
-            if benchmark_ext == '.sy' and \
-                '--no-check-synth-sol' not in all_args and \
-                '--check-synth-sol' not in all_args:
-                extra_command_line_args = ['--check-synth-sol']
-            if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
-               '--no-check-models' not in all_args:
-                extra_command_line_args = ['--check-models']
-            if proof and re.search(r'^(unsat|valid)$', expected_output):
-                if '--no-check-proofs' not in all_args and \
-                   '--incremental' not in all_args and \
-                   '--unconstrained-simp' not in all_args and \
-                   not cvc4_binary.endswith('pcvc4'):
-                    extra_command_line_args = [
-                        '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
-                        '--no-bv-algebraic'
-                    ]
-                if '--no-check-unsat-cores' not in all_args and \
-                   '--incremental' not in all_args and \
-                   '--unconstrained-simp' not in all_args and \
-                   not cvc4_binary.endswith('pcvc4'):
-                    extra_command_line_args += ['--check-unsat-cores']
-            if extra_command_line_args:
-                command_line_args_configs.append(all_args +
-                                                 extra_command_line_args)
-
-    # Run CVC4 on the benchmark with the different option sets and check
+        all_args = basic_command_line_args + args
+
+        if not check_unsat_cores and ('--check-unsat-cores' in all_args):
+            print(
+                '# Skipped command line options ({}): unsat cores not supported without proof support'
+                .format(all_args))
+            continue
+        if not check_proofs and '--dump-proofs' in all_args:
+            print(
+                '# Skipped command line options ({}): proof production not supported'
+                .format(all_args))
+            continue
+
+        command_line_args_configs.append(all_args)
+
+        expected_output_lines = expected_output.split()
+        extra_command_line_args = []
+        if benchmark_ext == '.sy' and \
+            '--no-check-synth-sol' not in all_args and \
+            '--sygus-rr' not in all_args and \
+            '--check-synth-sol' not in all_args:
+            all_args += ['--check-synth-sol']
+        if ('sat' in expected_output_lines or \
+            'unknown' in expected_output_lines) and \
+           '--no-debug-check-models' not in all_args and \
+           '--no-check-models' not in all_args and \
+           '--debug-check-models' not in all_args:
+            extra_command_line_args += ['--debug-check-models']
+        if 'unsat' in expected_output_lines in expected_output_lines:
+            if check_unsat_cores and \
+               '--no-produce-unsat-cores' not in all_args and \
+               '--no-check-unsat-cores' not in all_args and \
+               '--check-unsat-cores' not in all_args and \
+               'sygus-inference' not in benchmark_content and \
+               '--unconstrained-simp' not in all_args:
+                extra_command_line_args += ['--check-unsat-cores']
+            if check_proofs and \
+               '--no-produce-proofs' not in all_args and \
+               '--no-check-proofs' not in all_args and \
+               '--check-proofs' not in all_args:
+                extra_command_line_args += ['--check-proofs']
+        if '--no-check-abducts' not in all_args and \
+            '--check-abducts' not in all_args and \
+            'get-abduct' in benchmark_content:
+            all_args += ['--check-abducts']
+
+        # Create a test case for each extra argument
+        for extra_arg in extra_command_line_args:
+            command_line_args_configs.append(all_args + [extra_arg])
+
+    # Run cvc5 on the benchmark with the different option sets and check
      # whether the exit status, stdout output, stderr output are as expected.
      print('1..{}'.format(len(command_line_args_configs)))
      print('# Starting')
+    exit_code = EXIT_OK
      for command_line_args in command_line_args_configs:
-        output, error, exit_status = run_benchmark(
-            dump, wrapper, scrubber, error_scrubber, cvc4_binary,
-            command_line_args, benchmark_dir, benchmark_basename, timeout)
-        if output != expected_output:
-            print(
-                'not ok - Differences between expected and actual output on stdout - Flags: {}'.
-                format(command_line_args))
-            for line in difflib.context_diff(output.splitlines(),
-                                             expected_output.splitlines()):
-                print(line)
+        output, error, exit_status = run_benchmark(dump, wrapper, scrubber,
+                                                   error_scrubber, cvc5_binary,
+                                                   command_line_args,
+                                                   benchmark_dir,
+                                                   benchmark_basename, timeout)
+        output = re.sub(r'^[ \t]*', '', output, flags=re.MULTILINE)
+        error = re.sub(r'^[ \t]*', '', error, flags=re.MULTILINE)
+        if exit_status == STATUS_TIMEOUT:
+            exit_code = EXIT_SKIP if skip_timeout else EXIT_FAILURE
+            print('Timeout - Flags: {}'.format(command_line_args))
+        elif output != expected_output:
+            exit_code = EXIT_FAILURE
+            print('not ok - Flags: {}'.format(command_line_args))
+            print()
+            print('Standard output difference')
+            print('=' * 80)
+            print_diff(output, expected_output)
+            print('=' * 80)
+            print()
+            print()
+            if error:
+                print('Error output')
+                print('=' * 80)
+                print_colored(Color.YELLOW, error)
+                print('=' * 80)
+                print()
          elif error != expected_error:
+            exit_code = EXIT_FAILURE
              print(
-                'not ok - Differences between expected and actual output on stderr - Flags: {}'.
-                format(command_line_args))
-            for line in difflib.context_diff(error.splitlines(),
-                                             expected_error.splitlines()):
-                print(line)
+                'not ok - Differences between expected and actual output on stderr - Flags: {}'
+                .format(command_line_args))
+            print()
+            print('Error output difference')
+            print('=' * 80)
+            print_diff(error, expected_error)
+            print('=' * 80)
+            print()
          elif expected_exit_status != exit_status:
+            exit_code = EXIT_FAILURE
              print(
                  'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
                  format(expected_exit_status, exit_status, command_line_args))
+            print()
+            print('Output:')
+            print('=' * 80)
+            print_colored(Color.BLUE, output)
+            print('=' * 80)
+            print()
+            print()
+            print('Error output:')
+            print('=' * 80)
+            print_colored(Color.YELLOW, error)
+            print('=' * 80)
+            print()
          else:
              print('ok - Flags: {}'.format(command_line_args))
  
+    return exit_code
+
  
  def main():
      """Parses the command line arguments and then calls the core of the
@@ -323,23 +429,40 @@ def main():
      parser = argparse.ArgumentParser(
          description=
          'Runs benchmark and checks for correct exit status and output.')
-    parser.add_argument('--proof', action='store_true')
      parser.add_argument('--dump', action='store_true')
+    parser.add_argument('--use-skip-return-code', action='store_true')
+    parser.add_argument('--skip-timeout', action='store_true')
+    parser.add_argument('--check-unsat-cores', action='store_true',
+                        default=True)
+    parser.add_argument('--no-check-unsat-cores', dest='check_unsat_cores',
+                        action='store_false')
+    parser.add_argument('--check-proofs', action='store_true', default=True)
+    parser.add_argument('--no-check-proofs', dest='check_proofs',
+                        action='store_false')
      parser.add_argument('wrapper', nargs='*')
-    parser.add_argument('cvc4_binary')
+    parser.add_argument('cvc5_binary')
      parser.add_argument('benchmark')
-    args = parser.parse_args()
-    cvc4_binary = os.path.abspath(args.cvc4_binary)
+
+    argv = sys.argv[1:]
+    # Append options passed via RUN_REGRESSION_ARGS to argv
+    if os.environ.get('RUN_REGRESSION_ARGS'):
+        argv.extend(shlex.split(os.getenv('RUN_REGRESSION_ARGS')))
+
+    args = parser.parse_args(argv)
+
+    cvc5_binary = os.path.abspath(args.cvc5_binary)
  
      wrapper = args.wrapper
      if os.environ.get('VALGRIND') == '1' and not wrapper:
          wrapper = ['libtool', '--mode=execute', 'valgrind']
  
-    timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
+    timeout = float(os.getenv('TEST_TIMEOUT', '600'))
  
-    run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark,
-                   timeout)
+    return run_regression(args.check_unsat_cores, args.check_proofs, args.dump,
+                          args.use_skip_return_code, args.skip_timeout,
+                          wrapper, cvc5_binary, args.benchmark, timeout)
  
  
  if __name__ == "__main__":
-    main()
+    exit_code = main()
+    sys.exit(exit_code)