#!/usr/bin/env python3
-"""
-Usage:
-
- run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
- [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
+###############################################################################
+# Top contributors (to current version):
+# Andres Noetzli, Mathias Preiner, Yoni Zohar
+#
+# This file is part of the cvc5 project.
+#
+# Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+# in the top-level source directory and their institutional affiliations.
+# All rights reserved. See the file COPYING in the top-level source
+# directory for licensing information.
+# #############################################################################
+##
+"""
Runs benchmark and checks for correct exit status and output.
"""
import sys
import threading
-SCRUBBER = 'SCRUBBER: '
-ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
-EXPECT = 'EXPECT: '
-EXPECT_ERROR = 'EXPECT-ERROR: '
-EXIT = 'EXIT: '
-COMMAND_LINE = 'COMMAND-LINE: '
-REQUIRES = 'REQUIRES: '
+
+class Color:
+ BLUE = '\033[94m'
+ GREEN = '\033[92m'
+ YELLOW = '\033[93m'
+ RED = '\033[91m'
+ ENDC = '\033[0m'
+
+
+SCRUBBER = 'SCRUBBER:'
+ERROR_SCRUBBER = 'ERROR-SCRUBBER:'
+EXPECT = 'EXPECT:'
+EXPECT_ERROR = 'EXPECT-ERROR:'
+EXIT = 'EXIT:'
+COMMAND_LINE = 'COMMAND-LINE:'
+REQUIRES = 'REQUIRES:'
+
+EXIT_OK = 0
+EXIT_FAILURE = 1
+EXIT_SKIP = 77
+STATUS_TIMEOUT = 124
+
+
+def print_colored(color, text):
+ """Prints `text` in color `color`."""
+
+ for line in text.splitlines():
+ print(color + line + Color.ENDC)
+
+
+def print_diff(actual, expected):
+ """Prints the difference between `actual` and `expected`."""
+
+ for line in difflib.unified_diff(expected.splitlines(),
+ actual.splitlines(),
+ 'expected',
+ 'actual',
+ lineterm=''):
+ if line.startswith('+'):
+ print_colored(Color.GREEN, line)
+ elif line.startswith('-'):
+ print_colored(Color.RED, line)
+ else:
+ print(line)
def run_process(args, cwd, timeout, s_input=None):
output and the exit code of the process. If the process times out, the
output and the error output are empty and the exit code is 124."""
- proc = subprocess.Popen(
- args,
- cwd=cwd,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
+ proc = subprocess.Popen(args,
+ cwd=cwd,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
out = ''
err = ''
- exit_status = 124
+ exit_status = STATUS_TIMEOUT
try:
if timeout:
timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
exit_status = proc.returncode
finally:
if timeout:
+ # The timer killed the process and is not active anymore.
+ if exit_status == -9 and not timer.is_alive():
+ exit_status = STATUS_TIMEOUT
timer.cancel()
return out, err, exit_status
-def get_cvc4_features(cvc4_binary):
- """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""
+def get_cvc5_features(cvc5_binary):
+ """Returns a list of features supported by the cvc5 binary `cvc5_binary`."""
- output, _, _ = run_process([cvc4_binary, '--show-config'], None, None)
+ output, _, _ = run_process([cvc5_binary, '--show-config'], None, None)
if isinstance(output, bytes):
output = output.decode()
features = []
+ disabled_features = []
for line in output.split('\n'):
tokens = [t.strip() for t in line.split(':')]
if len(tokens) == 2:
key, value = tokens
if value == 'yes':
features.append(key)
+ elif value == 'no':
+ disabled_features.append(key)
- return features
+ return features, disabled_features
-def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
+def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc5_binary,
command_line, benchmark_dir, benchmark_filename, timeout):
- """Runs CVC4 on the file `benchmark_filename` in the directory
- `benchmark_dir` using the binary `cvc4_binary` with the command line
+ """Runs cvc5 on the file `benchmark_filename` in the directory
+ `benchmark_dir` using the binary `cvc5_binary` with the command line
options `command_line`. The output is scrubbed using `scrubber` and
`error_scrubber` for stdout and stderr, respectively. If dump is true, the
- function first uses CVC4 to read in and dump the benchmark file and then
+ function first uses cvc5 to read in and dump the benchmark file and then
uses that as input."""
bin_args = wrapper[:]
- bin_args.append(cvc4_binary)
+ bin_args.append(cvc5_binary)
output = None
error = None
exit_status = None
if dump:
dump_args = [
- '--preprocess-only', '--dump', 'raw-benchmark',
- '--output-lang=smt2', '-qq'
+ '--parse-only', '-o', 'raw-benchmark', '--output-lang=smt2'
]
dump_output, _, _ = run_process(
bin_args + command_line + dump_args + [benchmark_filename],
# If a scrubber command has been specified then apply it to the output.
if scrubber:
- output, _, _ = run_process(
- shlex.split(scrubber), benchmark_dir, timeout, output)
+ output, _, _ = run_process(shlex.split(scrubber), benchmark_dir,
+ timeout, output)
if error_scrubber:
- error, _, _ = run_process(
- shlex.split(error_scrubber), benchmark_dir, timeout, error)
+ error, _, _ = run_process(shlex.split(error_scrubber), benchmark_dir,
+ timeout, error)
# Popen in Python 3 returns a bytes object instead of a string for
# stdout/stderr.
return (output.strip(), error.strip(), exit_status)
-def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path, timeout):
- """Determines the expected output for a benchmark, runs CVC4 on it and then
+def run_regression(check_unsat_cores, check_proofs, dump, use_skip_return_code,
+ skip_timeout, wrapper, cvc5_binary, benchmark_path,
+ timeout):
+ """Determines the expected output for a benchmark, runs cvc5 on it and then
checks whether the output corresponds to the expected output. Optionally
- uses a wrapper `wrapper`, tests proof generation (if proof is true), or
- dumps a benchmark and uses that as the input (if dump is true)."""
+ uses a wrapper `wrapper`, tests unsat cores (if check_unsat_cores is true),
+ checks proofs (if check_proofs is true), or dumps a benchmark and uses that as
+ the input (if dump is true). `use_skip_return_code` enables/disables
+ returning 77 when a test is skipped."""
- if not os.access(cvc4_binary, os.X_OK):
+ if not os.access(cvc5_binary, os.X_OK):
sys.exit(
- '"{}" does not exist or is not executable'.format(cvc4_binary))
+ '"{}" does not exist or is not executable'.format(cvc5_binary))
if not os.path.isfile(benchmark_path):
sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
- cvc4_features = get_cvc4_features(cvc4_binary)
+ cvc5_features, cvc5_disabled_features = get_cvc5_features(cvc5_binary)
basic_command_line_args = []
elif benchmark_ext == '.smt2':
status_regex = r'set-info\s*:status\s*(sat|unsat)'
comment_char = ';'
- elif benchmark_ext == '.cvc':
- pass
elif benchmark_ext == '.p':
- basic_command_line_args.append('--finite-model-find')
status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
- status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
+ status_to_output = lambda s: '% SZS status {} for {}'.format(
+ s, benchmark_filename)
elif benchmark_ext == '.sy':
comment_char = ';'
- # Do not use proofs/unsat-cores with .sy files
- proof = False
+ # Do not check proofs/unsat-cores with .sy files
+ check_unsat_cores = False
+ check_proofs = False
else:
- sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
+ sys.exit('"{}" must be *.smt2 or *.p or *.sy'.format(
benchmark_basename))
- # If there is an ".expect" file for the benchmark, read the metadata
- # from there, otherwise from the benchmark file.
- metadata_filename = benchmark_path + '.expect'
- if os.path.isfile(metadata_filename):
- comment_char = '%'
- else:
- metadata_filename = benchmark_path
-
- metadata_lines = None
- with open(metadata_filename, 'r') as metadata_file:
- metadata_lines = metadata_file.readlines()
-
- benchmark_content = None
- if metadata_filename == benchmark_path:
- benchmark_content = ''.join(metadata_lines)
- else:
- with open(benchmark_path, 'r') as benchmark_file:
- benchmark_content = benchmark_file.read()
+ benchmark_lines = None
+ with open(benchmark_path, 'r') as benchmark_file:
+ benchmark_lines = benchmark_file.readlines()
+ benchmark_content = ''.join(benchmark_lines)
# Extract the metadata for the benchmark.
scrubber = None
expected_exit_status = None
command_lines = []
requires = []
- for line in metadata_lines:
+ for line in benchmark_lines:
# Skip lines that do not start with a comment character.
if line[0] != comment_char:
continue
line = line[1:].lstrip()
if line.startswith(SCRUBBER):
- scrubber = line[len(SCRUBBER):]
+ scrubber = line[len(SCRUBBER):].strip()
elif line.startswith(ERROR_SCRUBBER):
- error_scrubber = line[len(ERROR_SCRUBBER):]
+ error_scrubber = line[len(ERROR_SCRUBBER):].strip()
elif line.startswith(EXPECT):
- expected_output += line[len(EXPECT):]
+ expected_output += line[len(EXPECT):].strip() + '\n'
elif line.startswith(EXPECT_ERROR):
- expected_error += line[len(EXPECT_ERROR):]
+ expected_error += line[len(EXPECT_ERROR):].strip() + '\n'
elif line.startswith(EXIT):
- expected_exit_status = int(line[len(EXIT):])
+ expected_exit_status = int(line[len(EXIT):].strip())
elif line.startswith(COMMAND_LINE):
- command_lines.append(line[len(COMMAND_LINE):])
+ command_lines.append(line[len(COMMAND_LINE):].strip())
elif line.startswith(REQUIRES):
requires.append(line[len(REQUIRES):].strip())
expected_output = expected_output.strip()
if expected_output == '' and expected_error == '':
match = None
if status_regex:
- match = re.search(status_regex, benchmark_content)
+ match = re.findall(status_regex, benchmark_content)
if match:
- expected_output = status_to_output(match.group(1))
+ expected_output = status_to_output('\n'.join(match))
elif expected_exit_status is None:
# If there is no expected output/error and the exit status has not
# been set explicitly, the benchmark is invalid.
if expected_exit_status is None:
expected_exit_status = 0
- if 'CVC4_REGRESSION_ARGS' in os.environ:
+ if 'CVC5_REGRESSION_ARGS' in os.environ:
basic_command_line_args += shlex.split(
- os.environ['CVC4_REGRESSION_ARGS'])
+ os.environ['CVC5_REGRESSION_ARGS'])
- if not proof and ('(get-unsat-core)' in benchmark_content
- or '(get-unsat-assumptions)' in benchmark_content
- or '--check-proofs' in basic_command_line_args
- or '--dump-proofs' in basic_command_line_args):
+ if not check_unsat_cores and ('(get-unsat-core)' in benchmark_content
+ or '(get-unsat-assumptions)' in benchmark_content):
print(
'1..0 # Skipped regression: unsat cores not supported without proof support'
)
- return
+ return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
for req_feature in requires:
- if req_feature not in cvc4_features:
+ is_negative = False
+ if req_feature.startswith("no-"):
+ req_feature = req_feature[len("no-"):]
+ is_negative = True
+ if req_feature not in (cvc5_features + cvc5_disabled_features):
+ print(
+ 'Illegal requirement in regression: {}\nAllowed requirements: {}'
+ .format(req_feature,
+ ' '.join(cvc5_features + cvc5_disabled_features)))
+ return EXIT_FAILURE
+ if is_negative:
+ if req_feature in cvc5_features:
+ print('1..0 # Skipped regression: not valid with {}'.format(
+ req_feature))
+ return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
+ elif req_feature not in cvc5_features:
print('1..0 # Skipped regression: {} not supported'.format(
req_feature))
- return
+ return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
if not command_lines:
command_lines.append('')
command_line_args_configs = []
for command_line in command_lines:
args = shlex.split(command_line)
- if proof or ('--check-proofs' not in args
- and '--dump-proofs' not in args):
- all_args = basic_command_line_args + args
- command_line_args_configs.append(all_args)
-
- extra_command_line_args = []
- if benchmark_ext == '.sy' and \
- '--no-check-synth-sol' not in all_args and \
- '--check-synth-sol' not in all_args:
- extra_command_line_args = ['--check-synth-sol']
- if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
- '--no-check-models' not in all_args:
- extra_command_line_args = ['--check-models']
- if proof and re.search(r'^(unsat|valid)$', expected_output):
- if '--no-check-proofs' not in all_args and \
- '--incremental' not in all_args and \
- '--unconstrained-simp' not in all_args and \
- not cvc4_binary.endswith('pcvc4'):
- extra_command_line_args = [
- '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
- '--no-bv-algebraic'
- ]
- if '--no-check-unsat-cores' not in all_args and \
- '--incremental' not in all_args and \
- '--unconstrained-simp' not in all_args and \
- not cvc4_binary.endswith('pcvc4'):
- extra_command_line_args += ['--check-unsat-cores']
- if extra_command_line_args:
- command_line_args_configs.append(all_args +
- extra_command_line_args)
-
- # Run CVC4 on the benchmark with the different option sets and check
+ all_args = basic_command_line_args + args
+
+ if not check_unsat_cores and ('--check-unsat-cores' in all_args):
+ print(
+ '# Skipped command line options ({}): unsat cores not supported without proof support'
+ .format(all_args))
+ continue
+ if not check_proofs and '--dump-proofs' in all_args:
+ print(
+ '# Skipped command line options ({}): proof production not supported'
+ .format(all_args))
+ continue
+
+ command_line_args_configs.append(all_args)
+
+ expected_output_lines = expected_output.split()
+ extra_command_line_args = []
+ if benchmark_ext == '.sy' and \
+ '--no-check-synth-sol' not in all_args and \
+ '--sygus-rr' not in all_args and \
+ '--check-synth-sol' not in all_args:
+ all_args += ['--check-synth-sol']
+ if ('sat' in expected_output_lines or \
+ 'unknown' in expected_output_lines) and \
+ '--no-debug-check-models' not in all_args and \
+ '--no-check-models' not in all_args and \
+ '--debug-check-models' not in all_args:
+ extra_command_line_args += ['--debug-check-models']
+ if 'unsat' in expected_output_lines in expected_output_lines:
+ if check_unsat_cores and \
+ '--no-produce-unsat-cores' not in all_args and \
+ '--no-check-unsat-cores' not in all_args and \
+ '--check-unsat-cores' not in all_args and \
+ 'sygus-inference' not in benchmark_content and \
+ '--unconstrained-simp' not in all_args:
+ extra_command_line_args += ['--check-unsat-cores']
+ if check_proofs and \
+ '--no-produce-proofs' not in all_args and \
+ '--no-check-proofs' not in all_args and \
+ '--check-proofs' not in all_args:
+ extra_command_line_args += ['--check-proofs']
+ if '--no-check-abducts' not in all_args and \
+ '--check-abducts' not in all_args and \
+ 'get-abduct' in benchmark_content:
+ all_args += ['--check-abducts']
+
+ # Create a test case for each extra argument
+ for extra_arg in extra_command_line_args:
+ command_line_args_configs.append(all_args + [extra_arg])
+
+ # Run cvc5 on the benchmark with the different option sets and check
# whether the exit status, stdout output, stderr output are as expected.
print('1..{}'.format(len(command_line_args_configs)))
print('# Starting')
+ exit_code = EXIT_OK
for command_line_args in command_line_args_configs:
- output, error, exit_status = run_benchmark(
- dump, wrapper, scrubber, error_scrubber, cvc4_binary,
- command_line_args, benchmark_dir, benchmark_basename, timeout)
- if output != expected_output:
- print(
- 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
- format(command_line_args))
- for line in difflib.context_diff(output.splitlines(),
- expected_output.splitlines()):
- print(line)
+ output, error, exit_status = run_benchmark(dump, wrapper, scrubber,
+ error_scrubber, cvc5_binary,
+ command_line_args,
+ benchmark_dir,
+ benchmark_basename, timeout)
+ output = re.sub(r'^[ \t]*', '', output, flags=re.MULTILINE)
+ error = re.sub(r'^[ \t]*', '', error, flags=re.MULTILINE)
+ if exit_status == STATUS_TIMEOUT:
+ exit_code = EXIT_SKIP if skip_timeout else EXIT_FAILURE
+ print('Timeout - Flags: {}'.format(command_line_args))
+ elif output != expected_output:
+ exit_code = EXIT_FAILURE
+ print('not ok - Flags: {}'.format(command_line_args))
+ print()
+ print('Standard output difference')
+ print('=' * 80)
+ print_diff(output, expected_output)
+ print('=' * 80)
+ print()
+ print()
+ if error:
+ print('Error output')
+ print('=' * 80)
+ print_colored(Color.YELLOW, error)
+ print('=' * 80)
+ print()
elif error != expected_error:
+ exit_code = EXIT_FAILURE
print(
- 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
- format(command_line_args))
- for line in difflib.context_diff(error.splitlines(),
- expected_error.splitlines()):
- print(line)
+ 'not ok - Differences between expected and actual output on stderr - Flags: {}'
+ .format(command_line_args))
+ print()
+ print('Error output difference')
+ print('=' * 80)
+ print_diff(error, expected_error)
+ print('=' * 80)
+ print()
elif expected_exit_status != exit_status:
+ exit_code = EXIT_FAILURE
print(
'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
format(expected_exit_status, exit_status, command_line_args))
+ print()
+ print('Output:')
+ print('=' * 80)
+ print_colored(Color.BLUE, output)
+ print('=' * 80)
+ print()
+ print()
+ print('Error output:')
+ print('=' * 80)
+ print_colored(Color.YELLOW, error)
+ print('=' * 80)
+ print()
else:
print('ok - Flags: {}'.format(command_line_args))
+ return exit_code
+
def main():
"""Parses the command line arguments and then calls the core of the
parser = argparse.ArgumentParser(
description=
'Runs benchmark and checks for correct exit status and output.')
- parser.add_argument('--proof', action='store_true')
parser.add_argument('--dump', action='store_true')
+ parser.add_argument('--use-skip-return-code', action='store_true')
+ parser.add_argument('--skip-timeout', action='store_true')
+ parser.add_argument('--check-unsat-cores', action='store_true',
+ default=True)
+ parser.add_argument('--no-check-unsat-cores', dest='check_unsat_cores',
+ action='store_false')
+ parser.add_argument('--check-proofs', action='store_true', default=True)
+ parser.add_argument('--no-check-proofs', dest='check_proofs',
+ action='store_false')
parser.add_argument('wrapper', nargs='*')
- parser.add_argument('cvc4_binary')
+ parser.add_argument('cvc5_binary')
parser.add_argument('benchmark')
- args = parser.parse_args()
- cvc4_binary = os.path.abspath(args.cvc4_binary)
+
+ argv = sys.argv[1:]
+ # Append options passed via RUN_REGRESSION_ARGS to argv
+ if os.environ.get('RUN_REGRESSION_ARGS'):
+ argv.extend(shlex.split(os.getenv('RUN_REGRESSION_ARGS')))
+
+ args = parser.parse_args(argv)
+
+ cvc5_binary = os.path.abspath(args.cvc5_binary)
wrapper = args.wrapper
if os.environ.get('VALGRIND') == '1' and not wrapper:
wrapper = ['libtool', '--mode=execute', 'valgrind']
- timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
+ timeout = float(os.getenv('TEST_TIMEOUT', '600'))
- run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark,
- timeout)
+ return run_regression(args.check_unsat_cores, args.check_proofs, args.dump,
+ args.use_skip_return_code, args.skip_timeout,
+ wrapper, cvc5_binary, args.benchmark, timeout)
if __name__ == "__main__":
- main()
+ exit_code = main()
+ sys.exit(exit_code)