From 4587d4000e2544765ce940c4bd3bcaec42ca6507 Mon Sep 17 00:00:00 2001 From: Mathias Preiner Date: Tue, 16 Mar 2021 13:30:21 -0700 Subject: [PATCH] ci: Enable checking of proofs + unsat cores. (#6088) This commit refactors the run_regression.py script and adds options for enabling/disabling checking of proofs and unsat cores. Both options are enabled by default and disabled for each corresponding CI build. --- .github/workflows/ci.yml | 13 ++- CMakeLists.txt | 2 - src/theory/builtin/proof_checker.cpp | 1 - test/regress/regress0/nl/sqrt.smt2 | 1 + test/regress/regress0/nl/sqrt2-value.smt2 | 1 + .../regress/regress0/seq/seq-expand-defs.smt2 | 2 +- .../regress1/quantifiers/qid-debug-inst.smt2 | 2 +- .../issue3657-unexpectedUnsatCVC4.smt2 | 2 +- test/regress/run_regression.py | 109 +++++++++--------- 9 files changed, 67 insertions(+), 66 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7e029e3c7..e1ea39011 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,6 +28,7 @@ jobs: python-bindings: true check-examples: true exclude_regress: 3-4 + run_regression_args: --no-check-unsat-cores --no-check-proofs - name: production-clang config: production @@ -35,20 +36,23 @@ jobs: check-examples: true env: CC=clang CXX=clang++ os: ubuntu-latest - exclude_regress: 1-4 + exclude_regress: 3-4 + run_regression_args: --no-check-unsat-cores --no-check-proofs - name: production-dbg config: production --assertions --tracing --unit-testing --symfpu --lfsc --editline cache-key: dbg os: ubuntu-latest - exclude_regress: 1-4 + exclude_regress: 3-4 + run_regression_args: --no-check-unsat-cores - name: production-dbg-clang - config: production --assertions --tracing --unit-testing --symfpu --cln --gpl --no-proofs --poly + config: production --assertions --tracing --unit-testing --symfpu --cln --gpl --poly cache-key: dbgclang env: CC=clang CXX=clang++ os: ubuntu-latest - exclude_regress: 1-4 + exclude_regress: 3-4 + run_regression_args: --no-check-proofs name: ${{ matrix.os }}:${{ matrix.name }} runs-on: ${{ matrix.os }} @@ -168,6 +172,7 @@ jobs: env: ARGS: --output-on-failure -LE regress[${{ matrix.exclude_regress }}] CVC4_REGRESSION_ARGS: --no-early-exit + RUN_REGRESSION_ARGS: ${{ matrix.run_regression_args }} working-directory: build - name: Install Check diff --git a/CMakeLists.txt b/CMakeLists.txt index c06c360ac..843fc16c1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -430,7 +430,6 @@ if(ENABLE_PROFILING) endif() if(ENABLE_PROOFS) - set(RUN_REGRESSION_ARGS ${RUN_REGRESSION_ARGS} --enable-proof) add_definitions(-DCVC4_PROOF) endif() @@ -495,7 +494,6 @@ if(USE_KISSAT) endif() if(USE_LFSC) - set(RUN_REGRESSION_ARGS ${RUN_REGRESSION_ARGS} --with-lfsc) find_package(LFSC REQUIRED) add_definitions(-DCVC4_USE_LFSC) endif() diff --git a/src/theory/builtin/proof_checker.cpp b/src/theory/builtin/proof_checker.cpp index 5d05e5383..3e0eca128 100644 --- a/src/theory/builtin/proof_checker.cpp +++ b/src/theory/builtin/proof_checker.cpp @@ -426,7 +426,6 @@ Node BuiltinProofRuleChecker::checkInternal(PfRule id, || id == PfRule::TRUST_SUBS_MAP) { // "trusted" rules - Assert(children.empty()); Assert(!args.empty()); Assert(args[0].getType().isBoolean()); return args[0]; diff --git a/test/regress/regress0/nl/sqrt.smt2 b/test/regress/regress0/nl/sqrt.smt2 index fdcec3d62..92c4dabba 100644 --- a/test/regress/regress0/nl/sqrt.smt2 +++ b/test/regress/regress0/nl/sqrt.smt2 @@ -1,3 +1,4 @@ +; COMMAND-LINE: -q ; EXPECT: sat ; EXPECT: sat ; EXPECT: unsat diff --git a/test/regress/regress0/nl/sqrt2-value.smt2 b/test/regress/regress0/nl/sqrt2-value.smt2 index 6c3cd378a..078d8fcc7 100644 --- a/test/regress/regress0/nl/sqrt2-value.smt2 +++ b/test/regress/regress0/nl/sqrt2-value.smt2 @@ -1,4 +1,5 @@ ; SCRUBBER: sed -e 's/witness.*/witness/' +; COMMAND-LINE: --no-check-models ; EXPECT: sat ; EXPECT: ((x (witness (set-option :produce-models true) diff --git a/test/regress/regress0/seq/seq-expand-defs.smt2 b/test/regress/regress0/seq/seq-expand-defs.smt2 index 3e51627c0..065dd6bd5 100644 --- a/test/regress/regress0/seq/seq-expand-defs.smt2 +++ b/test/regress/regress0/seq/seq-expand-defs.smt2 @@ -1,4 +1,4 @@ -; COMMAND-LINE: --strings-exp +; COMMAND-LINE: --strings-exp -q ; EXPECT: sat ; EXPECT: (((seq.nth y 7) 404)) ; EXPECT: (((str.from_code x) "?")) diff --git a/test/regress/regress1/quantifiers/qid-debug-inst.smt2 b/test/regress/regress1/quantifiers/qid-debug-inst.smt2 index d7ce3771b..b43c9697a 100644 --- a/test/regress/regress1/quantifiers/qid-debug-inst.smt2 +++ b/test/regress/regress1/quantifiers/qid-debug-inst.smt2 @@ -1,4 +1,4 @@ -; COMMAND-LINE: --debug-inst +; COMMAND-LINE: --debug-inst --no-check-unsat-cores ; EXPECT: (num-instantiations myQuant1 1) ; EXPECT: (num-instantiations myQuant2 1) ; EXPECT: unsat diff --git a/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2 b/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2 index 4879cb3fb..648d436bb 100644 --- a/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2 +++ b/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2 @@ -1,4 +1,4 @@ -; COMMAND-LINE: --strings-exp --fmf-fun-rlv -i +; COMMAND-LINE: --strings-exp --fmf-fun-rlv -i -q ; EXPECT: sat ; EXPECT: sat ; EXPECT: sat diff --git a/test/regress/run_regression.py b/test/regress/run_regression.py index 4a56aed9f..fb4786331 100755 --- a/test/regress/run_regression.py +++ b/test/regress/run_regression.py @@ -127,31 +127,6 @@ def get_cvc4_features(cvc4_binary): return features, disabled_features -def logic_supported_with_proofs(logic): - assert logic is None or isinstance(logic, str) - return logic in [ - #single theories - "QF_BV", - "QF_UF", - "QF_A", - "QF_LRA", - #two theories - "QF_UFBV", - "QF_UFLRA", - "QF_AUF", - "QF_ALRA", - "QF_ABV", - "QF_BVLRA" - #three theories - "QF_AUFBV", - "QF_ABVLRA", - "QF_UFBVLRA", - "QF_AUFLRA", - #four theories - "QF_AUFBVLRA" - ] - - def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary, command_line, benchmark_dir, benchmark_filename, timeout): """Runs CVC4 on the file `benchmark_filename` in the directory @@ -200,13 +175,13 @@ def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary, return (output.strip(), error.strip(), exit_status) -def run_regression(unsat_cores, proofs, dump, use_skip_return_code, +def run_regression(check_unsat_cores, check_proofs, dump, use_skip_return_code, skip_timeout, wrapper, cvc4_binary, benchmark_path, timeout): """Determines the expected output for a benchmark, runs CVC4 on it and then checks whether the output corresponds to the expected output. Optionally - uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true), - checks proofs (if proofs is true), or dumps a benchmark and uses that as + uses a wrapper `wrapper`, tests unsat cores (if check_unsat_cores is true), + checks proofs (if check_proofs is true), or dumps a benchmark and uses that as the input (if dump is true). `use_skip_return_code` enables/disables returning 77 when a test is skipped.""" @@ -218,6 +193,11 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code, cvc4_features, cvc4_disabled_features = get_cvc4_features(cvc4_binary) + # Disable proof and unsat core checks if CVC4 was not compiled with proofs. + if 'proof' not in cvc4_features: + check_unsat_cores = False + check_proofs = False + basic_command_line_args = [] benchmark_basename = os.path.basename(benchmark_path) @@ -225,14 +205,12 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code, benchmark_dir = os.path.dirname(benchmark_path) comment_char = '%' status_regex = None - logic_regex = None status_to_output = lambda s: s if benchmark_ext == '.smt': status_regex = r':status\s*(sat|unsat)' comment_char = ';' elif benchmark_ext == '.smt2': status_regex = r'set-info\s*:status\s*(sat|unsat)' - logic_regex = r'\(\s*set-logic\s*(.*)\)' comment_char = ';' elif benchmark_ext == '.cvc': pass @@ -242,9 +220,9 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code, s, benchmark_filename) elif benchmark_ext == '.sy': comment_char = ';' - # Do not use proofs/unsat-cores with .sy files - unsat_cores = False - proofs = False + # Do not check proofs/unsat-cores with .sy files + check_unsat_cores = False + check_proofs = False else: sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format( benchmark_basename)) @@ -262,7 +240,6 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code, expected_exit_status = None command_lines = [] requires = [] - logic = None for line in benchmark_lines: # Skip lines that do not start with a comment character. if line[0] != comment_char: @@ -301,16 +278,12 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code, sys.exit('Cannot determine status of "{}"'.format(benchmark_path)) if expected_exit_status is None: expected_exit_status = 0 - if logic_regex: - logic_match = re.findall(logic_regex, benchmark_content) - if logic_match and len(logic_match) == 1: - logic = logic_match[0] if 'CVC4_REGRESSION_ARGS' in os.environ: basic_command_line_args += shlex.split( os.environ['CVC4_REGRESSION_ARGS']) - if not unsat_cores and ('(get-unsat-core)' in benchmark_content + if not check_unsat_cores and ('(get-unsat-core)' in benchmark_content or '(get-unsat-assumptions)' in benchmark_content): print( '1..0 # Skipped regression: unsat cores not supported without proof support' @@ -346,42 +319,54 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code, args = shlex.split(command_line) all_args = basic_command_line_args + args - if not unsat_cores and ('--check-unsat-cores' in all_args): + if not check_unsat_cores and ('--check-unsat-cores' in all_args): print( '# Skipped command line options ({}): unsat cores not supported without proof support' .format(all_args)) continue - if not proofs and '--dump-proofs' in all_args: + if not check_proofs and '--dump-proofs' in all_args: print( - '# Skipped command line options ({}): proof production not supported without LFSC support' + '# Skipped command line options ({}): proof production not supported' .format(all_args)) continue command_line_args_configs.append(all_args) + expected_output_lines = expected_output.split() extra_command_line_args = [] if benchmark_ext == '.sy' and \ '--no-check-synth-sol' not in all_args and \ '--sygus-rr' not in all_args and \ '--check-synth-sol' not in all_args: - extra_command_line_args = ['--check-synth-sol'] - if re.search(r'^(sat|invalid|unknown)$', expected_output) and \ + extra_command_line_args += ['--check-synth-sol'] + if ('sat' in expected_output_lines or \ + 'invalid' in expected_output_lines or \ + 'unknown' in expected_output_lines) and \ '--no-debug-check-models' not in all_args and \ '--no-check-models' not in all_args and \ '--debug-check-models' not in all_args: - extra_command_line_args = ['--debug-check-models'] - if unsat_cores and re.search(r'^(unsat|valid)$', expected_output): - if '--no-check-unsat-cores' not in all_args and \ + extra_command_line_args += ['--debug-check-models'] + if 'unsat' in expected_output_lines or 'valid' in expected_output_lines: + if check_unsat_cores and \ + '--no-produce-unsat-cores' not in all_args and \ + '--no-check-unsat-cores' not in all_args and \ '--check-unsat-cores' not in all_args and \ '--incremental' not in all_args and \ '--unconstrained-simp' not in all_args: extra_command_line_args += ['--check-unsat-cores'] + if check_proofs and \ + '--no-produce-proofs' not in all_args and \ + '--no-check-proofs' not in all_args and \ + '--check-proofs' not in all_args: + extra_command_line_args += ['--check-proofs'] if '--no-check-abducts' not in all_args and \ - '--check-abducts' not in all_args: + '--check-abducts' not in all_args and \ + 'get-abduct' in benchmark_content: extra_command_line_args += ['--check-abducts'] - if extra_command_line_args: - command_line_args_configs.append(all_args + - extra_command_line_args) + + # Create a test case for each extra argument + for extra_arg in extra_command_line_args: + command_line_args_configs.append(all_args + [extra_arg]) # Run CVC4 on the benchmark with the different option sets and check # whether the exit status, stdout output, stderr output are as expected. @@ -456,24 +441,36 @@ def main(): parser = argparse.ArgumentParser( description= 'Runs benchmark and checks for correct exit status and output.') - parser.add_argument('--enable-proof', action='store_true') - parser.add_argument('--with-lfsc', action='store_true') parser.add_argument('--dump', action='store_true') parser.add_argument('--use-skip-return-code', action='store_true') parser.add_argument('--skip-timeout', action='store_true') + parser.add_argument('--check-unsat-cores', action='store_true', + default=True) + parser.add_argument('--no-check-unsat-cores', dest='check_unsat_cores', + action='store_false') + parser.add_argument('--check-proofs', action='store_true', default=True) + parser.add_argument('--no-check-proofs', dest='check_proofs', + action='store_false') parser.add_argument('wrapper', nargs='*') parser.add_argument('cvc4_binary') parser.add_argument('benchmark') - args = parser.parse_args() + + argv = sys.argv[1:] + # Append options passed via RUN_REGRESSION_ARGS to argv + if os.environ.get('RUN_REGRESSION_ARGS'): + argv.extend(shlex.split(os.getenv('RUN_REGRESSION_ARGS'))) + + args = parser.parse_args(argv) + cvc4_binary = os.path.abspath(args.cvc4_binary) wrapper = args.wrapper if os.environ.get('VALGRIND') == '1' and not wrapper: wrapper = ['libtool', '--mode=execute', 'valgrind'] - timeout = float(os.getenv('TEST_TIMEOUT', 600.0)) + timeout = float(os.getenv('TEST_TIMEOUT', '600')) - return run_regression(args.enable_proof, args.with_lfsc, args.dump, + return run_regression(args.check_unsat_cores, args.check_proofs, args.dump, args.use_skip_return_code, args.skip_timeout, wrapper, cvc4_binary, args.benchmark, timeout) -- 2.30.2