From 4587d4000e2544765ce940c4bd3bcaec42ca6507 Mon Sep 17 00:00:00 2001
From: Mathias Preiner <mathias.preiner@gmail.com>
Date: Tue, 16 Mar 2021 13:30:21 -0700
Subject: [PATCH] ci: Enable checking of proofs + unsat cores. (#6088)

This commit refactors the run_regression.py script and adds options for enabling/disabling checking of proofs and unsat cores. Both options are enabled by default and disabled for each corresponding CI build.
---
 .github/workflows/ci.yml                      |  13 ++-
 CMakeLists.txt                                |   2 -
 src/theory/builtin/proof_checker.cpp          |   1 -
 test/regress/regress0/nl/sqrt.smt2            |   1 +
 test/regress/regress0/nl/sqrt2-value.smt2     |   1 +
 .../regress/regress0/seq/seq-expand-defs.smt2 |   2 +-
 .../regress1/quantifiers/qid-debug-inst.smt2  |   2 +-
 .../issue3657-unexpectedUnsatCVC4.smt2        |   2 +-
 test/regress/run_regression.py                | 109 +++++++++---------
 9 files changed, 67 insertions(+), 66 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7e029e3c7..e1ea39011 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,6 +28,7 @@ jobs:
             python-bindings: true
             check-examples: true
             exclude_regress: 3-4
+            run_regression_args: --no-check-unsat-cores --no-check-proofs
 
           - name: production-clang
             config: production
@@ -35,20 +36,23 @@ jobs:
             check-examples: true
             env: CC=clang CXX=clang++
             os: ubuntu-latest
-            exclude_regress: 1-4
+            exclude_regress: 3-4
+            run_regression_args: --no-check-unsat-cores --no-check-proofs
 
           - name: production-dbg
             config: production --assertions --tracing --unit-testing --symfpu --lfsc --editline
             cache-key: dbg
             os: ubuntu-latest
-            exclude_regress: 1-4
+            exclude_regress: 3-4
+            run_regression_args: --no-check-unsat-cores
 
           - name: production-dbg-clang
-            config: production --assertions --tracing --unit-testing --symfpu --cln --gpl --no-proofs --poly
+            config: production --assertions --tracing --unit-testing --symfpu --cln --gpl --poly
             cache-key: dbgclang
             env: CC=clang CXX=clang++
             os: ubuntu-latest
-            exclude_regress: 1-4
+            exclude_regress: 3-4
+            run_regression_args: --no-check-proofs
 
     name: ${{ matrix.os }}:${{ matrix.name }}
     runs-on: ${{ matrix.os }}
@@ -168,6 +172,7 @@ jobs:
       env:
         ARGS: --output-on-failure -LE regress[${{ matrix.exclude_regress }}]
         CVC4_REGRESSION_ARGS: --no-early-exit
+        RUN_REGRESSION_ARGS: ${{ matrix.run_regression_args }}
       working-directory: build
 
     - name: Install Check
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c06c360ac..843fc16c1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -430,7 +430,6 @@ if(ENABLE_PROFILING)
 endif()
 
 if(ENABLE_PROOFS)
-  set(RUN_REGRESSION_ARGS ${RUN_REGRESSION_ARGS} --enable-proof)
   add_definitions(-DCVC4_PROOF)
 endif()
 
@@ -495,7 +494,6 @@ if(USE_KISSAT)
 endif()
 
 if(USE_LFSC)
-  set(RUN_REGRESSION_ARGS ${RUN_REGRESSION_ARGS} --with-lfsc)
   find_package(LFSC REQUIRED)
   add_definitions(-DCVC4_USE_LFSC)
 endif()
diff --git a/src/theory/builtin/proof_checker.cpp b/src/theory/builtin/proof_checker.cpp
index 5d05e5383..3e0eca128 100644
--- a/src/theory/builtin/proof_checker.cpp
+++ b/src/theory/builtin/proof_checker.cpp
@@ -426,7 +426,6 @@ Node BuiltinProofRuleChecker::checkInternal(PfRule id,
            || id == PfRule::TRUST_SUBS_MAP)
   {
     // "trusted" rules
-    Assert(children.empty());
     Assert(!args.empty());
     Assert(args[0].getType().isBoolean());
     return args[0];
diff --git a/test/regress/regress0/nl/sqrt.smt2 b/test/regress/regress0/nl/sqrt.smt2
index fdcec3d62..92c4dabba 100644
--- a/test/regress/regress0/nl/sqrt.smt2
+++ b/test/regress/regress0/nl/sqrt.smt2
@@ -1,3 +1,4 @@
+; COMMAND-LINE: -q
 ; EXPECT: sat
 ; EXPECT: sat
 ; EXPECT: unsat
diff --git a/test/regress/regress0/nl/sqrt2-value.smt2 b/test/regress/regress0/nl/sqrt2-value.smt2
index 6c3cd378a..078d8fcc7 100644
--- a/test/regress/regress0/nl/sqrt2-value.smt2
+++ b/test/regress/regress0/nl/sqrt2-value.smt2
@@ -1,4 +1,5 @@
 ; SCRUBBER: sed -e 's/witness.*/witness/'
+; COMMAND-LINE: --no-check-models
 ; EXPECT: sat
 ; EXPECT: ((x (witness
 (set-option :produce-models true)
diff --git a/test/regress/regress0/seq/seq-expand-defs.smt2 b/test/regress/regress0/seq/seq-expand-defs.smt2
index 3e51627c0..065dd6bd5 100644
--- a/test/regress/regress0/seq/seq-expand-defs.smt2
+++ b/test/regress/regress0/seq/seq-expand-defs.smt2
@@ -1,4 +1,4 @@
-; COMMAND-LINE: --strings-exp
+; COMMAND-LINE: --strings-exp -q
 ; EXPECT: sat
 ; EXPECT: (((seq.nth y 7) 404))
 ; EXPECT: (((str.from_code x) "?"))
diff --git a/test/regress/regress1/quantifiers/qid-debug-inst.smt2 b/test/regress/regress1/quantifiers/qid-debug-inst.smt2
index d7ce3771b..b43c9697a 100644
--- a/test/regress/regress1/quantifiers/qid-debug-inst.smt2
+++ b/test/regress/regress1/quantifiers/qid-debug-inst.smt2
@@ -1,4 +1,4 @@
-; COMMAND-LINE: --debug-inst
+; COMMAND-LINE: --debug-inst --no-check-unsat-cores
 ; EXPECT: (num-instantiations myQuant1 1)
 ; EXPECT: (num-instantiations myQuant2 1)
 ; EXPECT: unsat
diff --git a/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2 b/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2
index 4879cb3fb..648d436bb 100644
--- a/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2
+++ b/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2
@@ -1,4 +1,4 @@
-; COMMAND-LINE: --strings-exp --fmf-fun-rlv -i
+; COMMAND-LINE: --strings-exp --fmf-fun-rlv -i -q
 ; EXPECT: sat
 ; EXPECT: sat
 ; EXPECT: sat
diff --git a/test/regress/run_regression.py b/test/regress/run_regression.py
index 4a56aed9f..fb4786331 100755
--- a/test/regress/run_regression.py
+++ b/test/regress/run_regression.py
@@ -127,31 +127,6 @@ def get_cvc4_features(cvc4_binary):
     return features, disabled_features
 
 
-def logic_supported_with_proofs(logic):
-    assert logic is None or isinstance(logic, str)
-    return logic in [
-        #single theories
-        "QF_BV",
-        "QF_UF",
-        "QF_A",
-        "QF_LRA",
-        #two theories
-        "QF_UFBV",
-        "QF_UFLRA",
-        "QF_AUF",
-        "QF_ALRA",
-        "QF_ABV",
-        "QF_BVLRA"
-        #three theories
-        "QF_AUFBV",
-        "QF_ABVLRA",
-        "QF_UFBVLRA",
-        "QF_AUFLRA",
-        #four theories
-        "QF_AUFBVLRA"
-    ]
-
-
 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
                   command_line, benchmark_dir, benchmark_filename, timeout):
     """Runs CVC4 on the file `benchmark_filename` in the directory
@@ -200,13 +175,13 @@ def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
     return (output.strip(), error.strip(), exit_status)
 
 
-def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
+def run_regression(check_unsat_cores, check_proofs, dump, use_skip_return_code,
                    skip_timeout, wrapper, cvc4_binary, benchmark_path,
                    timeout):
     """Determines the expected output for a benchmark, runs CVC4 on it and then
     checks whether the output corresponds to the expected output. Optionally
-    uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
-    checks proofs (if proofs is true), or dumps a benchmark and uses that as
+    uses a wrapper `wrapper`, tests unsat cores (if check_unsat_cores is true),
+    checks proofs (if check_proofs is true), or dumps a benchmark and uses that as
     the input (if dump is true). `use_skip_return_code` enables/disables
     returning 77 when a test is skipped."""
 
@@ -218,6 +193,11 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
 
     cvc4_features, cvc4_disabled_features = get_cvc4_features(cvc4_binary)
 
+    # Disable proof and unsat core checks if CVC4 was not compiled with proofs.
+    if 'proof' not in cvc4_features:
+        check_unsat_cores = False
+        check_proofs = False
+
     basic_command_line_args = []
 
     benchmark_basename = os.path.basename(benchmark_path)
@@ -225,14 +205,12 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
     benchmark_dir = os.path.dirname(benchmark_path)
     comment_char = '%'
     status_regex = None
-    logic_regex = None
     status_to_output = lambda s: s
     if benchmark_ext == '.smt':
         status_regex = r':status\s*(sat|unsat)'
         comment_char = ';'
     elif benchmark_ext == '.smt2':
         status_regex = r'set-info\s*:status\s*(sat|unsat)'
-        logic_regex = r'\(\s*set-logic\s*(.*)\)'
         comment_char = ';'
     elif benchmark_ext == '.cvc':
         pass
@@ -242,9 +220,9 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
             s, benchmark_filename)
     elif benchmark_ext == '.sy':
         comment_char = ';'
-        # Do not use proofs/unsat-cores with .sy files
-        unsat_cores = False
-        proofs = False
+        # Do not check proofs/unsat-cores with .sy files
+        check_unsat_cores = False
+        check_proofs = False
     else:
         sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
             benchmark_basename))
@@ -262,7 +240,6 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
     expected_exit_status = None
     command_lines = []
     requires = []
-    logic = None
     for line in benchmark_lines:
         # Skip lines that do not start with a comment character.
         if line[0] != comment_char:
@@ -301,16 +278,12 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
             sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
     if expected_exit_status is None:
         expected_exit_status = 0
-    if logic_regex:
-        logic_match = re.findall(logic_regex, benchmark_content)
-        if logic_match and len(logic_match) == 1:
-            logic = logic_match[0]
 
     if 'CVC4_REGRESSION_ARGS' in os.environ:
         basic_command_line_args += shlex.split(
             os.environ['CVC4_REGRESSION_ARGS'])
 
-    if not unsat_cores and ('(get-unsat-core)' in benchmark_content
+    if not check_unsat_cores and ('(get-unsat-core)' in benchmark_content
                             or '(get-unsat-assumptions)' in benchmark_content):
         print(
             '1..0 # Skipped regression: unsat cores not supported without proof support'
@@ -346,42 +319,54 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
         args = shlex.split(command_line)
         all_args = basic_command_line_args + args
 
-        if not unsat_cores and ('--check-unsat-cores' in all_args):
+        if not check_unsat_cores and ('--check-unsat-cores' in all_args):
             print(
                 '# Skipped command line options ({}): unsat cores not supported without proof support'
                 .format(all_args))
             continue
-        if not proofs and '--dump-proofs' in all_args:
+        if not check_proofs and '--dump-proofs' in all_args:
             print(
-                '# Skipped command line options ({}): proof production not supported without LFSC support'
+                '# Skipped command line options ({}): proof production not supported'
                 .format(all_args))
             continue
 
         command_line_args_configs.append(all_args)
 
+        expected_output_lines = expected_output.split()
         extra_command_line_args = []
         if benchmark_ext == '.sy' and \
             '--no-check-synth-sol' not in all_args and \
             '--sygus-rr' not in all_args and \
             '--check-synth-sol' not in all_args:
-            extra_command_line_args = ['--check-synth-sol']
-        if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
+            extra_command_line_args += ['--check-synth-sol']
+        if ('sat' in expected_output_lines or \
+            'invalid' in expected_output_lines or \
+            'unknown' in expected_output_lines) and \
            '--no-debug-check-models' not in all_args and \
            '--no-check-models' not in all_args and \
            '--debug-check-models' not in all_args:
-            extra_command_line_args = ['--debug-check-models']
-        if unsat_cores and re.search(r'^(unsat|valid)$', expected_output):
-            if '--no-check-unsat-cores' not in all_args and \
+            extra_command_line_args += ['--debug-check-models']
+        if 'unsat' in expected_output_lines or 'valid' in expected_output_lines:
+            if check_unsat_cores and \
+               '--no-produce-unsat-cores' not in all_args and \
+               '--no-check-unsat-cores' not in all_args and \
                '--check-unsat-cores' not in all_args and \
                '--incremental' not in all_args and \
                '--unconstrained-simp' not in all_args:
                 extra_command_line_args += ['--check-unsat-cores']
+            if check_proofs and \
+               '--no-produce-proofs' not in all_args and \
+               '--no-check-proofs' not in all_args and \
+               '--check-proofs' not in all_args:
+                extra_command_line_args += ['--check-proofs']
         if '--no-check-abducts' not in all_args and \
-            '--check-abducts' not in all_args:
+            '--check-abducts' not in all_args and \
+            'get-abduct' in benchmark_content:
             extra_command_line_args += ['--check-abducts']
-        if extra_command_line_args:
-            command_line_args_configs.append(all_args +
-                                             extra_command_line_args)
+
+        # Create a test case for each extra argument
+        for extra_arg in extra_command_line_args:
+            command_line_args_configs.append(all_args + [extra_arg])
 
     # Run CVC4 on the benchmark with the different option sets and check
     # whether the exit status, stdout output, stderr output are as expected.
@@ -456,24 +441,36 @@ def main():
     parser = argparse.ArgumentParser(
         description=
         'Runs benchmark and checks for correct exit status and output.')
-    parser.add_argument('--enable-proof', action='store_true')
-    parser.add_argument('--with-lfsc', action='store_true')
     parser.add_argument('--dump', action='store_true')
     parser.add_argument('--use-skip-return-code', action='store_true')
     parser.add_argument('--skip-timeout', action='store_true')
+    parser.add_argument('--check-unsat-cores', action='store_true',
+                        default=True)
+    parser.add_argument('--no-check-unsat-cores', dest='check_unsat_cores',
+                        action='store_false')
+    parser.add_argument('--check-proofs', action='store_true', default=True)
+    parser.add_argument('--no-check-proofs', dest='check_proofs',
+                        action='store_false')
     parser.add_argument('wrapper', nargs='*')
     parser.add_argument('cvc4_binary')
     parser.add_argument('benchmark')
-    args = parser.parse_args()
+
+    argv = sys.argv[1:]
+    # Append options passed via RUN_REGRESSION_ARGS to argv
+    if os.environ.get('RUN_REGRESSION_ARGS'):
+        argv.extend(shlex.split(os.getenv('RUN_REGRESSION_ARGS')))
+
+    args = parser.parse_args(argv)
+
     cvc4_binary = os.path.abspath(args.cvc4_binary)
 
     wrapper = args.wrapper
     if os.environ.get('VALGRIND') == '1' and not wrapper:
         wrapper = ['libtool', '--mode=execute', 'valgrind']
 
-    timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
+    timeout = float(os.getenv('TEST_TIMEOUT', '600'))
 
-    return run_regression(args.enable_proof, args.with_lfsc, args.dump,
+    return run_regression(args.check_unsat_cores, args.check_proofs, args.dump,
                           args.use_skip_return_code, args.skip_timeout,
                           wrapper, cvc4_binary, args.benchmark, timeout)
 
-- 
2.30.2