ci: Enable checking of proofs + unsat cores. (#6088)

author Mathias Preiner <mathias.preiner@gmail.com>

Tue, 16 Mar 2021 20:30:21 +0000 (13:30 -0700)

committer GitHub <noreply@github.com>

Tue, 16 Mar 2021 20:30:21 +0000 (20:30 +0000)
author Mathias Preiner <mathias.preiner@gmail.com>
Tue, 16 Mar 2021 20:30:21 +0000 (13:30 -0700)
committer GitHub <noreply@github.com>
Tue, 16 Mar 2021 20:30:21 +0000 (20:30 +0000)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml

index 7e029e3c701e177d7a202cf463e07b132339b29b..e1ea39011eaf8b2c65eb7ec6f6bf38039f5be562 100644 (file)
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,6 +28,7 @@ jobs:
              python-bindings: true
              check-examples: true
              exclude_regress: 3-4
+            run_regression_args: --no-check-unsat-cores --no-check-proofs
  
            - name: production-clang
              config: production
@@ -35,20 +36,23 @@ jobs:
              check-examples: true
              env: CC=clang CXX=clang++
              os: ubuntu-latest
-            exclude_regress: 1-4
+            exclude_regress: 3-4
+            run_regression_args: --no-check-unsat-cores --no-check-proofs
  
            - name: production-dbg
              config: production --assertions --tracing --unit-testing --symfpu --lfsc --editline
              cache-key: dbg
              os: ubuntu-latest
-            exclude_regress: 1-4
+            exclude_regress: 3-4
+            run_regression_args: --no-check-unsat-cores
  
            - name: production-dbg-clang
-            config: production --assertions --tracing --unit-testing --symfpu --cln --gpl --no-proofs --poly
+            config: production --assertions --tracing --unit-testing --symfpu --cln --gpl --poly
              cache-key: dbgclang
              env: CC=clang CXX=clang++
              os: ubuntu-latest
-            exclude_regress: 1-4
+            exclude_regress: 3-4
+            run_regression_args: --no-check-proofs
  
      name: ${{ matrix.os }}:${{ matrix.name }}
      runs-on: ${{ matrix.os }}
@@ -168,6 +172,7 @@ jobs:
        env:
          ARGS: --output-on-failure -LE regress[${{ matrix.exclude_regress }}]
          CVC4_REGRESSION_ARGS: --no-early-exit
+        RUN_REGRESSION_ARGS: ${{ matrix.run_regression_args }}
        working-directory: build
  
      - name: Install Check
diff --git a/CMakeLists.txt b/CMakeLists.txt

index c06c360acf45769ed6945ff54654751842056079..843fc16c17d6ef89a34006af3e184f7f70e8c141 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -430,7 +430,6 @@ if(ENABLE_PROFILING)
  endif()
  
  if(ENABLE_PROOFS)
-  set(RUN_REGRESSION_ARGS ${RUN_REGRESSION_ARGS} --enable-proof)
    add_definitions(-DCVC4_PROOF)
  endif()
  
@@ -495,7 +494,6 @@ if(USE_KISSAT)
  endif()
  
  if(USE_LFSC)
-  set(RUN_REGRESSION_ARGS ${RUN_REGRESSION_ARGS} --with-lfsc)
    find_package(LFSC REQUIRED)
    add_definitions(-DCVC4_USE_LFSC)
  endif()
diff --git a/src/theory/builtin/proof_checker.cpp b/src/theory/builtin/proof_checker.cpp

index 5d05e53834f8ead131214dda9f155603153af35e..3e0eca1288a4985bba5776a2ce9d689e079ad703 100644 (file)
--- a/src/theory/builtin/proof_checker.cpp
+++ b/src/theory/builtin/proof_checker.cpp
@@ -426,7 +426,6 @@ Node BuiltinProofRuleChecker::checkInternal(PfRule id,
             || id == PfRule::TRUST_SUBS_MAP)
    {
      // "trusted" rules
-    Assert(children.empty());
      Assert(!args.empty());
      Assert(args[0].getType().isBoolean());
      return args[0];
diff --git a/test/regress/regress0/nl/sqrt.smt2 b/test/regress/regress0/nl/sqrt.smt2

index fdcec3d62eef40c259cc143675d9be4d6d327544..92c4dabba0c33ddd0a49c24d0c46861e5d3dd763 100644 (file)
--- a/test/regress/regress0/nl/sqrt.smt2
+++ b/test/regress/regress0/nl/sqrt.smt2
@@ -1,3 +1,4 @@
+; COMMAND-LINE: -q
  ; EXPECT: sat
  ; EXPECT: sat
  ; EXPECT: unsat
diff --git a/test/regress/regress0/nl/sqrt2-value.smt2 b/test/regress/regress0/nl/sqrt2-value.smt2

index 6c3cd378a2d2d01bad3d0e8a19fd89f0da4483ad..078d8fcc72ff3c9d35fd343b74c9d308095bffff 100644 (file)
--- a/test/regress/regress0/nl/sqrt2-value.smt2
+++ b/test/regress/regress0/nl/sqrt2-value.smt2
@@ -1,4 +1,5 @@
  ; SCRUBBER: sed -e 's/witness.*/witness/'
+; COMMAND-LINE: --no-check-models
  ; EXPECT: sat
  ; EXPECT: ((x (witness
  (set-option :produce-models true)
diff --git a/test/regress/regress0/seq/seq-expand-defs.smt2 b/test/regress/regress0/seq/seq-expand-defs.smt2

index 3e51627c0055bb8a5e007d9a090854fcb97c5094..065dd6bd57e0c98911b2ce4064662915c7fbda06 100644 (file)
--- a/test/regress/regress0/seq/seq-expand-defs.smt2
+++ b/test/regress/regress0/seq/seq-expand-defs.smt2
@@ -1,4 +1,4 @@
-; COMMAND-LINE: --strings-exp
+; COMMAND-LINE: --strings-exp -q
  ; EXPECT: sat
  ; EXPECT: (((seq.nth y 7) 404))
  ; EXPECT: (((str.from_code x) "?"))
diff --git a/test/regress/regress1/quantifiers/qid-debug-inst.smt2 b/test/regress/regress1/quantifiers/qid-debug-inst.smt2

index d7ce3771be687b518d74cabb7a5af4c62e2ace9c..b43c9697a58628f967d8b1ec5fc07f97fce43d58 100644 (file)
--- a/test/regress/regress1/quantifiers/qid-debug-inst.smt2
+++ b/test/regress/regress1/quantifiers/qid-debug-inst.smt2
@@ -1,4 +1,4 @@
-; COMMAND-LINE: --debug-inst
+; COMMAND-LINE: --debug-inst --no-check-unsat-cores
  ; EXPECT: (num-instantiations myQuant1 1)
  ; EXPECT: (num-instantiations myQuant2 1)
  ; EXPECT: unsat
diff --git a/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2 b/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2

index 4879cb3fb6a485ca2924e6f4299afda5130ecfda..648d436bb512ca052d50b9d61ac2dde9bdd071f1 100644 (file)
--- a/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2
+++ b/test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2
@@ -1,4 +1,4 @@
-; COMMAND-LINE: --strings-exp --fmf-fun-rlv -i\r
+; COMMAND-LINE: --strings-exp --fmf-fun-rlv -i -q\r
  ; EXPECT: sat\r
  ; EXPECT: sat\r
  ; EXPECT: sat\r
diff --git a/test/regress/run_regression.py b/test/regress/run_regression.py

index 4a56aed9f53ac8c86164557386384f117eacb953..fb47863315611a80f5fd67a5682f31be12d4fcd9 100755 (executable)
--- a/test/regress/run_regression.py
+++ b/test/regress/run_regression.py
@@ -127,31 +127,6 @@ def get_cvc4_features(cvc4_binary):
      return features, disabled_features
  
  
-def logic_supported_with_proofs(logic):
-    assert logic is None or isinstance(logic, str)
-    return logic in [
-        #single theories
-        "QF_BV",
-        "QF_UF",
-        "QF_A",
-        "QF_LRA",
-        #two theories
-        "QF_UFBV",
-        "QF_UFLRA",
-        "QF_AUF",
-        "QF_ALRA",
-        "QF_ABV",
-        "QF_BVLRA"
-        #three theories
-        "QF_AUFBV",
-        "QF_ABVLRA",
-        "QF_UFBVLRA",
-        "QF_AUFLRA",
-        #four theories
-        "QF_AUFBVLRA"
-    ]
-
-
  def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
                    command_line, benchmark_dir, benchmark_filename, timeout):
      """Runs CVC4 on the file `benchmark_filename` in the directory
@@ -200,13 +175,13 @@ def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
      return (output.strip(), error.strip(), exit_status)
  
  
-def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
+def run_regression(check_unsat_cores, check_proofs, dump, use_skip_return_code,
                     skip_timeout, wrapper, cvc4_binary, benchmark_path,
                     timeout):
      """Determines the expected output for a benchmark, runs CVC4 on it and then
      checks whether the output corresponds to the expected output. Optionally
-    uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
-    checks proofs (if proofs is true), or dumps a benchmark and uses that as
+    uses a wrapper `wrapper`, tests unsat cores (if check_unsat_cores is true),
+    checks proofs (if check_proofs is true), or dumps a benchmark and uses that as
      the input (if dump is true). `use_skip_return_code` enables/disables
      returning 77 when a test is skipped."""
  
@@ -218,6 +193,11 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
  
      cvc4_features, cvc4_disabled_features = get_cvc4_features(cvc4_binary)
  
+    # Disable proof and unsat core checks if CVC4 was not compiled with proofs.
+    if 'proof' not in cvc4_features:
+        check_unsat_cores = False
+        check_proofs = False
+
      basic_command_line_args = []
  
      benchmark_basename = os.path.basename(benchmark_path)
@@ -225,14 +205,12 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
      benchmark_dir = os.path.dirname(benchmark_path)
      comment_char = '%'
      status_regex = None
-    logic_regex = None
      status_to_output = lambda s: s
      if benchmark_ext == '.smt':
          status_regex = r':status\s*(sat|unsat)'
          comment_char = ';'
      elif benchmark_ext == '.smt2':
          status_regex = r'set-info\s*:status\s*(sat|unsat)'
-        logic_regex = r'\(\s*set-logic\s*(.*)\)'
          comment_char = ';'
      elif benchmark_ext == '.cvc':
          pass
@@ -242,9 +220,9 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
              s, benchmark_filename)
      elif benchmark_ext == '.sy':
          comment_char = ';'
-        # Do not use proofs/unsat-cores with .sy files
-        unsat_cores = False
-        proofs = False
+        # Do not check proofs/unsat-cores with .sy files
+        check_unsat_cores = False
+        check_proofs = False
      else:
          sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
              benchmark_basename))
@@ -262,7 +240,6 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
      expected_exit_status = None
      command_lines = []
      requires = []
-    logic = None
      for line in benchmark_lines:
          # Skip lines that do not start with a comment character.
          if line[0] != comment_char:
@@ -301,16 +278,12 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
              sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
      if expected_exit_status is None:
          expected_exit_status = 0
-    if logic_regex:
-        logic_match = re.findall(logic_regex, benchmark_content)
-        if logic_match and len(logic_match) == 1:
-            logic = logic_match[0]
  
      if 'CVC4_REGRESSION_ARGS' in os.environ:
          basic_command_line_args += shlex.split(
              os.environ['CVC4_REGRESSION_ARGS'])
  
-    if not unsat_cores and ('(get-unsat-core)' in benchmark_content
+    if not check_unsat_cores and ('(get-unsat-core)' in benchmark_content
                              or '(get-unsat-assumptions)' in benchmark_content):
          print(
              '1..0 # Skipped regression: unsat cores not supported without proof support'
@@ -346,42 +319,54 @@ def run_regression(unsat_cores, proofs, dump, use_skip_return_code,
          args = shlex.split(command_line)
          all_args = basic_command_line_args + args
  
-        if not unsat_cores and ('--check-unsat-cores' in all_args):
+        if not check_unsat_cores and ('--check-unsat-cores' in all_args):
              print(
                  '# Skipped command line options ({}): unsat cores not supported without proof support'
                  .format(all_args))
              continue
-        if not proofs and '--dump-proofs' in all_args:
+        if not check_proofs and '--dump-proofs' in all_args:
              print(
-                '# Skipped command line options ({}): proof production not supported without LFSC support'
+                '# Skipped command line options ({}): proof production not supported'
                  .format(all_args))
              continue
  
          command_line_args_configs.append(all_args)
  
+        expected_output_lines = expected_output.split()
          extra_command_line_args = []
          if benchmark_ext == '.sy' and \
              '--no-check-synth-sol' not in all_args and \
              '--sygus-rr' not in all_args and \
              '--check-synth-sol' not in all_args:
-            extra_command_line_args = ['--check-synth-sol']
-        if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
+            extra_command_line_args += ['--check-synth-sol']
+        if ('sat' in expected_output_lines or \
+            'invalid' in expected_output_lines or \
+            'unknown' in expected_output_lines) and \
             '--no-debug-check-models' not in all_args and \
             '--no-check-models' not in all_args and \
             '--debug-check-models' not in all_args:
-            extra_command_line_args = ['--debug-check-models']
-        if unsat_cores and re.search(r'^(unsat|valid)$', expected_output):
-            if '--no-check-unsat-cores' not in all_args and \
+            extra_command_line_args += ['--debug-check-models']
+        if 'unsat' in expected_output_lines or 'valid' in expected_output_lines:
+            if check_unsat_cores and \
+               '--no-produce-unsat-cores' not in all_args and \
+               '--no-check-unsat-cores' not in all_args and \
                 '--check-unsat-cores' not in all_args and \
                 '--incremental' not in all_args and \
                 '--unconstrained-simp' not in all_args:
                  extra_command_line_args += ['--check-unsat-cores']
+            if check_proofs and \
+               '--no-produce-proofs' not in all_args and \
+               '--no-check-proofs' not in all_args and \
+               '--check-proofs' not in all_args:
+                extra_command_line_args += ['--check-proofs']
          if '--no-check-abducts' not in all_args and \
-            '--check-abducts' not in all_args:
+            '--check-abducts' not in all_args and \
+            'get-abduct' in benchmark_content:
              extra_command_line_args += ['--check-abducts']
-        if extra_command_line_args:
-            command_line_args_configs.append(all_args +
-                                             extra_command_line_args)
+
+        # Create a test case for each extra argument
+        for extra_arg in extra_command_line_args:
+            command_line_args_configs.append(all_args + [extra_arg])
  
      # Run CVC4 on the benchmark with the different option sets and check
      # whether the exit status, stdout output, stderr output are as expected.
@@ -456,24 +441,36 @@ def main():
      parser = argparse.ArgumentParser(
          description=
          'Runs benchmark and checks for correct exit status and output.')
-    parser.add_argument('--enable-proof', action='store_true')
-    parser.add_argument('--with-lfsc', action='store_true')
      parser.add_argument('--dump', action='store_true')
      parser.add_argument('--use-skip-return-code', action='store_true')
      parser.add_argument('--skip-timeout', action='store_true')
+    parser.add_argument('--check-unsat-cores', action='store_true',
+                        default=True)
+    parser.add_argument('--no-check-unsat-cores', dest='check_unsat_cores',
+                        action='store_false')
+    parser.add_argument('--check-proofs', action='store_true', default=True)
+    parser.add_argument('--no-check-proofs', dest='check_proofs',
+                        action='store_false')
      parser.add_argument('wrapper', nargs='*')
      parser.add_argument('cvc4_binary')
      parser.add_argument('benchmark')
-    args = parser.parse_args()
+
+    argv = sys.argv[1:]
+    # Append options passed via RUN_REGRESSION_ARGS to argv
+    if os.environ.get('RUN_REGRESSION_ARGS'):
+        argv.extend(shlex.split(os.getenv('RUN_REGRESSION_ARGS')))
+
+    args = parser.parse_args(argv)
+
      cvc4_binary = os.path.abspath(args.cvc4_binary)
  
      wrapper = args.wrapper
      if os.environ.get('VALGRIND') == '1' and not wrapper:
          wrapper = ['libtool', '--mode=execute', 'valgrind']
  
-    timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
+    timeout = float(os.getenv('TEST_TIMEOUT', '600'))
  
-    return run_regression(args.enable_proof, args.with_lfsc, args.dump,
+    return run_regression(args.check_unsat_cores, args.check_proofs, args.dump,
                            args.use_skip_return_code, args.skip_timeout,
                            wrapper, cvc4_binary, args.benchmark, timeout)
author	Mathias Preiner <mathias.preiner@gmail.com>
	Tue, 16 Mar 2021 20:30:21 +0000 (13:30 -0700)
committer	GitHub <noreply@github.com>
	Tue, 16 Mar 2021 20:30:21 +0000 (20:30 +0000)
.github/workflows/ci.yml		patch \| blob \| history
CMakeLists.txt		patch \| blob \| history
src/theory/builtin/proof_checker.cpp		patch \| blob \| history
test/regress/regress0/nl/sqrt.smt2		patch \| blob \| history
test/regress/regress0/nl/sqrt2-value.smt2		patch \| blob \| history
test/regress/regress0/seq/seq-expand-defs.smt2		patch \| blob \| history
test/regress/regress1/quantifiers/qid-debug-inst.smt2		patch \| blob \| history
test/regress/regress1/strings/issue3657-unexpectedUnsatCVC4.smt2		patch \| blob \| history
test/regress/run_regression.py		patch \| blob \| history