From 29959bec6e023f64cad0a5d43b18052f08ac94c5 Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Tue, 4 Jun 2019 14:48:21 -0700 Subject: [PATCH] Add check that result matches benchmark status (#3028) This commit adds a check to make sure that the result of a `(check-sat)` call matches the expected result set via `(set-info :status ...)`. In doing so, it also fixes an issue where CVC4 would crash if asked for the unsat core after setting the status to `unsat` but before calling `(check-sat)` (see regression for concrete example). This happened because CVC4 was storing the expected result and the computed result both in the same variable (the expected result wasn't really being used though). This commit keeps track of the expected result and the computed result in separate variables to fix that issue. --- src/smt/smt_engine.cpp | 11 +++++++++- src/smt/smt_engine.h | 5 +++++ test/regress/CMakeLists.txt | 1 + test/regress/regress0/bv/ackermann1.smt2 | 1 - test/regress/regress0/bv/ackermann4.smt2 | 1 - .../fmf/Arrow_Order-smtlib.778341.smt | 1 - .../quantifiers/qbv-test-invert-bvxor.smt2 | 1 - .../sets/mar2014/sharing-preregister.smt2 | 1 - .../regress0/smtlib/set-info-status.smt2 | 22 +++++++++++++++++++ .../quantifiers/qbv-test-invert-bvcomp.smt2 | 1 - test/regress/regress1/quantifiers/qe.smt2 | 1 - 11 files changed, 38 insertions(+), 8 deletions(-) create mode 100644 test/regress/regress0/smtlib/set-info-status.smt2 diff --git a/src/smt/smt_engine.cpp b/src/smt/smt_engine.cpp index 66198946f..8f2d95a0f 100644 --- a/src/smt/smt_engine.cpp +++ b/src/smt/smt_engine.cpp @@ -31,6 +31,7 @@ #include "base/configuration.h" #include "base/configuration_private.h" +#include "base/cvc4_check.h" #include "base/exception.h" #include "base/listener.h" #include "base/modal_exception.h" @@ -888,6 +889,7 @@ SmtEngine::SmtEngine(ExprManager* em) d_earlyTheoryPP(true), d_globalNegation(false), d_status(), + d_expectedStatus(), d_replayStream(NULL), d_private(NULL), d_statisticsRegistry(NULL), @@ -2420,7 +2422,7 @@ void SmtEngine::setInfo(const std::string& key, const CVC4::SExpr& value) throw OptionException("argument to (set-info :status ..) must be " "`sat' or `unsat' or `unknown'"); } - d_status = Result(s, d_filename); + d_expectedStatus = Result(s, d_filename); return; } throw UnrecognizedOptionException(); @@ -3747,6 +3749,13 @@ Result SmtEngine::checkSatisfiability(const vector& assumptions, // Remember the status d_status = r; + if (!d_expectedStatus.isUnknown() && !d_status.isUnknown() + && d_status != d_expectedStatus) + { + CVC4_FATAL() << "Expected result " << d_expectedStatus << " but got " + << d_status; + } + d_expectedStatus = Result(); setProblemExtended(false); diff --git a/src/smt/smt_engine.h b/src/smt/smt_engine.h index 165e93997..5913716e6 100644 --- a/src/smt/smt_engine.h +++ b/src/smt/smt_engine.h @@ -262,6 +262,11 @@ class CVC4_PUBLIC SmtEngine { */ Result d_status; + /** + * The expected status of the next satisfiability check. + */ + Result d_expectedStatus; + /** * The input file name (if any) or the name set through setInfo (if any) */ diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index e1b182f3d..b63930569 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -826,6 +826,7 @@ set(regress_0_tests regress0/smt2output.smt2 regress0/smtlib/get-unsat-assumptions.smt2 regress0/smtlib/reason-unknown.smt2 + regress0/smtlib/set-info-status.smt2 regress0/strings/bug001.smt2 regress0/strings/bug002.smt2 regress0/strings/bug612.smt2 diff --git a/test/regress/regress0/bv/ackermann1.smt2 b/test/regress/regress0/bv/ackermann1.smt2 index 218fd746b..7fd76c8cc 100644 --- a/test/regress/regress0/bv/ackermann1.smt2 +++ b/test/regress/regress0/bv/ackermann1.smt2 @@ -3,7 +3,6 @@ (set-logic QF_UFBV) (set-info :smt-lib-version 2.0) (set-info :category "crafted") -(set-info :status unsat) (declare-fun v0 () (_ BitVec 4)) (declare-fun f ((_ BitVec 4)) (_ BitVec 4)) (declare-fun g ((_ BitVec 4)) (_ BitVec 4)) diff --git a/test/regress/regress0/bv/ackermann4.smt2 b/test/regress/regress0/bv/ackermann4.smt2 index cb8ad2e55..0c1e323d5 100644 --- a/test/regress/regress0/bv/ackermann4.smt2 +++ b/test/regress/regress0/bv/ackermann4.smt2 @@ -3,7 +3,6 @@ (set-logic QF_UFBV) (set-info :smt-lib-version 2.0) (set-info :category "crafted") -(set-info :status unsat) (declare-fun v0 () (_ BitVec 4)) (declare-fun f ((_ BitVec 4)) (_ BitVec 4)) (declare-fun g ((_ BitVec 4)) (_ BitVec 4)) diff --git a/test/regress/regress0/fmf/Arrow_Order-smtlib.778341.smt b/test/regress/regress0/fmf/Arrow_Order-smtlib.778341.smt index bb2630b93..ab1e41360 100644 --- a/test/regress/regress0/fmf/Arrow_Order-smtlib.778341.smt +++ b/test/regress/regress0/fmf/Arrow_Order-smtlib.778341.smt @@ -1,7 +1,6 @@ ; COMMAND-LINE: --finite-model-find ; EXPECT: unsat (benchmark Isabelle -:status sat :logic AUFLIA :extrasorts ( S1 S2 S3 S4 S5 S6 S7 S8 S9 S10 S11 S12 S13 S14 S15 S16 S17 S18 S19 S20 S21 S22 S23 S24 S25 S26 S27 S28 S29 S30 S31 S32 S33 S34 S35 S36 S37) :extrafuns ( diff --git a/test/regress/regress0/quantifiers/qbv-test-invert-bvxor.smt2 b/test/regress/regress0/quantifiers/qbv-test-invert-bvxor.smt2 index ef41eecdd..69c5def65 100644 --- a/test/regress/regress0/quantifiers/qbv-test-invert-bvxor.smt2 +++ b/test/regress/regress0/quantifiers/qbv-test-invert-bvxor.smt2 @@ -1,7 +1,6 @@ ; COMMAND-LINE: --cbqi-bv --cbqi-bv-ineq=keep --no-cbqi-full ; EXPECT: unsat (set-logic BV) -(set-info :status sat) (declare-fun a () (_ BitVec 8)) (assert (forall ((x (_ BitVec 8))) (not (= (bvxor x a) (bvmul a a))))) diff --git a/test/regress/regress0/sets/mar2014/sharing-preregister.smt2 b/test/regress/regress0/sets/mar2014/sharing-preregister.smt2 index d851ca35e..e3e88c65f 100644 --- a/test/regress/regress0/sets/mar2014/sharing-preregister.smt2 +++ b/test/regress/regress0/sets/mar2014/sharing-preregister.smt2 @@ -1,6 +1,5 @@ ; EXPECT: unsat (set-logic QF_UFLIAFS) -(set-info :status sat) (declare-fun a () Int) (declare-fun b () Int) (declare-fun x () (Set Int)) diff --git a/test/regress/regress0/smtlib/set-info-status.smt2 b/test/regress/regress0/smtlib/set-info-status.smt2 new file mode 100644 index 000000000..489d686b3 --- /dev/null +++ b/test/regress/regress0/smtlib/set-info-status.smt2 @@ -0,0 +1,22 @@ +; EXPECT: (error "Cannot get an unsat core unless immediately preceded by UNSAT/VALID response.") +; EXPECT: sat +; EXPECT: sat +; EXPECT: unsat +; EXPECT-ERROR: Expected result unsat but got sat +; ERROR-SCRUBBER: sed -e '/Fatal failure within.*/d' +; EXIT: -6 +(set-option :incremental true) +(set-option :produce-unsat-cores true) +(set-logic QF_BV) +(set-info :status unsat) +(get-unsat-core) +(set-info :status sat) +(check-sat) +(set-info :status sat) +(check-sat) +(push) +(assert false) +(check-sat) +(pop) +(set-info :status unsat) +(check-sat) diff --git a/test/regress/regress1/quantifiers/qbv-test-invert-bvcomp.smt2 b/test/regress/regress1/quantifiers/qbv-test-invert-bvcomp.smt2 index 3b55c0b9a..6bcc6501b 100644 --- a/test/regress/regress1/quantifiers/qbv-test-invert-bvcomp.smt2 +++ b/test/regress/regress1/quantifiers/qbv-test-invert-bvcomp.smt2 @@ -1,7 +1,6 @@ ; COMMAND-LINE: --cbqi-bv --cbqi-bv-ineq=keep --no-cbqi-full ; EXPECT: unsat (set-logic BV) -(set-info :status sat) (declare-fun a () (_ BitVec 8)) (declare-fun b () (_ BitVec 8)) (declare-fun c () (_ BitVec 1)) diff --git a/test/regress/regress1/quantifiers/qe.smt2 b/test/regress/regress1/quantifiers/qe.smt2 index 673ece05b..96cdd2497 100644 --- a/test/regress/regress1/quantifiers/qe.smt2 +++ b/test/regress/regress1/quantifiers/qe.smt2 @@ -1,7 +1,6 @@ ; COMMAND-LINE: ; EXPECT: (not (>= (+ a (* (- 1) b)) 1)) (set-logic LIA) -(set-info :status unsat) (declare-fun a () Int) (declare-fun b () Int) (get-qe (exists ((x Int)) (and (<= a x) (<= x b)))) -- 2.30.2