From: Andrew Reynolds Date: Sat, 15 Aug 2020 11:06:28 +0000 (-0500) Subject: (proof-new) Add the strings proof checker (#4858) X-Git-Tag: cvc5-1.0.0~3000 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3b230077d51c8445328f0b5d5ff94bbd988d1c83;p=cvc5.git (proof-new) Add the strings proof checker (#4858) It also adds enumeration for two new rules that have been recently added. --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1f7f68289..1d54573e9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -748,6 +748,8 @@ libcvc4_add_sources( theory/strings/inference_manager.h theory/strings/normal_form.cpp theory/strings/normal_form.h + theory/strings/proof_checker.cpp + theory/strings/proof_checker.h theory/strings/regexp_elim.cpp theory/strings/regexp_elim.h theory/strings/regexp_entail.cpp diff --git a/src/expr/proof_rule.cpp b/src/expr/proof_rule.cpp index c4b72208e..be8aaea9b 100644 --- a/src/expr/proof_rule.cpp +++ b/src/expr/proof_rule.cpp @@ -115,6 +115,8 @@ const char* toString(PfRule id) return "RE_UNFOLD_NEG_CONCAT_FIXED"; case PfRule::RE_ELIM: return "RE_ELIM"; case PfRule::STRING_CODE_INJ: return "STRING_CODE_INJ"; + case PfRule::STRING_SEQ_UNIT_INJ: return "STRING_SEQ_UNIT_INJ"; + case PfRule::STRING_TRUST: return "STRING_TRUST"; //================================================= Arith rules case PfRule::ARITH_SCALE_SUM_UPPER_BOUNDS: return "ARITH_SCALE_SUM_UPPER_BOUNDS"; case PfRule::ARITH_TRICHOTOMY: return "ARITH_TRICHOTOMY"; diff --git a/src/expr/proof_rule.h b/src/expr/proof_rule.h index 364598cf4..a83e043bf 100644 --- a/src/expr/proof_rule.h +++ b/src/expr/proof_rule.h @@ -766,6 +766,22 @@ enum class PfRule : uint32_t // (not (= (str.code t) (str.code s))) // (not (= t s))) STRING_CODE_INJ, + //======================== Sequence unit + // Children: (P:(= (seq.unit x) (seq.unit y))) + // Arguments: none + // --------------------- + // Conclusion:(= x y) + // Also applies to the case where (seq.unit y) is a constant sequence + // of length one. + STRING_SEQ_UNIT_INJ, + // ======== String Trust + // Children: none + // Arguments: (Q) + // --------------------- + // Conclusion: (Q) + STRING_TRUST, + + //================================================= Arithmetic rules // ======== Adding Inequalities // Note: an ArithLiteral is a term of the form (>< poly const) // where diff --git a/src/theory/strings/proof_checker.cpp b/src/theory/strings/proof_checker.cpp new file mode 100644 index 000000000..c68490ae6 --- /dev/null +++ b/src/theory/strings/proof_checker.cpp @@ -0,0 +1,508 @@ +/********************* */ +/*! \file proof_checker.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of strings proof checker + **/ + +#include "theory/strings/proof_checker.h" + +#include "expr/sequence.h" +#include "options/strings_options.h" +#include "theory/rewriter.h" +#include "theory/strings/core_solver.h" +#include "theory/strings/regexp_elim.h" +#include "theory/strings/regexp_operation.h" +#include "theory/strings/term_registry.h" +#include "theory/strings/theory_strings_preprocess.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" + +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +void StringProofRuleChecker::registerTo(ProofChecker* pc) +{ + pc->registerChecker(PfRule::CONCAT_EQ, this); + pc->registerChecker(PfRule::CONCAT_UNIFY, this); + pc->registerChecker(PfRule::CONCAT_CONFLICT, this); + pc->registerChecker(PfRule::CONCAT_SPLIT, this); + pc->registerChecker(PfRule::CONCAT_CSPLIT, this); + pc->registerChecker(PfRule::CONCAT_LPROP, this); + pc->registerChecker(PfRule::CONCAT_CPROP, this); + pc->registerChecker(PfRule::STRING_DECOMPOSE, this); + pc->registerChecker(PfRule::STRING_LENGTH_POS, this); + pc->registerChecker(PfRule::STRING_LENGTH_NON_EMPTY, this); + pc->registerChecker(PfRule::STRING_REDUCTION, this); + pc->registerChecker(PfRule::STRING_EAGER_REDUCTION, this); + pc->registerChecker(PfRule::RE_INTER, this); + pc->registerChecker(PfRule::RE_UNFOLD_POS, this); + pc->registerChecker(PfRule::RE_UNFOLD_NEG, this); + pc->registerChecker(PfRule::RE_UNFOLD_NEG_CONCAT_FIXED, this); + pc->registerChecker(PfRule::RE_ELIM, this); + pc->registerChecker(PfRule::STRING_CODE_INJ, this); + pc->registerChecker(PfRule::STRING_SEQ_UNIT_INJ, this); + // trusted rules + pc->registerChecker(PfRule::STRING_TRUST, this); +} + +Node StringProofRuleChecker::checkInternal(PfRule id, + const std::vector& children, + const std::vector& args) +{ + NodeManager* nm = NodeManager::currentNM(); + // core rules for word equations + if (id == PfRule::CONCAT_EQ || id == PfRule::CONCAT_UNIFY + || id == PfRule::CONCAT_CONFLICT || id == PfRule::CONCAT_SPLIT + || id == PfRule::CONCAT_CSPLIT || id == PfRule::CONCAT_LPROP + || id == PfRule::CONCAT_CPROP) + { + Trace("strings-pfcheck") << "Checking id " << id << std::endl; + Assert(children.size() >= 1); + Assert(args.size() == 1); + // all rules have an equality + if (children[0].getKind() != EQUAL) + { + return Node::null(); + } + // convert to concatenation form + std::vector tvec; + std::vector svec; + utils::getConcat(children[0][0], tvec); + utils::getConcat(children[0][1], svec); + size_t nchildt = tvec.size(); + size_t nchilds = svec.size(); + TypeNode stringType = children[0][0].getType(); + // extract the Boolean corresponding to whether the rule is reversed + bool isRev; + if (!getBool(args[0], isRev)) + { + return Node::null(); + } + if (id == PfRule::CONCAT_EQ) + { + Assert(children.size() == 1); + size_t index = 0; + std::vector tremVec; + std::vector sremVec; + // scan the concatenation until we exhaust child proofs + while (index < nchilds && index < nchildt) + { + Node currT = tvec[isRev ? (nchildt - 1 - index) : index]; + Node currS = svec[isRev ? (nchilds - 1 - index) : index]; + if (currT != currS) + { + if (currT.isConst() && currS.isConst()) + { + size_t sindex; + // get the equal prefix/suffix, strip and add the remainders + Node currR = Word::splitConstant(currT, currS, sindex, isRev); + if (!currR.isNull()) + { + // add the constant to remainder vec + std::vector& rem = sindex == 0 ? tremVec : sremVec; + rem.push_back(currR); + // ignore the current component + index++; + // In other words, if we have (currS,currT) = ("ab","abc"), then + // we proceed to the next component and add currR = "c" to + // tremVec. + } + // otherwise if we are not the same prefix, then both will be added + // Notice that we do not add maximal prefixes, in other words, + // ("abc", "abd") may be added to the remainder vectors, and not + // ("c", "d"). + } + break; + } + index++; + } + Assert(index <= nchildt); + Assert(index <= nchilds); + // the remainders are equal + tremVec.insert(isRev ? tremVec.begin() : tremVec.end(), + tvec.begin() + (isRev ? 0 : index), + tvec.begin() + nchildt - (isRev ? index : 0)); + sremVec.insert(isRev ? sremVec.begin() : sremVec.end(), + svec.begin() + (isRev ? 0 : index), + svec.begin() + nchilds - (isRev ? index : 0)); + // convert back to node + Node trem = utils::mkConcat(tremVec, stringType); + Node srem = utils::mkConcat(sremVec, stringType); + return trem.eqNode(srem); + } + // all remaining rules do something with the first child of each side + Node t0 = tvec[isRev ? nchildt - 1 : 0]; + Node s0 = svec[isRev ? nchilds - 1 : 0]; + if (id == PfRule::CONCAT_UNIFY) + { + Assert(children.size() == 2); + if (children[1].getKind() != EQUAL) + { + return Node::null(); + } + for (size_t i = 0; i < 2; i++) + { + Node l = children[1][i]; + if (l.getKind() != STRING_LENGTH) + { + return Node::null(); + } + Node term = i == 0 ? t0 : s0; + if (l[0] == term) + { + continue; + } + // could be a spliced constant + bool success = false; + if (term.isConst() && l[0].isConst()) + { + size_t lenL = Word::getLength(l[0]); + success = (isRev && l[0] == Word::suffix(term, lenL)) + || (!isRev && l[0] == Word::prefix(term, lenL)); + } + if (!success) + { + return Node::null(); + } + } + return children[1][0][0].eqNode(children[1][1][0]); + } + else if (id == PfRule::CONCAT_CONFLICT) + { + Assert(children.size() == 1); + if (!t0.isConst() || !s0.isConst()) + { + // not constants + return Node::null(); + } + size_t sindex; + Node r0 = Word::splitConstant(t0, s0, sindex, isRev); + if (!r0.isNull()) + { + // Not a conflict due to constants, i.e. s0 is a prefix of t0 or vice + // versa. + return Node::null(); + } + return nm->mkConst(false); + } + else if (id == PfRule::CONCAT_SPLIT) + { + Assert(children.size() == 2); + if (children[1].getKind() != NOT || children[1][0].getKind() != EQUAL + || children[1][0][0].getKind() != STRING_LENGTH + || children[1][0][0][0] != t0 + || children[1][0][1].getKind() != STRING_LENGTH + || children[1][0][1][0] != s0) + { + return Node::null(); + } + } + else if (id == PfRule::CONCAT_CSPLIT) + { + Assert(children.size() == 2); + Node zero = nm->mkConst(Rational(0)); + Node one = nm->mkConst(Rational(1)); + if (children[1].getKind() != NOT || children[1][0].getKind() != EQUAL + || children[1][0][0].getKind() != STRING_LENGTH + || children[1][0][0][0] != t0 || children[1][0][1] != zero) + { + return Node::null(); + } + if (!s0.isConst() || !s0.getType().isStringLike() || Word::isEmpty(s0)) + { + return Node::null(); + } + } + else if (id == PfRule::CONCAT_LPROP) + { + Assert(children.size() == 2); + if (children[1].getKind() != GT + || children[1][0].getKind() != STRING_LENGTH + || children[1][0][0] != t0 + || children[1][1].getKind() != STRING_LENGTH + || children[1][1][0] != s0) + { + return Node::null(); + } + } + else if (id == PfRule::CONCAT_CPROP) + { + Assert(children.size() == 2); + Node zero = nm->mkConst(Rational(0)); + + Trace("pfcheck-strings-cprop") + << "CONCAT_PROP, isRev=" << isRev << std::endl; + if (children[1].getKind() != NOT || children[1][0].getKind() != EQUAL + || children[1][0][0].getKind() != STRING_LENGTH + || children[1][0][0][0] != t0 || children[1][0][1] != zero) + { + Trace("pfcheck-strings-cprop") + << "...failed pattern match" << std::endl; + return Node::null(); + } + if (tvec.size() <= 1) + { + Trace("pfcheck-strings-cprop") + << "...failed adjacent constant" << std::endl; + return Node::null(); + } + Node w1 = tvec[isRev ? nchildt - 2 : 1]; + if (!w1.isConst() || !w1.getType().isStringLike() || Word::isEmpty(w1)) + { + Trace("pfcheck-strings-cprop") + << "...failed adjacent constant content" << std::endl; + return Node::null(); + } + Node w2 = s0; + if (!w2.isConst() || !w2.getType().isStringLike() || Word::isEmpty(w2)) + { + Trace("pfcheck-strings-cprop") << "...failed constant" << std::endl; + return Node::null(); + } + // getConclusion expects the adjacent constant to be included + t0 = nm->mkNode(STRING_CONCAT, isRev ? w1 : t0, isRev ? t0 : w1); + } + // use skolem cache + SkolemCache skc(false); + std::vector newSkolems; + Node conc = CoreSolver::getConclusion(t0, s0, id, isRev, &skc, newSkolems); + return conc; + } + else if (id == PfRule::STRING_DECOMPOSE) + { + Assert(children.size() == 1); + Assert(args.size() == 1); + bool isRev; + if (!getBool(args[0], isRev)) + { + return Node::null(); + } + Node atom = children[0]; + if (atom.getKind() != GEQ || atom[0].getKind() != STRING_LENGTH) + { + return Node::null(); + } + SkolemCache sc(false); + std::vector newSkolems; + Node conc = CoreSolver::getConclusion( + atom[0][0], atom[1], id, isRev, &sc, newSkolems); + return conc; + } + else if (id == PfRule::STRING_REDUCTION + || id == PfRule::STRING_EAGER_REDUCTION + || id == PfRule::STRING_LENGTH_POS) + { + Assert(children.empty()); + Assert(args.size() >= 1); + // These rules are based on calling a C++ method for returning a valid + // lemma involving a single argument term. + // Must convert to skolem form. + Node t = args[0]; + Node ret; + if (id == PfRule::STRING_REDUCTION) + { + Assert(args.size() == 1); + // we do not use optimizations + SkolemCache skc(false); + std::vector conj; + ret = StringsPreprocess::reduce(t, conj, &skc); + conj.push_back(t.eqNode(ret)); + ret = mkAnd(conj); + } + else if (id == PfRule::STRING_EAGER_REDUCTION) + { + Assert(args.size() == 1); + SkolemCache skc(false); + ret = TermRegistry::eagerReduce(t, &skc); + } + else if (id == PfRule::STRING_LENGTH_POS) + { + Assert(args.size() == 1); + ret = TermRegistry::lengthPositive(t); + } + if (ret.isNull()) + { + return Node::null(); + } + return ret; + } + else if (id == PfRule::STRING_LENGTH_NON_EMPTY) + { + Assert(children.size() == 1); + Assert(args.empty()); + Node nemp = children[0]; + if (nemp.getKind() != NOT || nemp[0].getKind() != EQUAL + || !nemp[0][1].isConst() || !nemp[0][1].getType().isStringLike()) + { + return Node::null(); + } + if (!Word::isEmpty(nemp[0][1])) + { + return Node::null(); + } + Node zero = nm->mkConst(Rational(0)); + Node clen = nm->mkNode(STRING_LENGTH, nemp[0][0]); + return clen.eqNode(zero).notNode(); + } + else if (id == PfRule::RE_INTER) + { + Assert(children.size() >= 1); + Assert(args.empty()); + std::vector reis; + Node x; + // make the regular expression intersection that summarizes all + // memberships in the explanation + for (const Node& c : children) + { + bool polarity = c.getKind() != NOT; + Node catom = polarity ? c : c[0]; + if (catom.getKind() != STRING_IN_REGEXP) + { + return Node::null(); + } + if (x.isNull()) + { + x = catom[0]; + } + else if (x != catom[0]) + { + // different LHS + return Node::null(); + } + Node xcurr = catom[0]; + Node rcurr = + polarity ? catom[1] : nm->mkNode(REGEXP_COMPLEMENT, catom[1]); + reis.push_back(rcurr); + } + Node rei = reis.size() == 1 ? reis[0] : nm->mkNode(REGEXP_INTER, reis); + return nm->mkNode(STRING_IN_REGEXP, x, rei); + } + else if (id == PfRule::RE_UNFOLD_POS || id == PfRule::RE_UNFOLD_NEG + || id == PfRule::RE_UNFOLD_NEG_CONCAT_FIXED) + { + Assert(children.size() == 1); + Assert(args.empty()); + Node skChild = children[0]; + if (id == PfRule::RE_UNFOLD_NEG || id == PfRule::RE_UNFOLD_NEG_CONCAT_FIXED) + { + if (skChild.getKind() != NOT || skChild[0].getKind() != STRING_IN_REGEXP) + { + Trace("strings-pfcheck") << "...fail, non-neg member" << std::endl; + return Node::null(); + } + } + else if (skChild.getKind() != STRING_IN_REGEXP) + { + Trace("strings-pfcheck") << "...fail, non-pos member" << std::endl; + return Node::null(); + } + Node conc; + if (id == PfRule::RE_UNFOLD_POS) + { + SkolemCache sc; + conc = RegExpOpr::reduceRegExpPos(skChild, &sc); + } + else if (id == PfRule::RE_UNFOLD_NEG) + { + conc = RegExpOpr::reduceRegExpNeg(skChild); + } + else if (id == PfRule::RE_UNFOLD_NEG_CONCAT_FIXED) + { + if (skChild[0][1].getKind() != REGEXP_CONCAT) + { + Trace("strings-pfcheck") << "...fail, no concat regexp" << std::endl; + return Node::null(); + } + size_t index; + Node reLen = RegExpOpr::getRegExpConcatFixed(skChild[0][1], index); + if (reLen.isNull()) + { + Trace("strings-pfcheck") << "...fail, non-fixed lengths" << std::endl; + return Node::null(); + } + conc = RegExpOpr::reduceRegExpNegConcatFixed(skChild, reLen, index); + } + return conc; + } + else if (id == PfRule::RE_ELIM) + { + Assert(children.size() == 1); + Assert(args.empty()); + return RegExpElimination::eliminate(children[0]); + } + else if (id == PfRule::STRING_CODE_INJ) + { + Assert(children.empty()); + Assert(args.size() == 2); + Assert(args[0].getType().isStringLike() + && args[1].getType().isStringLike()); + Node c1 = nm->mkNode(STRING_TO_CODE, args[0]); + Node c2 = nm->mkNode(STRING_TO_CODE, args[1]); + Node eqNegOne = c1.eqNode(nm->mkConst(Rational(-1))); + Node deq = c1.eqNode(c2).negate(); + Node eqn = args[0].eqNode(args[1]); + return nm->mkNode(kind::OR, eqNegOne, deq, eqn); + } + else if (id == PfRule::STRING_SEQ_UNIT_INJ) + { + Assert(children.size() == 1); + Assert(args.empty()); + if (children[0].getKind() != EQUAL) + { + return Node::null(); + } + Node t[2]; + for (size_t i = 0; i < 2; i++) + { + if (children[0][i].getKind() == SEQ_UNIT) + { + t[i] = children[0][i][0]; + } + else if (children[0][i].isConst()) + { + // notice that Word::getChars is not the right call here, since it + // gets a vector of sequences of length one. We actually need to + // extract the character, which is a sequence-specific operation. + const Sequence& sx = children[0][i].getConst(); + const std::vector& vec = sx.getVec(); + if (vec.size() == 1) + { + // the character of the single character sequence + t[i] = vec[0]; + } + } + if (t[i].isNull()) + { + return Node::null(); + } + } + Trace("strings-pfcheck-debug") + << "STRING_SEQ_UNIT_INJ: " << children[0] << " => " << t[0] + << " == " << t[1] << std::endl; + AlwaysAssert(t[0].getType() == t[1].getType()); + return t[0].eqNode(t[1]); + } + else if (id == PfRule::STRING_TRUST) + { + // "trusted" rules + Assert(!args.empty()); + Assert(args[0].getType().isBoolean()); + return args[0]; + } + return Node::null(); +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/proof_checker.h b/src/theory/strings/proof_checker.h new file mode 100644 index 000000000..09716ffb0 --- /dev/null +++ b/src/theory/strings/proof_checker.h @@ -0,0 +1,49 @@ +/********************* */ +/*! \file proof_checker.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Strings proof checker utility + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__PROOF_CHECKER_H +#define CVC4__THEORY__STRINGS__PROOF_CHECKER_H + +#include "expr/node.h" +#include "expr/proof_checker.h" +#include "expr/proof_node.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** A checker for strings proofs */ +class StringProofRuleChecker : public ProofRuleChecker +{ + public: + StringProofRuleChecker() {} + ~StringProofRuleChecker() {} + + /** Register all rules owned by this rule checker in pc. */ + void registerTo(ProofChecker* pc) override; + + protected: + /** Return the conclusion of the given proof step, or null if it is invalid */ + Node checkInternal(PfRule id, + const std::vector& children, + const std::vector& args) override; +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__PROOF_CHECKER_H */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 54c4759a8..142b0006b 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -97,6 +97,13 @@ TheoryStrings::TheoryStrings(context::Context* c, d_false = NodeManager::currentNM()->mkConst( false ); d_cardSize = utils::getAlphabetCardinality(); + + ProofChecker* pc = pnm != nullptr ? pnm->getChecker() : nullptr; + if (pc != nullptr) + { + // add checkers + d_sProofChecker.registerTo(pc); + } } TheoryStrings::~TheoryStrings() { diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 1d3261115..2fb827429 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -32,6 +32,7 @@ #include "theory/strings/infer_info.h" #include "theory/strings/inference_manager.h" #include "theory/strings/normal_form.h" +#include "theory/strings/proof_checker.h" #include "theory/strings/regexp_elim.h" #include "theory/strings/regexp_operation.h" #include "theory/strings/regexp_solver.h" @@ -279,6 +280,8 @@ class TheoryStrings : public Theory { InferenceManager d_im; /** The theory rewriter for this theory. */ StringsRewriter d_rewriter; + /** The proof rule checker */ + StringProofRuleChecker d_sProofChecker; /** * The base solver, responsible for reasoning about congruent terms and * inferring constants for equivalence classes.