From: Andrew Reynolds Date: Sun, 18 Aug 2019 12:32:40 +0000 (-0500) Subject: Context-independent regular expression unfolding (#3168) X-Git-Tag: cvc5-1.0.0~4005 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b1e9c6b7f5e4beb0183e48f5a1cbbf679f52d7d7;p=cvc5.git Context-independent regular expression unfolding (#3168) --- diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp index 463891839..ea38c4dd9 100644 --- a/src/theory/strings/regexp_solver.cpp +++ b/src/theory/strings/regexp_solver.cpp @@ -80,7 +80,7 @@ void RegExpSolver::check(const std::map >& mems) if (!addedLemma) { // get all memberships - std::vector allMems; + std::map allMems; for (const std::pair >& mr : mems) { for (const Node& m : mr.second) @@ -88,7 +88,7 @@ void RegExpSolver::check(const std::map >& mems) bool polarity = m.getKind() != NOT; if (polarity || !options::stringIgnNegMembership()) { - allMems.push_back(m); + allMems[m] = mr.first; } } } @@ -100,8 +100,10 @@ void RegExpSolver::check(const std::map >& mems) // check positive (e=0), then negative (e=1) memberships for (unsigned e = 0; e < 2; e++) { - for (const Node& assertion : allMems) + for (const std::pair& mp : allMems) { + Node assertion = mp.first; + Node rep = mp.second; // check regular expression membership Trace("regexp-debug") << "Check : " << assertion << " " @@ -126,43 +128,67 @@ void RegExpSolver::check(const std::map >& mems) bool flag = true; Node x = atom[0]; Node r = atom[1]; + Assert(rep == d_parent.getRepresentative(x)); + // The following code takes normal forms into account for the purposes + // of simplifying a regular expression membership x in R. For example, + // if x = "A" in the current context, then we may be interested in + // reasoning about ( x in R ) * { x -> "A" }. Say we update the + // membership to nx in R', then: + // - nfexp => ( x in R ) <=> nx in R' + // - rnfexp => R = R' + // We use these explanations below as assumptions on inferences when + // appropriate. Notice that for inferring conflicts and tautologies, + // we use the normal form of x always. This is because we always want to + // discover conflicts/tautologies whenever possible. + // For inferences based on regular expression unfolding, we do not use + // the normal form of x. The reason is that it is better to unfold + // regular expression memberships in a context-indepedent manner, + // that is, not taking into account the current normal form of x, since + // this ensures these lemmas are still relevant after backtracking. + std::vector nfexp; std::vector rnfexp; - + // The normal form of x is stored in nx, while x is left unchanged. + Node nx = x; if (!x.isConst()) { - x = d_parent.getNormalString(x, rnfexp); - changed = true; + nx = d_parent.getNormalString(x, nfexp); } + // If r is not a constant regular expression, we update it based on + // normal forms, which may concretize its variables. if (!d_regexp_opr.checkConstRegExp(r)) { r = getNormalSymRegExp(r, rnfexp); + nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end()); changed = true; } Trace("strings-regexp-nf") << "Term " << atom << " is normalized to " - << x << " IN " << r << std::endl; - if (changed) + << nx << " IN " << r << std::endl; + if (nx != x || changed) { - Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, x, r)); - if (!polarity) - { - tmp = tmp.negate(); - } - if (tmp == d_true) - { - d_regexp_ccached.insert(assertion); - continue; - } - else if (tmp == d_false) + // We rewrite the membership nx IN r. + Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r)); + Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl; + if (tmp.isConst()) { - std::vector exp_n; - exp_n.push_back(assertion); - Node conc = Node::null(); - d_im.sendInference(rnfexp, exp_n, conc, "REGEXP NF Conflict"); - addedLemma = true; - break; + if (tmp.getConst() == polarity) + { + // it is satisfied in this SAT context + d_regexp_ccached.insert(assertion); + continue; + } + else + { + // we have a conflict + std::vector exp_n; + exp_n.push_back(assertion); + Node conc = Node::null(); + d_im.sendInference(nfexp, exp_n, conc, "REGEXP NF Conflict"); + addedLemma = true; + break; + } } } - if (e == 1 && repUnfold.find(x) != repUnfold.end()) + if (e == 1 && repUnfold.find(rep) != repUnfold.end()) { // do not unfold negative memberships of strings that have new // positive unfoldings. For example: @@ -188,9 +214,8 @@ void RegExpSolver::check(const std::map >& mems) if (flag) { // check if the term is atomic - Node xr = d_parent.getRepresentative(x); Trace("strings-regexp") - << "Unroll/simplify membership of atomic term " << xr + << "Unroll/simplify membership of atomic term " << rep << std::endl; // if so, do simple unrolling std::vector nvec; @@ -216,7 +241,7 @@ void RegExpSolver::check(const std::map >& mems) // Remember that we have unfolded a membership for x // notice that we only do this here, after we have definitely // added a lemma. - repUnfold.insert(x); + repUnfold.insert(rep); } } if (d_im.hasConflict()) diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 4bceb1b74..8c2950c3e 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1591,6 +1591,8 @@ set(regress_1_tests regress1/strings/nterm-re-inter-sigma.smt2 regress1/strings/pierre150331.smt2 regress1/strings/policy_variable.smt2 + regress1/strings/query4674.smt2 + regress1/strings/query8485.smt2 regress1/strings/re-all-char-hard.smt2 regress1/strings/re-agg-total1.smt2 regress1/strings/re-agg-total2.smt2 diff --git a/test/regress/regress1/strings/query4674.smt2 b/test/regress/regress1/strings/query4674.smt2 new file mode 100644 index 000000000..7132fa6a8 --- /dev/null +++ b/test/regress/regress1/strings/query4674.smt2 @@ -0,0 +1,8 @@ +(set-info :smt-lib-version 2.6) +(set-logic ALL) +(set-info :status sat) +(set-option :strings-exp true) +(set-option :re-elim false) +(declare-fun x () String) +(assert (let ((_let_0 (re.* re.allchar ))) (and (not (= (str.in.re x (re.++ _let_0 re.allchar _let_0 (str.to.re (str.++ "A" (str.++ "B" (str.++ "C" "A")))) _let_0 re.allchar _let_0)) (str.in.re x (re.++ _let_0 re.allchar _let_0 re.allchar _let_0 (str.to.re (str.++ "B" (str.++ "C" (str.++ "B" "B")))) _let_0)))) (not (= (str.in.re x (re.++ _let_0 re.allchar _let_0 (str.to.re (str.++ "C" (str.++ "B" "C"))) _let_0 (str.to.re "B") _let_0)) (str.in.re x (re.++ _let_0 re.allchar _let_0 (str.to.re "C") _let_0 (str.to.re (str.++ "B" (str.++ "C" "B"))) _let_0))))))) +(check-sat) diff --git a/test/regress/regress1/strings/query8485.smt2 b/test/regress/regress1/strings/query8485.smt2 new file mode 100644 index 000000000..ccd3b7f9a --- /dev/null +++ b/test/regress/regress1/strings/query8485.smt2 @@ -0,0 +1,8 @@ +(set-info :smt-lib-version 2.6) +(set-logic ALL) +(set-info :status sat) +(set-option :strings-exp true) +(set-option :re-elim false) +(declare-fun x () String) +(assert (let ((_let_0 (re.* re.allchar ))) (and (not (= (str.in.re x (re.++ _let_0 re.allchar _let_0 (str.to.re (str.++ "B" (str.++ "A" (str.++ "C" "B")))) _let_0 re.allchar _let_0)) (str.in.re x (re.++ _let_0 re.allchar _let_0 (str.to.re (str.++ "A" (str.++ "B" (str.++ "C" "C")))) _let_0 re.allchar _let_0)))) (not (= (str.in.re x (re.++ _let_0 re.allchar _let_0 (str.to.re (str.++ "C" (str.++ "B" (str.++ "A" "B")))) _let_0 re.allchar _let_0)) (str.in.re x (re.++ _let_0 re.allchar _let_0 (str.to.re (str.++ "C" (str.++ "B" "A"))) _let_0 (str.to.re "B") _let_0))))))) +(check-sat)