From: Andrew Reynolds Date: Mon, 10 Jun 2019 21:19:35 +0000 (-0500) Subject: Optimization for negative concatenation membership. (#3048) X-Git-Tag: cvc5-1.0.0~4120 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b38fc2ed13342524cefe7480c06c9afbcc73aedd;p=cvc5.git Optimization for negative concatenation membership. (#3048) --- diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index 91beb1ab5..0360228c6 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -20,6 +20,9 @@ #include "options/strings_options.h" #include "theory/strings/theory_strings_rewriter.h" +using namespace CVC4; +using namespace CVC4::kind; + namespace CVC4 { namespace theory { namespace strings { @@ -808,39 +811,72 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes break; } case kind::REGEXP_CONCAT: { - //TODO: rewrite empty - Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s); - Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType()); - Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); - Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero), - NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) ); - Node s1 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, d_zero, b1)); - Node s2 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1))); - Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate(); - if(r[0].getKind() == kind::STRING_TO_REGEXP) { - s1r1 = s1.eqNode(r[0][0]).negate(); - } else if(r[0].getKind() == kind::REGEXP_EMPTY) { - s1r1 = d_true; + // The following simplification states that + // ~( s in R1 ++ R2 ) + // is equivalent to + // forall x. + // 0 <= x <= len(s) => + // ~( substr(s,0,x) in R1 ) OR ~( substr(s,x,len(s)-x) in R2) + Node lens = nm->mkNode(STRING_LENGTH, s); + // the index we are removing from the RE concatenation + unsigned indexRm = 0; + Node b1; + Node b1v; + // As an optimization to the above reduction, if we can determine that + // all strings in the language of R1 have the same length, say n, + // then the conclusion of the reduction is quantifier-free: + // ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2) + Node reLength = TheoryStringsRewriter::getFixedLengthForRegexp(r[0]); + if (reLength.isNull()) + { + // try from the opposite end + unsigned indexE = r.getNumChildren() - 1; + reLength = TheoryStringsRewriter::getFixedLengthForRegexp(r[indexE]); + if (!reLength.isNull()) + { + indexRm = indexE; + } + } + Node guard; + if (reLength.isNull()) + { + b1 = nm->mkBoundVar(nm->integerType()); + b1v = nm->mkNode(BOUND_VAR_LIST, b1); + guard = nm->mkNode(AND, + nm->mkNode(GEQ, b1, d_zero), + nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1)); + } + else + { + b1 = reLength; + } + Node s1 = nm->mkNode(STRING_SUBSTR, s, d_zero, b1); + Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1)); + if (indexRm != 0) + { + // swap if we are removing from the end + Node sswap = s1; + s1 = s2; + s2 = sswap; } - Node r2 = r[1]; - if(r.getNumChildren() > 2) { - std::vector< Node > nvec; - for(unsigned i=1; imkNode(STRING_IN_REGEXP, s1, r[indexRm]).negate(); + std::vector nvec; + for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++) + { + if (i != indexRm) + { nvec.push_back( r[i] ); } - r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec); } + Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec); r2 = Rewriter::rewrite(r2); - Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate(); - if(r2.getKind() == kind::STRING_TO_REGEXP) { - s2r2 = s2.eqNode(r2[0]).negate(); - } else if(r2.getKind() == kind::REGEXP_EMPTY) { - s2r2 = d_true; + Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate(); + conc = nm->mkNode(OR, s1r1, s2r2); + if (!b1v.isNull()) + { + conc = nm->mkNode(OR, guard.negate(), conc); + conc = nm->mkNode(FORALL, b1v, conc); } - - conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2); - conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc); - conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc); break; } case kind::REGEXP_UNION: { diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 0a5f0a972..f5571349f 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1573,6 +1573,7 @@ set(regress_1_tests regress1/strings/re-agg-total1.smt2 regress1/strings/re-agg-total2.smt2 regress1/strings/re-elim-exact.smt2 + regress1/strings/re-neg-concat-reduct.smt2 regress1/strings/re-unsound-080718.smt2 regress1/strings/regexp001.smt2 regress1/strings/regexp002.smt2 diff --git a/test/regress/regress1/strings/re-neg-concat-reduct.smt2 b/test/regress/regress1/strings/re-neg-concat-reduct.smt2 new file mode 100644 index 000000000..cbe8c4a8b --- /dev/null +++ b/test/regress/regress1/strings/re-neg-concat-reduct.smt2 @@ -0,0 +1,12 @@ +(set-info :smt-lib-version 2.5) +(set-logic QF_S) +(set-info :status sat) +(set-option :strings-exp true) + +(declare-fun x () String) + +(assert (not (= x ""))) +(assert (not (str.in.re x (re.++ (str.to.re "AB") (re.* (str.to.re "A")))))) +(assert (not (str.in.re x (re.++ (re.* (str.to.re "A")) (str.to.re "B"))))) + +(check-sat) diff --git a/test/regress/regress2/strings/norn-dis-0707-3.smt2 b/test/regress/regress2/strings/norn-dis-0707-3.smt2 index bc0f877ad..47b2e6b19 100644 --- a/test/regress/regress2/strings/norn-dis-0707-3.smt2 +++ b/test/regress/regress2/strings/norn-dis-0707-3.smt2 @@ -1,6 +1,7 @@ (set-logic QF_S) (set-info :status sat) (set-option :strings-exp true) +(set-option :strings-fmf true) (declare-fun var_0 () String) (declare-fun var_1 () String)