From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Mon, 10 Jun 2019 21:19:35 +0000 (-0500)
Subject: Optimization for negative concatenation membership. (#3048)
X-Git-Tag: cvc5-1.0.0~4120
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b38fc2ed13342524cefe7480c06c9afbcc73aedd;p=cvc5.git

Optimization for negative concatenation membership. (#3048)
---

diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index 91beb1ab5..0360228c6 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -20,6 +20,9 @@
 #include "options/strings_options.h"
 #include "theory/strings/theory_strings_rewriter.h"
 
+using namespace CVC4;
+using namespace CVC4::kind;
+
 namespace CVC4 {
 namespace theory {
 namespace strings {
@@ -808,39 +811,72 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
         break;
       }
       case kind::REGEXP_CONCAT: {
-        //TODO: rewrite empty
-        Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
-        Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
-        Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
-        Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
-              NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
-        Node s1 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, d_zero, b1));
-        Node s2 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1)));
-        Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
-        if(r[0].getKind() == kind::STRING_TO_REGEXP) {
-          s1r1 = s1.eqNode(r[0][0]).negate();
-        } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
-          s1r1 = d_true;
+        // The following simplification states that
+        //    ~( s in R1 ++ R2 )
+        // is equivalent to
+        //    forall x.
+        //      0 <= x <= len(s) =>
+        //        ~( substr(s,0,x) in R1 ) OR ~( substr(s,x,len(s)-x) in R2)
+        Node lens = nm->mkNode(STRING_LENGTH, s);
+        // the index we are removing from the RE concatenation
+        unsigned indexRm = 0;
+        Node b1;
+        Node b1v;
+        // As an optimization to the above reduction, if we can determine that
+        // all strings in the language of R1 have the same length, say n,
+        // then the conclusion of the reduction is quantifier-free:
+        //    ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
+        Node reLength = TheoryStringsRewriter::getFixedLengthForRegexp(r[0]);
+        if (reLength.isNull())
+        {
+          // try from the opposite end
+          unsigned indexE = r.getNumChildren() - 1;
+          reLength = TheoryStringsRewriter::getFixedLengthForRegexp(r[indexE]);
+          if (!reLength.isNull())
+          {
+            indexRm = indexE;
+          }
+        }
+        Node guard;
+        if (reLength.isNull())
+        {
+          b1 = nm->mkBoundVar(nm->integerType());
+          b1v = nm->mkNode(BOUND_VAR_LIST, b1);
+          guard = nm->mkNode(AND,
+                             nm->mkNode(GEQ, b1, d_zero),
+                             nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1));
+        }
+        else
+        {
+          b1 = reLength;
+        }
+        Node s1 = nm->mkNode(STRING_SUBSTR, s, d_zero, b1);
+        Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
+        if (indexRm != 0)
+        {
+          // swap if we are removing from the end
+          Node sswap = s1;
+          s1 = s2;
+          s2 = sswap;
         }
-        Node r2 = r[1];
-        if(r.getNumChildren() > 2) {
-          std::vector< Node > nvec;
-          for(unsigned i=1; i<r.getNumChildren(); i++) {
+        Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[indexRm]).negate();
+        std::vector<Node> nvec;
+        for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++)
+        {
+          if (i != indexRm)
+          {
             nvec.push_back( r[i] );
           }
-          r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
         }
+        Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec);
         r2 = Rewriter::rewrite(r2);
-        Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
-        if(r2.getKind() == kind::STRING_TO_REGEXP) {
-          s2r2 = s2.eqNode(r2[0]).negate();
-        } else if(r2.getKind() == kind::REGEXP_EMPTY) {
-          s2r2 = d_true;
+        Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate();
+        conc = nm->mkNode(OR, s1r1, s2r2);
+        if (!b1v.isNull())
+        {
+          conc = nm->mkNode(OR, guard.negate(), conc);
+          conc = nm->mkNode(FORALL, b1v, conc);
         }
-
-        conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
-        conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
-        conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
         break;
       }
       case kind::REGEXP_UNION: {
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt
index 0a5f0a972..f5571349f 100644
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -1573,6 +1573,7 @@ set(regress_1_tests
   regress1/strings/re-agg-total1.smt2
   regress1/strings/re-agg-total2.smt2
   regress1/strings/re-elim-exact.smt2
+  regress1/strings/re-neg-concat-reduct.smt2
   regress1/strings/re-unsound-080718.smt2
   regress1/strings/regexp001.smt2
   regress1/strings/regexp002.smt2
diff --git a/test/regress/regress1/strings/re-neg-concat-reduct.smt2 b/test/regress/regress1/strings/re-neg-concat-reduct.smt2
new file mode 100644
index 000000000..cbe8c4a8b
--- /dev/null
+++ b/test/regress/regress1/strings/re-neg-concat-reduct.smt2
@@ -0,0 +1,12 @@
+(set-info :smt-lib-version 2.5)
+(set-logic QF_S)
+(set-info :status sat)
+(set-option :strings-exp true)
+
+(declare-fun x () String)
+
+(assert (not (= x "")))
+(assert (not (str.in.re x (re.++ (str.to.re "AB") (re.* (str.to.re "A"))))))
+(assert (not (str.in.re x (re.++ (re.* (str.to.re "A")) (str.to.re "B")))))
+
+(check-sat)
diff --git a/test/regress/regress2/strings/norn-dis-0707-3.smt2 b/test/regress/regress2/strings/norn-dis-0707-3.smt2
index bc0f877ad..47b2e6b19 100644
--- a/test/regress/regress2/strings/norn-dis-0707-3.smt2
+++ b/test/regress/regress2/strings/norn-dis-0707-3.smt2
@@ -1,6 +1,7 @@
 (set-logic QF_S)
 (set-info :status sat)
 (set-option :strings-exp true)
+(set-option :strings-fmf true)
 
 (declare-fun var_0 () String)
 (declare-fun var_1 () String)