(proof-new) Refactor regular expression operation (#4596)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Thu, 6 Aug 2020 11:48:45 +0000 (06:48 -0500)

committer GitHub <noreply@github.com>

Thu, 6 Aug 2020 11:48:45 +0000 (06:48 -0500)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Thu, 6 Aug 2020 11:48:45 +0000 (06:48 -0500)
committer GitHub <noreply@github.com>
Thu, 6 Aug 2020 11:48:45 +0000 (06:48 -0500)
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp

index 017d861a2394f71fba12605d72c8011384cec0c6..273518edd144c08b0b600bdf3d5412b5e6c26d27 100644 (file)
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -16,20 +16,20 @@
  
  #include "theory/strings/regexp_operation.h"
  
-#include "expr/kind.h"
+#include "expr/node_algorithm.h"
  #include "options/strings_options.h"
+#include "theory/rewriter.h"
  #include "theory/strings/regexp_entail.h"
  #include "theory/strings/theory_strings_utils.h"
  #include "theory/strings/word.h"
  
-using namespace CVC4;
  using namespace CVC4::kind;
  
  namespace CVC4 {
  namespace theory {
  namespace strings {
  
-RegExpOpr::RegExpOpr()
+RegExpOpr::RegExpOpr(SkolemCache* sc)
      : d_true(NodeManager::currentNM()->mkConst(true)),
        d_false(NodeManager::currentNM()->mkConst(false)),
        d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY,
@@ -38,7 +38,9 @@ RegExpOpr::RegExpOpr()
        d_one(NodeManager::currentNM()->mkConst(::CVC4::Rational(1))),
        d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA,
                                                 std::vector<Node>{})),
-      d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma))
+      d_sigma_star(
+          NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)),
+      d_sc(sc)
  {
    d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType());
  
@@ -821,444 +823,250 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
    }
  }
  
-//simplify
-void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
-  Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
+Node RegExpOpr::simplify(Node t, bool polarity)
+{
+  Trace("strings-regexp-simpl")
+      << "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl;
    Assert(t.getKind() == kind::STRING_IN_REGEXP);
-  Node str = t[0];
-  Node re = t[1];
-  if(polarity) {
-    simplifyPRegExp( str, re, new_nodes );
-  } else {
-    simplifyNRegExp( str, re, new_nodes );
+  Node tlit = polarity ? t : t.notNode();
+  Node conc;
+  std::map<Node, Node>::const_iterator itr = d_simpCache.find(tlit);
+  if (itr != d_simpCache.end())
+  {
+    return itr->second;
    }
-  Trace("strings-regexp-simpl") << "RegExp-Simpl  returns (" << new_nodes.size() << "):\n";
-  for(unsigned i=0; i<new_nodes.size(); i++) {
-    Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
+  if (polarity)
+  {
+    conc = reduceRegExpPos(tlit, d_sc);
    }
-}
-void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
-  std::pair < Node, Node > p(s, r);
-  NodeManager *nm = NodeManager::currentNM();
-  std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
-  if(itr != d_simpl_neg_cache.end()) {
-    new_nodes.push_back( itr->second );
-  } else {
-    Kind k = r.getKind();
-    Node conc;
-    switch( k ) {
-      case kind::REGEXP_EMPTY: {
-        conc = d_true;
-        break;
-      }
-      case kind::REGEXP_SIGMA: {
-        conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
-        break;
-      }
-      case kind::REGEXP_RANGE: {
-        std::vector< Node > vec;
-        unsigned a = r[0].getConst<String>().front();
-        unsigned b = r[1].getConst<String>().front();
-        for (unsigned c = a; c <= b; c++)
-        {
-          std::vector<unsigned> tmpVec;
-          tmpVec.push_back(c);
-          Node tmp = s.eqNode(nm->mkConst(String(tmpVec))).negate();
-          vec.push_back( tmp );
-        }
-        conc = vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::AND, vec);
-        break;
-      }
-      case kind::STRING_TO_REGEXP: {
-        conc = s.eqNode(r[0]).negate();
-        break;
-      }
-      case kind::REGEXP_CONCAT: {
-        // The following simplification states that
-        //    ~( s in R1 ++ R2 )
-        // is equivalent to
-        //    forall x.
-        //      0 <= x <= len(s) =>
-        //        ~( substr(s,0,x) in R1 ) OR ~( substr(s,x,len(s)-x) in R2)
-        Node lens = nm->mkNode(STRING_LENGTH, s);
-        // the index we are removing from the RE concatenation
-        unsigned indexRm = 0;
-        Node b1;
-        Node b1v;
-        // As an optimization to the above reduction, if we can determine that
-        // all strings in the language of R1 have the same length, say n,
-        // then the conclusion of the reduction is quantifier-free:
-        //    ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
-        Node reLength = RegExpEntail::getFixedLengthForRegexp(r[0]);
-        if (reLength.isNull())
-        {
-          // try from the opposite end
-          unsigned indexE = r.getNumChildren() - 1;
-          reLength = RegExpEntail::getFixedLengthForRegexp(r[indexE]);
-          if (!reLength.isNull())
-          {
-            indexRm = indexE;
-          }
-        }
-        Node guard;
-        if (reLength.isNull())
-        {
-          b1 = nm->mkBoundVar(nm->integerType());
-          b1v = nm->mkNode(BOUND_VAR_LIST, b1);
-          guard = nm->mkNode(AND,
-                             nm->mkNode(GEQ, b1, d_zero),
-                             nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1));
-        }
-        else
-        {
-          b1 = reLength;
-        }
-        Node s1;
-        Node s2;
-        if (indexRm == 0)
-        {
-          s1 = nm->mkNode(STRING_SUBSTR, s, d_zero, b1);
-          s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
-        }
-        else
-        {
-          s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1);
-          s2 =
-              nm->mkNode(STRING_SUBSTR, s, d_zero, nm->mkNode(MINUS, lens, b1));
-        }
-        Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[indexRm]).negate();
-        std::vector<Node> nvec;
-        for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++)
-        {
-          if (i != indexRm)
-          {
-            nvec.push_back( r[i] );
-          }
-        }
-        Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec);
-        r2 = Rewriter::rewrite(r2);
-        Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate();
-        conc = nm->mkNode(OR, s1r1, s2r2);
-        if (!b1v.isNull())
-        {
-          conc = nm->mkNode(OR, guard.negate(), conc);
-          conc = nm->mkNode(FORALL, b1v, conc);
-        }
-        break;
-      }
-      case kind::REGEXP_UNION: {
-        std::vector< Node > c_and;
-        for(unsigned i=0; i<r.getNumChildren(); ++i) {
-          if(r[i].getKind() == kind::STRING_TO_REGEXP) {
-            c_and.push_back( r[i][0].eqNode(s).negate() );
-          } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
-            continue;
-          } else {
-            c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
-          }
-        }
-        conc = c_and.size() == 0 ? d_true :
-            c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
-        break;
-      }
-      case kind::REGEXP_INTER: {
-        bool emptyflag = false;
-        std::vector< Node > c_or;
-        for(unsigned i=0; i<r.getNumChildren(); ++i) {
-          if(r[i].getKind() == kind::STRING_TO_REGEXP) {
-            c_or.push_back( r[i][0].eqNode(s).negate() );
-          } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
-            emptyflag = true;
-            break;
-          } else {
-            c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
-          }
-        }
-        if(emptyflag) {
-          conc = d_true;
-        } else {
-          conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
-        }
-        break;
-      }
-      case kind::REGEXP_STAR: {
-        if(s == d_emptyString) {
-          conc = d_false;
-        } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
-          conc = s.eqNode(d_emptyString).negate();
-        } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
-          conc = d_false;
-        } else {
-          Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
-          Node sne = s.eqNode(d_emptyString).negate();
-          Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
-          Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
-          Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
-                NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
-          //internal
-          Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, d_zero, b1);
-          Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
-          Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
-          Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
-
-          conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
-          conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
-          conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
-          conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
-        }
-        break;
-      }
-      case kind::REGEXP_LOOP: {
-        Assert(r.getNumChildren() == 3);
-        if(r[1] == r[2]) {
-          if(r[1] == d_zero) {
-            conc = s.eqNode(d_emptyString).negate();
-          } else if(r[1] == d_one) {
-            conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]).negate();
-          } else {
-            //unroll for now
-            unsigned l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
-            std::vector<Node> vec;
-            for(unsigned i=0; i<l; i++) {
-              vec.push_back(r[0]);
-            }
-            Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec);
-            conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r2).negate();
-          }
-        } else {
-          Assert(r[1] == d_zero);
-          //unroll for now
-          unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
-          std::vector<Node> vec;
-          vec.push_back(d_emptySingleton);
-          std::vector<Node> vec2;
-          for(unsigned i=1; i<=u; i++) {
-            vec2.push_back(r[0]);
-            Node r2 = i==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec);
-            vec.push_back(r2);
-          }
-          Node r3 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec);
-          conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r3).negate();
-        }
-        break;
-      }
-      case kind::REGEXP_COMPLEMENT:
+  else
+  {
+    // see if we can use an optimized version of the reduction for re.++.
+    Node r = t[1];
+    if (r.getKind() == REGEXP_CONCAT)
+    {
+      // the index we are removing from the RE concatenation
+      size_t index = 0;
+      // As an optimization to the reduction, if we can determine that
+      // all strings in the language of R1 have the same length, say n,
+      // then the conclusion of the reduction is quantifier-free:
+      //    ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
+      Node reLen = getRegExpConcatFixed(r, index);
+      if (!reLen.isNull())
        {
-        // ~( s in complement(R) ) ---> s in R
-        conc = nm->mkNode(STRING_IN_REGEXP, s, r[0]);
-        break;
-      }
-      default: {
-        Assert(!utils::isRegExpKind(k));
-        break;
+        conc = reduceRegExpNegConcatFixed(tlit, reLen, index);
        }
      }
-    if (!conc.isNull())
+    if (conc.isNull())
      {
-      conc = Rewriter::rewrite(conc);
-      new_nodes.push_back(conc);
-      d_simpl_neg_cache[p] = conc;
+      conc = reduceRegExpNeg(tlit);
      }
    }
+  d_simpCache[tlit] = conc;
+  Trace("strings-regexp-simpl")
+      << "RegExpOpr::simplify: returns " << conc << std::endl;
+  return conc;
  }
-void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
-  std::pair < Node, Node > p(s, r);
-  NodeManager *nm = NodeManager::currentNM();
-  std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
-  if(itr != d_simpl_cache.end()) {
-    new_nodes.push_back( itr->second );
-  } else {
-    Kind k = r.getKind();
-    Node conc;
-    switch( k ) {
-      case kind::REGEXP_EMPTY: {
-        conc = d_false;
-        break;
-      }
-      case kind::REGEXP_SIGMA: {
-        conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
-        break;
-      }
-      case kind::REGEXP_RANGE: {
-        conc = s.eqNode( r[0] );
-        if(r[0] != r[1]) {
-          unsigned a = r[0].getConst<String>().front();
-          unsigned b = r[1].getConst<String>().front();
-          a += 1;
-          std::vector<unsigned> anvec;
-          anvec.push_back(a);
-          Node an = nm->mkConst(String(anvec));
-          Node tmp = a != b
-                         ? nm->mkNode(kind::STRING_IN_REGEXP,
-                                      s,
-                                      nm->mkNode(kind::REGEXP_RANGE, an, r[1]))
-                         : s.eqNode(r[1]);
-          conc = NodeManager::currentNM()->mkNode(kind::OR, conc, tmp);
-        }
-        break;
-      }
-      case kind::STRING_TO_REGEXP: {
-        conc = s.eqNode(r[0]);
-        break;
-      }
-      case kind::REGEXP_CONCAT: {
-        std::vector< Node > nvec;
-        std::vector< Node > cc;
-        bool emptyflag = false;
-        for(unsigned i=0; i<r.getNumChildren(); ++i) {
-          if(r[i].getKind() == kind::STRING_TO_REGEXP) {
-            cc.push_back( r[i][0] );
-          } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
-            emptyflag = true;
-            break;
-          } else {
-            Node sk = NodeManager::currentNM()->mkSkolem( "rc", s.getType(), "created for regular expression concat" );
-            Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
-            nvec.push_back(lem);
-            cc.push_back(sk);
-          }
-        }
-        if(emptyflag) {
-          conc = d_false;
-        } else {
-          Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
-          nvec.push_back(lem);
-          conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
-        }
-        break;
-      }
-      case kind::REGEXP_UNION: {
-        std::vector< Node > c_or;
-        for(unsigned i=0; i<r.getNumChildren(); ++i) {
-          if(r[i].getKind() == kind::STRING_TO_REGEXP) {
-            c_or.push_back( r[i][0].eqNode(s) );
-          } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
-            continue;
-          } else {
-            c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
-          }
-        }
-        conc = c_or.size() == 0 ? d_false :
-            c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
-        break;
-      }
-      case kind::REGEXP_INTER: {
-        std::vector< Node > c_and;
-        bool emptyflag = false;
-        for(unsigned i=0; i<r.getNumChildren(); ++i) {
-          if(r[i].getKind() == kind::STRING_TO_REGEXP) {
-            c_and.push_back( r[i][0].eqNode(s) );
-          } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
-            emptyflag = true;
-            break;
-          } else {
-            c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
-          }
-        }
-        if(emptyflag) {
-          conc = d_false;
-        } else {
-          conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
-        }
-        break;
-      }
-      case kind::REGEXP_STAR: {
-        if(s == d_emptyString) {
-          conc = d_true;
-        } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
-          conc = s.eqNode(d_emptyString);
-        } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
-          conc = d_true;
-        } else {
-          Node se = s.eqNode(d_emptyString);
-          Node sinr = nm->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
-          Node sk1 = nm->mkSkolem(
-              "rs", s.getType(), "created for regular expression star");
-          Node sk2 = nm->mkSkolem(
-              "rs", s.getType(), "created for regular expression star");
-          Node sk3 = nm->mkSkolem(
-              "rs", s.getType(), "created for regular expression star");
  
-          NodeBuilder<> nb(kind::AND);
-          nb << sk1.eqNode(d_emptyString).negate();
-          nb << sk3.eqNode(d_emptyString).negate();
-          nb << nm->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
-          nb << nm->mkNode(kind::STRING_IN_REGEXP, sk2, r);
-          nb << nm->mkNode(kind::STRING_IN_REGEXP, sk3, r[0]);
-          nb << s.eqNode(nm->mkNode(kind::STRING_CONCAT, sk1, sk2, sk3));
-          conc = nb;
+Node RegExpOpr::getRegExpConcatFixed(Node r, size_t& index)
+{
+  Assert(r.getKind() == REGEXP_CONCAT);
+  index = 0;
+  Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]);
+  if (!reLen.isNull())
+  {
+    return reLen;
+  }
+  // try from the opposite end
+  size_t indexE = r.getNumChildren() - 1;
+  reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]);
+  if (!reLen.isNull())
+  {
+    index = indexE;
+    return reLen;
+  }
+  return Node::null();
+}
  
-          // We unfold `x in R*` by considering three cases: `x` is empty, `x`
-          // is matched by `R`, or `x` is matched by two or more `R`s. For the
-          // last case, we break `x` into three pieces, making the beginning
-          // and the end each match `R` and the middle match `R*`. Matching the
-          // beginning and the end with `R` allows us to reason about the
-          // beginning and the end of `x` simultaneously.
-          //
-          // x in R* ---> (x = "") v (x in R) v
-          //              (x = x1 ++ x2 ++ x3 ^ x1 != "" ^ x3 != "" ^
-          //               x1 in R ^ x2 in R* ^ x3 in R)
-          conc = nm->mkNode(kind::OR, se, sinr, conc);
-        }
-        break;
-      }
-      case kind::REGEXP_LOOP: {
-        Assert(r.getNumChildren() == 3);
-        if(r[1] == d_zero) {
-          if(r[2] == d_zero) {
-            conc = s.eqNode( d_emptyString );
-          } else {
-            //R{0,n}
-            if(s != d_emptyString) {
-              Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
-              Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
-              Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
-              Node sk1ne = sk1.eqNode(d_emptyString).negate();
-              Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
-              unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
-              Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1));
-              Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2,
-                NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], d_zero, u1));
-              conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru);
-              conc = NodeManager::currentNM()->mkNode(kind::OR,
-                s.eqNode(d_emptyString), conc);
-            } else {
-              conc = d_true;
-            }
-          }
-        } else {
-          //R^n
-          Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
-          Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
-          Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
-          Node sk1ne = sk1.eqNode(d_emptyString).negate();
-          Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
-          unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
-          Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1));
-          Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2,
-            NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], u1, u1));
-          conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru);
-        }
-        break;
-      }
-      case kind::REGEXP_COMPLEMENT:
+Node RegExpOpr::reduceRegExpNeg(Node mem)
+{
+  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
+  Node s = mem[0][0];
+  Node r = mem[0][1];
+  NodeManager* nm = NodeManager::currentNM();
+  Kind k = r.getKind();
+  Node zero = nm->mkConst(Rational(0));
+  Node conc;
+  if (k == REGEXP_CONCAT)
+  {
+    // do not use length entailment, call regular expression concat
+    Node reLen;
+    size_t i = 0;
+    conc = reduceRegExpNegConcatFixed(mem, reLen, i);
+  }
+  else if (k == REGEXP_STAR)
+  {
+    Node emp = Word::mkEmptyWord(s.getType());
+    Node lens = nm->mkNode(STRING_LENGTH, s);
+    Node sne = s.eqNode(emp).negate();
+    Node b1 = nm->mkBoundVar(nm->integerType());
+    Node b1v = nm->mkNode(BOUND_VAR_LIST, b1);
+    Node g1 =
+        nm->mkNode(AND, nm->mkNode(GT, b1, zero), nm->mkNode(GEQ, lens, b1));
+    // internal
+    Node s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
+    Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
+    Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[0]).negate();
+    Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r).negate();
+
+    conc = nm->mkNode(OR, s1r1, s2r2);
+    conc = nm->mkNode(IMPLIES, g1, conc);
+    conc = nm->mkNode(FORALL, b1v, conc);
+    conc = nm->mkNode(AND, sne, conc);
+  }
+  else
+  {
+    Assert(!utils::isRegExpKind(k));
+  }
+  return conc;
+}
+
+Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index)
+{
+  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
+  Node s = mem[0][0];
+  Node r = mem[0][1];
+  NodeManager* nm = NodeManager::currentNM();
+  Assert(r.getKind() == REGEXP_CONCAT);
+  Node zero = nm->mkConst(Rational(0));
+  // The following simplification states that
+  //    ~( s in R1 ++ R2 ++... ++ Rn )
+  // is equivalent to
+  //    forall x.
+  //      0 <= x <= len(s) =>
+  //        ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn)
+  // Index is the child index of r that we are stripping off, which is either
+  // from the beginning or the end.
+  Assert(index == 0 || index == r.getNumChildren() - 1);
+  Node lens = nm->mkNode(STRING_LENGTH, s);
+  Node b1;
+  Node b1v;
+  Node guard;
+  if (reLen.isNull())
+  {
+    b1 = SkolemCache::mkIndexVar(mem);
+    b1v = nm->mkNode(BOUND_VAR_LIST, b1);
+    guard = nm->mkNode(AND,
+                       nm->mkNode(GEQ, b1, zero),
+                       nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1));
+  }
+  else
+  {
+    b1 = reLen;
+  }
+  Node s1;
+  Node s2;
+  if (index == 0)
+  {
+    s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
+    s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
+  }
+  else
+  {
+    s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1);
+    s2 = nm->mkNode(STRING_SUBSTR, s, zero, nm->mkNode(MINUS, lens, b1));
+  }
+  Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[index]).negate();
+  std::vector<Node> nvec;
+  for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++)
+  {
+    if (i != index)
+    {
+      nvec.push_back(r[i]);
+    }
+  }
+  Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec);
+  r2 = Rewriter::rewrite(r2);
+  Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate();
+  Node conc = nm->mkNode(OR, s1r1, s2r2);
+  if (!b1v.isNull())
+  {
+    conc = nm->mkNode(OR, guard.negate(), conc);
+    conc = nm->mkNode(FORALL, b1v, conc);
+  }
+  return conc;
+}
+
+Node RegExpOpr::reduceRegExpPos(Node mem, SkolemCache* sc)
+{
+  Assert(mem.getKind() == STRING_IN_REGEXP);
+  Node s = mem[0];
+  Node r = mem[1];
+  NodeManager* nm = NodeManager::currentNM();
+  Kind k = r.getKind();
+  Node conc;
+  if (k == REGEXP_CONCAT)
+  {
+    std::vector<Node> nvec;
+    std::vector<Node> cc;
+    // get the (valid) existential for this membership
+    Node eform = getExistsForRegExpConcatMem(mem);
+    SkolemManager* sm = nm->getSkolemManager();
+    // Notice that this rule does not introduce witness terms, instead it
+    // uses skolems in the conclusion of the proof rule directly. Thus,
+    // the existential eform does not need to be explicitly justified by a
+    // proof here, since it is only being used as an intermediate formula in
+    // this inference. Hence we do not pass a proof generator to mkSkolemize.
+    std::vector<Node> skolems;
+    sm->mkSkolemize(eform, skolems, "rc", "regexp concat skolem");
+    Assert(skolems.size() == r.getNumChildren());
+    // Look up skolems for each of the components. If sc has optimizations
+    // enabled, this will return arguments of str.to_re.
+    for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i)
+    {
+      if (r[i].getKind() == STRING_TO_REGEXP)
        {
-        // s in complement(R) ---> ~( s in R )
-        conc = nm->mkNode(STRING_IN_REGEXP, s, r[0]).negate();
-        break;
+        // optimization, just take the body
+        skolems[i] = r[i][0];
        }
-      default: {
-        Assert(!utils::isRegExpKind(k));
-        break;
+      else
+      {
+        nvec.push_back(nm->mkNode(STRING_IN_REGEXP, skolems[i], r[i]));
        }
      }
-    if (!conc.isNull())
-    {
-      conc = Rewriter::rewrite(conc);
-      new_nodes.push_back(conc);
-      d_simpl_cache[p] = conc;
-    }
+    // (str.in_re x (re.++ R1 .... Rn)) =>
+    // (and (str.in_re k1 R1) ... (str.in_re kn Rn) (= x (str.++ k1 ... kn)))
+    Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, skolems));
+    nvec.push_back(lem);
+    conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
+  }
+  else if (k == REGEXP_STAR)
+  {
+    Node emp = Word::mkEmptyWord(s.getType());
+    Node se = s.eqNode(emp);
+    Node sinr = nm->mkNode(STRING_IN_REGEXP, s, r[0]);
+    Node reExpand = nm->mkNode(REGEXP_CONCAT, r[0], r, r[0]);
+    Node sinRExp = nm->mkNode(STRING_IN_REGEXP, s, reExpand);
+    // We unfold `x in R*` by considering three cases: `x` is empty, `x`
+    // is matched by `R`, or `x` is matched by two or more `R`s. For the
+    // last case, `x` will break into three pieces, making the beginning
+    // and the end each match `R` and the middle match `R*`. Matching the
+    // beginning and the end with `R` allows us to reason about the
+    // beginning and the end of `x` simultaneously.
+    //
+    // x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R))
+
+    // We also immediately unfold the last disjunct for re.*. The advantage
+    // of doing this is that we use the same scheme for skolems above.
+    sinRExp = reduceRegExpPos(sinRExp, sc);
+    // make the return lemma
+    conc = nm->mkNode(OR, se, sinr, sinRExp);
    }
+  else
+  {
+    Assert(!utils::isRegExpKind(k));
+  }
+  return conc;
  }
  
  bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
@@ -1384,24 +1192,6 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
    }
  }
  
-bool RegExpOpr::testNoRV(Node r) {
-  std::map< Node, bool >::const_iterator itr = d_norv_cache.find(r);
-  if(itr != d_norv_cache.end()) {
-    return itr->second;
-  } else {
-    if(r.getKind() == kind::REGEXP_RV) {
-      return false;
-    } else if(r.getNumChildren() > 1) {
-      for(unsigned int i=0; i<r.getNumChildren(); i++) {
-        if(!testNoRV(r[i])) {
-          return false;
-        }
-      }
-    }
-    return true;
-  }
-}
-
  Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) {
    //Assert(checkConstRegExp(r1) && checkConstRegExp(r2));
    if(r1 > r2) {
@@ -1410,13 +1200,6 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node >
      r2 = tmpNode;
    }
    Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n  "<< mkString(r1) << ",\n  " << mkString(r2) << std::endl;
-  //if(Trace.isOn("regexp-debug")) {
-  //  Trace("regexp-debug") << "... with cache:\n";
-  //  for(std::map< PairNodes, Node >::const_iterator itr=cache.begin();
-  //      itr!=cache.end();itr++) {
-  //        Trace("regexp-debug") << "(" << itr->first.first << "," << itr->first.second << ")->" << itr->second << std::endl;
-  //      }
-  //}
    std::pair < Node, Node > p(r1, r2);
    std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p);
    Node rNode;
@@ -1528,7 +1311,8 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node >
        }
      }
      Trace("regexp-int-debug") << "  ... try testing no RV of " << mkString(rNode) << std::endl;
-    if(testNoRV(rNode)) {
+    if (!expr::hasSubtermKind(REGEXP_RV, rNode))
+    {
        d_inter_cache[p] = rNode;
      }
    }
@@ -1538,80 +1322,103 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node >
  
  Node RegExpOpr::removeIntersection(Node r) {
    Assert(checkConstRegExp(r));
-  std::map < Node, Node >::const_iterator itr = d_rm_inter_cache.find(r);
-  if(itr != d_rm_inter_cache.end()) {
-    return itr->second;
-  }
-  Node retNode;
-  Kind rk = r.getKind();
-  switch (rk)
+  NodeManager* nm = NodeManager::currentNM();
+  std::unordered_map<TNode, Node, TNodeHashFunction> visited;
+  std::unordered_map<TNode, Node, TNodeHashFunction>::iterator it;
+  std::vector<TNode> visit;
+  TNode cur;
+  visit.push_back(r);
+  do
    {
-    case REGEXP_EMPTY:
-    case REGEXP_SIGMA:
-    case REGEXP_RANGE:
-    case STRING_TO_REGEXP:
-    {
-      retNode = r;
-      break;
-    }
-    case REGEXP_CONCAT:
-    case REGEXP_UNION:
-    case REGEXP_STAR:
-    case REGEXP_COMPLEMENT:
+    cur = visit.back();
+    visit.pop_back();
+    it = visited.find(cur);
+
+    if (it == visited.end())
      {
-      NodeBuilder<> nb(rk);
-      for (const Node& rc : r)
+      visited[cur] = Node::null();
+      visit.push_back(cur);
+      for (const Node& cn : cur)
        {
-        nb << removeIntersection(rc);
+        visit.push_back(cn);
        }
-      retNode = Rewriter::rewrite(nb.constructNode());
-      break;
      }
-
-    case REGEXP_INTER:
+    else if (it->second.isNull())
      {
-      retNode = removeIntersection(r[0]);
-      for (size_t i = 1, nchild = r.getNumChildren(); i < nchild; i++)
+      Kind ck = cur.getKind();
+      Node ret;
+      bool childChanged = false;
+      std::vector<Node> children;
+      if (cur.getMetaKind() == metakind::PARAMETERIZED)
        {
-        bool spflag = false;
-        Node tmpNode = removeIntersection(r[i]);
-        retNode = intersect(retNode, tmpNode, spflag);
+        children.push_back(cur.getOperator());
        }
-      break;
-    }
-    case REGEXP_LOOP:
-    {
-      retNode = removeIntersection(r[0]);
-      retNode = Rewriter::rewrite(
-          NodeManager::currentNM()->mkNode(REGEXP_LOOP, retNode, r[1], r[2]));
-      break;
-    }
-    default:
-    {
-      Unreachable();
+      for (const Node& cn : cur)
+      {
+        it = visited.find(cn);
+        Assert(it != visited.end());
+        Assert(!it->second.isNull());
+        if (ck == REGEXP_INTER)
+        {
+          if (ret.isNull())
+          {
+            ret = it->second;
+          }
+          else
+          {
+            ret = intersect(ret, it->second);
+          }
+        }
+        else
+        {
+          // will construct below
+          childChanged = childChanged || cn != it->second;
+          children.push_back(it->second);
+        }
+      }
+      if (ck != REGEXP_INTER)
+      {
+        if (childChanged)
+        {
+          ret = nm->mkNode(cur.getKind(), children);
+        }
+        else
+        {
+          ret = cur;
+        }
+      }
+      visited[cur] = ret;
      }
+  } while (!visit.empty());
+  Assert(visited.find(r) != visited.end());
+  Assert(!visited.find(r)->second.isNull());
+  if (Trace.isOn("regexp-intersect"))
+  {
+    Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r)
+                              << " ) = " << mkString(visited[r]) << std::endl;
    }
-  d_rm_inter_cache[r] = retNode;
-  Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r) << " ) = " << mkString(retNode) << std::endl;
-  return retNode;
+  return visited[r];
  }
  
-Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) {
-  if(checkConstRegExp(r1) && checkConstRegExp(r2)) {
-    Node rr1 = removeIntersection(r1);
-    Node rr2 = removeIntersection(r2);
-    std::map< PairNodes, Node > cache;
-    Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl;
-    Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl;
-    Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1) << ",\n\t"<< mkString(r2) << ")" << std::endl;
-    Node retNode = intersectInternal(rr1, rr2, cache, 1);
-    Trace("regexp-intersect") << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t"<< mkString(r2) << ") =\n\t" << mkString(retNode) << std::endl;
-    Trace("regexp-intersect-node") << "Intersect finished." << std::endl;
-    return retNode;
-  } else {
-    spflag = true;
+Node RegExpOpr::intersect(Node r1, Node r2)
+{
+  if (!checkConstRegExp(r1) || !checkConstRegExp(r2))
+  {
      return Node::null();
    }
+  Node rr1 = removeIntersection(r1);
+  Node rr2 = removeIntersection(r2);
+  std::map<PairNodes, Node> cache;
+  Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl;
+  Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl;
+  Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1)
+                            << ",\n\t" << mkString(r2) << ")" << std::endl;
+  Node retNode = intersectInternal(rr1, rr2, cache, 1);
+  Trace("regexp-intersect")
+      << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2)
+      << ") =\n\t" << mkString(retNode) << std::endl;
+  Trace("regexp-intersect-node") << "Intersect finished." << std::endl;
+  return retNode;
  }
  
  //printing
@@ -1695,16 +1502,15 @@ std::string RegExpOpr::mkString( Node r ) {
          break;
        }
        case kind::REGEXP_LOOP: {
-        retStr += "(";
-        retStr += mkString(r[0]);
-        retStr += ")";
-        retStr += "{";
-        retStr += r[1].getConst<Rational>().toString();
-        retStr += ",";
+        uint32_t l = utils::getLoopMinOccurrences(r);
+        std::stringstream ss;
+        ss << "(" << mkString(r[0]) << "){" << l << ",";
          if(r.getNumChildren() == 3) {
-          retStr += r[2].getConst<Rational>().toString();
+          uint32_t u = utils::getLoopMaxOccurrences(r);
+          ss << u;
          }
-        retStr += "}";
+        ss << "}";
+        retStr += ss.str();
          break;
        }
        case kind::REGEXP_RV: {
@@ -1746,6 +1552,50 @@ bool RegExpOpr::regExpIncludes(Node r1, Node r2)
    return result;
  }
  
+/**
+ * Associating formulas with their "exists form", or an existentially
+ * quantified formula that is equivalent to it. This is currently used
+ * for regular expression memberships in the method below.
+ */
+struct ExistsFormAttributeId
+{
+};
+typedef expr::Attribute<ExistsFormAttributeId, Node> ExistsFormAttribute;
+
+Node RegExpOpr::getExistsForRegExpConcatMem(Node mem)
+{
+  // get or make the exists form of the membership
+  ExistsFormAttribute efa;
+  if (mem.hasAttribute(efa))
+  {
+    // already computed
+    return mem.getAttribute(efa);
+  }
+  Assert(mem.getKind() == STRING_IN_REGEXP);
+  Node x = mem[0];
+  Node r = mem[1];
+  Assert(r.getKind() == REGEXP_CONCAT);
+  NodeManager* nm = NodeManager::currentNM();
+  TypeNode xtn = x.getType();
+  std::vector<Node> vars;
+  std::vector<Node> mems;
+  for (const Node& rc : r)
+  {
+    Node v = nm->mkBoundVar(xtn);
+    vars.push_back(v);
+    mems.push_back(nm->mkNode(STRING_IN_REGEXP, v, rc));
+  }
+  Node sconcat = nm->mkNode(STRING_CONCAT, vars);
+  Node eq = x.eqNode(sconcat);
+  mems.insert(mems.begin(), eq);
+  Node bvl = nm->mkNode(BOUND_VAR_LIST, vars);
+  Node ebody = nm->mkNode(AND, mems);
+  Node eform = nm->mkNode(EXISTS, bvl, ebody);
+  mem.setAttribute(efa, eform);
+  Trace("regexp-opr") << "Exists form " << mem << " : " << eform << std::endl;
+  return eform;
+}
+
  }/* CVC4::theory::strings namespace */
  }/* CVC4::theory namespace */
  }/* CVC4 namespace */
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h

index d0b0755eb7ee21f190d10dea21f311a7e6ce8d88..66be4036bdfe5b378b6b8e8b084c57b49c4517f9 100644 (file)
--- a/src/theory/strings/regexp_operation.h
+++ b/src/theory/strings/regexp_operation.h
@@ -19,14 +19,14 @@
  #ifndef CVC4__THEORY__STRINGS__REGEXP__OPERATION_H
  #define CVC4__THEORY__STRINGS__REGEXP__OPERATION_H
  
-#include <vector>
+#include <map>
  #include <set>
-#include <algorithm>
-#include <climits>
-#include "util/hash.h"
+#include <unordered_map>
+#include <vector>
+
+#include "expr/node.h"
+#include "theory/strings/skolem_cache.h"
  #include "util/string.h"
-#include "theory/theory.h"
-#include "theory/rewriter.h"
  
  namespace CVC4 {
  namespace theory {
@@ -73,23 +73,17 @@ class RegExpOpr {
    Node d_sigma;
    Node d_sigma_star;
  
-  std::map<PairNodes, Node> d_simpl_cache;
-  std::map<PairNodes, Node> d_simpl_neg_cache;
+  /** A cache for simplify */
+  std::map<Node, Node> d_simpCache;
    std::map<Node, std::pair<int, Node> > d_delta_cache;
    std::map<PairNodeStr, Node> d_dv_cache;
    std::map<PairNodeStr, std::pair<Node, int> > d_deriv_cache;
-  std::map<Node, std::pair<Node, int> > d_compl_cache;
    /** cache mapping regular expressions to whether they contain constants */
    std::unordered_map<Node, RegExpConstType, NodeHashFunction> d_constCache;
-  std::map<Node, std::pair<std::set<unsigned>, std::set<Node> > > d_cset_cache;
    std::map<Node, std::pair<std::set<unsigned>, std::set<Node> > > d_fset_cache;
    std::map<PairNodes, Node> d_inter_cache;
-  std::map<Node, Node> d_rm_inter_cache;
-  std::map<Node, bool> d_norv_cache;
    std::map<Node, std::vector<PairNodes> > d_split_cache;
    std::map<PairNodes, bool> d_inclusionCache;
-  void simplifyPRegExp(Node s, Node r, std::vector<Node> &new_nodes);
-  void simplifyNRegExp(Node s, Node r, std::vector<Node> &new_nodes);
    /**
     * Helper function for mkString, pretty prints constant or variable regular
     * expression r.
@@ -101,16 +95,19 @@ class RegExpOpr {
    bool containC2(unsigned cnt, Node n);
    Node convert1(unsigned cnt, Node n);
    void convert2(unsigned cnt, Node n, Node &r1, Node &r2);
-  bool testNoRV(Node r);
    Node intersectInternal(Node r1,
                           Node r2,
                           std::map<PairNodes, Node> cache,
                           unsigned cnt);
+  /**
+   * Given a regular expression r, this returns an equivalent regular expression
+   * that contains no applications of intersection.
+   */
    Node removeIntersection(Node r);
    void firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset);
  
   public:
-  RegExpOpr();
+  RegExpOpr(SkolemCache* sc);
    ~RegExpOpr();
  
    /**
@@ -121,7 +118,42 @@ class RegExpOpr {
    bool checkConstRegExp( Node r );
    /** get the constant type for regular expression r */
    RegExpConstType getRegExpConstType(Node r);
-  void simplify(Node t, std::vector< Node > &new_nodes, bool polarity);
+  /** Simplify
+   *
+   * This is the main method to simplify (unfold) a regular expression
+   * membership. It is called where t is of the form (str.in_re s r),
+   * and t (or (not t), when polarity=false) holds in the current context.
+   * It returns the unfolded form of t.
+   */
+  Node simplify(Node t, bool polarity);
+  /**
+   * Given regular expression of the form
+   *   (re.++ r_0 ... r_{n-1})
+   * This returns a non-null node reLen and updates index such that
+   *   RegExpEntail::getFixedLengthForRegexp(r_index) = reLen
+   * where index is set to either 0 or n-1.
+   */
+  static Node getRegExpConcatFixed(Node r, size_t& index);
+  //------------------------ trusted reductions
+  /**
+   * Return the unfolded form of mem of the form (str.in_re s r).
+   */
+  static Node reduceRegExpPos(Node mem, SkolemCache* sc);
+  /**
+   * Return the unfolded form of mem of the form (not (str.in_re s r)).
+   */
+  static Node reduceRegExpNeg(Node mem);
+  /**
+   * Return the unfolded form of mem of the form
+   *   (not (str.in_re s (re.++ r_0 ... r_{n-1})))
+   * Called when RegExpEntail::getFixedLengthForRegexp(r_index) = reLen
+   * where index is either 0 or n-1.
+   *
+   * This uses reLen as an optimization to improve the reduction. If reLen
+   * is null, then this optimization is not applied.
+   */
+  static Node reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index);
+  //------------------------ end trusted reductions
    /**
     * This method returns 1 if the empty string is in r, 2 if the empty string
     * is not in r, or 0 if it is unknown whether the empty string is in r.
@@ -141,9 +173,9 @@ class RegExpOpr {
    Node derivativeSingle( Node r, CVC4::String c );
    /**
     * Returns the regular expression intersection of r1 and r2. If r1 or r2 is
-   * not constant, then this method returns null and sets spflag to true.
+   * not constant, then this method returns null.
     */
-  Node intersect(Node r1, Node r2, bool &spflag);
+  Node intersect(Node r1, Node r2);
    /** Get the pretty printed version of the regular expression r */
    static std::string mkString(Node r);
  
@@ -155,6 +187,22 @@ class RegExpOpr {
     * for performance reasons.
     */
    bool regExpIncludes(Node r1, Node r2);
+
+ private:
+  /**
+   * Given a regular expression membership of the form:
+   *   (str.in_re x (re.++ R1 ... Rn))
+   * This returns the valid existentially quantified formula:
+   *   (exists ((x1 String) ... (xn String))
+   *      (=> (str.in_re x (re.++ R1 ... Rn))
+   *      (and (= x (str.++ x1 ... xn))
+   *           (str.in_re x1 R1) ... (str.in_re xn Rn))))
+   * Moreover, this formula is cached per regular expression membership via
+   * an attribute, meaning it is always the same for a given membership mem.
+   */
+  static Node getExistsForRegExpConcatMem(Node mem);
+  /** pointer to the skolem cache used by this class */
+  SkolemCache* d_sc;
  };
  
  }/* CVC4::theory::strings namespace */
diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp

index 53c6c9accbe80e20276cc484c023343b642234ef..c5d6df601d10c4b9c1a3f09489b8f45a67ba74f2 100644 (file)
--- a/src/theory/strings/regexp_solver.cpp
+++ b/src/theory/strings/regexp_solver.cpp
@@ -33,19 +33,19 @@ namespace strings {
  
  RegExpSolver::RegExpSolver(SolverState& s,
                             InferenceManager& im,
+                           SkolemCache* skc,
                             CoreSolver& cs,
                             ExtfSolver& es,
-                           SequencesStatistics& stats,
-                           context::Context* c,
-                           context::UserContext* u)
+                           SequencesStatistics& stats)
      : d_state(s),
        d_im(im),
        d_csolver(cs),
        d_esolver(es),
        d_statistics(stats),
-      d_regexp_ucached(u),
-      d_regexp_ccached(c),
-      d_processed_memberships(c)
+      d_regexp_ucached(s.getUserContext()),
+      d_regexp_ccached(s.getSatContext()),
+      d_processed_memberships(s.getSatContext()),
+      d_regexp_opr(skc)
  {
    d_emptyString = NodeManager::currentNM()->mkConst(::CVC4::String(""));
    std::vector<Node> nvec;
@@ -260,16 +260,14 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
                << "Unroll/simplify membership of atomic term " << rep
                << std::endl;
            // if so, do simple unrolling
-          std::vector<Node> nvec;
            Trace("strings-regexp") << "Simplify on " << atom << std::endl;
-          d_regexp_opr.simplify(atom, nvec, polarity);
+          Node conc = d_regexp_opr.simplify(atom, polarity);
            Trace("strings-regexp") << "...finished" << std::endl;
            // if simplifying successfully generated a lemma
-          if (!nvec.empty())
+          if (!conc.isNull())
            {
              std::vector<Node> exp_n;
              exp_n.push_back(assertion);
-            Node conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
              Assert(atom.getKind() == STRING_IN_REGEXP);
              if (polarity)
              {
@@ -468,11 +466,9 @@ bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
        rcti = rct;
        continue;
      }
-    bool spflag = false;
-    Node resR = d_regexp_opr.intersect(mi[1], m[1], spflag);
+    Node resR = d_regexp_opr.intersect(mi[1], m[1]);
      // intersection should be computable
      Assert(!resR.isNull());
-    Assert(!spflag);
      if (resR == d_emptyRegexp)
      {
        // conflict, explain
diff --git a/src/theory/strings/regexp_solver.h b/src/theory/strings/regexp_solver.h

index 9e9ba58456d75668d69ed44f2d2f38049a15b00c..59afd5ba31cf2cce66739aa703e033d8f96360af 100644 (file)
--- a/src/theory/strings/regexp_solver.h
+++ b/src/theory/strings/regexp_solver.h
@@ -25,6 +25,7 @@
  #include "expr/node.h"
  #include "theory/strings/extf_solver.h"
  #include "theory/strings/inference_manager.h"
+#include "theory/strings/skolem_cache.h"
  #include "theory/strings/regexp_operation.h"
  #include "theory/strings/sequences_stats.h"
  #include "theory/strings/solver_state.h"
@@ -46,11 +47,10 @@ class RegExpSolver
   public:
    RegExpSolver(SolverState& s,
                 InferenceManager& im,
+               SkolemCache* skc,
                 CoreSolver& cs,
                 ExtfSolver& es,
-               SequencesStatistics& stats,
-               context::Context* c,
-               context::UserContext* u);
+               SequencesStatistics& stats);
    ~RegExpSolver() {}
  
    /** check regular expression memberships
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp

index 102826591914233026659b273143b3c6efd45db7..2c7573949946a3c2cb31fde015d146799ab54be7 100644 (file)
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -61,7 +61,8 @@ TheoryStrings::TheoryStrings(context::Context* c,
                  d_csolver,
                  d_extTheory,
                  d_statistics),
-      d_rsolver(d_state, d_im, d_csolver, d_esolver, d_statistics, c, u),
+      d_rsolver(d_state, d_im, d_termReg.getSkolemCache(),
+                d_csolver, d_esolver, d_statistics),
        d_stringsFmf(c, u, valuation, d_termReg)
  {
    bool eagerEval = options::stringEagerEval();
diff --git a/test/unit/theory/regexp_operation_black.h b/test/unit/theory/regexp_operation_black.h

index 6e01392abb87edfcab53b6884b9f6bc08096725d..551d59bef856f5c88b69283bdcad7d21162ae14f 100644 (file)
--- a/test/unit/theory/regexp_operation_black.h
+++ b/test/unit/theory/regexp_operation_black.h
@@ -25,7 +25,9 @@
  #include "expr/node_manager.h"
  #include "smt/smt_engine.h"
  #include "smt/smt_engine_scope.h"
+#include "theory/rewriter.h"
  #include "theory/strings/regexp_operation.h"
+#include "theory/strings/skolem_cache.h"
  
  using namespace CVC4;
  using namespace CVC4::kind;
@@ -46,7 +48,8 @@ class RegexpOperationBlack : public CxxTest::TestSuite
      d_em = d_slv->getExprManager();
      d_smt = d_slv->getSmtEngine();
      d_scope = new SmtScope(d_smt);
-    d_regExpOpr = new RegExpOpr();
+    d_skc = new SkolemCache();
+    d_regExpOpr = new RegExpOpr(d_skc);
  
      // Ensure that the SMT engine is fully initialized (required for the
      // rewriter)
@@ -58,6 +61,7 @@ class RegexpOperationBlack : public CxxTest::TestSuite
    void tearDown() override
    {
      delete d_regExpOpr;
+    delete d_skc;
      delete d_scope;
      delete d_slv;
    }
@@ -153,6 +157,7 @@ class RegexpOperationBlack : public CxxTest::TestSuite
    ExprManager* d_em;
    SmtEngine* d_smt;
    SmtScope* d_scope;
+  SkolemCache* d_skc;
    RegExpOpr* d_regExpOpr;
  
    NodeManager* d_nm;
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Thu, 6 Aug 2020 11:48:45 +0000 (06:48 -0500)
committer	GitHub <noreply@github.com>
	Thu, 6 Aug 2020 11:48:45 +0000 (06:48 -0500)
src/theory/strings/regexp_operation.cpp		patch \| blob \| history
src/theory/strings/regexp_operation.h		patch \| blob \| history
src/theory/strings/regexp_solver.cpp		patch \| blob \| history
src/theory/strings/regexp_solver.h		patch \| blob \| history
src/theory/strings/theory_strings.cpp		patch \| blob \| history
test/unit/theory/regexp_operation_black.h		patch \| blob \| history