Fixes for SyGuS + regular expressions (#3313)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Wed, 23 Oct 2019 23:15:28 +0000 (18:15 -0500)

committer Andres Noetzli <andres.noetzli@gmail.com>

Wed, 23 Oct 2019 23:15:28 +0000 (16:15 -0700)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Wed, 23 Oct 2019 23:15:28 +0000 (18:15 -0500)
committer Andres Noetzli <andres.noetzli@gmail.com>
Wed, 23 Oct 2019 23:15:28 +0000 (16:15 -0700)
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp

index d2505f4f4ef9fe895018d5621d3f7609643372d1..b20e2f3ad3889f469e8089a0c1366a7626812953 100644 (file)
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -48,6 +48,7 @@ RegExpOpr::RegExpOpr()
  RegExpOpr::~RegExpOpr() {}
  
  bool RegExpOpr::checkConstRegExp( Node r ) {
+  Assert(r.getType().isRegExp());
    Trace("strings-regexp-cstre")
        << "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl;
    RegExpConstType rct = getRegExpConstType(r);
@@ -56,6 +57,7 @@ bool RegExpOpr::checkConstRegExp( Node r ) {
  
  RegExpConstType RegExpOpr::getRegExpConstType(Node r)
  {
+  Assert(r.getType().isRegExp());
    std::unordered_map<Node, RegExpConstType, NodeHashFunction>::iterator it;
    std::vector<TNode> visit;
    TNode cur;
@@ -79,11 +81,25 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r)
        {
          d_constCache[cur] = RE_C_CONSTANT;
        }
+      else if (!isRegExpKind(ck))
+      {
+        // non-regular expression applications, e.g. function applications
+        // with regular expression return type are treated as variables.
+        d_constCache[cur] = RE_C_VARIABLE;
+      }
        else
        {
          d_constCache[cur] = RE_C_UNKNOWN;
          visit.push_back(cur);
-        visit.insert(visit.end(), cur.begin(), cur.end());
+        if (ck == REGEXP_LOOP)
+        {
+          // only add the first child of loop
+          visit.push_back(cur[0]);
+        }
+        else
+        {
+          visit.insert(visit.end(), cur.begin(), cur.end());
+        }
        }
      }
      else if (it->second == RE_C_UNKNOWN)
@@ -105,6 +121,14 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r)
    return d_constCache[r];
  }
  
+bool RegExpOpr::isRegExpKind(Kind k)
+{
+  return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
+         || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
+         || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
+         || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV;
+}
+
  // 0-unknown, 1-yes, 2-no
  int RegExpOpr::delta( Node r, Node &exp ) {
    Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r ) << "/" << std::endl;
@@ -113,7 +137,7 @@ int RegExpOpr::delta( Node r, Node &exp ) {
      ret = d_delta_cache[r].first;
      exp = d_delta_cache[r].second;
    } else {
-    int k = r.getKind();
+    Kind k = r.getKind();
      switch( k ) {
        case kind::REGEXP_EMPTY: {
          ret = 2;
@@ -241,8 +265,8 @@ int RegExpOpr::delta( Node r, Node &exp ) {
          break;
        }
        default: {
-        //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
-        Unreachable();
+        Assert(!isRegExpKind(k));
+        break;
        }
      }
      if(!exp.isNull()) {
@@ -481,8 +505,9 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
          break;
        }
        default: {
-        //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
-        Unreachable();
+        Assert(!isRegExpKind(r.getKind()));
+        return 0;
+        break;
        }
      }
      if(retNode != d_emptyRegexp) {
@@ -515,7 +540,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
        retNode = d_emptyRegexp;
      }
    } else {
-    int k = r.getKind();
+    Kind k = r.getKind();
      switch( k ) {
        case kind::REGEXP_EMPTY: {
          retNode = d_emptyRegexp;
@@ -657,6 +682,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
        default: {
          Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
          Unreachable();
+        break;
        }
      }
      if(retNode != d_emptyRegexp) {
@@ -680,19 +706,11 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
      // cset is code points
      std::set<unsigned> cset;
      SetNodes vset;
-    int k = r.getKind();
+    Kind k = r.getKind();
      switch( k ) {
        case kind::REGEXP_EMPTY: {
          break;
        }
-      case kind::REGEXP_SIGMA: {
-        Assert(d_lastchar < std::numeric_limits<unsigned>::max());
-        for (unsigned i = 0; i <= d_lastchar; i++)
-        {
-          cset.insert(i);
-        }
-        break;
-      }
        case kind::REGEXP_RANGE: {
          unsigned a = r[0].getConst<String>().front();
          a = String::convertUnsignedIntToCode(a);
@@ -767,9 +785,20 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
          firstChars(r[0], cset, vset);
          break;
        }
+      case kind::REGEXP_SIGMA:
        default: {
-        Trace("regexp-error") << "Unsupported term: " << r << " in firstChars." << std::endl;
-        Unreachable();
+        // we do not expect to call this function on regular expressions that
+        // aren't a standard regular expression kind. However, if we do, then
+        // the following code is conservative and says that the current
+        // regular expression can begin with any character.
+        Assert(k == REGEXP_SIGMA);
+        // can start with any character
+        Assert(d_lastchar < std::numeric_limits<unsigned>::max());
+        for (unsigned i = 0; i <= d_lastchar; i++)
+        {
+          cset.insert(i);
+        }
+        break;
        }
      }
      pcset.insert(cset.begin(), cset.end());
@@ -817,7 +846,7 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
    if(itr != d_simpl_neg_cache.end()) {
      new_nodes.push_back( itr->second );
    } else {
-    int k = r.getKind();
+    Kind k = r.getKind();
      Node conc;
      switch( k ) {
        case kind::REGEXP_EMPTY: {
@@ -1018,13 +1047,16 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
          break;
        }
        default: {
-        Trace("strings-error") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
-        Assert( false, "Unsupported Term" );
+        Assert(!isRegExpKind(k));
+        break;
        }
      }
-    conc = Rewriter::rewrite( conc );
-    new_nodes.push_back( conc );
-    d_simpl_neg_cache[p] = conc;
+    if (!conc.isNull())
+    {
+      conc = Rewriter::rewrite(conc);
+      new_nodes.push_back(conc);
+      d_simpl_neg_cache[p] = conc;
+    }
    }
  }
  void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
@@ -1034,7 +1066,7 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
    if(itr != d_simpl_cache.end()) {
      new_nodes.push_back( itr->second );
    } else {
-    int k = r.getKind();
+    Kind k = r.getKind();
      Node conc;
      switch( k ) {
        case kind::REGEXP_EMPTY: {
@@ -1191,13 +1223,16 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
          break;
        }
        default: {
-        Trace("strings-error") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
-        Assert( false, "Unsupported Term" );
+        Assert(!isRegExpKind(k));
+        break;
        }
      }
-    conc = Rewriter::rewrite( conc );
-    new_nodes.push_back( conc );
-    d_simpl_cache[p] = conc;
+    if (!conc.isNull())
+    {
+      conc = Rewriter::rewrite(conc);
+      new_nodes.push_back(conc);
+      d_simpl_cache[p] = conc;
+    }
    }
  }
  
@@ -1644,9 +1679,13 @@ std::string RegExpOpr::mkString( Node r ) {
          break;
        }
        default:
-        Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
-        //Assert( false );
-        //return Node::null();
+      {
+        std::stringstream ss;
+        ss << r;
+        retStr = ss.str();
+        Assert(!isRegExpKind(r.getKind()));
+        break;
+      }
      }
    }
  
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h

index 117449d350c7d806e43632941d16d9772bd1847c..c7464760d91ebfc0233d6b336b2ed4d7429bc48e 100644 (file)
--- a/src/theory/strings/regexp_operation.h
+++ b/src/theory/strings/regexp_operation.h
@@ -119,6 +119,8 @@ class RegExpOpr {
    bool checkConstRegExp( Node r );
    /** get the constant type for regular expression r */
    RegExpConstType getRegExpConstType(Node r);
+  /** is k a native operator whose return type is a regular expression? */
+  static bool isRegExpKind(Kind k);
    void simplify(Node t, std::vector< Node > &new_nodes, bool polarity);
    int delta( Node r, Node &exp );
    int derivativeS( Node r, CVC4::String c, Node &retNode );
diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp

index 0e44c9461bb8c10404327a813a565f3f2e34d14d..11471c09fee871029f5e0ac2256692e46d4ef7c4 100644 (file)
--- a/src/theory/strings/regexp_solver.cpp
+++ b/src/theory/strings/regexp_solver.cpp
@@ -227,29 +227,37 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
                << std::endl;
            // if so, do simple unrolling
            std::vector<Node> nvec;
-          if (nvec.empty())
+          Trace("strings-regexp") << "Simplify on " << atom << std::endl;
+          d_regexp_opr.simplify(atom, nvec, polarity);
+          Trace("strings-regexp") << "...finished" << std::endl;
+          // if simplifying successfully generated a lemma
+          if (!nvec.empty())
            {
-            d_regexp_opr.simplify(atom, nvec, polarity);
-          }
-          std::vector<Node> exp_n;
-          exp_n.push_back(assertion);
-          Node conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
-          d_im.sendInference(rnfexp, exp_n, conc, "REGEXP_Unfold");
-          addedLemma = true;
-          if (changed)
-          {
-            cprocessed.push_back(assertion);
+            std::vector<Node> exp_n;
+            exp_n.push_back(assertion);
+            Node conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
+            d_im.sendInference(rnfexp, exp_n, conc, "REGEXP_Unfold");
+            addedLemma = true;
+            if (changed)
+            {
+              cprocessed.push_back(assertion);
+            }
+            else
+            {
+              processed.push_back(assertion);
+            }
+            if (e == 0)
+            {
+              // Remember that we have unfolded a membership for x
+              // notice that we only do this here, after we have definitely
+              // added a lemma.
+              repUnfold.insert(rep);
+            }
            }
            else
            {
-            processed.push_back(assertion);
-          }
-          if (e == 0)
-          {
-            // Remember that we have unfolded a membership for x
-            // notice that we only do this here, after we have definitely
-            // added a lemma.
-            repUnfold.insert(rep);
+            // otherwise we are incomplete
+            d_parent.getOutputChannel().setIncomplete();
            }
          }
          if (d_state.isInConflict())
@@ -387,7 +395,7 @@ bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
        Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
        continue;
      }
-    RegExpConstType rct = d_regexp_opr.getRegExpConstType(m);
+    RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
      if (rct == RE_C_VARIABLE
          || (options::stringRegExpInterMode() == RE_INTER_CONSTANT
              && rct != RE_C_CONRETE_CONSTANT))
@@ -629,7 +637,7 @@ Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
      {
        Trace("strings-error") << "Unsupported term: " << r
                               << " in normalization SymRegExp." << std::endl;
-      Assert(false);
+      Assert(!RegExpOpr::isRegExpKind(r.getKind()));
      }
    }
    return ret;
diff --git a/src/theory/strings/regexp_solver.h b/src/theory/strings/regexp_solver.h

index 0b84ebc7927604b45096314241a3d6458c0e15d9..a43ea516ad7035d7338fc7ccceff98856f33aebe 100644 (file)
--- a/src/theory/strings/regexp_solver.h
+++ b/src/theory/strings/regexp_solver.h
@@ -108,6 +108,16 @@ class RegExpSolver
    InferenceManager& d_im;
    // check membership constraints
    Node mkAnd(Node c1, Node c2);
+  /**
+   * Check partial derivative
+   *
+   * Returns false if a lemma pertaining to checking the partial derivative
+   * of x in r was added. In this case, addedLemma is updated to true.
+   *
+   * The argument atom is the assertion that explains x in r, which is the
+   * normalized form of atom that may be modified using a substitution whose
+   * explanation is nf_exp.
+   */
    bool checkPDerivative(
        Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp);
    Node getMembership(Node n, bool isPos, unsigned i);
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp

index d3ec6d35cb9beddcd874f9afda4f02711985f9e1..adf74f0ccec9ae3a5b7e763fd1a895593235e7e0 100644 (file)
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -23,6 +23,7 @@
  #include "options/strings_options.h"
  #include "smt/logic_exception.h"
  #include "theory/arith/arith_msum.h"
+#include "theory/strings/regexp_operation.h"
  #include "theory/strings/theory_strings_utils.h"
  #include "theory/theory.h"
  #include "util/integer.h"
@@ -1186,7 +1187,7 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i
    Assert( index_start <= s.size() );
    Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at " << index_start << std::endl;
    Assert(!r.isVar());
-  int k = r.getKind();
+  Kind k = r.getKind();
    switch( k ) {
      case kind::STRING_TO_REGEXP: {
        CVC4::String s2 = s.substr( index_start, s.size() - index_start );
@@ -1338,8 +1339,7 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i
        }
      }
      default: {
-      Trace("strings-error") << "Unsupported term: " << r << " in testConstStringInRegExp." << std::endl;
-      Unreachable();
+      Assert(!RegExpOpr::isRegExpKind(k));
        return false;
      }
    }
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt

index 876751b0267d6562a4da24a09d52c2bfc32117b9..8fc4b6410135aa20bb2aa44a2973e13f9ac63446 100644 (file)
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -1728,6 +1728,7 @@ set(regress_1_tests
    regress1/sygus/qe.sy
    regress1/sygus/qf_abv.smt2
    regress1/sygus/real-grammar.sy
+  regress1/sygus/re-concat.sy
    regress1/sygus/repair-const-rl.sy
    regress1/sygus/simple-regexp.sy
    regress1/sygus/stopwatch-bt.sy
diff --git a/test/regress/regress1/sygus/re-concat.sy b/test/regress/regress1/sygus/re-concat.sy

new file mode 100644 (file)

index 0000000..3449ed5
--- /dev/null
+++ b/test/regress/regress1/sygus/re-concat.sy
@@ -0,0 +1,13 @@
+; EXPECT: unsat
+; COMMAND-LINE: --sygus-out=status
+(set-logic SLIA)
+(synth-fun f () RegLan (
+                         (Start RegLan (
+                                         (str.to.re Tokens)
+                                         (re.++ Start Start)))
+                         (Tokens String ("A" "B"))
+                         ))
+
+(constraint (str.in.re "AB" f))
+
+(check-synth)
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Wed, 23 Oct 2019 23:15:28 +0000 (18:15 -0500)
committer	Andres Noetzli <andres.noetzli@gmail.com>
	Wed, 23 Oct 2019 23:15:28 +0000 (16:15 -0700)
src/theory/strings/regexp_operation.cpp		patch \| blob \| history
src/theory/strings/regexp_operation.h		patch \| blob \| history
src/theory/strings/regexp_solver.cpp		patch \| blob \| history
src/theory/strings/regexp_solver.h		patch \| blob \| history
src/theory/strings/theory_strings_rewriter.cpp		patch \| blob \| history
test/regress/CMakeLists.txt		patch \| blob \| history
test/regress/regress1/sygus/re-concat.sy	[new file with mode: 0644]	patch \| blob