From ca6f1b0350b18ce3e134701f68f7e02813c3fb5f Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Tue, 16 Oct 2018 12:25:20 -0500
Subject: [PATCH] Improve strings reductions including skolem caching for
 contains (#2641)

---
 src/theory/strings/skolem_cache.cpp   |   4 +
 src/theory/strings/skolem_cache.h     |  13 +-
 src/theory/strings/theory_strings.cpp | 210 +++++++++++++++-----------
 src/theory/strings/theory_strings.h   |  16 +-
 4 files changed, 144 insertions(+), 99 deletions(-)
diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp
index 4a78a11ff..2b0cc8a1b 100644
--- a/src/theory/strings/skolem_cache.cpp
+++ b/src/theory/strings/skolem_cache.cpp
@@ -14,6 +14,8 @@
 
 #include "theory/strings/skolem_cache.h"
 
+#include "theory/rewriter.h"
+
 namespace CVC4 {
 namespace theory {
 namespace strings {
@@ -22,6 +24,8 @@ SkolemCache::SkolemCache() {}
 
 Node SkolemCache::mkSkolemCached(Node a, Node b, SkolemId id, const char* c)
 {
+  a = a.isNull() ? a : Rewriter::rewrite(a);
+  b = b.isNull() ? b : Rewriter::rewrite(b);
   std::map<SkolemId, Node>::iterator it = d_skolemCache[a][b].find(id);
   if (it == d_skolemCache[a][b].end())
   {
diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h
index c9b9fbe5a..1cd4d7de8 100644
--- a/src/theory/strings/skolem_cache.h
+++ b/src/theory/strings/skolem_cache.h
@@ -65,10 +65,6 @@ class SkolemCache
     //    exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k )
     SK_ID_V_SPT,
     SK_ID_V_SPT_REV,
-    // contains( a, b ) =>
-    //    exists k_pre, k_post. a = k_pre ++ b ++ k_post
-    SK_ID_CTN_PRE,
-    SK_ID_CTN_POST,
     // a != ""  ^ b = "c" ^ a ++ a' != b ++ b' =>
     //    exists k, k_rem.
     //         len( k ) = 1 ^
@@ -85,6 +81,15 @@ class SkolemCache
     // contains( a, b ) =>
     //    exists k_pre, k_post. a = k_pre ++ b ++ k_post ^
     //                          ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b)
+    //
+    // As an optimization, these skolems are reused for positive occurrences of
+    // contains, where they have the semantics:
+    //
+    //   contains( a, b ) =>
+    //      exists k_pre, k_post. a = k_pre ++ b ++ k_post
+    //
+    // We reuse them since it is sound to consider w.l.o.g. the first occurrence
+    // of b in a as the witness for contains( a, b ).
     SK_FIRST_CTN_PRE,
     SK_FIRST_CTN_POST,
     // For integer b,
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index fb25e1348..984b47e72 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -113,7 +113,6 @@ TheoryStrings::TheoryStrings(context::Context* c,
       d_registered_terms_cache(u),
       d_length_lemma_terms_cache(u),
       d_preproc(&d_sk_cache, u),
-      d_preproc_cache(u),
       d_extf_infer_cache(c),
       d_extf_infer_cache_u(u),
       d_ee_disequalities(c),
@@ -395,101 +394,128 @@ bool TheoryStrings::getCurrentSubstitution( int effort, std::vector< Node >& var
   return true;
 }
 
-int TheoryStrings::getReduction( int effort, Node n, Node& nr ) {
+bool TheoryStrings::doReduction(int effort, Node n, bool& isCd)
+{
+  Assert(d_extf_info_tmp.find(n) != d_extf_info_tmp.end());
+  if (!d_extf_info_tmp[n].d_model_active)
+  {
+    // n is not active in the model, no need to reduce
+    return false;
+  }
   //determine the effort level to process the extf at
   // 0 - at assertion time, 1+ - after no other reduction is applicable
-  Assert( d_extf_info_tmp.find( n )!=d_extf_info_tmp.end() );
-  if( d_extf_info_tmp[n].d_model_active ){
-    int r_effort = -1;
-    // polarity : 1 true, -1 false, 0 neither
-    int pol = 0;
-    if (n.getType().isBoolean() && !d_extf_info_tmp[n].d_const.isNull())
+  int r_effort = -1;
+  // polarity : 1 true, -1 false, 0 neither
+  int pol = 0;
+  Kind k = n.getKind();
+  if (n.getType().isBoolean() && !d_extf_info_tmp[n].d_const.isNull())
+  {
+    pol = d_extf_info_tmp[n].d_const.getConst<bool>() ? 1 : -1;
+  }
+  if (k == STRING_STRCTN)
+  {
+    if (pol == 1)
     {
-      pol = d_extf_info_tmp[n].d_const.getConst<bool>() ? 1 : -1;
+      r_effort = 1;
     }
-    if( n.getKind()==kind::STRING_STRCTN ){
-      if( pol==1 ){
-        r_effort = 1;
-      }else if( pol==-1 ){
-        if( effort==2 ){
-          Node x = n[0];
-          Node s = n[1];
-          std::vector< Node > lexp;
-          Node lenx = getLength( x, lexp );
-          Node lens = getLength( s, lexp );
-          if( areEqual( lenx, lens ) ){
-            Trace("strings-extf-debug") << "  resolve extf : " << n << " based on equal lengths disequality." << std::endl;
-            //we can reduce to disequality when lengths are equal
-            if( !areDisequal( x, s ) ){
-              lexp.push_back( lenx.eqNode(lens) );
-              lexp.push_back( n.negate() );
-              Node xneqs = x.eqNode(s).negate();
-              sendInference( lexp, xneqs, "NEG-CTN-EQL", true );
-            }
-            return 1;
-          }else{
-            r_effort = 2;
+    else if (pol == -1)
+    {
+      if (effort == 2)
+      {
+        Node x = n[0];
+        Node s = n[1];
+        std::vector<Node> lexp;
+        Node lenx = getLength(x, lexp);
+        Node lens = getLength(s, lexp);
+        if (areEqual(lenx, lens))
+        {
+          Trace("strings-extf-debug")
+              << "  resolve extf : " << n
+              << " based on equal lengths disequality." << std::endl;
+          // We can reduce negative contains to a disequality when lengths are
+          // equal. In other words, len( x ) = len( s ) implies
+          //   ~contains( x, s ) reduces to x != s.
+          if (!areDisequal(x, s))
+          {
+            // len( x ) = len( s ) ^ ~contains( x, s ) => x != s
+            lexp.push_back(lenx.eqNode(lens));
+            lexp.push_back(n.negate());
+            Node xneqs = x.eqNode(s).negate();
+            sendInference(lexp, xneqs, "NEG-CTN-EQL", true);
           }
+          // this depends on the current assertions, so we set that this
+          // inference is context-dependent.
+          isCd = true;
+          return true;
         }
-      }
-    }else{
-      if( options::stringLazyPreproc() ){
-        if( n.getKind()==kind::STRING_SUBSTR ){
-          r_effort = 1;
-        }else if( n.getKind()!=kind::STRING_IN_REGEXP ){
+        else
+        {
           r_effort = 2;
         }
       }
     }
-    if( effort==r_effort ){
-      Node c_n = pol==-1 ? n.negate() : n;
-      if( d_preproc_cache.find( c_n )==d_preproc_cache.end() ){
-        d_preproc_cache[ c_n ] = true;
-        Trace("strings-process-debug") << "Process reduction for " << n << ", pol = " << pol << std::endl;
-        Kind k = n.getKind();
-        if (k == kind::STRING_STRCTN && pol == 1)
-        {
-          Node x = n[0];
-          Node s = n[1];
-          //positive contains reduces to a equality
-          Node sk1 = d_sk_cache.mkSkolemCached(
-              x, s, SkolemCache::SK_ID_CTN_PRE, "sc1");
-          Node sk2 = d_sk_cache.mkSkolemCached(
-              x, s, SkolemCache::SK_ID_CTN_POST, "sc2");
-          Node eq = Rewriter::rewrite( x.eqNode( mkConcat( sk1, s, sk2 ) ) );
-          std::vector< Node > exp_vec;
-          exp_vec.push_back( n );
-          sendInference( d_empty_vec, exp_vec, eq, "POS-CTN", true );
-          //we've reduced this n
-          Trace("strings-extf-debug") << "  resolve extf : " << n << " based on positive contain reduction." << std::endl;
-          return 1;
-        }
-        else if (k != kind::STRING_CODE)
-        {
-          Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF
-                 || k == STRING_ITOS
-                 || k == STRING_STOI
-                 || k == STRING_STRREPL
-                 || k == STRING_LEQ);
-          std::vector< Node > new_nodes;
-          Node res = d_preproc.simplify( n, new_nodes );
-          Assert( res!=n );
-          new_nodes.push_back( NodeManager::currentNM()->mkNode( kind::EQUAL, res, n ) );
-          Node nnlem = new_nodes.size()==1 ? new_nodes[0] : NodeManager::currentNM()->mkNode( kind::AND, new_nodes );
-          nnlem = Rewriter::rewrite( nnlem );
-          Trace("strings-red-lemma") << "Reduction_" << effort << " lemma : " << nnlem << std::endl;
-          Trace("strings-red-lemma") << "...from " << n << std::endl;
-          sendInference( d_empty_vec, nnlem, "Reduction", true );
-          //we've reduced this n
-          Trace("strings-extf-debug") << "  resolve extf : " << n << " based on reduction." << std::endl;
-          return 1;
-        }
-      }else{
-        return 1;
+  }
+  else
+  {
+    if (options::stringLazyPreproc())
+    {
+      if (k == STRING_SUBSTR)
+      {
+        r_effort = 1;
+      }
+      else if (k != STRING_IN_REGEXP)
+      {
+        r_effort = 2;
       }
     }
   }
-  return 0;
+  if (effort != r_effort)
+  {
+    // not the right effort level to reduce
+    return false;
+  }
+  Node c_n = pol == -1 ? n.negate() : n;
+  Trace("strings-process-debug")
+      << "Process reduction for " << n << ", pol = " << pol << std::endl;
+  if (k == STRING_STRCTN && pol == 1)
+  {
+    Node x = n[0];
+    Node s = n[1];
+    // positive contains reduces to a equality
+    Node sk1 =
+        d_sk_cache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1");
+    Node sk2 =
+        d_sk_cache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2");
+    Node eq = Rewriter::rewrite(x.eqNode(mkConcat(sk1, s, sk2)));
+    std::vector<Node> exp_vec;
+    exp_vec.push_back(n);
+    sendInference(d_empty_vec, exp_vec, eq, "POS-CTN", true);
+    Trace("strings-extf-debug")
+        << "  resolve extf : " << n << " based on positive contain reduction."
+        << std::endl;
+  }
+  else if (k != kind::STRING_CODE)
+  {
+    NodeManager* nm = NodeManager::currentNM();
+    Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF
+           || k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL
+           || k == STRING_LEQ);
+    std::vector<Node> new_nodes;
+    Node res = d_preproc.simplify(n, new_nodes);
+    Assert(res != n);
+    new_nodes.push_back(res.eqNode(n));
+    Node nnlem =
+        new_nodes.size() == 1 ? new_nodes[0] : nm->mkNode(AND, new_nodes);
+    nnlem = Rewriter::rewrite(nnlem);
+    Trace("strings-red-lemma")
+        << "Reduction_" << effort << " lemma : " << nnlem << std::endl;
+    Trace("strings-red-lemma") << "...from " << n << std::endl;
+    sendInference(d_empty_vec, nnlem, "Reduction", true);
+    Trace("strings-extf-debug")
+        << "  resolve extf : " << n << " based on reduction." << std::endl;
+  }
+  isCd = false;
+  return true;
 }
 
 /////////////////////////////////////////////////////////////////////////////
@@ -968,10 +994,9 @@ bool TheoryStrings::needsCheckLastEffort() {
 }
 
 void TheoryStrings::checkExtfReductions( int effort ) {
-  //standardize this?
-  //std::vector< Node > nred;
-  //getExtTheory()->doReductions( effort, nred, false );
-
+  // Notice we don't make a standard call to ExtTheory::doReductions here,
+  // since certain optimizations like context-dependent reductions and
+  // stratifying effort levels are done in doReduction below.
   std::vector< Node > extf = getExtTheory()->getActive();
   Trace("strings-process") << "  checking " << extf.size() << " active extf"
                            << std::endl;
@@ -979,11 +1004,12 @@ void TheoryStrings::checkExtfReductions( int effort ) {
     Node n = extf[i];
     Trace("strings-process") << "  check " << n << ", active in model="
                              << d_extf_info_tmp[n].d_model_active << std::endl;
-    Node nr;
-    int ret = getReduction( effort, n, nr );
-    Assert( nr.isNull() );
-    if( ret!=0 ){
-      getExtTheory()->markReduced( extf[i] );
+    // whether the reduction was context-dependent
+    bool isCd = false;
+    bool ret = doReduction(effort, n, isCd);
+    if (ret)
+    {
+      getExtTheory()->markReduced(extf[i], isCd);
       if (hasProcessed())
       {
         return;
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h
index 236c3906c..ec250288b 100644
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -163,7 +163,18 @@ class TheoryStrings : public Theory {
                               std::vector<Node>& vars,
                               std::vector<Node>& subs,
                               std::map<Node, std::vector<Node> >& exp) override;
-  int getReduction(int effort, Node n, Node& nr) override;
+  //--------------------------for checkExtfReductions
+  /** do reduction
+   *
+   * This is called when an extended function application n is not able to be
+   * simplified by context-depdendent simplification, and we are resorting to
+   * expanding n to its full semantics via a reduction. This method returns
+   * true if it successfully reduced n by some reduction and sets isCd to
+   * true if the reduction was (SAT)-context-dependent, and false otherwise.
+   * The argument effort has the same meaning as in checkExtfReductions.
+   */
+  bool doReduction(int effort, Node n, bool& isCd);
+  //--------------------------end for checkExtfReductions
 
   // NotifyClass for equality engine
   class NotifyClass : public eq::EqualityEngineNotify {
@@ -278,9 +289,8 @@ private:
   NodeSet d_pregistered_terms_cache;
   NodeSet d_registered_terms_cache;
   NodeSet d_length_lemma_terms_cache;
-  // preprocess cache
+  /** preprocessing utility, for performing strings reductions */
   StringsPreprocess d_preproc;
-  NodeBoolMap d_preproc_cache;
   // extended functions inferences cache
   NodeSet d_extf_infer_cache;
   NodeSet d_extf_infer_cache_u;
-- 
2.30.2