Strings: More aggressive skolem normalization (#2761)
authorAndres Noetzli <andres.noetzli@gmail.com>
Tue, 18 Jun 2019 17:04:08 +0000 (10:04 -0700)
committerGitHub <noreply@github.com>
Tue, 18 Jun 2019 17:04:08 +0000 (10:04 -0700)
This PR makes the skolem normalization in the string solver quite a bit
more aggressive by reducing more skolems to prefix and suffix skolems.
Experiments show that this PR significantly improves performance on CMU
benchmarks.

src/theory/strings/skolem_cache.cpp
src/theory/strings/theory_strings.cpp

index 8791b54b5c61698353c8698304844a0b0d0627c8..b4e1c74eab68c5aa18f795ae6d34fd42b7918d88 100644 (file)
@@ -18,6 +18,8 @@
 #include "theory/strings/theory_strings_rewriter.h"
 #include "util/rational.h"
 
+using namespace CVC4::kind;
+
 namespace CVC4 {
 namespace theory {
 namespace strings {
@@ -90,23 +92,96 @@ SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b)
   Trace("skolem-cache") << "normalizeStringSkolem start: (" << id << ", " << a
                         << ", " << b << ")" << std::endl;
 
-  // SK_PURIFY(str.substr x 0 (str.indexof x y 0)) ---> SK_FIRST_CTN_PRE(x, y)
+  NodeManager* nm = NodeManager::currentNM();
+
+  if (id == SK_FIRST_CTN_POST)
+  {
+    // SK_FIRST_CTN_POST(x, y) --->
+    //   SK_SUFFIX_REM(x, (+ (str.len SK_FIRST_CTN_PRE(x, y)) (str.len y)))
+    id = SK_SUFFIX_REM;
+    Node pre = mkSkolemCached(a, b, SK_FIRST_CTN_PRE, "pre");
+    b = Rewriter::rewrite(nm->mkNode(
+        PLUS, nm->mkNode(STRING_LENGTH, pre), nm->mkNode(STRING_LENGTH, b)));
+  }
+
+  if (id == SK_ID_C_SPT)
+  {
+    // SK_ID_C_SPT(x, y) ---> SK_SUFFIX_REM(x, (str.len y))
+    id = SK_SUFFIX_REM;
+    b = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, b));
+  }
+  else if (id == SK_ID_VC_SPT)
+  {
+    // SK_ID_VC_SPT(x, y) ---> SK_SUFFIX_REM(x, 1)
+    id = SK_SUFFIX_REM;
+    b = nm->mkConst(Rational(1));
+  }
+  else if (id == SK_ID_VC_SPT_REV)
+  {
+    // SK_ID_VC_SPT_REV(x, y) ---> SK_PREFIX(x, (- (str.len x) 1))
+    id = SK_PREFIX;
+    b = Rewriter::rewrite(nm->mkNode(
+        MINUS, nm->mkNode(STRING_LENGTH, a), nm->mkConst(Rational(1))));
+  }
+  else if (id == SK_ID_DC_SPT)
+  {
+    // SK_ID_DC_SPT(x, y) ---> SK_PREFIX(x, 1)
+    id = SK_PREFIX;
+    b = nm->mkConst(Rational(1));
+  }
+  else if (id == SK_ID_DC_SPT_REM)
+  {
+    // SK_ID_DC_SPT_REM(x, y) ---> SK_SUFFIX_REM(x, 1)
+    id = SK_SUFFIX_REM;
+    b = nm->mkConst(Rational(1));
+  }
+  else if (id == SK_ID_DEQ_X)
+  {
+    // SK_ID_DEQ_X(x, y) ---> SK_PREFIX(y, (str.len x))
+    id = SK_PREFIX;
+    Node aOld = a;
+    a = b;
+    b = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, aOld));
+  }
+  else if (id == SK_ID_DEQ_Y)
+  {
+    // SK_ID_DEQ_Y(x, y) ---> SK_PREFIX(x, (str.len y))
+    id = SK_PREFIX;
+    b = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, b));
+  }
+
   if (id == SK_PURIFY && a.getKind() == kind::STRING_SUBSTR)
   {
     Node s = a[0];
     Node n = a[1];
     Node m = a[2];
-    if (m.getKind() == kind::STRING_STRIDOF && m[0] == s)
+
+    if (n == d_zero)
+    {
+      // SK_PURIFY((str.substr x 0 m)) ---> SK_PREFIX(x, m)
+      id = SK_PREFIX;
+      a = s;
+      b = m;
+    }
+    else if (TheoryStringsRewriter::checkEntailArith(
+                 nm->mkNode(PLUS, n, m), nm->mkNode(STRING_LENGTH, s)))
     {
-      if (n == d_zero && m[2] == d_zero)
-      {
-        id = SK_FIRST_CTN_PRE;
-        a = m[0];
-        b = m[1];
-      }
+      // SK_PURIFY((str.substr x n m)) ---> SK_SUFFIX_REM(x, n)
+      // if n + m >= (str.len x)
+      id = SK_SUFFIX_REM;
+      a = s;
+      b = n;
     }
   }
 
+  if (id == SK_PREFIX && b.getKind() == kind::STRING_STRIDOF && a == b[0]
+      && b[2] == d_zero)
+  {
+    // SK_PREFIX(x, (str.indexof x y 0)) ---> SK_FIRST_CTN_PRE(x, y)
+    id = SK_FIRST_CTN_PRE;
+    b = b[1];
+  }
+
   if (id == SK_FIRST_CTN_PRE)
   {
     // SK_FIRST_CTN_PRE((str.substr x 0 n), y) ---> SK_FIRST_CTN_PRE(x, y)
index d37d4f9048709c32b40f2beed08818c7fa646c09..cea3c35150bca51b4e564f3b0b320133fbc95010 100644 (file)
@@ -3289,7 +3289,6 @@ void TheoryStrings::processSimpleNEq(NormalForm& nfi,
                       Node firstChar = stra.size() == 1 ? const_str : NodeManager::currentNM()->mkConst( isRev ? stra.suffix( 1 ) : stra.prefix( 1 ) );
                       Node sk = d_sk_cache.mkSkolemCached(
                           other_str,
-                          firstChar,
                           isRev ? SkolemCache::SK_ID_VC_SPT_REV
                                 : SkolemCache::SK_ID_VC_SPT,
                           "c_spt");
@@ -3687,11 +3686,10 @@ void TheoryStrings::processDeq( Node ni, Node nj ) {
                   }
                 }else{
                   Node sk = d_sk_cache.mkSkolemCached(
-                      nconst_k, firstChar, SkolemCache::SK_ID_DC_SPT, "dc_spt");
+                      nconst_k, SkolemCache::SK_ID_DC_SPT, "dc_spt");
                   registerLength(sk, LENGTH_ONE);
                   Node skr =
                       d_sk_cache.mkSkolemCached(nconst_k,
-                                                firstChar,
                                                 SkolemCache::SK_ID_DC_SPT_REM,
                                                 "dc_spt_rem");
                   Node eq1 = nconst_k.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk, skr ) );