Strip non-matching beginning from indexof operator (#2885)
authorAndres Noetzli <andres.noetzli@gmail.com>
Sat, 23 Mar 2019 00:15:53 +0000 (00:15 +0000)
committerGitHub <noreply@github.com>
Sat, 23 Mar 2019 00:15:53 +0000 (00:15 +0000)
This commit adds a rewrite that strips endpoints from `str.indexof`
operators if they don't overlap with the string that is being searched
for using `stripConstantEndpoints()`. In addition to that, it makes
`stripConstantEndpoints()` slightly more aggressive by allowing it to
drop substring components that have zero overlap with the string that we
are looking at. Finally, the commit fixes the default argument for
`fullRewriter` of `checkEntailContains()` to be true instead of false,
which should allow for more rewriting opportunities.

src/theory/strings/theory_strings_rewriter.cpp
src/theory/strings/theory_strings_rewriter.h
test/unit/theory/theory_strings_rewriter_white.h

index 0763fa7d5a62f1840d541bdb97519de31041db58..27393e5d47dad1a31d952e911d46b875d3ddb066 100644 (file)
@@ -2318,6 +2318,22 @@ Node TheoryStringsRewriter::rewriteIndexof( Node node ) {
           Node ret = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]);
           return returnRewrite(node, ret, "idof-def-ctn");
         }
+
+        // Strip components from the beginning that are guaranteed not to match
+        if (stripConstantEndpoints(children0, children1, nb, ne, 1))
+        {
+          // str.indexof(str.++("AB", x, "C"), "C", 0) --->
+          // 2 + str.indexof(str.++(x, "C"), "C", 0)
+          Node ret =
+              nm->mkNode(kind::PLUS,
+                         nm->mkNode(kind::STRING_LENGTH,
+                                    mkConcat(kind::STRING_CONCAT, nb)),
+                         nm->mkNode(kind::STRING_STRIDOF,
+                                    mkConcat(kind::STRING_CONCAT, children0),
+                                    node[1],
+                                    node[2]));
+          return returnRewrite(node, ret, "idof-strip-cnst-endpts");
+        }
       }
 
       // strip symbolic length
@@ -3664,6 +3680,14 @@ bool TheoryStringsRewriter::stripConstantEndpoints(std::vector<Node>& n1,
               // str.contains( str.++( "c", x ), str.++( "cd", y ) )
               overlap = r == 0 ? s.overlap(t) : t.overlap(s);
             }
+            else
+            {
+              // if we are looking at a substring, we can remove the component
+              // if there is no overlap
+              //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
+              //        --> str.contains( x, "a" )
+              removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
+            }
           }
           else if (sss.empty())  // only if not substr
           {
index 8b0072f52671383db6f688c625df1b58a162930c..81bc29ad662400c771c8706927edb63b44bd80b2 100644 (file)
@@ -472,7 +472,7 @@ class TheoryStringsRewriter {
    * @return true node if it can be shown that `a` contains `b`, false node if
    * it can be shown that `a` does not contain `b`, null node otherwise
    */
-  static Node checkEntailContains(Node a, Node b, bool fullRewriter = false);
+  static Node checkEntailContains(Node a, Node b, bool fullRewriter = true);
 
   /** entail non-empty
    *
index 8819415686a3e4debf762b557ad32224638a9913..4650bbf273778e996407d0ea341ec8df35f0b487 100644 (file)
@@ -445,13 +445,17 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite
     Node abcd = d_nm->mkConst(::CVC4::String("ABCD"));
     Node aaad = d_nm->mkConst(::CVC4::String("AAAD"));
     Node b = d_nm->mkConst(::CVC4::String("B"));
+    Node c = d_nm->mkConst(::CVC4::String("C"));
+    Node ccc = d_nm->mkConst(::CVC4::String("CCC"));
     Node x = d_nm->mkVar("x", strType);
     Node y = d_nm->mkVar("y", strType);
     Node negOne = d_nm->mkConst(Rational(-1));
+    Node zero = d_nm->mkConst(Rational(0));
     Node one = d_nm->mkConst(Rational(1));
     Node two = d_nm->mkConst(Rational(2));
     Node three = d_nm->mkConst(Rational(3));
     Node i = d_nm->mkVar("i", intType);
+    Node j = d_nm->mkVar("j", intType);
 
     // Same normal form for:
     //
@@ -486,6 +490,54 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite
                      a,
                      i);
     sameNormalForm(idof_substr, negOne);
+
+    {
+      // Same normal form for:
+      //
+      // (str.indexof (str.++ "B" (str.substr "CCC" i j) x "A") "A" 0)
+      //
+      // (+ 1 (str.len (str.substr "CCC" i j))
+      //    (str.indexof (str.++ "A" x y) "A" 0))
+      Node lhs = d_nm->mkNode(
+          kind::STRING_STRIDOF,
+          d_nm->mkNode(kind::STRING_CONCAT,
+                       b,
+                       d_nm->mkNode(kind::STRING_SUBSTR, ccc, i, j),
+                       x,
+                       a),
+          a,
+          zero);
+      Node rhs = d_nm->mkNode(
+          kind::PLUS,
+          one,
+          d_nm->mkNode(kind::STRING_LENGTH,
+                       d_nm->mkNode(kind::STRING_SUBSTR, ccc, i, j)),
+          d_nm->mkNode(kind::STRING_STRIDOF,
+                       d_nm->mkNode(kind::STRING_CONCAT, x, a),
+                       a,
+                       zero));
+      sameNormalForm(lhs, rhs);
+    }
+
+    {
+      // Same normal form for:
+      //
+      // (str.indexof (str.++ "B" "C" "A" x y) "A" 0)
+      //
+      // (+ 2 (str.indexof (str.++ "A" x y) "A" 0))
+      Node lhs = d_nm->mkNode(kind::STRING_STRIDOF,
+                              d_nm->mkNode(kind::STRING_CONCAT, b, c, a, x, y),
+                              a,
+                              zero);
+      Node rhs =
+          d_nm->mkNode(kind::PLUS,
+                       two,
+                       d_nm->mkNode(kind::STRING_STRIDOF,
+                                    d_nm->mkNode(kind::STRING_CONCAT, a, x, y),
+                                    a,
+                                    zero));
+      sameNormalForm(lhs, rhs);
+    }
   }
 
   void testRewriteReplace()
@@ -648,6 +700,7 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite
 
     Node empty = d_nm->mkConst(::CVC4::String(""));
     Node a = d_nm->mkConst(::CVC4::String("A"));
+    Node ab = d_nm->mkConst(::CVC4::String("AB"));
     Node b = d_nm->mkConst(::CVC4::String("B"));
     Node c = d_nm->mkConst(::CVC4::String("C"));
     Node abc = d_nm->mkConst(::CVC4::String("ABC"));
@@ -659,6 +712,7 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite
     Node yx = d_nm->mkNode(kind::STRING_CONCAT, y, x);
     Node z = d_nm->mkVar("z", strType);
     Node n = d_nm->mkVar("n", intType);
+    Node m = d_nm->mkVar("m", intType);
     Node one = d_nm->mkConst(Rational(1));
     Node two = d_nm->mkConst(Rational(2));
     Node three = d_nm->mkConst(Rational(3));
@@ -930,6 +984,22 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite
       rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc);
       differentNormalForms(lhs, rhs);
     }
+
+    {
+      // Same normal form for:
+      //
+      // (str.contains (str.++ (str.substr "DEF" n m) x) "AB")
+      //
+      // (str.contains x "AB")
+      lhs = d_nm->mkNode(
+          kind::STRING_STRCTN,
+          d_nm->mkNode(kind::STRING_CONCAT,
+                       d_nm->mkNode(kind::STRING_SUBSTR, def, n, m),
+                       x),
+          ab);
+      rhs = d_nm->mkNode(kind::STRING_STRCTN, x, ab);
+      sameNormalForm(lhs, rhs);
+    }
   }
 
   void testInferEqsFromContains()