Disable substring component contains in strip endpoints (#6266)
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>
Sat, 3 Apr 2021 18:21:55 +0000 (13:21 -0500)
committerGitHub <noreply@github.com>
Sat, 3 Apr 2021 18:21:55 +0000 (13:21 -0500)
Fixes the first benchmark from #6203.

src/theory/strings/strings_entail.cpp
test/regress/CMakeLists.txt
test/regress/regress1/strings/issue6203-1-substr-ctn-strip.smt2 [new file with mode: 0644]
test/unit/theory/sequences_rewriter_white.cpp

index 62ba41743b31f39d8d3fe732788701567198af84..54af1ddcd8e8f63ca31a5ba440671a5ab9b7bfcb 100644 (file)
@@ -517,7 +517,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
       n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
       Trace("strings-rewrite-debug2")
           << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
-          << ", dir = " << dir << std::endl;
+          << ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl;
       if (n1cmp.isConst())
       {
         Node s = n1cmp;
@@ -536,6 +536,8 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
               // can remove everything
               //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
               //   str.contains( "", str.++( "ba", x ) )
+              //   or std.contains( str.substr( "abc", x, y ), "d" ) --->
+              //   str.contains( "", "d" )
               removeComponent = true;
             }
             else if (sss.empty())  // only if not substr
@@ -546,15 +548,11 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
               // str.contains( str.++( "c", x ), str.++( "cd", y ) )
               overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
             }
-            else
-            {
-              // if we are looking at a substring, we can remove the component
-              // if there is no overlap
-              //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
-              //        --> str.contains( x, "a" )
-              removeComponent =
-                  ((r == 0 ? Word::overlap(s, t) : Word::overlap(t, s)) == 0);
-            }
+            // note that we cannot process substring here, since t may
+            // match only part of s. Consider:
+            // (str.++ "C" (str.substr "AB" x y)), "CB"
+            // where "AB" and "CB" have no overlap, but "C" is not part of what
+            // is matched with "AB".
           }
           else if (sss.empty())  // only if not substr
           {
@@ -572,6 +570,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
         {
           // inconclusive
         }
+        Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl;
         // process the overlap
         if (overlap < slen)
         {
@@ -635,6 +634,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
       }
       if (removeComponent)
       {
+        Trace("strings-rewrite-debug2") << "...remove component" << std::endl;
         // can drop entire first (resp. last) component
         if (r == 0)
         {
index 81c2bdba3a39521e4a4cc537020038e45a08039b..e125c651e17c59bf19e392bb660b602bfa35132b 100644 (file)
@@ -2041,6 +2041,7 @@ set(regress_1_tests
   regress1/strings/issue6132-non-unique-skolem.smt2
   regress1/strings/issue6142-repl-inv-rew.smt2
   regress1/strings/issue6191-replace-all.smt2
+  regress1/strings/issue6203-1-substr-ctn-strip.smt2
   regress1/strings/issue6203-2-re-ccache.smt2
   regress1/strings/kaluza-fl.smt2
   regress1/strings/loop002.smt2
diff --git a/test/regress/regress1/strings/issue6203-1-substr-ctn-strip.smt2 b/test/regress/regress1/strings/issue6203-1-substr-ctn-strip.smt2
new file mode 100644 (file)
index 0000000..dde9c82
--- /dev/null
@@ -0,0 +1,11 @@
+; COMMAND-LINE: --strings-exp -q
+; EXPECT: sat
+(set-logic ALL)
+(declare-fun a () String)
+(declare-fun b () String)
+(declare-fun c () String)
+(declare-fun d () String)
+(declare-fun e () String)
+(assert (or (= (str.++ c (str.substr e (* (mod 0 0) (str.len b)) (str.len d))) "ab") (= (str.++ c (str.substr e (* (mod 0 0) (str.len b)) (str.len d))) "")))
+(assert (= (str.len c) (+ (str.len e) (* (str.indexof a "a" 0) (str.len b))) 1 (str.len b)))
+(check-sat)
index a1512d7de40afc6c611a8df9298abae0f9111da8..f66a87a83778b91b62d7b6532c562f5701057a84 100644 (file)
@@ -519,36 +519,6 @@ TEST_F(TestTheoryWhiteSequencesRewriter, rewrite_indexOf)
       i);
   sameNormalForm(idof_substr, negOne);
 
-  {
-    // Same normal form for:
-    //
-    // (str.indexof (str.++ "B" (str.substr "CCC" i j) x "A") "A" 0)
-    //
-    // (+ 1 (str.len (str.substr "CCC" i j))
-    //    (str.indexof (str.++ "A" x y) "A" 0))
-    Node lhs = d_nodeManager->mkNode(
-        kind::STRING_STRIDOF,
-        d_nodeManager->mkNode(
-            kind::STRING_CONCAT,
-            b,
-            d_nodeManager->mkNode(kind::STRING_SUBSTR, ccc, i, j),
-            x,
-            a),
-        a,
-        zero);
-    Node rhs = d_nodeManager->mkNode(
-        kind::PLUS,
-        one,
-        d_nodeManager->mkNode(
-            kind::STRING_LENGTH,
-            d_nodeManager->mkNode(kind::STRING_SUBSTR, ccc, i, j)),
-        d_nodeManager->mkNode(kind::STRING_STRIDOF,
-                              d_nodeManager->mkNode(kind::STRING_CONCAT, x, a),
-                              a,
-                              zero));
-    sameNormalForm(lhs, rhs);
-  }
-
   {
     // Same normal form for:
     //
@@ -1317,23 +1287,6 @@ TEST_F(TestTheoryWhiteSequencesRewriter, rewrite_contains)
     differentNormalForms(lhs, rhs);
   }
 
-  {
-    // Same normal form for:
-    //
-    // (str.contains (str.++ (str.substr "DEF" n m) x) "AB")
-    //
-    // (str.contains x "AB")
-    lhs = d_nodeManager->mkNode(
-        kind::STRING_STRCTN,
-        d_nodeManager->mkNode(
-            kind::STRING_CONCAT,
-            d_nodeManager->mkNode(kind::STRING_SUBSTR, def, n, m),
-            x),
-        ab);
-    rhs = d_nodeManager->mkNode(kind::STRING_STRCTN, x, ab);
-    sameNormalForm(lhs, rhs);
-  }
-
   {
     // Same normal form for:
     //