Disable substring component contains in strip endpoints (#6266)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Sat, 3 Apr 2021 18:21:55 +0000 (13:21 -0500)

committer GitHub <noreply@github.com>

Sat, 3 Apr 2021 18:21:55 +0000 (13:21 -0500)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Sat, 3 Apr 2021 18:21:55 +0000 (13:21 -0500)
committer GitHub <noreply@github.com>
Sat, 3 Apr 2021 18:21:55 +0000 (13:21 -0500)
diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp

index 62ba41743b31f39d8d3fe732788701567198af84..54af1ddcd8e8f63ca31a5ba440671a5ab9b7bfcb 100644 (file)
--- a/src/theory/strings/strings_entail.cpp
+++ b/src/theory/strings/strings_entail.cpp
@@ -517,7 +517,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
        n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
        Trace("strings-rewrite-debug2")
            << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
-          << ", dir = " << dir << std::endl;
+          << ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl;
        if (n1cmp.isConst())
        {
          Node s = n1cmp;
@@ -536,6 +536,8 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
                // can remove everything
                //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
                //   str.contains( "", str.++( "ba", x ) )
+              //   or std.contains( str.substr( "abc", x, y ), "d" ) --->
+              //   str.contains( "", "d" )
                removeComponent = true;
              }
              else if (sss.empty())  // only if not substr
@@ -546,15 +548,11 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
                // str.contains( str.++( "c", x ), str.++( "cd", y ) )
                overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
              }
-            else
-            {
-              // if we are looking at a substring, we can remove the component
-              // if there is no overlap
-              //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
-              //        --> str.contains( x, "a" )
-              removeComponent =
-                  ((r == 0 ? Word::overlap(s, t) : Word::overlap(t, s)) == 0);
-            }
+            // note that we cannot process substring here, since t may
+            // match only part of s. Consider:
+            // (str.++ "C" (str.substr "AB" x y)), "CB"
+            // where "AB" and "CB" have no overlap, but "C" is not part of what
+            // is matched with "AB".
            }
            else if (sss.empty())  // only if not substr
            {
@@ -572,6 +570,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
          {
            // inconclusive
          }
+        Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl;
          // process the overlap
          if (overlap < slen)
          {
@@ -635,6 +634,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
        }
        if (removeComponent)
        {
+        Trace("strings-rewrite-debug2") << "...remove component" << std::endl;
          // can drop entire first (resp. last) component
          if (r == 0)
          {
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt

index 81c2bdba3a39521e4a4cc537020038e45a08039b..e125c651e17c59bf19e392bb660b602bfa35132b 100644 (file)
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -2041,6 +2041,7 @@ set(regress_1_tests
    regress1/strings/issue6132-non-unique-skolem.smt2
    regress1/strings/issue6142-repl-inv-rew.smt2
    regress1/strings/issue6191-replace-all.smt2
+  regress1/strings/issue6203-1-substr-ctn-strip.smt2
    regress1/strings/issue6203-2-re-ccache.smt2
    regress1/strings/kaluza-fl.smt2
    regress1/strings/loop002.smt2
diff --git a/test/regress/regress1/strings/issue6203-1-substr-ctn-strip.smt2 b/test/regress/regress1/strings/issue6203-1-substr-ctn-strip.smt2

new file mode 100644 (file)

index 0000000..dde9c82
--- /dev/null
+++ b/test/regress/regress1/strings/issue6203-1-substr-ctn-strip.smt2
@@ -0,0 +1,11 @@
+; COMMAND-LINE: --strings-exp -q
+; EXPECT: sat
+(set-logic ALL)
+(declare-fun a () String)
+(declare-fun b () String)
+(declare-fun c () String)
+(declare-fun d () String)
+(declare-fun e () String)
+(assert (or (= (str.++ c (str.substr e (* (mod 0 0) (str.len b)) (str.len d))) "ab") (= (str.++ c (str.substr e (* (mod 0 0) (str.len b)) (str.len d))) "")))
+(assert (= (str.len c) (+ (str.len e) (* (str.indexof a "a" 0) (str.len b))) 1 (str.len b)))
+(check-sat)
diff --git a/test/unit/theory/sequences_rewriter_white.cpp b/test/unit/theory/sequences_rewriter_white.cpp

index a1512d7de40afc6c611a8df9298abae0f9111da8..f66a87a83778b91b62d7b6532c562f5701057a84 100644 (file)
--- a/test/unit/theory/sequences_rewriter_white.cpp
+++ b/test/unit/theory/sequences_rewriter_white.cpp
@@ -519,36 +519,6 @@ TEST_F(TestTheoryWhiteSequencesRewriter, rewrite_indexOf)
        i);
    sameNormalForm(idof_substr, negOne);
  
-  {
-    // Same normal form for:
-    //
-    // (str.indexof (str.++ "B" (str.substr "CCC" i j) x "A") "A" 0)
-    //
-    // (+ 1 (str.len (str.substr "CCC" i j))
-    //    (str.indexof (str.++ "A" x y) "A" 0))
-    Node lhs = d_nodeManager->mkNode(
-        kind::STRING_STRIDOF,
-        d_nodeManager->mkNode(
-            kind::STRING_CONCAT,
-            b,
-            d_nodeManager->mkNode(kind::STRING_SUBSTR, ccc, i, j),
-            x,
-            a),
-        a,
-        zero);
-    Node rhs = d_nodeManager->mkNode(
-        kind::PLUS,
-        one,
-        d_nodeManager->mkNode(
-            kind::STRING_LENGTH,
-            d_nodeManager->mkNode(kind::STRING_SUBSTR, ccc, i, j)),
-        d_nodeManager->mkNode(kind::STRING_STRIDOF,
-                              d_nodeManager->mkNode(kind::STRING_CONCAT, x, a),
-                              a,
-                              zero));
-    sameNormalForm(lhs, rhs);
-  }
-
    {
      // Same normal form for:
      //
@@ -1317,23 +1287,6 @@ TEST_F(TestTheoryWhiteSequencesRewriter, rewrite_contains)
      differentNormalForms(lhs, rhs);
    }
  
-  {
-    // Same normal form for:
-    //
-    // (str.contains (str.++ (str.substr "DEF" n m) x) "AB")
-    //
-    // (str.contains x "AB")
-    lhs = d_nodeManager->mkNode(
-        kind::STRING_STRCTN,
-        d_nodeManager->mkNode(
-            kind::STRING_CONCAT,
-            d_nodeManager->mkNode(kind::STRING_SUBSTR, def, n, m),
-            x),
-        ab);
-    rhs = d_nodeManager->mkNode(kind::STRING_STRCTN, x, ab);
-    sameNormalForm(lhs, rhs);
-  }
-
    {
      // Same normal form for:
      //
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Sat, 3 Apr 2021 18:21:55 +0000 (13:21 -0500)
committer	GitHub <noreply@github.com>
	Sat, 3 Apr 2021 18:21:55 +0000 (13:21 -0500)
src/theory/strings/strings_entail.cpp		patch \| blob \| history
test/regress/CMakeLists.txt		patch \| blob \| history
test/regress/regress1/strings/issue6203-1-substr-ctn-strip.smt2	[new file with mode: 0644]	patch \| blob
test/unit/theory/sequences_rewriter_white.cpp		patch \| blob \| history