Fix non-termination in the strings rewriter (#8438)
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>
Thu, 31 Mar 2022 02:08:18 +0000 (21:08 -0500)
committerGitHub <noreply@github.com>
Thu, 31 Mar 2022 02:08:18 +0000 (02:08 +0000)
Fixes #8434.

This makes our component containment utility less aggressive. This impacts (seldom used) rewrites for indexof and replace only.

src/theory/rewriter.cpp
src/theory/strings/strings_entail.cpp
src/theory/strings/strings_entail.h
test/regress/cli/CMakeLists.txt
test/regress/cli/regress1/strings/issue8434-nterm-str-rw.smt2 [new file with mode: 0644]

index fb25787a6b6a74d2b8355747b23f273c24e75234..39bf09200eed89e7632143cf45c11fdde1c51678 100644 (file)
@@ -313,8 +313,8 @@ Node Rewriter::rewriteTo(theory::TheoryId theoryId,
           Assert(response.d_node != rewriteStackTop.d_node);
           //TODO: this is not thread-safe - should make this assertion dependent on sequential build
 #ifdef CVC5_ASSERTIONS
-          Assert(d_rewriteStack->find(response.d_node)
-                 == d_rewriteStack->end());
+          Assert(d_rewriteStack->find(response.d_node) == d_rewriteStack->end())
+              << "Non-terminating rewriting detected for: " << response.d_node;
           d_rewriteStack->insert(response.d_node);
 #endif
           Node rewritten = rewriteTo(newTheoryId, response.d_node, tcpg);
index e9a73478ce7662765ea8ff4aad084bd16fd6d297..a20e6202b368711f669d794f25204b24c2ab36e0 100644 (file)
@@ -254,8 +254,8 @@ int StringsEntail::componentContains(std::vector<Node>& n1,
           }
           else if (!n1re.isNull())
           {
-            n1[i] = d_rr->rewrite(
-                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
+            n1[i] =
+                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re);
           }
           if (remainderDir != 1)
           {
@@ -268,8 +268,8 @@ int StringsEntail::componentContains(std::vector<Node>& n1,
           }
           else if (!n1rb.isNull())
           {
-            n1[i] = d_rr->rewrite(
-                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
+            n1[i] =
+                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]);
           }
         }
         return i;
@@ -419,6 +419,13 @@ bool StringsEntail::componentContainsBase(
         }
       }
     }
+    else if (computeRemainder)
+    {
+      // Note the cases below would require constructing new terms
+      // as part of the remainder components. Thus, this is only checked
+      // when computeRemainder is false.
+      return false;
+    }
     else
     {
       // cases for:
@@ -448,33 +455,12 @@ bool StringsEntail::componentContainsBase(
           }
           if (success)
           {
-            if (computeRemainder)
-            {
-              // we can only compute the remainder if start_pos and end_pos
-              // are known to be non-negative.
-              if (!d_arithEntail.check(start_pos)
-                  || !d_arithEntail.check(end_pos))
-              {
-                return false;
-              }
-              if (dir != -1)
-              {
-                n1rb = nm->mkNode(STRING_SUBSTR,
-                                  n2[0],
-                                  nm->mkConstInt(Rational(0)),
-                                  start_pos);
-              }
-              if (dir != 1)
-              {
-                n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
-              }
-            }
             return true;
           }
         }
       }
 
-      if (!computeRemainder && dir == 0)
+      if (dir == 0)
       {
         if (n1.getKind() == STRING_REPLACE)
         {
index 92c0403533b98fbc5a7d5cd983ceaacc76f6ea0f..39362ad23c021316e05b9a3ffd9fdfa57866341f 100644 (file)
@@ -163,6 +163,17 @@ class StringsEntail
    *   returns 1,
    *   n1 is updated to { "c", x, "def" },
    *   nb is updated to { y, "ab" }
+   *
+   * Note that when computeRemainder is true, this check is less aggressive.
+   * In particular, the only terms we add to nb and ne are terms from n1 or
+   * substrings of words that appear in n1. If we would require constructing
+   * a (symbolic) substring term, we fail instead. For example:
+   *
+   * componentContains({ y }, { substr(y,0,1) }, {}, false, 1) returns 1,
+   * while componentContains({ y }, { substr(y,0,1) }, {}, true, 1) returns 0;
+   * it does not return 1 updating nb/ne to
+   * { substr(y,0,1) } / { substr(y,1,len(y)-1) }. This is to avoid
+   * non-termination in the rewriter.
    */
   int componentContains(std::vector<Node>& n1,
                         std::vector<Node>& n2,
@@ -354,8 +365,8 @@ class StringsEntail
    *               str.substr(y,x,z),
    *               ite( x+z < 0 OR x < 0, y, str.substr(y,x+z,len(y)) ) )
    *
-   * Since we do not wish to introduce ITE terms in the rewriter, we instead
-   * return false, indicating that we cannot compute the remainder.
+   * Since we do not wish to introduce new (symbolic) terms, we
+   * instead return false, indicating that we cannot compute the remainder.
    */
   bool componentContainsBase(
       Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder);
index 3374c2388fe787e424e08b503b5e6bb2a0dc9e1e..d4f6a2dadf22f49dfa5b713c7d283dc1f23f1759 100644 (file)
@@ -2545,6 +2545,7 @@ set(regress_1_tests
   regress1/strings/issue7677-test-const-rv.smt2
   regress1/strings/issue7918-learned-rewrite.smt2
   regress1/strings/issue8094-witness-model.smt2
+  regress1/strings/issue8434-nterm-str-rw.smt2
   regress1/strings/kaluza-fl.smt2
   regress1/strings/loop002.smt2
   regress1/strings/loop003.smt2
diff --git a/test/regress/cli/regress1/strings/issue8434-nterm-str-rw.smt2 b/test/regress/cli/regress1/strings/issue8434-nterm-str-rw.smt2
new file mode 100644 (file)
index 0000000..7a680cd
--- /dev/null
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --strings-exp
+; EXPECT: unsat
+(set-logic ALL)
+(declare-fun T_7 () String)                                                     
+(declare-fun T_c () String)                                                     
+(assert (= T_c (str.++ (str.at T_7 0) ";")))                                    
+(assert (= 0 (ite (str.contains "" (str.substr T_c 1 (str.indexof (str.substr T_c 0 
+(str.indexof T_c (str.substr T_c (+ 1 1) 1) 0)) "T" 0))) 1 0)))                 
+(check-sat)