From 600685caae205a9abe6bd7d821f5b2716f6f7520 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 30 Mar 2022 21:08:18 -0500 Subject: [PATCH] Fix non-termination in the strings rewriter (#8438) Fixes #8434. This makes our component containment utility less aggressive. This impacts (seldom used) rewrites for indexof and replace only. --- src/theory/rewriter.cpp | 4 +- src/theory/strings/strings_entail.cpp | 38 ++++++------------- src/theory/strings/strings_entail.h | 15 +++++++- test/regress/cli/CMakeLists.txt | 1 + .../strings/issue8434-nterm-str-rw.smt2 | 9 +++++ 5 files changed, 37 insertions(+), 30 deletions(-) create mode 100644 test/regress/cli/regress1/strings/issue8434-nterm-str-rw.smt2 diff --git a/src/theory/rewriter.cpp b/src/theory/rewriter.cpp index fb25787a6..39bf09200 100644 --- a/src/theory/rewriter.cpp +++ b/src/theory/rewriter.cpp @@ -313,8 +313,8 @@ Node Rewriter::rewriteTo(theory::TheoryId theoryId, Assert(response.d_node != rewriteStackTop.d_node); //TODO: this is not thread-safe - should make this assertion dependent on sequential build #ifdef CVC5_ASSERTIONS - Assert(d_rewriteStack->find(response.d_node) - == d_rewriteStack->end()); + Assert(d_rewriteStack->find(response.d_node) == d_rewriteStack->end()) + << "Non-terminating rewriting detected for: " << response.d_node; d_rewriteStack->insert(response.d_node); #endif Node rewritten = rewriteTo(newTheoryId, response.d_node, tcpg); diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp index e9a73478c..a20e6202b 100644 --- a/src/theory/strings/strings_entail.cpp +++ b/src/theory/strings/strings_entail.cpp @@ -254,8 +254,8 @@ int StringsEntail::componentContains(std::vector& n1, } else if (!n1re.isNull()) { - n1[i] = d_rr->rewrite( - NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re)); + n1[i] = + NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re); } if (remainderDir != 1) { @@ -268,8 +268,8 @@ int StringsEntail::componentContains(std::vector& n1, } else if (!n1rb.isNull()) { - n1[i] = d_rr->rewrite( - NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i])); + n1[i] = + NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]); } } return i; @@ -419,6 +419,13 @@ bool StringsEntail::componentContainsBase( } } } + else if (computeRemainder) + { + // Note the cases below would require constructing new terms + // as part of the remainder components. Thus, this is only checked + // when computeRemainder is false. + return false; + } else { // cases for: @@ -448,33 +455,12 @@ bool StringsEntail::componentContainsBase( } if (success) { - if (computeRemainder) - { - // we can only compute the remainder if start_pos and end_pos - // are known to be non-negative. - if (!d_arithEntail.check(start_pos) - || !d_arithEntail.check(end_pos)) - { - return false; - } - if (dir != -1) - { - n1rb = nm->mkNode(STRING_SUBSTR, - n2[0], - nm->mkConstInt(Rational(0)), - start_pos); - } - if (dir != 1) - { - n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s); - } - } return true; } } } - if (!computeRemainder && dir == 0) + if (dir == 0) { if (n1.getKind() == STRING_REPLACE) { diff --git a/src/theory/strings/strings_entail.h b/src/theory/strings/strings_entail.h index 92c040353..39362ad23 100644 --- a/src/theory/strings/strings_entail.h +++ b/src/theory/strings/strings_entail.h @@ -163,6 +163,17 @@ class StringsEntail * returns 1, * n1 is updated to { "c", x, "def" }, * nb is updated to { y, "ab" } + * + * Note that when computeRemainder is true, this check is less aggressive. + * In particular, the only terms we add to nb and ne are terms from n1 or + * substrings of words that appear in n1. If we would require constructing + * a (symbolic) substring term, we fail instead. For example: + * + * componentContains({ y }, { substr(y,0,1) }, {}, false, 1) returns 1, + * while componentContains({ y }, { substr(y,0,1) }, {}, true, 1) returns 0; + * it does not return 1 updating nb/ne to + * { substr(y,0,1) } / { substr(y,1,len(y)-1) }. This is to avoid + * non-termination in the rewriter. */ int componentContains(std::vector& n1, std::vector& n2, @@ -354,8 +365,8 @@ class StringsEntail * str.substr(y,x,z), * ite( x+z < 0 OR x < 0, y, str.substr(y,x+z,len(y)) ) ) * - * Since we do not wish to introduce ITE terms in the rewriter, we instead - * return false, indicating that we cannot compute the remainder. + * Since we do not wish to introduce new (symbolic) terms, we + * instead return false, indicating that we cannot compute the remainder. */ bool componentContainsBase( Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder); diff --git a/test/regress/cli/CMakeLists.txt b/test/regress/cli/CMakeLists.txt index 3374c2388..d4f6a2dad 100644 --- a/test/regress/cli/CMakeLists.txt +++ b/test/regress/cli/CMakeLists.txt @@ -2545,6 +2545,7 @@ set(regress_1_tests regress1/strings/issue7677-test-const-rv.smt2 regress1/strings/issue7918-learned-rewrite.smt2 regress1/strings/issue8094-witness-model.smt2 + regress1/strings/issue8434-nterm-str-rw.smt2 regress1/strings/kaluza-fl.smt2 regress1/strings/loop002.smt2 regress1/strings/loop003.smt2 diff --git a/test/regress/cli/regress1/strings/issue8434-nterm-str-rw.smt2 b/test/regress/cli/regress1/strings/issue8434-nterm-str-rw.smt2 new file mode 100644 index 000000000..7a680cd2a --- /dev/null +++ b/test/regress/cli/regress1/strings/issue8434-nterm-str-rw.smt2 @@ -0,0 +1,9 @@ +; COMMAND-LINE: --strings-exp +; EXPECT: unsat +(set-logic ALL) +(declare-fun T_7 () String) +(declare-fun T_c () String) +(assert (= T_c (str.++ (str.at T_7 0) ";"))) +(assert (= 0 (ite (str.contains "" (str.substr T_c 1 (str.indexof (str.substr T_c 0 +(str.indexof T_c (str.substr T_c (+ 1 1) 1) 0)) "T" 0))) 1 0))) +(check-sat) -- 2.30.2