From: Andrew Reynolds Date: Mon, 6 Nov 2017 05:14:16 +0000 (-0600) Subject: Improve rewriting for string contains part 2 (#1300) X-Git-Tag: cvc5-1.0.0~5510 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d72fcccb19e43d0140b8a6ac954c2858dd3a239a;p=cvc5.git Improve rewriting for string contains part 2 (#1300) * Generalize component contains, some infrastructure. * Length entailment, substr for component contains, int.to.str for constant can contain concat. * Disable rewrite. * Fix, reenable length entailment for contains. * Clang format, minor. * Comment * Minor fixes and improvements for comments. * Improvements * Clang format * Fixes * Clang format * Fix, improve. * Format * Fix assertion --- diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index fd8f4a41b..caf143b37 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -1064,6 +1064,7 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { if( node[2].isConst() && node[2].getConst().sgn()<=0 ) { retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); }else if( node[1].isConst() ){ + // TODO (#1180) : use entailment test here if( node[1].getConst().sgn()<0 ){ //bring forward to start at zero? don't use this semantics, e.g. does not compose well with error conditions for str.indexof. //retNode = NodeManager::currentNM()->mkNode( kind::STRING_SUBSTR, node[0], zero, NodeManager::currentNM()->mkNode( kind::PLUS, node[1], node[2] ) ); @@ -1111,6 +1112,8 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } }else{ if( node[1]==zero ){ + // TODO (#1180) : use entailment test here instead of the special + // case if( node[2].getKind() == kind::STRING_LENGTH && node[2][0]==node[0] ){ retNode = node[0]; }else{ @@ -1421,10 +1424,11 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { getConcat(node[0], nc1); std::vector nc2; getConcat(node[1], nc2); - std::vector nr; // component-wise containment - if (componentContains(nc1, nc2, nr) != -1) + std::vector nc1rb; + std::vector nc1re; + if (componentContains(nc1, nc2, nc1rb, nc1re) != -1) { return NodeManager::currentNM()->mkConst(true); } @@ -1440,6 +1444,20 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { Trace("strings-rewrite-debug2") << "No constant endpoints for " << node[0] << " " << node[1] << std::endl; + // length entailment + Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); + Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + if (checkEntailArith(len_n2, len_n1, true)) + { + // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false + return NodeManager::currentNM()->mkConst(false); + } + else if (checkEntailArithEq(len_n1, len_n2)) + { + // len( n2 ) = len( n1 ) => contains( n1, n2 ) ---> n1 = n2 + return node[0].eqNode(node[1]); + } + // splitting if (node[0].getKind() == kind::STRING_CONCAT) { @@ -1593,6 +1611,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { if( node[1]==node[2] ){ return node[0]; }else{ + // TODO (#1180) : try str.contains( node[0], node[1] ) ---> false if( node[1].isConst() ){ if( node[1].getConst().isEmptyString() ){ return node[0]; @@ -1679,6 +1698,7 @@ Node TheoryStringsRewriter::splitConstant( Node a, Node b, int& index, bool isRe bool TheoryStringsRewriter::canConstantContainConcat( Node c, Node n, int& firstc, int& lastc ) { Assert( c.isConst() ); CVC4::String t = c.getConst(); + const std::vector& tvec = t.getVec(); Assert( n.getKind()==kind::STRING_CONCAT ); //must find constant components in order size_t pos = 0; @@ -1696,6 +1716,20 @@ bool TheoryStringsRewriter::canConstantContainConcat( Node c, Node n, int& first pos = new_pos + s.size(); } } + else if (n[i].getKind() == kind::STRING_ITOS) + { + // find the first occurrence of a digit starting at pos + while (pos < tvec.size() && !String::isDigit(tvec[pos])) + { + pos++; + } + if (pos == tvec.size()) + { + return false; + } + // must consume at least one digit here + pos++; + } } return true; } @@ -1757,35 +1791,55 @@ Node TheoryStringsRewriter::collectConstantStringAt( std::vector< Node >& vec, u int TheoryStringsRewriter::componentContains(std::vector& n1, std::vector& n2, - std::vector& nr, - bool computeRemainder) + std::vector& nb, + std::vector& ne, + bool computeRemainder, + int remainderDir) { - if (n2.size() == 1 && n2[0].isConst()) + Assert(nb.empty()); + Assert(ne.empty()); + // if n2 is a singleton, we can do optimized version here + if (n2.size() == 1) { - CVC4::String t = n2[0].getConst(); for (unsigned i = 0; i < n1.size(); i++) { - if (n1[i].isConst()) + Node n1rb; + Node n1re; + if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder)) { - CVC4::String s = n1[i].getConst(); - size_t f = s.find(t); - if (f != std::string::npos) + if (computeRemainder) { - if (computeRemainder) + n1[i] = n2[0]; + if (remainderDir != -1) { - Assert(s.size() >= f + t.size()); - if (s.size() > f + t.size()) + if (!n1re.isNull()) { - nr.push_back(NodeManager::currentNM()->mkConst(::CVC4::String( - s.substr(f + t.size(), s.size() - (f + t.size()))))); + ne.push_back(n1re); } - nr.insert(nr.end(), n1.begin() + i + 1, n1.end()); - n1[i] = NodeManager::currentNM()->mkConst( - ::CVC4::String(s.substr(0, f + t.size()))); + ne.insert(ne.end(), n1.begin() + i + 1, n1.end()); n1.erase(n1.begin() + i + 1, n1.end()); } - return i; + else if (!n1re.isNull()) + { + n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode( + kind::STRING_CONCAT, n1[i], n1re)); + } + if (remainderDir != 1) + { + nb.insert(nb.end(), n1.begin(), n1.begin() + i); + n1.erase(n1.begin(), n1.begin() + i); + if (!n1rb.isNull()) + { + nb.push_back(n1rb); + } + } + else if (!n1rb.isNull()) + { + n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode( + kind::STRING_CONCAT, n1rb, n1[i])); + } } + return i; } } } @@ -1794,73 +1848,192 @@ int TheoryStringsRewriter::componentContains(std::vector& n1, unsigned diff = n1.size() - n2.size(); for (unsigned i = 0; i <= diff; i++) { - bool check = false; - if (n1[i] == n2[0]) - { - check = true; - } - else if (n1[i].isConst() && n2[0].isConst()) - { - // could be suffix - CVC4::String s = n1[i].getConst(); - CVC4::String t = n2[0].getConst(); - if (t.size() < s.size() && s.suffix(t.size()) == t) - { - check = true; - } - } - if (check) + Node n1rb_first; + Node n1re_first; + // first component of n2 must be a suffix + if (componentContainsBase(n1[i], + n2[0], + n1rb_first, + n1re_first, + 1, + computeRemainder && remainderDir != 1)) { - bool success = true; + Assert(n1re_first.isNull()); for (unsigned j = 1; j < n2.size(); j++) { - if (n1[i + j] != n2[j]) + // are we in the last component? + if (j + 1 == n2.size()) { - if (j + 1 == n2.size() && n1[i + j].isConst() && n2[j].isConst()) + Node n1rb_last; + Node n1re_last; + // last component of n2 must be a prefix + if (componentContainsBase(n1[i + j], + n2[j], + n1rb_last, + n1re_last, + -1, + computeRemainder && remainderDir != -1)) { - // could be prefix - CVC4::String s = n1[i + j].getConst(); - CVC4::String t = n2[j].getConst(); - if (t.size() < s.size() && s.prefix(t.size()) == t) + Assert(n1rb_last.isNull()); + if (computeRemainder) { - if (computeRemainder) + if (remainderDir != -1) { - if (s.size() > t.size()) + if (!n1re_last.isNull()) { - nr.push_back( - NodeManager::currentNM()->mkConst(::CVC4::String( - s.substr(t.size(), s.size() - t.size())))); + ne.push_back(n1re_last); + } + ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end()); + n1.erase(n1.begin() + i + j + 1, n1.end()); + n1[i + j] = n2[j]; + } + if (remainderDir != 1) + { + n1[i] = n2[0]; + nb.insert(nb.end(), n1.begin(), n1.begin() + i); + n1.erase(n1.begin(), n1.begin() + i); + if (!n1rb_first.isNull()) + { + nb.push_back(n1rb_first); } - n1[i + j] = NodeManager::currentNM()->mkConst( - ::CVC4::String(s.substr(0, t.size()))); } } - else - { - success = false; - break; - } + return i; } else { - success = false; break; } } + else if (n1[i + j] != n2[j]) + { + break; + } + } + } + } + } + return -1; +} + +bool TheoryStringsRewriter::componentContainsBase( + Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder) +{ + Assert(n1rb.isNull()); + Assert(n1re.isNull()); + if (n1 == n2) + { + return true; + } + else + { + if (n1.isConst() && n2.isConst()) + { + CVC4::String s = n1.getConst(); + CVC4::String t = n2.getConst(); + if (t.size() < s.size()) + { + if (dir == 1) + { + if (s.suffix(t.size()) == t) + { + if (computeRemainder) + { + n1rb = NodeManager::currentNM()->mkConst( + ::CVC4::String(s.prefix(s.size() - t.size()))); + } + return true; + } + } + else if (dir == -1) + { + if (s.prefix(t.size()) == t) + { + if (computeRemainder) + { + n1re = NodeManager::currentNM()->mkConst( + ::CVC4::String(s.suffix(s.size() - t.size()))); + } + return true; + } + } + else + { + size_t f = s.find(t); + if (f != std::string::npos) + { + if (computeRemainder) + { + if (f > 0) + { + n1rb = NodeManager::currentNM()->mkConst( + ::CVC4::String(s.prefix(f))); + } + if (s.size() > f + t.size()) + { + n1re = NodeManager::currentNM()->mkConst( + ::CVC4::String(s.suffix(s.size() - (f + t.size())))); + } + } + return true; + } } - if (success) + } + } + else + { + // cases for: + // n1 = x containing n2 = substr( x, n2[1], n2[2] ) + if (n2.getKind() == kind::STRING_SUBSTR) + { + if (n2[0] == n1) { - if (computeRemainder) + bool success = true; + Node start_pos = n2[1]; + Node end_pos = + NodeManager::currentNM()->mkNode(kind::PLUS, n2[1], n2[2]); + Node len_n2s = + NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n2[0]); + if (dir == 1) + { + // To be suffix, start + length must be greater than + // or equal to the length of the string. + success = checkEntailArith(end_pos, len_n2s); + } + else if (dir == -1) { - nr.insert(nr.end(), n1.begin() + i + n2.size(), n1.end()); - n1.erase(n1.begin() + i + n2.size(), n1.end()); + // To be prefix, must literally start at 0, since + // if we knew it started at <0, it should be rewritten to "", + // if we knew it started at 0, then n2[1] should be rewritten to + // 0. + success = start_pos.isConst() + && start_pos.getConst().sgn() == 0; + } + if (success) + { + if (computeRemainder) + { + if (dir != 1) + { + n1rb = NodeManager::currentNM()->mkNode( + kind::STRING_SUBSTR, + n2[0], + NodeManager::currentNM()->mkConst(Rational(0)), + start_pos); + } + if (dir != -1) + { + n1re = NodeManager::currentNM()->mkNode( + kind::STRING_SUBSTR, n2[0], end_pos, len_n2s); + } + } + return true; } - return i; } } } } - return -1; + return false; } bool TheoryStringsRewriter::stripConstantEndpoints(std::vector& n1, @@ -1869,6 +2042,8 @@ bool TheoryStringsRewriter::stripConstantEndpoints(std::vector& n1, std::vector& ne, int dir) { + Assert(nb.empty()); + Assert(ne.empty()); bool changed = false; // for ( forwards, backwards ) direction for (unsigned r = 0; r < 2; r++) @@ -2039,3 +2214,83 @@ bool TheoryStringsRewriter::stripConstantEndpoints(std::vector& n1, // which is 3. return changed; } + +bool TheoryStringsRewriter::checkEntailArithEq(Node a, Node b) +{ + if (a == b) + { + return true; + } + else + { + Node ar = Rewriter::rewrite(a); + Node br = Rewriter::rewrite(b); + return ar == br; + } +} + +bool TheoryStringsRewriter::checkEntailArith(Node a, Node b, bool strict) +{ + if (a == b) + { + return !strict; + } + else + { + Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b); + return checkEntailArith(diff, strict); + } +} + +bool TheoryStringsRewriter::checkEntailArith(Node a, bool strict) +{ + if (a.isConst()) + { + return a.getConst().sgn() >= (strict ? 1 : 0); + } + else + { + Node ar = strict + ? NodeManager::currentNM()->mkNode( + kind::MINUS, + a, + NodeManager::currentNM()->mkConst(Rational(1))) + : a; + ar = Rewriter::rewrite(ar); + if (checkEntailArithInternal(ar)) + { + return true; + } + // TODO (#1180) : abstract interpretation goes here + return false; + } +} + +bool TheoryStringsRewriter::checkEntailArithInternal(Node a) +{ + Assert(Rewriter::rewrite(a) == a); + // check whether a >= 0 + if (a.isConst()) + { + return a.getConst().sgn() >= 0; + } + else if (a.getKind() == kind::STRING_LENGTH) + { + // str.len( t ) >= 0 + return true; + } + else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) + { + for (unsigned i = 0; i < a.getNumChildren(); i++) + { + if (!checkEntailArithInternal(a[i])) + { + return false; + } + } + // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0 + return true; + } + + return false; +} diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index 2dff1ad52..593458843 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -45,7 +45,13 @@ private: static Node rewriteMembership(TNode node); static bool hasEpsilonNode(TNode node); -public: + /** check entail arithmetic internal + * Returns true if we can show a >= 0 always. + * a is in rewritten form. + */ + static bool checkEntailArithInternal(Node a); + + public: static RewriteResponse postRewrite(TNode node); static RewriteResponse preRewrite(TNode node); @@ -107,29 +113,87 @@ public: * n1 is the vector form of t1 * n2 is the vector form of t2 * - * if this function returns n>=0 for some n, then + * If this function returns n>=0 for some n, then * n1 = { x1...x{n-1} xn...x{n+s} x{n+s+1}...xm }, * n2 = { y1...ys }, * y1 is a suffix of xn, * y2...y{s-1} = x{n+1}...x{n+s-1}, and * ys is a prefix of x{n+s} - * if computeRemainder = true, then - * n1 is updated to { x1...x{n-1} xn... x{n+s-1} ys }, and - * nr is set to { ( x{n+s} \ ys ) x{n+s+1}...xm } - * where ( x{n+s} \ ys ) denotes string remainder (see operator "\" in - * Section 3.2 of Reynolds et al CAV 2017). - * otherwise it returns -1. + * Otherwise it returns -1. + * + * This function may update n1 if computeRemainder = true. + * We maintain the invariant that the resulting value n1' + * of n1 after this function is such that: + * n1 = str.++( nb, n1', ne ) + * The vectors nb and ne have the following properties. + * If computeRemainder = true, then + * If remainderDir != -1, then + * ne is { x{n+s}' x{n+s+1}...xm } + * where x{n+s} = str.++( ys, x{n+s}' ). + * If remainderDir != 1, then + * nb is { x1, ..., x{n-1}, xn' } + * where xn = str.++( xn', y1 ). * * For example: - * componentContains( { y, "abc", x, "def" }, { "c", x, "de" }, {}, true ) + * + * componentContains({ x, "abc", x }, { "b" }, {}, true, 0) + * returns 1, + * n1 is updated to { "b" }, + * nb is updated to { x, "a" }, + * ne is updated to { "c", x } + * + * componentContains({ x, "abc", x }, { "b" }, {}, true, 1) * returns 1, - * n1 is updated to { y, "abc", x, "de" }, - * nr is updated to { "f" } + * n1 is updated to { x, "ab" }, + * ne is updated to { "c", x } + * + * componentContains({ y, z, "abc", x, "def" }, { "c", x, "de" }, {}, true, 1) + * returns 2, + * n1 is updated to { y, z, "abc", x, "de" }, + * ne is updated to { "f" } + * + * componentContains({ y, "abc", x, "def" }, { "c", x, "de" }, {}, true, -1) + * returns 1, + * n1 is updated to { "c", x, "def" }, + * nb is updated to { y, "ab" } */ static int componentContains(std::vector& n1, std::vector& n2, - std::vector& nr, - bool computeRemainder = false); + std::vector& nb, + std::vector& ne, + bool computeRemainder = false, + int remainderDir = 0); + /** component contains base + * + * This function is a helper for the above function. + * + * It returns true if n2 is contained in n1 with the following + * restrictions: + * If dir=1, then n2 must be a suffix of n1. + * If dir=-1, then n2 must be a prefix of n1. + * + * If computeRemainder is true, then n1rb and n1re are + * updated such that : + * n1 = str.++( n1rb, n2, n1re ) + * where a null value of n1rb and n1re indicates the + * empty string. + * + * For example: + * + * componentContainsBase("cabe", "ab", n1rb, n1re, 1, false) + * returns false. + * + * componentContainsBase("cabe", "ab", n1rb, n1re, 0, true) + * returns true, + * n1rb is set to "c", + * n1re is set to "e". + * + * componentContainsBase(y, str.substr(y,0,5), n1rb, n1re, -1, true) + * returns true, + * n1re is set to str.substr(y,5,str.len(y)). + */ + static bool componentContainsBase( + Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder); /** strip constant endpoints * This function is used when rewriting str.contains( t1, t2 ), where * n1 is the vector form of t1 @@ -148,11 +212,11 @@ public: * It returns true if n1 is modified. * * For example: - * stripConstantEndpoints( { "ab", x, "de" }, { "c" }, {}, {}, 1 ) + * stripConstantEndpoints({ "ab", x, "de" }, { "c" }, {}, {}, 1) * returns true, * n1 is updated to { x, "de" } * nb is updated to { "ab" } - * stripConstantEndpoints( { "ab", x, "de" }, { "bd" }, {}, {}, 0 ) + * stripConstantEndpoints({ "ab", x, "de" }, { "bd" }, {}, {}, 0) * returns true, * n1 is updated to { "b", x, "d" } * nb is updated to { "a" } @@ -163,6 +227,19 @@ public: std::vector& nb, std::vector& ne, int dir = 0); + /** check arithmetic entailment equal + * Returns true if it is always the case that a = b. + */ + static bool checkEntailArithEq(Node a, Node b); + /** check arithmetic entailment + * Returns true if it is always the case that a >= b, + * and a>b if strict is true. + */ + static bool checkEntailArith(Node a, Node b, bool strict = false); + /** check arithmetic entailment + * Returns true if it is always the case that a >= 0. + */ + static bool checkEntailArith(Node a, bool strict = false); };/* class TheoryStringsRewriter */ }/* CVC4::theory::strings namespace */ diff --git a/test/regress/regress0/strings/Makefile.am b/test/regress/regress0/strings/Makefile.am index e27a1bfcd..cd3351c59 100644 --- a/test/regress/regress0/strings/Makefile.am +++ b/test/regress/regress0/strings/Makefile.am @@ -90,7 +90,8 @@ TESTS = \ bug799-min.smt2 \ strings-charat.cvc \ issue1105.smt2 \ - issue1189.smt2 + issue1189.smt2 \ + rewrites-v2.smt2 FAILING_TESTS = diff --git a/test/regress/regress0/strings/rewrites-v2.smt2 b/test/regress/regress0/strings/rewrites-v2.smt2 new file mode 100644 index 000000000..7e285b51a --- /dev/null +++ b/test/regress/regress0/strings/rewrites-v2.smt2 @@ -0,0 +1,20 @@ +; COMMAND-LINE: --strings-exp +; EXPECT: unsat +(set-logic SLIA) +(set-info :status unsat) +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () Int) + +; these should all rewrite to false +(assert (or +(str.contains "abcdef0ghij1abced" (str.++ "g" (int.to.str z) x "a" y (int.to.str (+ z 1)))) +(str.contains "abc23cd" (str.++ (int.to.str z) (int.to.str z) (int.to.str z))) +(not (str.contains (str.++ x "ab" y) (str.++ "b" (str.substr y 0 4)))) +(not (str.contains (str.++ x "ab" y) (str.++ (str.substr x 5 (str.len x)) "a"))) +(str.contains (str.++ x y) (str.++ x "a" y)) +(str.contains (str.++ x y) (str.++ y x x y "a")) +) +) + +(check-sat)