Move cardinality inference scheme to base solver in strings (#3792)
[cvc5.git] / src / theory / strings / theory_strings_rewriter.cpp
1 /********************* */
2 /*! \file theory_strings_rewriter.cpp
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Andres Noetzli, Tianyi Liang
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief Implementation of the theory of strings.
13 **
14 ** Implementation of the theory of strings.
15 **/
16
17 #include "theory/strings/theory_strings_rewriter.h"
18
19 #include <stdint.h>
20 #include <algorithm>
21
22 #include "expr/node_builder.h"
23 #include "options/strings_options.h"
24 #include "smt/logic_exception.h"
25 #include "theory/arith/arith_msum.h"
26 #include "theory/strings/regexp_operation.h"
27 #include "theory/strings/theory_strings_utils.h"
28 #include "theory/theory.h"
29 #include "util/integer.h"
30 #include "util/rational.h"
31
32 using namespace std;
33 using namespace CVC4;
34 using namespace CVC4::kind;
35 using namespace CVC4::theory;
36 using namespace CVC4::theory::strings;
37
38 Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, std::vector< Node >& children, int dir ){
39 Trace("regexp-ext-rewrite-debug")
40 << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
41 Trace("regexp-ext-rewrite-debug")
42 << " mchildren : " << mchildren << std::endl;
43 Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl;
44 NodeManager* nm = NodeManager::currentNM();
45 unsigned tmin = dir<0 ? 0 : dir;
46 unsigned tmax = dir<0 ? 1 : dir;
47 //try to remove off front and back
48 for( unsigned t=0; t<2; t++ ){
49 if( tmin<=t && t<=tmax ){
50 bool do_next = true;
51 while( !children.empty() && !mchildren.empty() && do_next ){
52 do_next = false;
53 Node xc = mchildren[mchildren.size()-1];
54 Node rc = children[children.size()-1];
55 Assert(rc.getKind() != kind::REGEXP_CONCAT);
56 Assert(xc.getKind() != kind::STRING_CONCAT);
57 if( rc.getKind() == kind::STRING_TO_REGEXP ){
58 if( xc==rc[0] ){
59 children.pop_back();
60 mchildren.pop_back();
61 do_next = true;
62 Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl;
63 }else if( xc.isConst() && rc[0].isConst() ){
64 //split the constant
65 int index;
66 Node s = splitConstant( xc, rc[0], index, t==0 );
67 Trace("regexp-ext-rewrite-debug") << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> " << s << " " << index << " " << t << std::endl;
68 if( s.isNull() ){
69 Trace("regexp-ext-rewrite-debug")
70 << "...return false" << std::endl;
71 return NodeManager::currentNM()->mkConst( false );
72 }else{
73 Trace("regexp-ext-rewrite-debug")
74 << "...strip equal const" << std::endl;
75 children.pop_back();
76 mchildren.pop_back();
77 if( index==0 ){
78 mchildren.push_back( s );
79 }else{
80 children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
81 }
82 }
83 do_next = true;
84 }
85 }else if( xc.isConst() ){
86 //check for constants
87 CVC4::String s = xc.getConst<String>();
88 if (s.size() == 0)
89 {
90 Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl;
91 // ignore and continue
92 mchildren.pop_back();
93 do_next = true;
94 }
95 else if (rc.getKind() == kind::REGEXP_RANGE
96 || rc.getKind() == kind::REGEXP_SIGMA)
97 {
98 std::vector<unsigned> ssVec;
99 ssVec.push_back(t == 0 ? s.back() : s.front());
100 CVC4::String ss(ssVec);
101 if( testConstStringInRegExp( ss, 0, rc ) ){
102 //strip off one character
103 mchildren.pop_back();
104 if( s.size()>1 ){
105 if( t==0 ){
106 mchildren.push_back( NodeManager::currentNM()->mkConst(s.substr( 0, s.size()-1 )) );
107 }else{
108 mchildren.push_back( NodeManager::currentNM()->mkConst(s.substr( 1 )) );
109 }
110 }
111 children.pop_back();
112 do_next = true;
113 }else{
114 return NodeManager::currentNM()->mkConst( false );
115 }
116 }else if( rc.getKind()==kind::REGEXP_INTER || rc.getKind()==kind::REGEXP_UNION ){
117 //see if any/each child does not work
118 bool result_valid = true;
119 Node result;
120 Node emp_s = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
121 for( unsigned i=0; i<rc.getNumChildren(); i++ ){
122 std::vector< Node > mchildren_s;
123 std::vector< Node > children_s;
124 mchildren_s.push_back( xc );
125 utils::getConcat(rc[i], children_s);
126 Node ret = simpleRegexpConsume( mchildren_s, children_s, t );
127 if( !ret.isNull() ){
128 // one conjunct cannot be satisfied, return false
129 if( rc.getKind()==kind::REGEXP_INTER ){
130 return ret;
131 }
132 }else{
133 if( children_s.empty() ){
134 //if we were able to fully consume, store the result
135 Assert(mchildren_s.size() <= 1);
136 if( mchildren_s.empty() ){
137 mchildren_s.push_back( emp_s );
138 }
139 if( result.isNull() ){
140 result = mchildren_s[0];
141 }else if( result!=mchildren_s[0] ){
142 result_valid = false;
143 }
144 }else{
145 result_valid = false;
146 }
147 }
148 }
149 if( result_valid ){
150 if( result.isNull() ){
151 //all disjuncts cannot be satisfied, return false
152 Assert(rc.getKind() == kind::REGEXP_UNION);
153 return NodeManager::currentNM()->mkConst( false );
154 }else{
155 //all branches led to the same result
156 children.pop_back();
157 mchildren.pop_back();
158 if( result!=emp_s ){
159 mchildren.push_back( result );
160 }
161 do_next = true;
162 }
163 }
164 }else if( rc.getKind()==kind::REGEXP_STAR ){
165 //check if there is no way that this star can be unrolled even once
166 std::vector< Node > mchildren_s;
167 mchildren_s.insert( mchildren_s.end(), mchildren.begin(), mchildren.end() );
168 if( t==1 ){
169 std::reverse( mchildren_s.begin(), mchildren_s.end() );
170 }
171 std::vector< Node > children_s;
172 utils::getConcat(rc[0], children_s);
173 Trace("regexp-ext-rewrite-debug")
174 << "...recursive call on body of star" << std::endl;
175 Node ret = simpleRegexpConsume( mchildren_s, children_s, t );
176 if( !ret.isNull() ){
177 Trace("regexp-ext-rewrite-debug") << "CRE : regexp star infeasable " << xc << " " << rc << std::endl;
178 children.pop_back();
179 if (!children.empty())
180 {
181 Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl;
182 do_next = true;
183 }
184 }else{
185 if( children_s.empty() ){
186 //check if beyond this, we can't do it or there is nothing left, if so, repeat
187 bool can_skip = false;
188 if( children.size()>1 ){
189 std::vector< Node > mchildren_ss;
190 mchildren_ss.insert( mchildren_ss.end(), mchildren.begin(), mchildren.end() );
191 std::vector< Node > children_ss;
192 children_ss.insert( children_ss.end(), children.begin(), children.end()-1 );
193 if( t==1 ){
194 std::reverse( mchildren_ss.begin(), mchildren_ss.end() );
195 std::reverse( children_ss.begin(), children_ss.end() );
196 }
197 Node ret = simpleRegexpConsume( mchildren_ss, children_ss, t );
198 if( ret.isNull() ){
199 can_skip = true;
200 }
201 }
202 if( !can_skip ){
203 Trace("regexp-ext-rewrite-debug")
204 << "...can't skip" << std::endl;
205 //take the result of fully consuming once
206 if( t==1 ){
207 std::reverse( mchildren_s.begin(), mchildren_s.end() );
208 }
209 mchildren.clear();
210 mchildren.insert( mchildren.end(), mchildren_s.begin(), mchildren_s.end() );
211 do_next = true;
212 }else{
213 Trace("regexp-ext-rewrite-debug")
214 << "...can skip " << rc << " from " << xc << std::endl;
215 }
216 }
217 }
218 }
219 }
220 if( !do_next ){
221 Trace("regexp-ext-rewrite") << "Cannot consume : " << xc << " " << rc << std::endl;
222 }
223 }
224 }
225 if( dir!=0 ){
226 std::reverse( children.begin(), children.end() );
227 std::reverse( mchildren.begin(), mchildren.end() );
228 }
229 }
230 return Node::null();
231 }
232
233 Node TheoryStringsRewriter::rewriteEquality(Node node)
234 {
235 Assert(node.getKind() == kind::EQUAL);
236 if (node[0] == node[1])
237 {
238 return NodeManager::currentNM()->mkConst(true);
239 }
240 else if (node[0].isConst() && node[1].isConst())
241 {
242 return NodeManager::currentNM()->mkConst(false);
243 }
244
245 // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false )
246 for (unsigned r = 0; r < 2; r++)
247 {
248 // must call rewrite contains directly to avoid infinite loop
249 // we do a fix point since we may rewrite contains terms to simpler
250 // contains terms.
251 Node ctn = checkEntailContains(node[r], node[1 - r], false);
252 if (!ctn.isNull())
253 {
254 if (!ctn.getConst<bool>())
255 {
256 return returnRewrite(node, ctn, "eq-nctn");
257 }
258 else
259 {
260 // definitely contains but not syntactically equal
261 // We may be able to simplify, e.g.
262 // str.++( x, "a" ) == "a" ----> x = ""
263 }
264 }
265 }
266
267 // ( len( s ) != len( t ) ) => ( s == t ---> false )
268 // This covers cases like str.++( x, x ) == "a" ---> false
269 Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
270 Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
271 Node len_eq = len0.eqNode(len1);
272 len_eq = Rewriter::rewrite(len_eq);
273 if (len_eq.isConst() && !len_eq.getConst<bool>())
274 {
275 return returnRewrite(node, len_eq, "eq-len-deq");
276 }
277
278 std::vector<Node> c[2];
279 for (unsigned i = 0; i < 2; i++)
280 {
281 utils::getConcat(node[i], c[i]);
282 }
283
284 // check if the prefix, suffix mismatches
285 // For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false
286 unsigned minsize = std::min(c[0].size(), c[1].size());
287 for (unsigned r = 0; r < 2; r++)
288 {
289 for (unsigned i = 0; i < minsize; i++)
290 {
291 unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i;
292 unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i;
293 if (c[0][index1].isConst() && c[1][index2].isConst())
294 {
295 CVC4::String s = c[0][index1].getConst<String>();
296 CVC4::String t = c[1][index2].getConst<String>();
297 unsigned len_short = s.size() <= t.size() ? s.size() : t.size();
298 bool isSameFix =
299 r == 1 ? s.rstrncmp(t, len_short) : s.strncmp(t, len_short);
300 if (!isSameFix)
301 {
302 Node ret = NodeManager::currentNM()->mkConst(false);
303 return returnRewrite(node, ret, "eq-nfix");
304 }
305 }
306 if (c[0][index1] != c[1][index2])
307 {
308 break;
309 }
310 }
311 }
312
313 // standard ordering
314 if (node[0] > node[1])
315 {
316 return NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]);
317 }
318 return node;
319 }
320
321 Node TheoryStringsRewriter::rewriteEqualityExt(Node node)
322 {
323 Assert(node.getKind() == EQUAL);
324 if (node[0].getType().isInteger())
325 {
326 return rewriteArithEqualityExt(node);
327 }
328 if (node[0].getType().isString())
329 {
330 return rewriteStrEqualityExt(node);
331 }
332 return node;
333 }
334
335 Node TheoryStringsRewriter::rewriteStrEqualityExt(Node node)
336 {
337 Assert(node.getKind() == EQUAL && node[0].getType().isString());
338
339 NodeManager* nm = NodeManager::currentNM();
340 std::vector<Node> c[2];
341 Node new_ret;
342 for (unsigned i = 0; i < 2; i++)
343 {
344 utils::getConcat(node[i], c[i]);
345 }
346 // ------- equality unification
347 bool changed = false;
348 for (unsigned i = 0; i < 2; i++)
349 {
350 while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back())
351 {
352 c[0].pop_back();
353 c[1].pop_back();
354 changed = true;
355 }
356 // splice constants
357 if (!c[0].empty() && !c[1].empty() && c[0].back().isConst()
358 && c[1].back().isConst())
359 {
360 String cs[2];
361 for (unsigned j = 0; j < 2; j++)
362 {
363 cs[j] = c[j].back().getConst<String>();
364 }
365 unsigned larger = cs[0].size() > cs[1].size() ? 0 : 1;
366 unsigned smallerSize = cs[1 - larger].size();
367 if (cs[1 - larger]
368 == (i == 0 ? cs[larger].suffix(smallerSize)
369 : cs[larger].prefix(smallerSize)))
370 {
371 unsigned sizeDiff = cs[larger].size() - smallerSize;
372 c[larger][c[larger].size() - 1] = nm->mkConst(
373 i == 0 ? cs[larger].prefix(sizeDiff) : cs[larger].suffix(sizeDiff));
374 c[1 - larger].pop_back();
375 changed = true;
376 }
377 }
378 for (unsigned j = 0; j < 2; j++)
379 {
380 std::reverse(c[j].begin(), c[j].end());
381 }
382 }
383 if (changed)
384 {
385 // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y
386 Node s1 = utils::mkConcat(STRING_CONCAT, c[0]);
387 Node s2 = utils::mkConcat(STRING_CONCAT, c[1]);
388 new_ret = s1.eqNode(s2);
389 node = returnRewrite(node, new_ret, "str-eq-unify");
390 }
391
392 // ------- homogeneous constants
393 for (unsigned i = 0; i < 2; i++)
394 {
395 Node cn = checkEntailHomogeneousString(node[i]);
396 if (!cn.isNull() && cn.getConst<String>().size() > 0)
397 {
398 Assert(cn.isConst());
399 Assert(cn.getConst<String>().size() == 1);
400 unsigned hchar = cn.getConst<String>().front();
401
402 // The operands of the concat on each side of the equality without
403 // constant strings
404 std::vector<Node> trimmed[2];
405 // Counts the number of `hchar`s on each side
406 size_t numHChars[2] = {0, 0};
407 for (size_t j = 0; j < 2; j++)
408 {
409 // Sort the operands of the concats on both sides of the equality
410 // (since both sides may only contain one char, the order does not
411 // matter)
412 std::sort(c[j].begin(), c[j].end());
413 for (const Node& cc : c[j])
414 {
415 if (cc.getKind() == CONST_STRING)
416 {
417 // Count the number of `hchar`s in the string constant and make
418 // sure that all chars are `hchar`s
419 std::vector<unsigned> veccc = cc.getConst<String>().getVec();
420 for (size_t k = 0, size = veccc.size(); k < size; k++)
421 {
422 if (veccc[k] != hchar)
423 {
424 // This conflict case should mostly should be taken care of by
425 // multiset reasoning in the strings rewriter, but we recognize
426 // this conflict just in case.
427 new_ret = nm->mkConst(false);
428 return returnRewrite(
429 node, new_ret, "string-eq-const-conflict-non-homog");
430 }
431 numHChars[j]++;
432 }
433 }
434 else
435 {
436 trimmed[j].push_back(cc);
437 }
438 }
439 }
440
441 // We have to remove the same number of `hchar`s from both sides, so the
442 // side with less `hchar`s determines how many we can remove
443 size_t trimmedConst = std::min(numHChars[0], numHChars[1]);
444 for (size_t j = 0; j < 2; j++)
445 {
446 size_t diff = numHChars[j] - trimmedConst;
447 if (diff != 0)
448 {
449 // Add a constant string to the side with more `hchar`s to restore
450 // the difference in number of `hchar`s
451 std::vector<unsigned> vec(diff, hchar);
452 trimmed[j].push_back(nm->mkConst(String(vec)));
453 }
454 }
455
456 Node lhs = utils::mkConcat(STRING_CONCAT, trimmed[i]);
457 Node ss = utils::mkConcat(STRING_CONCAT, trimmed[1 - i]);
458 if (lhs != node[i] || ss != node[1 - i])
459 {
460 // e.g.
461 // "AA" = y ++ x ---> "AA" = x ++ y if x < y
462 // "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z
463 new_ret = lhs.eqNode(ss);
464 node = returnRewrite(node, new_ret, "str-eq-homog-const");
465 }
466 }
467 }
468
469 // ------- rewrites for (= "" _)
470 Node empty = nm->mkConst(::CVC4::String(""));
471 for (size_t i = 0; i < 2; i++)
472 {
473 if (node[i] == empty)
474 {
475 Node ne = node[1 - i];
476 if (ne.getKind() == STRING_STRREPL)
477 {
478 // (= "" (str.replace x y x)) ---> (= x "")
479 if (ne[0] == ne[2])
480 {
481 Node ret = nm->mkNode(EQUAL, ne[0], empty);
482 return returnRewrite(node, ret, "str-emp-repl-x-y-x");
483 }
484
485 // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y "")))
486 if (checkEntailNonEmpty(ne[2]))
487 {
488 Node ret =
489 nm->mkNode(AND,
490 nm->mkNode(EQUAL, ne[0], empty),
491 nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty)));
492 return returnRewrite(node, ret, "str-emp-repl-emp");
493 }
494
495 // (= "" (str.replace x "A" "")) ---> (str.prefix x "A")
496 if (checkEntailLengthOne(ne[1]) && ne[2] == empty)
497 {
498 Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]);
499 return returnRewrite(node, ret, "str-emp-repl-emp");
500 }
501 }
502 else if (ne.getKind() == STRING_SUBSTR)
503 {
504 Node zero = nm->mkConst(Rational(0));
505
506 if (checkEntailArith(ne[1], false) && checkEntailArith(ne[2], true))
507 {
508 // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0
509 if (ne[1] == zero)
510 {
511 Node ret = nm->mkNode(EQUAL, ne[0], empty);
512 return returnRewrite(node, ret, "str-emp-substr-leq-len");
513 }
514
515 // (= "" (str.substr x n m)) ---> (<= (str.len x) n)
516 // if n >= 0 and m > 0
517 Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]);
518 return returnRewrite(node, ret, "str-emp-substr-leq-len");
519 }
520
521 // (= "" (str.substr "A" 0 z)) ---> (<= z 0)
522 if (checkEntailNonEmpty(ne[0]) && ne[1] == zero)
523 {
524 Node ret = nm->mkNode(LEQ, ne[2], zero);
525 return returnRewrite(node, ret, "str-emp-substr-leq-z");
526 }
527 }
528 }
529 }
530
531 // ------- rewrites for (= (str.replace _ _ _) _)
532 for (size_t i = 0; i < 2; i++)
533 {
534 if (node[i].getKind() == STRING_STRREPL)
535 {
536 Node repl = node[i];
537 Node x = node[1 - i];
538
539 // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x))
540 if (checkEntailNonEmpty(x) && repl[0] == empty)
541 {
542 Node ret = nm->mkNode(
543 EQUAL, empty, nm->mkNode(STRING_STRREPL, x, repl[2], repl[1]));
544 return returnRewrite(node, ret, "str-eq-repl-emp");
545 }
546
547 // (= x (str.replace y x y)) ---> (= x y)
548 if (repl[0] == repl[2] && x == repl[1])
549 {
550 Node ret = nm->mkNode(EQUAL, x, repl[0]);
551 return returnRewrite(node, ret, "str-eq-repl-to-eq");
552 }
553
554 // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A"))
555 if (x == repl[0])
556 {
557 Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2]));
558 if (eq.isConst() && !eq.getConst<bool>())
559 {
560 Node ret = nm->mkNode(NOT, nm->mkNode(STRING_STRCTN, x, repl[1]));
561 return returnRewrite(node, ret, "str-eq-repl-not-ctn");
562 }
563 }
564
565 // (= (str.replace x y z) z) --> (or (= x y) (= x z))
566 // if (str.len y) = (str.len z)
567 if (repl[2] == x)
568 {
569 Node lenY = nm->mkNode(STRING_LENGTH, repl[1]);
570 Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]);
571 if (checkEntailArithEq(lenY, lenZ))
572 {
573 Node ret = nm->mkNode(OR,
574 nm->mkNode(EQUAL, repl[0], repl[1]),
575 nm->mkNode(EQUAL, repl[0], repl[2]));
576 return returnRewrite(node, ret, "str-eq-repl-to-dis");
577 }
578 }
579 }
580 }
581
582 // Try to rewrite (= x y) into a conjunction of equalities based on length
583 // entailment.
584 //
585 // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) --->
586 // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
587 //
588 // where yi' and yi'' correspond to some yj and
589 // (<= (str.len x) (str.++ y1' ... ym'))
590 for (unsigned i = 0; i < 2; i++)
591 {
592 if (node[1 - i].getKind() == STRING_CONCAT)
593 {
594 new_ret = inferEqsFromContains(node[i], node[1 - i]);
595 if (!new_ret.isNull())
596 {
597 return returnRewrite(node, new_ret, "str-eq-conj-len-entail");
598 }
599 }
600 }
601
602 if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT)
603 {
604 // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n)
605 // (str.++ y_1 ... y_j y_{j + 1} ... y_m)) --->
606 // (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j))
607 // (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m)))
608 //
609 // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j))
610 //
611 // This rewrite performs length-based equality splitting: If we can show
612 // that two prefixes have the same length, we can split an equality into
613 // two equalities, one over the prefixes and another over the suffixes.
614 std::vector<Node> v0, v1;
615 utils::getConcat(node[0], v0);
616 utils::getConcat(node[1], v1);
617 size_t startRhs = 0;
618 for (size_t i = 0, size0 = v0.size(); i <= size0; i++)
619 {
620 std::vector<Node> pfxv0(v0.begin(), v0.begin() + i);
621 Node pfx0 = utils::mkConcat(STRING_CONCAT, pfxv0);
622 for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++)
623 {
624 if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size()))
625 {
626 std::vector<Node> pfxv1(v1.begin(), v1.begin() + j);
627 Node pfx1 = utils::mkConcat(STRING_CONCAT, pfxv1);
628 Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0);
629 Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1);
630
631 if (checkEntailArithEq(lenPfx0, lenPfx1))
632 {
633 std::vector<Node> sfxv0(v0.begin() + i, v0.end());
634 std::vector<Node> sfxv1(v1.begin() + j, v1.end());
635 Node ret =
636 nm->mkNode(kind::AND,
637 pfx0.eqNode(pfx1),
638 utils::mkConcat(STRING_CONCAT, sfxv0)
639 .eqNode(utils::mkConcat(STRING_CONCAT, sfxv1)));
640 return returnRewrite(node, ret, "split-eq");
641 }
642 else if (checkEntailArith(lenPfx1, lenPfx0, true))
643 {
644 // The prefix on the right-hand side is strictly longer than the
645 // prefix on the left-hand side, so we try to strip the right-hand
646 // prefix by the length of the left-hand prefix
647 //
648 // Example:
649 // (= (str.++ "A" x y) (str.++ x "AB" z)) --->
650 // (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z)))
651 std::vector<Node> rpfxv1;
652 if (stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0))
653 {
654 std::vector<Node> sfxv0(v0.begin() + i, v0.end());
655 pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end());
656 Node ret = nm->mkNode(
657 kind::AND,
658 pfx0.eqNode(utils::mkConcat(STRING_CONCAT, rpfxv1)),
659 utils::mkConcat(STRING_CONCAT, sfxv0)
660 .eqNode(utils::mkConcat(STRING_CONCAT, pfxv1)));
661 return returnRewrite(node, ret, "split-eq-strip-r");
662 }
663
664 // If the prefix of the right-hand side is (strictly) longer than
665 // the prefix of the left-hand side, we can advance the left-hand
666 // side (since the length of the right-hand side is only increasing
667 // in the inner loop)
668 break;
669 }
670 else if (checkEntailArith(lenPfx0, lenPfx1, true))
671 {
672 // The prefix on the left-hand side is strictly longer than the
673 // prefix on the right-hand side, so we try to strip the left-hand
674 // prefix by the length of the right-hand prefix
675 //
676 // Example:
677 // (= (str.++ x "AB" z) (str.++ "A" x y)) --->
678 // (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y))
679 std::vector<Node> rpfxv0;
680 if (stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1))
681 {
682 pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end());
683 std::vector<Node> sfxv1(v1.begin() + j, v1.end());
684 Node ret = nm->mkNode(
685 kind::AND,
686 utils::mkConcat(STRING_CONCAT, rpfxv0).eqNode(pfx1),
687 utils::mkConcat(STRING_CONCAT, pfxv0)
688 .eqNode(utils::mkConcat(STRING_CONCAT, sfxv1)));
689 return returnRewrite(node, ret, "split-eq-strip-l");
690 }
691
692 // If the prefix of the left-hand side is (strictly) longer than
693 // the prefix of the right-hand side, then we don't need to check
694 // that right-hand prefix for future left-hand prefixes anymore
695 // (since they are increasing in length)
696 startRhs = j + 1;
697 }
698 }
699 }
700 }
701 }
702
703 return node;
704 }
705
706 Node TheoryStringsRewriter::rewriteArithEqualityExt(Node node)
707 {
708 Assert(node.getKind() == EQUAL && node[0].getType().isInteger());
709
710 // cases where we can solve the equality
711
712 // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes.
713
714 return node;
715 }
716
717 // TODO (#1180) add rewrite
718 // str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) --->
719 // str.substr( x, n1, n2+n3 )
720 Node TheoryStringsRewriter::rewriteConcat(Node node)
721 {
722 Assert(node.getKind() == kind::STRING_CONCAT);
723 Trace("strings-rewrite-debug")
724 << "Strings::rewriteConcat start " << node << std::endl;
725 NodeManager* nm = NodeManager::currentNM();
726 Node retNode = node;
727 std::vector<Node> node_vec;
728 Node preNode = Node::null();
729 for (Node tmpNode : node)
730 {
731 if (tmpNode.getKind() == STRING_CONCAT)
732 {
733 unsigned j = 0;
734 // combine the first term with the previous constant if applicable
735 if (!preNode.isNull())
736 {
737 if (tmpNode[0].isConst())
738 {
739 preNode = nm->mkConst(
740 preNode.getConst<String>().concat(tmpNode[0].getConst<String>()));
741 node_vec.push_back(preNode);
742 }
743 else
744 {
745 node_vec.push_back(preNode);
746 node_vec.push_back(tmpNode[0]);
747 }
748 preNode = Node::null();
749 ++j;
750 }
751 // insert the middle terms to node_vec
752 if (j <= tmpNode.getNumChildren() - 1)
753 {
754 node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1);
755 }
756 // take the last term as the current
757 tmpNode = tmpNode[tmpNode.getNumChildren() - 1];
758 }
759 if(!tmpNode.isConst()) {
760 if(!preNode.isNull()) {
761 if(preNode.getKind() == kind::CONST_STRING && !preNode.getConst<String>().isEmptyString() ) {
762 node_vec.push_back( preNode );
763 }
764 preNode = Node::null();
765 }
766 node_vec.push_back( tmpNode );
767 }else{
768 if( preNode.isNull() ){
769 preNode = tmpNode;
770 }else{
771 preNode = NodeManager::currentNM()->mkConst( preNode.getConst<String>().concat( tmpNode.getConst<String>() ) );
772 }
773 }
774 }
775 if( !preNode.isNull() && ( preNode.getKind()!=kind::CONST_STRING || !preNode.getConst<String>().isEmptyString() ) ){
776 node_vec.push_back( preNode );
777 }
778
779 // Sort adjacent operands in str.++ that all result in the same string or the
780 // empty string.
781 //
782 // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) -->
783 // (str.++ ... [sort those 3 arguments] ... )
784 size_t lastIdx = 0;
785 Node lastX;
786 for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++)
787 {
788 Node s = getStringOrEmpty(node_vec[i]);
789 bool nextX = false;
790 if (s != lastX)
791 {
792 nextX = true;
793 }
794
795 if (nextX)
796 {
797 std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i);
798 lastX = s;
799 lastIdx = i;
800 }
801 }
802 std::sort(node_vec.begin() + lastIdx, node_vec.end());
803
804 retNode = utils::mkConcat(STRING_CONCAT, node_vec);
805 Trace("strings-rewrite-debug")
806 << "Strings::rewriteConcat end " << retNode << std::endl;
807 return retNode;
808 }
809
810 Node TheoryStringsRewriter::rewriteConcatRegExp(TNode node)
811 {
812 Assert(node.getKind() == kind::REGEXP_CONCAT);
813 NodeManager* nm = NodeManager::currentNM();
814 Trace("strings-rewrite-debug")
815 << "Strings::rewriteConcatRegExp flatten " << node << std::endl;
816 Node retNode = node;
817 std::vector<Node> vec;
818 bool changed = false;
819 Node emptyRe;
820 for (const Node& c : node)
821 {
822 if (c.getKind() == REGEXP_CONCAT)
823 {
824 changed = true;
825 for (const Node& cc : c)
826 {
827 vec.push_back(cc);
828 }
829 }
830 else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst()
831 && c[0].getConst<String>().isEmptyString())
832 {
833 changed = true;
834 emptyRe = c;
835 }
836 else if (c.getKind() == REGEXP_EMPTY)
837 {
838 // re.++( ..., empty, ... ) ---> empty
839 std::vector<Node> nvec;
840 return nm->mkNode(REGEXP_EMPTY, nvec);
841 }
842 else
843 {
844 vec.push_back(c);
845 }
846 }
847 if (changed)
848 {
849 // flatten
850 // this handles nested re.++ and elimination or str.to.re(""), e.g.:
851 // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 )
852 if (vec.empty())
853 {
854 Assert(!emptyRe.isNull());
855 retNode = emptyRe;
856 }
857 else
858 {
859 retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec);
860 }
861 return returnRewrite(node, retNode, "re.concat-flatten");
862 }
863 Trace("strings-rewrite-debug")
864 << "Strings::rewriteConcatRegExp start " << node << std::endl;
865 std::vector<Node> cvec;
866 // the current accumulation of constant strings
867 std::vector<Node> preReStr;
868 // whether the last component was (_)*
869 bool lastAllStar = false;
870 String emptyStr = String("");
871 // this loop checks to see if components can be combined or dropped
872 for (unsigned i = 0, size = vec.size(); i <= size; i++)
873 {
874 Node curr;
875 if (i < size)
876 {
877 curr = vec[i];
878 Assert(curr.getKind() != REGEXP_CONCAT);
879 }
880 // update preReStr
881 if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP)
882 {
883 lastAllStar = false;
884 preReStr.push_back(curr[0]);
885 curr = Node::null();
886 }
887 else if (!preReStr.empty())
888 {
889 Assert(!lastAllStar);
890 // this groups consecutive strings a++b ---> ab
891 Node acc = nm->mkNode(STRING_TO_REGEXP,
892 utils::mkConcat(STRING_CONCAT, preReStr));
893 cvec.push_back(acc);
894 preReStr.clear();
895 }
896 else if (!curr.isNull() && lastAllStar)
897 {
898 // if empty, drop it
899 // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)*
900 if (testConstStringInRegExp(emptyStr, 0, curr))
901 {
902 curr = Node::null();
903 }
904 }
905 if (!curr.isNull())
906 {
907 lastAllStar = false;
908 if (curr.getKind() == REGEXP_STAR)
909 {
910 // we can group stars (a)* ++ (a)* ---> (a)*
911 if (!cvec.empty() && cvec.back() == curr)
912 {
913 curr = Node::null();
914 }
915 else if (curr[0].getKind() == REGEXP_SIGMA)
916 {
917 Assert(!lastAllStar);
918 lastAllStar = true;
919 // go back and remove empty ones from back of cvec
920 // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)*
921 while (!cvec.empty()
922 && testConstStringInRegExp(emptyStr, 0, cvec.back()))
923 {
924 cvec.pop_back();
925 }
926 }
927 }
928 }
929 if (!curr.isNull())
930 {
931 cvec.push_back(curr);
932 }
933 }
934 Assert(!cvec.empty());
935 retNode = utils::mkConcat(REGEXP_CONCAT, cvec);
936 if (retNode != node)
937 {
938 // handles all cases where consecutive re constants are combined or dropped
939 // as described in the loop above.
940 return returnRewrite(node, retNode, "re.concat");
941 }
942
943 // flipping adjacent star arguments
944 changed = false;
945 for (size_t i = 0, size = cvec.size() - 1; i < size; i++)
946 {
947 if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1])
948 {
949 // by convention, flip the order (a*)++a ---> a++(a*)
950 std::swap(cvec[i], cvec[i+1]);
951 changed = true;
952 }
953 }
954 if (changed)
955 {
956 retNode = utils::mkConcat(REGEXP_CONCAT, cvec);
957 return returnRewrite(node, retNode, "re.concat.opt");
958 }
959 return node;
960 }
961
962 Node TheoryStringsRewriter::rewriteStarRegExp(TNode node)
963 {
964 Assert(node.getKind() == REGEXP_STAR);
965 NodeManager* nm = NodeManager::currentNM();
966 Node retNode = node;
967 if (node[0].getKind() == REGEXP_STAR)
968 {
969 // ((R)*)* ---> R*
970 return returnRewrite(node, node[0], "re-star-nested-star");
971 }
972 else if (node[0].getKind() == STRING_TO_REGEXP
973 && node[0][0].getKind() == CONST_STRING
974 && node[0][0].getConst<String>().isEmptyString())
975 {
976 // ("")* ---> ""
977 return returnRewrite(node, node[0], "re-star-empty-string");
978 }
979 else if (node[0].getKind() == REGEXP_EMPTY)
980 {
981 // (empty)* ---> ""
982 retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")));
983 return returnRewrite(node, retNode, "re-star-empty");
984 }
985 else if (node[0].getKind() == REGEXP_UNION)
986 {
987 // simplification of unions under star
988 if (hasEpsilonNode(node[0]))
989 {
990 bool changed = false;
991 std::vector<Node> node_vec;
992 for (const Node& nc : node[0])
993 {
994 if (nc.getKind() == STRING_TO_REGEXP && nc[0].getKind() == CONST_STRING
995 && nc[0].getConst<String>().isEmptyString())
996 {
997 // can be removed
998 changed = true;
999 }
1000 else
1001 {
1002 node_vec.push_back(nc);
1003 }
1004 }
1005 if (changed)
1006 {
1007 retNode = node_vec.size() == 1 ? node_vec[0]
1008 : nm->mkNode(REGEXP_UNION, node_vec);
1009 retNode = nm->mkNode(REGEXP_STAR, retNode);
1010 // simplification of union beneath star based on loop above
1011 // for example, ( "" | "a" )* ---> ("a")*
1012 return returnRewrite(node, retNode, "re-star-union");
1013 }
1014 }
1015 }
1016 return node;
1017 }
1018
1019 Node TheoryStringsRewriter::rewriteAndOrRegExp(TNode node)
1020 {
1021 Kind nk = node.getKind();
1022 Assert(nk == REGEXP_UNION || nk == REGEXP_INTER);
1023 Trace("strings-rewrite-debug")
1024 << "Strings::rewriteAndOrRegExp start " << node << std::endl;
1025 std::vector<Node> node_vec;
1026 for (const Node& ni : node)
1027 {
1028 if (ni.getKind() == nk)
1029 {
1030 for (const Node& nic : ni)
1031 {
1032 if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end())
1033 {
1034 node_vec.push_back(nic);
1035 }
1036 }
1037 }
1038 else if (ni.getKind() == REGEXP_EMPTY)
1039 {
1040 if (nk == REGEXP_INTER)
1041 {
1042 return returnRewrite(node, ni, "re.and-empty");
1043 }
1044 // otherwise, can ignore
1045 }
1046 else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA)
1047 {
1048 if (nk == REGEXP_UNION)
1049 {
1050 return returnRewrite(node, ni, "re.or-all");
1051 }
1052 // otherwise, can ignore
1053 }
1054 else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end())
1055 {
1056 node_vec.push_back(ni);
1057 }
1058 }
1059 NodeManager* nm = NodeManager::currentNM();
1060 std::vector<Node> nvec;
1061 Node retNode;
1062 if (node_vec.empty())
1063 {
1064 if (nk == REGEXP_INTER)
1065 {
1066 retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, nvec));
1067 }
1068 else
1069 {
1070 retNode = nm->mkNode(kind::REGEXP_EMPTY, nvec);
1071 }
1072 }
1073 else
1074 {
1075 retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec);
1076 }
1077 if (retNode != node)
1078 {
1079 // flattening and removing children, based on loop above
1080 return returnRewrite(node, retNode, "re.andor-flatten");
1081 }
1082 return node;
1083 }
1084
1085 Node TheoryStringsRewriter::rewriteLoopRegExp(TNode node)
1086 {
1087 Assert(node.getKind() == REGEXP_LOOP);
1088 Node retNode = node;
1089 Node r = node[0];
1090 if (r.getKind() == REGEXP_STAR)
1091 {
1092 return returnRewrite(node, r, "re.loop-star");
1093 }
1094 TNode n1 = node[1];
1095 NodeManager* nm = NodeManager::currentNM();
1096 CVC4::Rational rMaxInt(String::maxSize());
1097 AlwaysAssert(n1.isConst()) << "re.loop contains non-constant integer (1).";
1098 AlwaysAssert(n1.getConst<Rational>().sgn() >= 0)
1099 << "Negative integer in string REGEXP_LOOP (1)";
1100 Assert(n1.getConst<Rational>() <= rMaxInt)
1101 << "Exceeded UINT32_MAX in string REGEXP_LOOP (1)";
1102 uint32_t l = n1.getConst<Rational>().getNumerator().toUnsignedInt();
1103 std::vector<Node> vec_nodes;
1104 for (unsigned i = 0; i < l; i++)
1105 {
1106 vec_nodes.push_back(r);
1107 }
1108 if (node.getNumChildren() == 3)
1109 {
1110 TNode n2 = Rewriter::rewrite(node[2]);
1111 Node n =
1112 vec_nodes.size() == 0
1113 ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")))
1114 : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes);
1115 AlwaysAssert(n2.isConst()) << "re.loop contains non-constant integer (2).";
1116 AlwaysAssert(n2.getConst<Rational>().sgn() >= 0)
1117 << "Negative integer in string REGEXP_LOOP (2)";
1118 Assert(n2.getConst<Rational>() <= rMaxInt)
1119 << "Exceeded UINT32_MAX in string REGEXP_LOOP (2)";
1120 uint32_t u = n2.getConst<Rational>().getNumerator().toUnsignedInt();
1121 if (u <= l)
1122 {
1123 retNode = n;
1124 }
1125 else
1126 {
1127 std::vector<Node> vec2;
1128 vec2.push_back(n);
1129 for (unsigned j = l; j < u; j++)
1130 {
1131 vec_nodes.push_back(r);
1132 n = utils::mkConcat(REGEXP_CONCAT, vec_nodes);
1133 vec2.push_back(n);
1134 }
1135 retNode = nm->mkNode(REGEXP_UNION, vec2);
1136 }
1137 }
1138 else
1139 {
1140 Node rest = nm->mkNode(REGEXP_STAR, r);
1141 retNode = vec_nodes.size() == 0
1142 ? rest
1143 : vec_nodes.size() == 1
1144 ? nm->mkNode(REGEXP_CONCAT, r, rest)
1145 : nm->mkNode(REGEXP_CONCAT,
1146 nm->mkNode(REGEXP_CONCAT, vec_nodes),
1147 rest);
1148 }
1149 Trace("strings-lp") << "Strings::lp " << node << " => " << retNode
1150 << std::endl;
1151 if (retNode != node)
1152 {
1153 return returnRewrite(node, retNode, "re.loop");
1154 }
1155 return node;
1156 }
1157
1158 bool TheoryStringsRewriter::isConstRegExp( TNode t ) {
1159 if( t.getKind()==kind::STRING_TO_REGEXP ) {
1160 return t[0].isConst();
1161 }
1162 else if (t.isVar())
1163 {
1164 return false;
1165 }else{
1166 for( unsigned i = 0; i<t.getNumChildren(); ++i ) {
1167 if( !isConstRegExp(t[i]) ){
1168 return false;
1169 }
1170 }
1171 return true;
1172 }
1173 }
1174
1175 bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned int index_start, TNode r ) {
1176 Assert(index_start <= s.size());
1177 Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at " << index_start << std::endl;
1178 Assert(!r.isVar());
1179 Kind k = r.getKind();
1180 switch( k ) {
1181 case kind::STRING_TO_REGEXP: {
1182 CVC4::String s2 = s.substr( index_start, s.size() - index_start );
1183 if(r[0].getKind() == kind::CONST_STRING) {
1184 return ( s2 == r[0].getConst<String>() );
1185 } else {
1186 Assert(false) << "RegExp contains variables";
1187 return false;
1188 }
1189 }
1190 case kind::REGEXP_CONCAT: {
1191 if( s.size() != index_start ) {
1192 std::vector<int> vec_k( r.getNumChildren(), -1 );
1193 int start = 0;
1194 int left = (int) s.size() - index_start;
1195 int i=0;
1196 while( i<(int) r.getNumChildren() ) {
1197 bool flag = true;
1198 if( i == (int) r.getNumChildren() - 1 ) {
1199 if( testConstStringInRegExp( s, index_start + start, r[i] ) ) {
1200 return true;
1201 }
1202 } else if( i == -1 ) {
1203 return false;
1204 } else {
1205 for(vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i]) {
1206 CVC4::String t = s.substr(index_start + start, vec_k[i]);
1207 if( testConstStringInRegExp( t, 0, r[i] ) ) {
1208 start += vec_k[i]; left -= vec_k[i]; flag = false;
1209 ++i; vec_k[i] = -1;
1210 break;
1211 }
1212 }
1213 }
1214
1215 if(flag) {
1216 --i;
1217 if(i >= 0) {
1218 start -= vec_k[i]; left += vec_k[i];
1219 }
1220 }
1221 }
1222 return false;
1223 } else {
1224 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1225 if(!testConstStringInRegExp( s, index_start, r[i] )) return false;
1226 }
1227 return true;
1228 }
1229 }
1230 case kind::REGEXP_UNION: {
1231 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1232 if(testConstStringInRegExp( s, index_start, r[i] )) return true;
1233 }
1234 return false;
1235 }
1236 case kind::REGEXP_INTER: {
1237 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1238 if(!testConstStringInRegExp( s, index_start, r[i] )) return false;
1239 }
1240 return true;
1241 }
1242 case kind::REGEXP_STAR: {
1243 if( s.size() != index_start ) {
1244 for(unsigned k=s.size() - index_start; k>0; --k) {
1245 CVC4::String t = s.substr(index_start, k);
1246 if( testConstStringInRegExp( t, 0, r[0] ) ) {
1247 if( index_start + k == s.size() || testConstStringInRegExp( s, index_start + k, r ) ) {
1248 return true;
1249 }
1250 }
1251 }
1252 return false;
1253 } else {
1254 return true;
1255 }
1256 }
1257 case kind::REGEXP_EMPTY: {
1258 return false;
1259 }
1260 case kind::REGEXP_SIGMA: {
1261 if(s.size() == index_start + 1) {
1262 return true;
1263 } else {
1264 return false;
1265 }
1266 }
1267 case kind::REGEXP_RANGE: {
1268 if(s.size() == index_start + 1) {
1269 unsigned a = r[0].getConst<String>().front();
1270 a = String::convertUnsignedIntToCode(a);
1271 unsigned b = r[1].getConst<String>().front();
1272 b = String::convertUnsignedIntToCode(b);
1273 unsigned c = s.back();
1274 c = String::convertUnsignedIntToCode(c);
1275 return (a <= c && c <= b);
1276 } else {
1277 return false;
1278 }
1279 }
1280 case kind::REGEXP_LOOP: {
1281 uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
1282 if(s.size() == index_start) {
1283 return l==0? true : testConstStringInRegExp(s, index_start, r[0]);
1284 } else if(l==0 && r[1]==r[2]) {
1285 return false;
1286 } else {
1287 Assert(r.getNumChildren() == 3)
1288 << "String rewriter error: LOOP has 2 children";
1289 if(l==0) {
1290 //R{0,u}
1291 uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
1292 for(unsigned len=s.size() - index_start; len>=1; len--) {
1293 CVC4::String t = s.substr(index_start, len);
1294 if(testConstStringInRegExp(t, 0, r[0])) {
1295 if(len + index_start == s.size()) {
1296 return true;
1297 } else {
1298 Node num2 = NodeManager::currentNM()->mkConst( CVC4::Rational(u - 1) );
1299 Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], r[1], num2);
1300 if(testConstStringInRegExp(s, index_start+len, r2)) {
1301 return true;
1302 }
1303 }
1304 }
1305 }
1306 return false;
1307 } else {
1308 //R{l,l}
1309 Assert(r[1] == r[2])
1310 << "String rewriter error: LOOP nums are not equal";
1311 if(l>s.size() - index_start) {
1312 if(testConstStringInRegExp(s, s.size(), r[0])) {
1313 l = s.size() - index_start;
1314 } else {
1315 return false;
1316 }
1317 }
1318 for(unsigned len=1; len<=s.size() - index_start; len++) {
1319 CVC4::String t = s.substr(index_start, len);
1320 if(testConstStringInRegExp(t, 0, r[0])) {
1321 Node num2 = NodeManager::currentNM()->mkConst( CVC4::Rational(l - 1) );
1322 Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num2, num2);
1323 if(testConstStringInRegExp(s, index_start+len, r2)) {
1324 return true;
1325 }
1326 }
1327 }
1328 return false;
1329 }
1330 }
1331 }
1332 default: {
1333 Assert(!RegExpOpr::isRegExpKind(k));
1334 return false;
1335 }
1336 }
1337 }
1338
1339 Node TheoryStringsRewriter::rewriteMembership(TNode node) {
1340 NodeManager* nm = NodeManager::currentNM();
1341 Node retNode = node;
1342 Node x = node[0];
1343 Node r = node[1];
1344
1345 if(r.getKind() == kind::REGEXP_EMPTY) {
1346 retNode = NodeManager::currentNM()->mkConst( false );
1347 } else if(x.getKind()==kind::CONST_STRING && isConstRegExp(r)) {
1348 //test whether x in node[1]
1349 CVC4::String s = x.getConst<String>();
1350 retNode = NodeManager::currentNM()->mkConst( testConstStringInRegExp( s, 0, r ) );
1351 } else if(r.getKind() == kind::REGEXP_SIGMA) {
1352 Node one = nm->mkConst(Rational(1));
1353 retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x));
1354 } else if( r.getKind() == kind::REGEXP_STAR ) {
1355 if (x.isConst())
1356 {
1357 String s = x.getConst<String>();
1358 if (s.size() == 0)
1359 {
1360 retNode = nm->mkConst(true);
1361 // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true
1362 return returnRewrite(node, retNode, "re-empty-in-str-star");
1363 }
1364 else if (s.size() == 1)
1365 {
1366 if (r[0].getKind() == STRING_TO_REGEXP)
1367 {
1368 retNode = r[0][0].eqNode(x);
1369 // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x
1370 return returnRewrite(node, retNode, "re-char-in-str-star");
1371 }
1372 }
1373 }
1374 else if (x.getKind() == STRING_CONCAT)
1375 {
1376 // (str.in.re (str.++ x1 ... xn) (re.* R)) -->
1377 // (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R))
1378 // if the length of all strings in R is one.
1379 Node flr = getFixedLengthForRegexp(r[0]);
1380 if (!flr.isNull())
1381 {
1382 Node one = nm->mkConst(Rational(1));
1383 if (flr == one)
1384 {
1385 NodeBuilder<> nb(AND);
1386 for (const Node& xc : x)
1387 {
1388 nb << nm->mkNode(STRING_IN_REGEXP, xc, r);
1389 }
1390 Node retNode = nb.constructNode();
1391 return returnRewrite(node, retNode, "re-in-dist-char-star");
1392 }
1393 }
1394 }
1395 if (r[0].getKind() == kind::REGEXP_SIGMA)
1396 {
1397 retNode = NodeManager::currentNM()->mkConst( true );
1398 return returnRewrite(node, retNode, "re-in-sigma-star");
1399 }
1400 }else if( r.getKind() == kind::REGEXP_CONCAT ){
1401 bool allSigma = true;
1402 bool allSigmaStrict = true;
1403 unsigned allSigmaMinSize = 0;
1404 Node constStr;
1405 size_t constIdx = 0;
1406 size_t nchildren = r.getNumChildren();
1407 for (size_t i = 0; i < nchildren; i++)
1408 {
1409 Node rc = r[i];
1410 Assert(rc.getKind() != kind::REGEXP_EMPTY);
1411 if (rc.getKind() == kind::REGEXP_SIGMA)
1412 {
1413 allSigmaMinSize++;
1414 }
1415 else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA)
1416 {
1417 allSigmaStrict = false;
1418 }
1419 else if (rc.getKind() == STRING_TO_REGEXP)
1420 {
1421 if (constStr.isNull())
1422 {
1423 constStr = rc[0];
1424 constIdx = i;
1425 }
1426 else
1427 {
1428 allSigma = false;
1429 break;
1430 }
1431 }
1432 else
1433 {
1434 allSigma = false;
1435 break;
1436 }
1437 }
1438 if (allSigma)
1439 {
1440 if (constStr.isNull())
1441 {
1442 // x in re.++(_*, _, _) ---> str.len(x) >= 2
1443 Node num = nm->mkConst(Rational(allSigmaMinSize));
1444 Node lenx = nm->mkNode(STRING_LENGTH, x);
1445 retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
1446 return returnRewrite(node, retNode, "re-concat-pure-allchar");
1447 }
1448 else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0
1449 && constIdx != nchildren - 1)
1450 {
1451 // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc")
1452 retNode = nm->mkNode(STRING_STRCTN, x, constStr);
1453 return returnRewrite(node, retNode, "re-concat-to-contains");
1454 }
1455 }
1456 }else if( r.getKind()==kind::REGEXP_INTER || r.getKind()==kind::REGEXP_UNION ){
1457 std::vector< Node > mvec;
1458 for( unsigned i=0; i<r.getNumChildren(); i++ ){
1459 mvec.push_back( NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, r[i] ) );
1460 }
1461 retNode = NodeManager::currentNM()->mkNode( r.getKind()==kind::REGEXP_INTER ? kind::AND : kind::OR, mvec );
1462 }else if(r.getKind() == kind::STRING_TO_REGEXP) {
1463 retNode = x.eqNode(r[0]);
1464 }
1465 else if (r.getKind() == REGEXP_RANGE)
1466 {
1467 // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j
1468 Node xcode = nm->mkNode(STRING_CODE, x);
1469 retNode = nm->mkNode(AND,
1470 nm->mkNode(LEQ, nm->mkNode(STRING_CODE, r[0]), xcode),
1471 nm->mkNode(LEQ, xcode, nm->mkNode(STRING_CODE, r[1])));
1472 }else if(x != node[0] || r != node[1]) {
1473 retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, r );
1474 }
1475
1476 //do simple consumes
1477 if( retNode==node ){
1478 if( r.getKind()==kind::REGEXP_STAR ){
1479 for( unsigned dir=0; dir<=1; dir++ ){
1480 std::vector< Node > mchildren;
1481 utils::getConcat(x, mchildren);
1482 bool success = true;
1483 while( success ){
1484 success = false;
1485 std::vector< Node > children;
1486 utils::getConcat(r[0], children);
1487 Node scn = simpleRegexpConsume( mchildren, children, dir );
1488 if( !scn.isNull() ){
1489 Trace("regexp-ext-rewrite") << "Regexp star : const conflict : " << node << std::endl;
1490 return scn;
1491 }else if( children.empty() ){
1492 //fully consumed one copy of the STAR
1493 if( mchildren.empty() ){
1494 Trace("regexp-ext-rewrite") << "Regexp star : full consume : " << node << std::endl;
1495 return NodeManager::currentNM()->mkConst( true );
1496 }else{
1497 retNode = nm->mkNode(STRING_IN_REGEXP,
1498 utils::mkConcat(STRING_CONCAT, mchildren),
1499 r);
1500 success = true;
1501 }
1502 }
1503 }
1504 if( retNode!=node ){
1505 Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node << " -> " << retNode << std::endl;
1506 break;
1507 }
1508 }
1509 }else{
1510 std::vector< Node > children;
1511 utils::getConcat(r, children);
1512 std::vector< Node > mchildren;
1513 utils::getConcat(x, mchildren);
1514 unsigned prevSize = children.size() + mchildren.size();
1515 Node scn = simpleRegexpConsume( mchildren, children );
1516 if( !scn.isNull() ){
1517 Trace("regexp-ext-rewrite") << "Regexp : const conflict : " << node << std::endl;
1518 return scn;
1519 }else{
1520 if( (children.size() + mchildren.size())!=prevSize ){
1521 // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm),
1522 // above, we strip components to construct an equivalent membership:
1523 // (str.++ xi .. xj) in (re.++ rk ... rl).
1524 Node xn = utils::mkConcat(STRING_CONCAT, mchildren);
1525 Node emptyStr = nm->mkConst(String(""));
1526 if( children.empty() ){
1527 // If we stripped all components on the right, then the left is
1528 // equal to the empty string.
1529 // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "")
1530 retNode = xn.eqNode(emptyStr);
1531 }else{
1532 // otherwise, construct the updated regular expression
1533 retNode = nm->mkNode(
1534 STRING_IN_REGEXP, xn, utils::mkConcat(REGEXP_CONCAT, children));
1535 }
1536 Trace("regexp-ext-rewrite") << "Regexp : rewrite : " << node << " -> " << retNode << std::endl;
1537 return returnRewrite(node, retNode, "re-simple-consume");
1538 }
1539 }
1540 }
1541 }
1542 return retNode;
1543 }
1544
1545 RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
1546 Trace("strings-postrewrite") << "Strings::postRewrite start " << node << std::endl;
1547 NodeManager* nm = NodeManager::currentNM();
1548 Node retNode = node;
1549 Node orig = retNode;
1550 Kind nk = node.getKind();
1551 if (nk == kind::STRING_CONCAT)
1552 {
1553 retNode = rewriteConcat(node);
1554 }
1555 else if (nk == kind::EQUAL)
1556 {
1557 retNode = rewriteEquality(node);
1558 }
1559 else if (nk == kind::STRING_LENGTH)
1560 {
1561 Kind nk0 = node[0].getKind();
1562 if( node[0].isConst() ){
1563 retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational( node[0].getConst<String>().size() ) );
1564 }
1565 else if (nk0 == kind::STRING_CONCAT)
1566 {
1567 Node tmpNode = node[0];
1568 if(tmpNode.isConst()) {
1569 retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational( tmpNode.getConst<String>().size() ) );
1570 }else if( tmpNode.getKind()==kind::STRING_CONCAT ){
1571 std::vector<Node> node_vec;
1572 for(unsigned int i=0; i<tmpNode.getNumChildren(); ++i) {
1573 if(tmpNode[i].isConst()) {
1574 node_vec.push_back( NodeManager::currentNM()->mkConst( ::CVC4::Rational( tmpNode[i].getConst<String>().size() ) ) );
1575 } else {
1576 node_vec.push_back( NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, tmpNode[i]) );
1577 }
1578 }
1579 retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec);
1580 }
1581 }
1582 else if (nk0 == STRING_STRREPL || nk0 == STRING_STRREPLALL)
1583 {
1584 Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1]));
1585 Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2]));
1586 if (len1 == len2)
1587 {
1588 // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x )
1589 retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
1590 }
1591 }
1592 else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER
1593 || nk0 == STRING_REV)
1594 {
1595 // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev.
1596 retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
1597 }
1598 }
1599 else if (nk == kind::STRING_CHARAT)
1600 {
1601 Node one = NodeManager::currentNM()->mkConst( Rational( 1 ) );
1602 retNode = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, node[0], node[1], one);
1603 }
1604 else if (nk == kind::STRING_SUBSTR)
1605 {
1606 retNode = rewriteSubstr(node);
1607 }
1608 else if (nk == kind::STRING_STRCTN)
1609 {
1610 retNode = rewriteContains( node );
1611 }
1612 else if (nk == kind::STRING_LT)
1613 {
1614 // eliminate s < t ---> s != t AND s <= t
1615 retNode = nm->mkNode(AND,
1616 node[0].eqNode(node[1]).negate(),
1617 nm->mkNode(STRING_LEQ, node[0], node[1]));
1618 }
1619 else if (nk == kind::STRING_LEQ)
1620 {
1621 retNode = rewriteStringLeq(node);
1622 }
1623 else if (nk == kind::STRING_STRIDOF)
1624 {
1625 retNode = rewriteIndexof( node );
1626 }
1627 else if (nk == kind::STRING_STRREPL)
1628 {
1629 retNode = rewriteReplace( node );
1630 }
1631 else if (nk == kind::STRING_STRREPLALL)
1632 {
1633 retNode = rewriteReplaceAll(node);
1634 }
1635 else if (nk == STRING_TOLOWER || nk == STRING_TOUPPER)
1636 {
1637 retNode = rewriteStrConvert(node);
1638 }
1639 else if (nk == STRING_REV)
1640 {
1641 retNode = rewriteStrReverse(node);
1642 }
1643 else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX)
1644 {
1645 retNode = rewritePrefixSuffix(node);
1646 }
1647 else if (nk == kind::STRING_ITOS)
1648 {
1649 if(node[0].isConst()) {
1650 if( node[0].getConst<Rational>().sgn()==-1 ){
1651 retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
1652 }else{
1653 std::string stmp = node[0].getConst<Rational>().getNumerator().toString();
1654 Assert(stmp[0] != '-');
1655 retNode = NodeManager::currentNM()->mkConst( ::CVC4::String(stmp) );
1656 }
1657 }
1658 }
1659 else if (nk == kind::STRING_STOI)
1660 {
1661 if(node[0].isConst()) {
1662 CVC4::String s = node[0].getConst<String>();
1663 if(s.isNumber()) {
1664 retNode = nm->mkConst(s.toNumber());
1665 } else {
1666 retNode = nm->mkConst(Rational(-1));
1667 }
1668 } else if(node[0].getKind() == kind::STRING_CONCAT) {
1669 for(unsigned i=0; i<node[0].getNumChildren(); ++i) {
1670 if(node[0][i].isConst()) {
1671 CVC4::String t = node[0][i].getConst<String>();
1672 if(!t.isNumber()) {
1673 retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(-1));
1674 break;
1675 }
1676 }
1677 }
1678 }
1679 }
1680 else if (nk == kind::STRING_IN_REGEXP)
1681 {
1682 retNode = rewriteMembership(node);
1683 }
1684 else if (nk == STRING_CODE)
1685 {
1686 retNode = rewriteStringCode(node);
1687 }
1688 else if (nk == REGEXP_CONCAT)
1689 {
1690 retNode = rewriteConcatRegExp(node);
1691 }
1692 else if (nk == REGEXP_UNION || nk == REGEXP_INTER)
1693 {
1694 retNode = rewriteAndOrRegExp(node);
1695 }
1696 else if (nk == REGEXP_STAR)
1697 {
1698 retNode = rewriteStarRegExp(node);
1699 }
1700 else if (nk == REGEXP_PLUS)
1701 {
1702 retNode =
1703 nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0]));
1704 }
1705 else if (nk == REGEXP_OPT)
1706 {
1707 retNode = nm->mkNode(REGEXP_UNION,
1708 nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))),
1709 node[0]);
1710 }
1711 else if (nk == REGEXP_RANGE)
1712 {
1713 if (node[0] == node[1])
1714 {
1715 retNode = nm->mkNode(STRING_TO_REGEXP, node[0]);
1716 }
1717 }
1718 else if (nk == REGEXP_LOOP)
1719 {
1720 retNode = rewriteLoopRegExp(node);
1721 }
1722
1723 Trace("strings-postrewrite") << "Strings::postRewrite returning " << retNode << std::endl;
1724 if( orig!=retNode ){
1725 Trace("strings-rewrite-debug") << "Strings: post-rewrite " << orig << " to " << retNode << std::endl;
1726 }
1727 return RewriteResponse(orig==retNode ? REWRITE_DONE : REWRITE_AGAIN_FULL, retNode);
1728 }
1729
1730 bool TheoryStringsRewriter::hasEpsilonNode(TNode node) {
1731 for(unsigned int i=0; i<node.getNumChildren(); i++) {
1732 if(node[i].getKind() == kind::STRING_TO_REGEXP && node[i][0].getKind() == kind::CONST_STRING && node[i][0].getConst<String>().isEmptyString()) {
1733 return true;
1734 }
1735 }
1736 return false;
1737 }
1738
1739 RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) {
1740 return RewriteResponse(REWRITE_DONE, node);
1741 }
1742
1743 Node TheoryStringsRewriter::rewriteSubstr(Node node)
1744 {
1745 Assert(node.getKind() == kind::STRING_SUBSTR);
1746
1747 NodeManager* nm = NodeManager::currentNM();
1748 if (node[0].isConst())
1749 {
1750 if (node[0].getConst<String>().size() == 0)
1751 {
1752 Node ret = node[0];
1753 return returnRewrite(node, ret, "ss-emptystr");
1754 }
1755 // rewriting for constant arguments
1756 if (node[1].isConst() && node[2].isConst())
1757 {
1758 CVC4::String s = node[0].getConst<String>();
1759 CVC4::Rational rMaxInt(String::maxSize());
1760 uint32_t start;
1761 if (node[1].getConst<Rational>() > rMaxInt)
1762 {
1763 // start beyond the maximum size of strings
1764 // thus, it must be beyond the end point of this string
1765 Node ret = nm->mkConst(::CVC4::String(""));
1766 return returnRewrite(node, ret, "ss-const-start-max-oob");
1767 }
1768 else if (node[1].getConst<Rational>().sgn() < 0)
1769 {
1770 // start before the beginning of the string
1771 Node ret = nm->mkConst(::CVC4::String(""));
1772 return returnRewrite(node, ret, "ss-const-start-neg");
1773 }
1774 else
1775 {
1776 start = node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1777 if (start >= s.size())
1778 {
1779 // start beyond the end of the string
1780 Node ret = nm->mkConst(::CVC4::String(""));
1781 return returnRewrite(node, ret, "ss-const-start-oob");
1782 }
1783 }
1784 if (node[2].getConst<Rational>() > rMaxInt)
1785 {
1786 // take up to the end of the string
1787 Node ret = nm->mkConst(::CVC4::String(s.suffix(s.size() - start)));
1788 return returnRewrite(node, ret, "ss-const-len-max-oob");
1789 }
1790 else if (node[2].getConst<Rational>().sgn() <= 0)
1791 {
1792 Node ret = nm->mkConst(::CVC4::String(""));
1793 return returnRewrite(node, ret, "ss-const-len-non-pos");
1794 }
1795 else
1796 {
1797 uint32_t len =
1798 node[2].getConst<Rational>().getNumerator().toUnsignedInt();
1799 if (start + len > s.size())
1800 {
1801 // take up to the end of the string
1802 Node ret = nm->mkConst(::CVC4::String(s.suffix(s.size() - start)));
1803 return returnRewrite(node, ret, "ss-const-end-oob");
1804 }
1805 else
1806 {
1807 // compute the substr using the constant string
1808 Node ret = nm->mkConst(::CVC4::String(s.substr(start, len)));
1809 return returnRewrite(node, ret, "ss-const-ss");
1810 }
1811 }
1812 }
1813 }
1814 Node zero = nm->mkConst(CVC4::Rational(0));
1815
1816 // if entailed non-positive length or negative start point
1817 if (checkEntailArith(zero, node[1], true))
1818 {
1819 Node ret = nm->mkConst(::CVC4::String(""));
1820 return returnRewrite(node, ret, "ss-start-neg");
1821 }
1822 else if (checkEntailArith(zero, node[2]))
1823 {
1824 Node ret = nm->mkConst(::CVC4::String(""));
1825 return returnRewrite(node, ret, "ss-len-non-pos");
1826 }
1827
1828 if (node[0].getKind() == STRING_SUBSTR)
1829 {
1830 // (str.substr (str.substr x a b) c d) ---> "" if c >= b
1831 //
1832 // Note that this rewrite can be generalized to:
1833 //
1834 // (str.substr x a b) ---> "" if a >= (str.len x)
1835 //
1836 // This can be done when we generalize our entailment methods to
1837 // accept an optional context. Then we could conjecture that
1838 // (str.substr x a b) rewrites to "" and do a case analysis:
1839 //
1840 // - a < 0 or b < 0 (the result is trivially empty in these cases)
1841 // - a >= (str.len x) assuming that { a >= 0, b >= 0 }
1842 //
1843 // For example, for (str.substr (str.substr x a a) a a), we could
1844 // then deduce that under those assumptions, "a" is an
1845 // over-approximation of the length of (str.substr x a a), which
1846 // then allows us to reason that the result of the whole term must
1847 // be empty.
1848 if (checkEntailArith(node[1], node[0][2]))
1849 {
1850 Node ret = nm->mkConst(::CVC4::String(""));
1851 return returnRewrite(node, ret, "ss-start-geq-len");
1852 }
1853 }
1854 else if (node[0].getKind() == STRING_STRREPL)
1855 {
1856 // (str.substr (str.replace x y z) 0 n)
1857 // ---> (str.replace (str.substr x 0 n) y z)
1858 // if (str.len y) = 1 and (str.len z) = 1
1859 if (node[1] == zero)
1860 {
1861 if (checkEntailLengthOne(node[0][1], true)
1862 && checkEntailLengthOne(node[0][2], true))
1863 {
1864 Node ret = nm->mkNode(
1865 kind::STRING_STRREPL,
1866 nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]),
1867 node[0][1],
1868 node[0][2]);
1869 return returnRewrite(node, ret, "substr-repl-swap");
1870 }
1871 }
1872 }
1873
1874 std::vector<Node> n1;
1875 utils::getConcat(node[0], n1);
1876
1877 // definite inclusion
1878 if (node[1] == zero)
1879 {
1880 Node curr = node[2];
1881 std::vector<Node> childrenr;
1882 if (stripSymbolicLength(n1, childrenr, 1, curr))
1883 {
1884 if (curr != zero && !n1.empty())
1885 {
1886 childrenr.push_back(nm->mkNode(kind::STRING_SUBSTR,
1887 utils::mkConcat(STRING_CONCAT, n1),
1888 node[1],
1889 curr));
1890 }
1891 Node ret = utils::mkConcat(STRING_CONCAT, childrenr);
1892 return returnRewrite(node, ret, "ss-len-include");
1893 }
1894 }
1895
1896 // symbolic length analysis
1897 for (unsigned r = 0; r < 2; r++)
1898 {
1899 // the amount of characters we can strip
1900 Node curr;
1901 if (r == 0)
1902 {
1903 if (node[1] != zero)
1904 {
1905 // strip up to start point off the start of the string
1906 curr = node[1];
1907 }
1908 }
1909 else if (r == 1)
1910 {
1911 Node tot_len =
1912 Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, node[0]));
1913 Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2]));
1914 if (node[2] != tot_len)
1915 {
1916 if (checkEntailArith(node[2], tot_len))
1917 {
1918 // end point beyond end point of string, map to tot_len
1919 Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len);
1920 return returnRewrite(node, ret, "ss-end-pt-norm");
1921 }
1922 else
1923 {
1924 // strip up to ( str.len(node[0]) - end_pt ) off the end of the string
1925 curr = Rewriter::rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt));
1926 }
1927 }
1928
1929 // (str.substr s x y) --> "" if x < len(s) |= 0 >= y
1930 Node n1_lt_tot_len =
1931 Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len));
1932 if (checkEntailArithWithAssumption(n1_lt_tot_len, zero, node[2], false))
1933 {
1934 Node ret = nm->mkConst(::CVC4::String(""));
1935 return returnRewrite(node, ret, "ss-start-entails-zero-len");
1936 }
1937
1938 // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s)
1939 Node non_zero_len =
1940 Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2]));
1941 if (checkEntailArithWithAssumption(non_zero_len, node[1], tot_len, false))
1942 {
1943 Node ret = nm->mkConst(::CVC4::String(""));
1944 return returnRewrite(node, ret, "ss-non-zero-len-entails-oob");
1945 }
1946
1947 // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s)
1948 Node geq_zero_start =
1949 Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero));
1950 if (checkEntailArithWithAssumption(geq_zero_start, zero, tot_len, false))
1951 {
1952 Node ret = nm->mkConst(String(""));
1953 return returnRewrite(node, ret, "ss-geq-zero-start-entails-emp-s");
1954 }
1955
1956 // (str.substr s x x) ---> "" if (str.len s) <= 1
1957 if (node[1] == node[2] && checkEntailLengthOne(node[0]))
1958 {
1959 Node ret = nm->mkConst(::CVC4::String(""));
1960 return returnRewrite(node, ret, "ss-len-one-z-z");
1961 }
1962 }
1963 if (!curr.isNull())
1964 {
1965 // strip off components while quantity is entailed positive
1966 int dir = r == 0 ? 1 : -1;
1967 std::vector<Node> childrenr;
1968 if (stripSymbolicLength(n1, childrenr, dir, curr))
1969 {
1970 if (r == 0)
1971 {
1972 Node ret = nm->mkNode(kind::STRING_SUBSTR,
1973 utils::mkConcat(STRING_CONCAT, n1),
1974 curr,
1975 node[2]);
1976 return returnRewrite(node, ret, "ss-strip-start-pt");
1977 }
1978 else
1979 {
1980 Node ret = nm->mkNode(kind::STRING_SUBSTR,
1981 utils::mkConcat(STRING_CONCAT, n1),
1982 node[1],
1983 node[2]);
1984 return returnRewrite(node, ret, "ss-strip-end-pt");
1985 }
1986 }
1987 }
1988 }
1989 // combine substr
1990 if (node[0].getKind() == kind::STRING_SUBSTR)
1991 {
1992 Node start_inner = node[0][1];
1993 Node start_outer = node[1];
1994 if (checkEntailArith(start_outer) && checkEntailArith(start_inner))
1995 {
1996 // both are positive
1997 // thus, start point is definitely start_inner+start_outer.
1998 // We can rewrite if it for certain what the length is
1999
2000 // the length of a string from the inner substr subtracts the start point
2001 // of the outer substr
2002 Node len_from_inner =
2003 Rewriter::rewrite(nm->mkNode(kind::MINUS, node[0][2], start_outer));
2004 Node len_from_outer = node[2];
2005 Node new_len;
2006 // take quantity that is for sure smaller than the other
2007 if (len_from_inner == len_from_outer)
2008 {
2009 new_len = len_from_inner;
2010 }
2011 else if (checkEntailArith(len_from_inner, len_from_outer))
2012 {
2013 new_len = len_from_outer;
2014 }
2015 else if (checkEntailArith(len_from_outer, len_from_inner))
2016 {
2017 new_len = len_from_inner;
2018 }
2019 if (!new_len.isNull())
2020 {
2021 Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer);
2022 Node ret =
2023 nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len);
2024 return returnRewrite(node, ret, "ss-combine");
2025 }
2026 }
2027 }
2028 Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
2029 return node;
2030 }
2031
2032 Node TheoryStringsRewriter::rewriteContains( Node node ) {
2033 Assert(node.getKind() == kind::STRING_STRCTN);
2034 NodeManager* nm = NodeManager::currentNM();
2035
2036 if( node[0] == node[1] ){
2037 Node ret = NodeManager::currentNM()->mkConst(true);
2038 return returnRewrite(node, ret, "ctn-eq");
2039 }
2040 if (node[0].isConst())
2041 {
2042 CVC4::String s = node[0].getConst<String>();
2043 if (node[1].isConst())
2044 {
2045 CVC4::String t = node[1].getConst<String>();
2046 Node ret =
2047 NodeManager::currentNM()->mkConst(s.find(t) != std::string::npos);
2048 return returnRewrite(node, ret, "ctn-const");
2049 }else{
2050 Node t = node[1];
2051 if (s.size() == 0)
2052 {
2053 Node len1 =
2054 NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2055 if (checkEntailArith(len1, true))
2056 {
2057 // we handle the false case here since the rewrite for equality
2058 // uses this function, hence we want to conclude false if possible.
2059 // len(x)>0 => contains( "", x ) ---> false
2060 Node ret = NodeManager::currentNM()->mkConst(false);
2061 return returnRewrite(node, ret, "ctn-lhs-emptystr");
2062 }
2063 }
2064 else if (checkEntailLengthOne(t))
2065 {
2066 const std::vector<unsigned>& vec = s.getVec();
2067
2068 NodeBuilder<> nb(OR);
2069 nb << nm->mkConst(String("")).eqNode(t);
2070 for (unsigned c : vec)
2071 {
2072 std::vector<unsigned> sv = {c};
2073 nb << nm->mkConst(String(sv)).eqNode(t);
2074 }
2075
2076 // str.contains("ABCabc", t) --->
2077 // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c"
2078 // if len(t) <= 1
2079 Node ret = nb;
2080 return returnRewrite(node, ret, "ctn-split");
2081 }
2082 else if (node[1].getKind() == kind::STRING_CONCAT)
2083 {
2084 int firstc, lastc;
2085 if (!canConstantContainConcat(node[0], node[1], firstc, lastc))
2086 {
2087 Node ret = NodeManager::currentNM()->mkConst(false);
2088 return returnRewrite(node, ret, "ctn-nconst-ctn-concat");
2089 }
2090 }
2091 }
2092 }
2093 if (node[1].isConst())
2094 {
2095 CVC4::String t = node[1].getConst<String>();
2096 if (t.size() == 0)
2097 {
2098 // contains( x, "" ) ---> true
2099 Node ret = NodeManager::currentNM()->mkConst(true);
2100 return returnRewrite(node, ret, "ctn-rhs-emptystr");
2101 }
2102 else if (t.size() == 1)
2103 {
2104 // The following rewrites are specific to a single character second
2105 // argument of contains, where we can reason that this character is
2106 // not split over multiple components in the first argument.
2107 if (node[0].getKind() == STRING_CONCAT)
2108 {
2109 std::vector<Node> nc1;
2110 utils::getConcat(node[0], nc1);
2111 NodeBuilder<> nb(OR);
2112 for (const Node& ncc : nc1)
2113 {
2114 nb << nm->mkNode(STRING_STRCTN, ncc, node[1]);
2115 }
2116 Node ret = nb.constructNode();
2117 // str.contains( x ++ y, "A" ) --->
2118 // str.contains( x, "A" ) OR str.contains( y, "A" )
2119 return returnRewrite(node, ret, "ctn-concat-char");
2120 }
2121 else if (node[0].getKind() == STRING_STRREPL)
2122 {
2123 Node rplDomain = checkEntailContains(node[0][1], node[1]);
2124 if (!rplDomain.isNull() && !rplDomain.getConst<bool>())
2125 {
2126 Node d1 = nm->mkNode(STRING_STRCTN, node[0][0], node[1]);
2127 Node d2 =
2128 nm->mkNode(AND,
2129 nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]),
2130 nm->mkNode(STRING_STRCTN, node[0][2], node[1]));
2131 Node ret = nm->mkNode(OR, d1, d2);
2132 // If str.contains( y, "A" ) ---> false, then:
2133 // str.contains( str.replace( x, y, z ), "A" ) --->
2134 // str.contains( x, "A" ) OR
2135 // ( str.contains( x, y ) AND str.contains( z, "A" ) )
2136 return returnRewrite(node, ret, "ctn-repl-char");
2137 }
2138 }
2139 }
2140 }
2141 std::vector<Node> nc1;
2142 utils::getConcat(node[0], nc1);
2143 std::vector<Node> nc2;
2144 utils::getConcat(node[1], nc2);
2145
2146 // component-wise containment
2147 std::vector<Node> nc1rb;
2148 std::vector<Node> nc1re;
2149 if (componentContains(nc1, nc2, nc1rb, nc1re) != -1)
2150 {
2151 Node ret = NodeManager::currentNM()->mkConst(true);
2152 return returnRewrite(node, ret, "ctn-component");
2153 }
2154
2155 // strip endpoints
2156 std::vector<Node> nb;
2157 std::vector<Node> ne;
2158 if (stripConstantEndpoints(nc1, nc2, nb, ne))
2159 {
2160 Node ret = NodeManager::currentNM()->mkNode(
2161 kind::STRING_STRCTN, utils::mkConcat(STRING_CONCAT, nc1), node[1]);
2162 return returnRewrite(node, ret, "ctn-strip-endpt");
2163 }
2164
2165 for (const Node& n : nc2)
2166 {
2167 if (n.getKind() == kind::STRING_STRREPL)
2168 {
2169 // (str.contains x (str.replace y z w)) --> false
2170 // if (str.contains x y) = false and (str.contains x w) = false
2171 //
2172 // Reasoning: (str.contains x y) checks that x does not contain y if the
2173 // replacement does not change y. (str.contains x w) checks that if the
2174 // replacement changes anything in y, the w makes it impossible for it to
2175 // occur in x.
2176 Node ctnConst = checkEntailContains(node[0], n[0]);
2177 if (!ctnConst.isNull() && !ctnConst.getConst<bool>())
2178 {
2179 Node ctnConst2 = checkEntailContains(node[0], n[2]);
2180 if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>())
2181 {
2182 Node res = nm->mkConst(false);
2183 return returnRewrite(node, res, "ctn-rpl-non-ctn");
2184 }
2185 }
2186
2187 // (str.contains x (str.++ w (str.replace x y x) z)) --->
2188 // (and (= w "") (= x (str.replace x y x)) (= z ""))
2189 //
2190 // TODO: Remove with under-/over-approximation
2191 if (node[0] == n[0] && node[0] == n[2])
2192 {
2193 Node ret;
2194 if (nc2.size() > 1)
2195 {
2196 Node emp = nm->mkConst(CVC4::String(""));
2197 NodeBuilder<> nb(kind::AND);
2198 for (const Node& n2 : nc2)
2199 {
2200 if (n2 == n)
2201 {
2202 nb << nm->mkNode(kind::EQUAL, node[0], node[1]);
2203 }
2204 else
2205 {
2206 nb << nm->mkNode(kind::EQUAL, emp, n2);
2207 }
2208 }
2209 ret = nb.constructNode();
2210 }
2211 else
2212 {
2213 ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2214 }
2215 return returnRewrite(node, ret, "ctn-repl-self");
2216 }
2217 }
2218 }
2219
2220 // length entailment
2221 Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2222 Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2223 if (checkEntailArith(len_n2, len_n1, true))
2224 {
2225 // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false
2226 Node ret = NodeManager::currentNM()->mkConst(false);
2227 return returnRewrite(node, ret, "ctn-len-ineq");
2228 }
2229
2230 // multi-set reasoning
2231 // For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false
2232 // since the number of a's in the second argument is greater than the number
2233 // of a's in the first argument
2234 if (checkEntailMultisetSubset(node[0], node[1]))
2235 {
2236 Node ret = nm->mkConst(false);
2237 return returnRewrite(node, ret, "ctn-mset-nss");
2238 }
2239
2240 if (checkEntailArith(len_n2, len_n1, false))
2241 {
2242 // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2
2243 Node ret = node[0].eqNode(node[1]);
2244 return returnRewrite(node, ret, "ctn-len-ineq-nstrict");
2245 }
2246
2247 // splitting
2248 if (node[0].getKind() == kind::STRING_CONCAT)
2249 {
2250 if( node[1].isConst() ){
2251 CVC4::String t = node[1].getConst<String>();
2252 // Below, we are looking for a constant component of node[0]
2253 // has no overlap with node[1], which means we can split.
2254 // Notice that if the first or last components had no
2255 // overlap, these would have been removed by strip
2256 // constant endpoints above.
2257 // Hence, we consider only the inner children.
2258 for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++)
2259 {
2260 //constant contains
2261 if( node[0][i].isConst() ){
2262 CVC4::String s = node[0][i].getConst<String>();
2263 // if no overlap, we can split into disjunction
2264 if (s.noOverlapWith(t))
2265 {
2266 std::vector<Node> nc0;
2267 utils::getConcat(node[0], nc0);
2268 std::vector<Node> spl[2];
2269 spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i);
2270 Assert(i < nc0.size() - 1);
2271 spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end());
2272 Node ret = NodeManager::currentNM()->mkNode(
2273 kind::OR,
2274 NodeManager::currentNM()->mkNode(
2275 kind::STRING_STRCTN,
2276 utils::mkConcat(STRING_CONCAT, spl[0]),
2277 node[1]),
2278 NodeManager::currentNM()->mkNode(
2279 kind::STRING_STRCTN,
2280 utils::mkConcat(STRING_CONCAT, spl[1]),
2281 node[1]));
2282 return returnRewrite(node, ret, "ctn-split");
2283 }
2284 }
2285 }
2286 }
2287 }
2288 else if (node[0].getKind() == kind::STRING_SUBSTR)
2289 {
2290 // (str.contains (str.substr x n (str.len y)) y) --->
2291 // (= (str.substr x n (str.len y)) y)
2292 //
2293 // TODO: Remove with under-/over-approximation
2294 if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1]))
2295 {
2296 Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2297 return returnRewrite(node, ret, "ctn-substr");
2298 }
2299 }
2300 else if (node[0].getKind() == kind::STRING_STRREPL)
2301 {
2302 if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst())
2303 {
2304 String c = node[1].getConst<String>();
2305 if (c.noOverlapWith(node[0][1].getConst<String>())
2306 && c.noOverlapWith(node[0][2].getConst<String>()))
2307 {
2308 // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3)
2309 // if there is no overlap between c1 and c3 and none between c2 and c3
2310 Node ret = nm->mkNode(STRING_STRCTN, node[0][0], node[1]);
2311 return returnRewrite(node, ret, "ctn-repl-cnsts-to-ctn");
2312 }
2313 }
2314
2315 if (node[0][0] == node[0][2])
2316 {
2317 // (str.contains (str.replace x y x) y) ---> (str.contains x y)
2318 if (node[0][1] == node[1])
2319 {
2320 Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]);
2321 return returnRewrite(node, ret, "ctn-repl-to-ctn");
2322 }
2323
2324 // (str.contains (str.replace x y x) z) ---> (str.contains x z)
2325 // if (str.len z) <= 1
2326 if (checkEntailLengthOne(node[1]))
2327 {
2328 Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]);
2329 return returnRewrite(node, ret, "ctn-repl-len-one-to-ctn");
2330 }
2331 }
2332
2333 // (str.contains (str.replace x y z) z) --->
2334 // (or (str.contains x y) (str.contains x z))
2335 if (node[0][2] == node[1])
2336 {
2337 Node ret = nm->mkNode(OR,
2338 nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]),
2339 nm->mkNode(STRING_STRCTN, node[0][0], node[0][2]));
2340 return returnRewrite(node, ret, "ctn-repl-to-ctn-disj");
2341 }
2342
2343 // (str.contains (str.replace x y z) w) --->
2344 // (str.contains (str.replace x y "") w)
2345 // if (str.contains z w) ---> false and (str.len w) = 1
2346 if (checkEntailLengthOne(node[1]))
2347 {
2348 Node ctn = checkEntailContains(node[1], node[0][2]);
2349 if (!ctn.isNull() && !ctn.getConst<bool>())
2350 {
2351 Node empty = nm->mkConst(String(""));
2352 Node ret = nm->mkNode(
2353 kind::STRING_STRCTN,
2354 nm->mkNode(kind::STRING_STRREPL, node[0][0], node[0][1], empty),
2355 node[1]);
2356 return returnRewrite(node, ret, "ctn-repl-simp-repl");
2357 }
2358 }
2359 }
2360
2361 if (node[1].getKind() == kind::STRING_STRREPL)
2362 {
2363 // (str.contains x (str.replace y x y)) --->
2364 // (str.contains x y)
2365 if (node[0] == node[1][1] && node[1][0] == node[1][2])
2366 {
2367 Node ret = nm->mkNode(kind::STRING_STRCTN, node[0], node[1][0]);
2368 return returnRewrite(node, ret, "ctn-repl");
2369 }
2370
2371 // (str.contains x (str.replace "" x y)) --->
2372 // (= "" (str.replace "" x y))
2373 //
2374 // Note: Length-based reasoning is not sufficient to get this rewrite. We
2375 // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0
2376 // nor str.len(x) - str.len(str.replace("", x, y)) >= 0
2377 Node emp = nm->mkConst(CVC4::String(""));
2378 if (node[0] == node[1][1] && node[1][0] == emp)
2379 {
2380 Node ret = nm->mkNode(kind::EQUAL, emp, node[1]);
2381 return returnRewrite(node, ret, "ctn-repl-empty");
2382 }
2383 }
2384
2385 Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
2386 return node;
2387 }
2388
2389 Node TheoryStringsRewriter::rewriteIndexof( Node node ) {
2390 Assert(node.getKind() == kind::STRING_STRIDOF);
2391 NodeManager* nm = NodeManager::currentNM();
2392
2393 if (node[2].isConst() && node[2].getConst<Rational>().sgn() < 0)
2394 {
2395 // z<0 implies str.indexof( x, y, z ) --> -1
2396 Node negone = nm->mkConst(Rational(-1));
2397 return returnRewrite(node, negone, "idof-neg");
2398 }
2399
2400 // evaluation and simple cases
2401 std::vector<Node> children0;
2402 utils::getConcat(node[0], children0);
2403 if (children0[0].isConst() && node[1].isConst() && node[2].isConst())
2404 {
2405 CVC4::Rational rMaxInt(CVC4::String::maxSize());
2406 if (node[2].getConst<Rational>() > rMaxInt)
2407 {
2408 // We know that, due to limitations on the size of string constants
2409 // in our implementation, that accessing a position greater than
2410 // rMaxInt is guaranteed to be out of bounds.
2411 Node negone = nm->mkConst(Rational(-1));
2412 return returnRewrite(node, negone, "idof-max");
2413 }
2414 Assert(node[2].getConst<Rational>().sgn() >= 0);
2415 uint32_t start =
2416 node[2].getConst<Rational>().getNumerator().toUnsignedInt();
2417 CVC4::String s = children0[0].getConst<String>();
2418 CVC4::String t = node[1].getConst<String>();
2419 std::size_t ret = s.find(t, start);
2420 if (ret != std::string::npos)
2421 {
2422 Node retv = nm->mkConst(Rational(static_cast<unsigned>(ret)));
2423 return returnRewrite(node, retv, "idof-find");
2424 }
2425 else if (children0.size() == 1)
2426 {
2427 Node negone = nm->mkConst(Rational(-1));
2428 return returnRewrite(node, negone, "idof-nfind");
2429 }
2430 }
2431
2432 if (node[0] == node[1])
2433 {
2434 if (node[2].isConst())
2435 {
2436 if (node[2].getConst<Rational>().sgn() == 0)
2437 {
2438 // indexof( x, x, 0 ) --> 0
2439 Node zero = nm->mkConst(Rational(0));
2440 return returnRewrite(node, zero, "idof-eq-cst-start");
2441 }
2442 }
2443 if (checkEntailArith(node[2], true))
2444 {
2445 // y>0 implies indexof( x, x, y ) --> -1
2446 Node negone = nm->mkConst(Rational(-1));
2447 return returnRewrite(node, negone, "idof-eq-nstart");
2448 }
2449 Node emp = nm->mkConst(CVC4::String(""));
2450 if (node[0] != emp)
2451 {
2452 // indexof( x, x, z ) ---> indexof( "", "", z )
2453 Node ret = nm->mkNode(STRING_STRIDOF, emp, emp, node[2]);
2454 return returnRewrite(node, ret, "idof-eq-norm");
2455 }
2456 }
2457
2458 Node len0 = nm->mkNode(STRING_LENGTH, node[0]);
2459 Node len1 = nm->mkNode(STRING_LENGTH, node[1]);
2460 Node len0m2 = nm->mkNode(MINUS, len0, node[2]);
2461
2462 if (node[1].isConst())
2463 {
2464 CVC4::String t = node[1].getConst<String>();
2465 if (t.size() == 0)
2466 {
2467 if (checkEntailArith(len0, node[2]) && checkEntailArith(node[2]))
2468 {
2469 // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z
2470 return returnRewrite(node, node[2], "idof-emp-idof");
2471 }
2472 }
2473 }
2474
2475 if (checkEntailArith(len1, len0m2, true))
2476 {
2477 // len(x)-z < len(y) implies indexof( x, y, z ) ----> -1
2478 Node negone = nm->mkConst(Rational(-1));
2479 return returnRewrite(node, negone, "idof-len");
2480 }
2481
2482 Node fstr = node[0];
2483 if (!node[2].isConst() || node[2].getConst<Rational>().sgn() != 0)
2484 {
2485 fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0);
2486 fstr = Rewriter::rewrite(fstr);
2487 }
2488
2489 Node cmp_conr = checkEntailContains(fstr, node[1]);
2490 Trace("strings-rewrite-debug") << "For " << node << ", check contains("
2491 << fstr << ", " << node[1] << ")" << std::endl;
2492 Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl;
2493 std::vector<Node> children1;
2494 utils::getConcat(node[1], children1);
2495 if (!cmp_conr.isNull())
2496 {
2497 if (cmp_conr.getConst<bool>())
2498 {
2499 if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2500 {
2501 // past the first position in node[0] that contains node[1], we can drop
2502 std::vector<Node> nb;
2503 std::vector<Node> ne;
2504 int cc = componentContains(children0, children1, nb, ne, true, 1);
2505 if (cc != -1 && !ne.empty())
2506 {
2507 // For example:
2508 // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0)
2509 Node nn = utils::mkConcat(STRING_CONCAT, children0);
2510 Node ret = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]);
2511 return returnRewrite(node, ret, "idof-def-ctn");
2512 }
2513
2514 // Strip components from the beginning that are guaranteed not to match
2515 if (stripConstantEndpoints(children0, children1, nb, ne, 1))
2516 {
2517 // str.indexof(str.++("AB", x, "C"), "C", 0) --->
2518 // 2 + str.indexof(str.++(x, "C"), "C", 0)
2519 Node ret =
2520 nm->mkNode(kind::PLUS,
2521 nm->mkNode(kind::STRING_LENGTH,
2522 utils::mkConcat(STRING_CONCAT, nb)),
2523 nm->mkNode(kind::STRING_STRIDOF,
2524 utils::mkConcat(STRING_CONCAT, children0),
2525 node[1],
2526 node[2]));
2527 return returnRewrite(node, ret, "idof-strip-cnst-endpts");
2528 }
2529 }
2530
2531 // strip symbolic length
2532 Node new_len = node[2];
2533 std::vector<Node> nr;
2534 if (stripSymbolicLength(children0, nr, 1, new_len))
2535 {
2536 // For example:
2537 // z>str.len( x1 ) and str.contains( x2, y )-->true
2538 // implies
2539 // str.indexof( str.++( x1, x2 ), y, z ) --->
2540 // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) )
2541 Node nn = utils::mkConcat(STRING_CONCAT, children0);
2542 Node ret =
2543 nm->mkNode(kind::PLUS,
2544 nm->mkNode(kind::MINUS, node[2], new_len),
2545 nm->mkNode(kind::STRING_STRIDOF, nn, node[1], new_len));
2546 return returnRewrite(node, ret, "idof-strip-sym-len");
2547 }
2548 }
2549 else
2550 {
2551 // str.contains( x, y ) --> false implies str.indexof(x,y,z) --> -1
2552 Node negone = nm->mkConst(Rational(-1));
2553 return returnRewrite(node, negone, "idof-nctn");
2554 }
2555 }
2556 else
2557 {
2558 Node new_len = node[2];
2559 std::vector<Node> nr;
2560 if (stripSymbolicLength(children0, nr, 1, new_len))
2561 {
2562 // Normalize the string before the start index.
2563 //
2564 // For example:
2565 // str.indexof(str.++("ABCD", x), y, 3) --->
2566 // str.indexof(str.++("AAAD", x), y, 3)
2567 Node nodeNr = utils::mkConcat(STRING_CONCAT, nr);
2568 Node normNr = lengthPreserveRewrite(nodeNr);
2569 if (normNr != nodeNr)
2570 {
2571 std::vector<Node> normNrChildren;
2572 utils::getConcat(normNr, normNrChildren);
2573 std::vector<Node> children(normNrChildren);
2574 children.insert(children.end(), children0.begin(), children0.end());
2575 Node nn = utils::mkConcat(STRING_CONCAT, children);
2576 Node res = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]);
2577 return returnRewrite(node, res, "idof-norm-prefix");
2578 }
2579 }
2580 }
2581
2582 if (node[2].isConst() && node[2].getConst<Rational>().sgn()==0)
2583 {
2584 std::vector<Node> cb;
2585 std::vector<Node> ce;
2586 if (stripConstantEndpoints(children0, children1, cb, ce, -1))
2587 {
2588 Node ret = utils::mkConcat(STRING_CONCAT, children0);
2589 ret = nm->mkNode(STRING_STRIDOF, ret, node[1], node[2]);
2590 // For example:
2591 // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 )
2592 return returnRewrite(node, ret, "rpl-pull-endpt");
2593 }
2594 }
2595
2596 Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
2597 return node;
2598 }
2599
2600 Node TheoryStringsRewriter::rewriteReplace( Node node ) {
2601 Assert(node.getKind() == kind::STRING_STRREPL);
2602 NodeManager* nm = NodeManager::currentNM();
2603
2604 if (node[1].isConst() && node[1].getConst<String>().isEmptyString())
2605 {
2606 Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]);
2607 return returnRewrite(node, ret, "rpl-rpl-empty");
2608 }
2609
2610 std::vector<Node> children0;
2611 utils::getConcat(node[0], children0);
2612
2613 if (node[1].isConst() && children0[0].isConst())
2614 {
2615 CVC4::String s = children0[0].getConst<String>();
2616 CVC4::String t = node[1].getConst<String>();
2617 std::size_t p = s.find(t);
2618 if (p == std::string::npos)
2619 {
2620 if (children0.size() == 1)
2621 {
2622 return returnRewrite(node, node[0], "rpl-const-nfind");
2623 }
2624 }
2625 else
2626 {
2627 String s1 = s.substr(0, (int)p);
2628 String s3 = s.substr((int)p + (int)t.size());
2629 std::vector<Node> children;
2630 if (s1.size() > 0)
2631 {
2632 Node ns1 = nm->mkConst(String(s1));
2633 children.push_back(ns1);
2634 }
2635 children.push_back(node[2]);
2636 if (s3.size() > 0)
2637 {
2638 Node ns3 = nm->mkConst(String(s3));
2639 children.push_back(ns3);
2640 }
2641 children.insert(children.end(), children0.begin() + 1, children0.end());
2642 Node ret = utils::mkConcat(STRING_CONCAT, children);
2643 return returnRewrite(node, ret, "rpl-const-find");
2644 }
2645 }
2646
2647 // rewrites that apply to both replace and replaceall
2648 Node rri = rewriteReplaceInternal(node);
2649 if (!rri.isNull())
2650 {
2651 // printing of the rewrite managed by the call above
2652 return rri;
2653 }
2654
2655 if (node[0] == node[2])
2656 {
2657 // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x
2658 Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2659 Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2660 if (checkEntailArith(l1, l0))
2661 {
2662 return returnRewrite(node, node[0], "rpl-rpl-len-id");
2663 }
2664
2665 // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x)
2666 // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "")
2667 if (checkEntailLengthOne(node[0]))
2668 {
2669 Node empty = nm->mkConst(String(""));
2670 Node rn1 = Rewriter::rewrite(
2671 rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty)));
2672 if (rn1 != node[1])
2673 {
2674 std::vector<Node> emptyNodes;
2675 bool allEmptyEqs;
2676 std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(rn1);
2677
2678 if (allEmptyEqs)
2679 {
2680 Node nn1 = utils::mkConcat(STRING_CONCAT, emptyNodes);
2681 if (node[1] != nn1)
2682 {
2683 Node ret = nm->mkNode(STRING_STRREPL, node[0], nn1, node[2]);
2684 return returnRewrite(node, ret, "rpl-x-y-x-simp");
2685 }
2686 }
2687 }
2688 }
2689 }
2690
2691 std::vector<Node> children1;
2692 utils::getConcat(node[1], children1);
2693
2694 // check if contains definitely does (or does not) hold
2695 Node cmp_con = nm->mkNode(kind::STRING_STRCTN, node[0], node[1]);
2696 Node cmp_conr = Rewriter::rewrite(cmp_con);
2697 if (!checkEntailContains(node[0], node[1]).isNull())
2698 {
2699 if (cmp_conr.getConst<bool>())
2700 {
2701 // component-wise containment
2702 std::vector<Node> cb;
2703 std::vector<Node> ce;
2704 int cc = componentContains(children0, children1, cb, ce, true, 1);
2705 if (cc != -1)
2706 {
2707 if (cc == 0 && children0[0] == children1[0])
2708 {
2709 // definitely a prefix, can do the replace
2710 // for example,
2711 // str.replace( str.++( x, "ab" ), str.++( x, "a" ), y ) --->
2712 // str.++( y, "b" )
2713 std::vector<Node> cres;
2714 cres.push_back(node[2]);
2715 cres.insert(cres.end(), ce.begin(), ce.end());
2716 Node ret = utils::mkConcat(STRING_CONCAT, cres);
2717 return returnRewrite(node, ret, "rpl-cctn-rpl");
2718 }
2719 else if (!ce.empty())
2720 {
2721 // we can pull remainder past first definite containment
2722 // for example,
2723 // str.replace( str.++( x, "ab" ), "a", y ) --->
2724 // str.++( str.replace( str.++( x, "a" ), "a", y ), "b" )
2725 // this is independent of whether the second argument may be empty
2726 std::vector<Node> cc;
2727 cc.push_back(NodeManager::currentNM()->mkNode(
2728 kind::STRING_STRREPL,
2729 utils::mkConcat(STRING_CONCAT, children0),
2730 node[1],
2731 node[2]));
2732 cc.insert(cc.end(), ce.begin(), ce.end());
2733 Node ret = utils::mkConcat(STRING_CONCAT, cc);
2734 return returnRewrite(node, ret, "rpl-cctn");
2735 }
2736 }
2737 }
2738 else
2739 {
2740 // ~contains( t, s ) => ( replace( t, s, r ) ----> t )
2741 return returnRewrite(node, node[0], "rpl-nctn");
2742 }
2743 }
2744 else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND)
2745 {
2746 // Rewriting the str.contains may return equalities of the form (= x "").
2747 // In that case, we can substitute the variables appearing in those
2748 // equalities with the empty string in the third argument of the
2749 // str.replace. For example:
2750 //
2751 // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "")
2752 //
2753 // This can be done because str.replace changes x iff (str.++ x y) is in x
2754 // but that means that y must be empty in that case. Thus, we can
2755 // substitute y with "" in the third argument. Note that the third argument
2756 // does not matter when the str.replace does not apply.
2757 //
2758 Node empty = nm->mkConst(::CVC4::String(""));
2759
2760 std::vector<Node> emptyNodes;
2761 bool allEmptyEqs;
2762 std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(cmp_conr);
2763
2764 if (emptyNodes.size() > 0)
2765 {
2766 // Perform the substitutions
2767 std::vector<TNode> substs(emptyNodes.size(), TNode(empty));
2768 Node nn2 = node[2].substitute(
2769 emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end());
2770
2771 // If the contains rewrites to a conjunction of empty-string equalities
2772 // and we are doing the replacement in an empty string, we can rewrite
2773 // the string-to-replace with a concatenation of all the terms that must
2774 // be empty:
2775 //
2776 // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn) z)
2777 // if (str.contains "" y) ---> (and (= y1 "") ... (= yn ""))
2778 if (node[0] == empty && allEmptyEqs)
2779 {
2780 std::vector<Node> emptyNodesList(emptyNodes.begin(), emptyNodes.end());
2781 Node nn1 = utils::mkConcat(STRING_CONCAT, emptyNodesList);
2782 if (nn1 != node[1] || nn2 != node[2])
2783 {
2784 Node res = nm->mkNode(kind::STRING_STRREPL, node[0], nn1, nn2);
2785 return returnRewrite(node, res, "rpl-emp-cnts-substs");
2786 }
2787 }
2788
2789 if (nn2 != node[2])
2790 {
2791 Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1], nn2);
2792 return returnRewrite(node, res, "rpl-cnts-substs");
2793 }
2794 }
2795 }
2796
2797 if (cmp_conr != cmp_con)
2798 {
2799 if (checkEntailNonEmpty(node[1]))
2800 {
2801 // pull endpoints that can be stripped
2802 // for example,
2803 // str.replace( str.++( "b", x, "b" ), "a", y ) --->
2804 // str.++( "b", str.replace( x, "a", y ), "b" )
2805 std::vector<Node> cb;
2806 std::vector<Node> ce;
2807 if (stripConstantEndpoints(children0, children1, cb, ce))
2808 {
2809 std::vector<Node> cc;
2810 cc.insert(cc.end(), cb.begin(), cb.end());
2811 cc.push_back(NodeManager::currentNM()->mkNode(
2812 kind::STRING_STRREPL,
2813 utils::mkConcat(STRING_CONCAT, children0),
2814 node[1],
2815 node[2]));
2816 cc.insert(cc.end(), ce.begin(), ce.end());
2817 Node ret = utils::mkConcat(STRING_CONCAT, cc);
2818 return returnRewrite(node, ret, "rpl-pull-endpt");
2819 }
2820 }
2821 }
2822
2823 children1.clear();
2824 utils::getConcat(node[1], children1);
2825 Node lastChild1 = children1[children1.size() - 1];
2826 if (lastChild1.getKind() == kind::STRING_SUBSTR)
2827 {
2828 // (str.replace x (str.++ t (str.substr y i j)) z) --->
2829 // (str.replace x (str.++ t
2830 // (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z)
2831 // if j > len(x)
2832 //
2833 // Reasoning: If the string to be replaced is longer than x, then it does
2834 // not matter how much longer it is, the result is always x. Thus, it is
2835 // fine to only look at the prefix of length len(x) + 1 - len(t).
2836
2837 children1.pop_back();
2838 // Length of the non-substr components in the second argument
2839 Node partLen1 = nm->mkNode(kind::STRING_LENGTH,
2840 utils::mkConcat(STRING_CONCAT, children1));
2841 Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]);
2842
2843 Node zero = nm->mkConst(Rational(0));
2844 Node one = nm->mkConst(Rational(1));
2845 Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
2846 Node len0_1 = nm->mkNode(kind::PLUS, len0, one);
2847 // Check len(t) + j > len(x) + 1
2848 if (checkEntailArith(maxLen1, len0_1, true))
2849 {
2850 children1.push_back(nm->mkNode(
2851 kind::STRING_SUBSTR,
2852 lastChild1[0],
2853 lastChild1[1],
2854 nm->mkNode(
2855 kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1))));
2856 Node res = nm->mkNode(kind::STRING_STRREPL,
2857 node[0],
2858 utils::mkConcat(STRING_CONCAT, children1),
2859 node[2]);
2860 return returnRewrite(node, res, "repl-subst-idx");
2861 }
2862 }
2863
2864 if (node[0].getKind() == STRING_STRREPL)
2865 {
2866 Node x = node[0];
2867 Node y = node[1];
2868 Node z = node[2];
2869 if (x[0] == x[2] && x[0] == y)
2870 {
2871 // (str.replace (str.replace y w y) y z) -->
2872 // (str.replace (str.replace y w z) y z)
2873 // if (str.len w) >= (str.len z) and w != z
2874 //
2875 // Reasoning: There are two cases: (1) w does not appear in y and (2) w
2876 // does appear in y.
2877 //
2878 // Case (1): In this case, the reasoning is trivial. The
2879 // inner replace does not do anything, so we can just replace its third
2880 // argument with any string.
2881 //
2882 // Case (2): After the inner replace, we are guaranteed to have a string
2883 // that contains y at the index of w in the original string y. The outer
2884 // replace then replaces that y with z, so we can short-circuit that
2885 // replace by directly replacing w with z in the inner replace. We can
2886 // only do that if the result of the new inner replace does not contain
2887 // y, otherwise we end up doing two replaces that are different from the
2888 // original expression. We enforce that by requiring that the length of w
2889 // has to be greater or equal to the length of z and that w and z have to
2890 // be different. This makes sure that an inner replace changes a string
2891 // to a string that is shorter than y, making it impossible for the outer
2892 // replace to match.
2893 Node w = x[1];
2894
2895 // (str.len w) >= (str.len z)
2896 Node wlen = nm->mkNode(kind::STRING_LENGTH, w);
2897 Node zlen = nm->mkNode(kind::STRING_LENGTH, z);
2898 if (checkEntailArith(wlen, zlen))
2899 {
2900 // w != z
2901 Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z));
2902 if (wEqZ.isConst() && !wEqZ.getConst<bool>())
2903 {
2904 Node ret = nm->mkNode(kind::STRING_STRREPL,
2905 nm->mkNode(kind::STRING_STRREPL, y, w, z),
2906 y,
2907 z);
2908 return returnRewrite(node, ret, "repl-repl-short-circuit");
2909 }
2910 }
2911 }
2912 }
2913
2914 if (node[1].getKind() == STRING_STRREPL)
2915 {
2916 if (node[1][0] == node[0])
2917 {
2918 if (node[1][0] == node[1][2] && node[1][0] == node[2])
2919 {
2920 // str.replace( x, str.replace( x, y, x ), x ) ---> x
2921 return returnRewrite(node, node[0], "repl-repl2-inv-id");
2922 }
2923 bool dualReplIteSuccess = false;
2924 Node cmp_con = checkEntailContains(node[1][0], node[1][2]);
2925 if (!cmp_con.isNull() && !cmp_con.getConst<bool>())
2926 {
2927 // str.contains( x, z ) ---> false
2928 // implies
2929 // str.replace( x, str.replace( x, y, z ), w ) --->
2930 // ite( str.contains( x, y ), x, w )
2931 dualReplIteSuccess = true;
2932 }
2933 else
2934 {
2935 // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false
2936 // implies
2937 // str.replace( x, str.replace( x, y, z ), w ) --->
2938 // ite( str.contains( x, y ), x, w )
2939 cmp_con = checkEntailContains(node[1][1], node[1][2]);
2940 if (!cmp_con.isNull() && !cmp_con.getConst<bool>())
2941 {
2942 cmp_con = checkEntailContains(node[1][2], node[1][1]);
2943 if (!cmp_con.isNull() && !cmp_con.getConst<bool>())
2944 {
2945 dualReplIteSuccess = true;
2946 }
2947 }
2948 }
2949 if (dualReplIteSuccess)
2950 {
2951 Node res = nm->mkNode(ITE,
2952 nm->mkNode(STRING_STRCTN, node[0], node[1][1]),
2953 node[0],
2954 node[2]);
2955 return returnRewrite(node, res, "repl-dual-repl-ite");
2956 }
2957 }
2958
2959 bool invSuccess = false;
2960 if (node[1][1] == node[0])
2961 {
2962 if (node[1][0] == node[1][2])
2963 {
2964 // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w)
2965 invSuccess = true;
2966 }
2967 else if (node[1][1] == node[2] || node[1][0] == node[2])
2968 {
2969 // str.contains(y, z) ----> false and ( y == w or x == w ) implies
2970 // implies
2971 // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w)
2972 Node cmp_con = checkEntailContains(node[1][0], node[1][2]);
2973 invSuccess = !cmp_con.isNull() && !cmp_con.getConst<bool>();
2974 }
2975 }
2976 else
2977 {
2978 // str.contains(x, z) ----> false and str.contains(x, w) ----> false
2979 // implies
2980 // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u)
2981 Node cmp_con = checkEntailContains(node[0], node[1][1]);
2982 if (!cmp_con.isNull() && !cmp_con.getConst<bool>())
2983 {
2984 cmp_con = checkEntailContains(node[0], node[1][2]);
2985 invSuccess = !cmp_con.isNull() && !cmp_con.getConst<bool>();
2986 }
2987 }
2988 if (invSuccess)
2989 {
2990 Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1][0], node[2]);
2991 return returnRewrite(node, res, "repl-repl2-inv");
2992 }
2993 }
2994 if (node[2].getKind() == STRING_STRREPL)
2995 {
2996 if (node[2][1] == node[0])
2997 {
2998 // str.contains( z, w ) ----> false implies
2999 // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z )
3000 Node cmp_con = checkEntailContains(node[1], node[2][0]);
3001 if (!cmp_con.isNull() && !cmp_con.getConst<bool>())
3002 {
3003 Node res =
3004 nm->mkNode(kind::STRING_STRREPL, node[0], node[1], node[2][0]);
3005 return returnRewrite(node, res, "repl-repl3-inv");
3006 }
3007 }
3008 if (node[2][0] == node[1])
3009 {
3010 bool success = false;
3011 if (node[2][0] == node[2][2] && node[2][1] == node[0])
3012 {
3013 // str.replace( x, y, str.replace( y, x, y ) ) ---> x
3014 success = true;
3015 }
3016 else
3017 {
3018 // str.contains( x, z ) ----> false implies
3019 // str.replace( x, y, str.replace( y, z, w ) ) ---> x
3020 cmp_con = checkEntailContains(node[0], node[2][1]);
3021 success = !cmp_con.isNull() && !cmp_con.getConst<bool>();
3022 }
3023 if (success)
3024 {
3025 return returnRewrite(node, node[0], "repl-repl3-inv-id");
3026 }
3027 }
3028 }
3029 // miniscope based on components that do not contribute to contains
3030 // for example,
3031 // str.replace( x ++ y ++ x ++ y, "A", z ) -->
3032 // str.replace( x ++ y, "A", z ) ++ x ++ y
3033 // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y.
3034 if (checkEntailLengthOne(node[1]))
3035 {
3036 Node lastLhs;
3037 unsigned lastCheckIndex = 0;
3038 for (unsigned i = 1, iend = children0.size(); i < iend; i++)
3039 {
3040 unsigned checkIndex = children0.size() - i;
3041 std::vector<Node> checkLhs;
3042 checkLhs.insert(
3043 checkLhs.end(), children0.begin(), children0.begin() + checkIndex);
3044 Node lhs = utils::mkConcat(STRING_CONCAT, checkLhs);
3045 Node rhs = children0[checkIndex];
3046 Node ctn = checkEntailContains(lhs, rhs);
3047 if (!ctn.isNull() && ctn.getConst<bool>())
3048 {
3049 lastLhs = lhs;
3050 lastCheckIndex = checkIndex;
3051 }
3052 else
3053 {
3054 break;
3055 }
3056 }
3057 if (!lastLhs.isNull())
3058 {
3059 std::vector<Node> remc(children0.begin() + lastCheckIndex,
3060 children0.end());
3061 Node rem = utils::mkConcat(STRING_CONCAT, remc);
3062 Node ret =
3063 nm->mkNode(STRING_CONCAT,
3064 nm->mkNode(STRING_STRREPL, lastLhs, node[1], node[2]),
3065 rem);
3066 // for example:
3067 // str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x
3068 // Since we know that the first occurrence of "A" cannot be in the
3069 // second occurrence of x. Notice this is specific to single characters
3070 // due to complications with finds that span multiple components for
3071 // non-characters.
3072 return returnRewrite(node, ret, "repl-char-ncontrib-find");
3073 }
3074 }
3075
3076 // TODO (#1180) incorporate these?
3077 // contains( t, s ) =>
3078 // replace( replace( x, t, s ), s, r ) ----> replace( x, t, r )
3079 // contains( t, s ) =>
3080 // contains( replace( t, s, r ), r ) ----> true
3081
3082 Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
3083 return node;
3084 }
3085
3086 Node TheoryStringsRewriter::rewriteReplaceAll(Node node)
3087 {
3088 Assert(node.getKind() == STRING_STRREPLALL);
3089 NodeManager* nm = NodeManager::currentNM();
3090
3091 if (node[0].isConst() && node[1].isConst())
3092 {
3093 std::vector<Node> children;
3094 String s = node[0].getConst<String>();
3095 String t = node[1].getConst<String>();
3096 if (s.isEmptyString() || t.isEmptyString())
3097 {
3098 return returnRewrite(node, node[0], "replall-empty-find");
3099 }
3100 std::size_t index = 0;
3101 std::size_t curr = 0;
3102 do
3103 {
3104 curr = s.find(t, index);
3105 if (curr != std::string::npos)
3106 {
3107 if (curr > index)
3108 {
3109 children.push_back(nm->mkConst(s.substr(index, curr - index)));
3110 }
3111 children.push_back(node[2]);
3112 index = curr + t.size();
3113 }
3114 else
3115 {
3116 children.push_back(nm->mkConst(s.substr(index, s.size() - index)));
3117 }
3118 } while (curr != std::string::npos && curr < s.size());
3119 // constant evaluation
3120 Node res = utils::mkConcat(STRING_CONCAT, children);
3121 return returnRewrite(node, res, "replall-const");
3122 }
3123
3124 // rewrites that apply to both replace and replaceall
3125 Node rri = rewriteReplaceInternal(node);
3126 if (!rri.isNull())
3127 {
3128 // printing of the rewrite managed by the call above
3129 return rri;
3130 }
3131
3132 Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
3133 return node;
3134 }
3135
3136 Node TheoryStringsRewriter::rewriteReplaceInternal(Node node)
3137 {
3138 Kind nk = node.getKind();
3139 Assert(nk == STRING_STRREPL || nk == STRING_STRREPLALL);
3140
3141 if (node[1] == node[2])
3142 {
3143 return returnRewrite(node, node[0], "rpl-id");
3144 }
3145
3146 if (node[0] == node[1])
3147 {
3148 // only holds for replaceall if non-empty
3149 if (nk == STRING_STRREPL || checkEntailNonEmpty(node[1]))
3150 {
3151 return returnRewrite(node, node[2], "rpl-replace");
3152 }
3153 }
3154
3155 return Node::null();
3156 }
3157
3158 Node TheoryStringsRewriter::rewriteStrConvert(Node node)
3159 {
3160 Kind nk = node.getKind();
3161 Assert(nk == STRING_TOLOWER || nk == STRING_TOUPPER);
3162 NodeManager* nm = NodeManager::currentNM();
3163 if (node[0].isConst())
3164 {
3165 std::vector<unsigned> nvec = node[0].getConst<String>().getVec();
3166 for (unsigned i = 0, nvsize = nvec.size(); i < nvsize; i++)
3167 {
3168 unsigned newChar = CVC4::String::convertUnsignedIntToCode(nvec[i]);
3169 // transform it
3170 // upper 65 ... 90
3171 // lower 97 ... 122
3172 if (nk == STRING_TOUPPER)
3173 {
3174 if (newChar >= 97 && newChar <= 122)
3175 {
3176 newChar = newChar - 32;
3177 }
3178 }
3179 else if (nk == STRING_TOLOWER)
3180 {
3181 if (newChar >= 65 && newChar <= 90)
3182 {
3183 newChar = newChar + 32;
3184 }
3185 }
3186 newChar = CVC4::String::convertCodeToUnsignedInt(newChar);
3187 nvec[i] = newChar;
3188 }
3189 Node retNode = nm->mkConst(String(nvec));
3190 return returnRewrite(node, retNode, "str-conv-const");
3191 }
3192 else if (node[0].getKind() == STRING_CONCAT)
3193 {
3194 NodeBuilder<> concatBuilder(STRING_CONCAT);
3195 for (const Node& nc : node[0])
3196 {
3197 concatBuilder << nm->mkNode(nk, nc);
3198 }
3199 // tolower( x1 ++ x2 ) --> tolower( x1 ) ++ tolower( x2 )
3200 Node retNode = concatBuilder.constructNode();
3201 return returnRewrite(node, retNode, "str-conv-minscope-concat");
3202 }
3203 else if (node[0].getKind() == STRING_TOLOWER
3204 || node[0].getKind() == STRING_TOUPPER)
3205 {
3206 // tolower( tolower( x ) ) --> tolower( x )
3207 // tolower( toupper( x ) ) --> tolower( x )
3208 Node retNode = nm->mkNode(nk, node[0][0]);
3209 return returnRewrite(node, retNode, "str-conv-idem");
3210 }
3211 else if (node[0].getKind() == STRING_ITOS)
3212 {
3213 // tolower( str.from.int( x ) ) --> str.from.int( x )
3214 return returnRewrite(node, node[0], "str-conv-itos");
3215 }
3216 return node;
3217 }
3218
3219 Node TheoryStringsRewriter::rewriteStrReverse(Node node)
3220 {
3221 Assert(node.getKind() == STRING_REV);
3222 NodeManager* nm = NodeManager::currentNM();
3223 Node x = node[0];
3224 if (x.isConst())
3225 {
3226 std::vector<unsigned> nvec = node[0].getConst<String>().getVec();
3227 std::reverse(nvec.begin(), nvec.end());
3228 Node retNode = nm->mkConst(String(nvec));
3229 return returnRewrite(node, retNode, "str-conv-const");
3230 }
3231 else if (x.getKind() == STRING_CONCAT)
3232 {
3233 std::vector<Node> children;
3234 for (const Node& nc : x)
3235 {
3236 children.push_back(nm->mkNode(STRING_REV, nc));
3237 }
3238 std::reverse(children.begin(), children.end());
3239 // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 )
3240 Node retNode = nm->mkNode(STRING_CONCAT, children);
3241 return returnRewrite(node, retNode, "str-rev-minscope-concat");
3242 }
3243 else if (x.getKind() == STRING_REV)
3244 {
3245 // rev( rev( x ) ) --> x
3246 Node retNode = x[0];
3247 return returnRewrite(node, retNode, "str-rev-idem");
3248 }
3249 return node;
3250 }
3251
3252 Node TheoryStringsRewriter::rewriteStringLeq(Node n)
3253 {
3254 Assert(n.getKind() == kind::STRING_LEQ);
3255 NodeManager* nm = NodeManager::currentNM();
3256 if (n[0] == n[1])
3257 {
3258 Node ret = nm->mkConst(true);
3259 return returnRewrite(n, ret, "str-leq-id");
3260 }
3261 if (n[0].isConst() && n[1].isConst())
3262 {
3263 String s = n[0].getConst<String>();
3264 String t = n[1].getConst<String>();
3265 Node ret = nm->mkConst(s.isLeq(t));
3266 return returnRewrite(n, ret, "str-leq-eval");
3267 }
3268 // empty strings
3269 for (unsigned i = 0; i < 2; i++)
3270 {
3271 if (n[i].isConst() && n[i].getConst<String>().isEmptyString())
3272 {
3273 Node ret = i == 0 ? nm->mkConst(true) : n[0].eqNode(n[1]);
3274 return returnRewrite(n, ret, "str-leq-empty");
3275 }
3276 }
3277
3278 std::vector<Node> n1;
3279 utils::getConcat(n[0], n1);
3280 std::vector<Node> n2;
3281 utils::getConcat(n[1], n2);
3282 Assert(!n1.empty() && !n2.empty());
3283
3284 // constant prefixes
3285 if (n1[0].isConst() && n2[0].isConst() && n1[0] != n2[0])
3286 {
3287 String s = n1[0].getConst<String>();
3288 String t = n2[0].getConst<String>();
3289 // only need to truncate if s is longer
3290 if (s.size() > t.size())
3291 {
3292 s = s.prefix(t.size());
3293 }
3294 // if prefix is not leq, then entire string is not leq
3295 if (!s.isLeq(t))
3296 {
3297 Node ret = nm->mkConst(false);
3298 return returnRewrite(n, ret, "str-leq-cprefix");
3299 }
3300 }
3301
3302 Trace("strings-rewrite-nf") << "No rewrites for : " << n << std::endl;
3303 return n;
3304 }
3305
3306 Node TheoryStringsRewriter::rewritePrefixSuffix(Node n)
3307 {
3308 Assert(n.getKind() == kind::STRING_PREFIX
3309 || n.getKind() == kind::STRING_SUFFIX);
3310 bool isPrefix = n.getKind() == kind::STRING_PREFIX;
3311 if (n[0] == n[1])
3312 {
3313 Node ret = NodeManager::currentNM()->mkConst(true);
3314 return returnRewrite(n, ret, "suf/prefix-eq");
3315 }
3316 if (n[0].isConst())
3317 {
3318 CVC4::String t = n[0].getConst<String>();
3319 if (t.isEmptyString())
3320 {
3321 Node ret = NodeManager::currentNM()->mkConst(true);
3322 return returnRewrite(n, ret, "suf/prefix-empty-const");
3323 }
3324 }
3325 if (n[1].isConst())
3326 {
3327 CVC4::String s = n[1].getConst<String>();
3328 if (n[0].isConst())
3329 {
3330 Node ret = NodeManager::currentNM()->mkConst(false);
3331 CVC4::String t = n[0].getConst<String>();
3332 if (s.size() >= t.size())
3333 {
3334 if ((isPrefix && t == s.prefix(t.size()))
3335 || (!isPrefix && t == s.suffix(t.size())))
3336 {
3337 ret = NodeManager::currentNM()->mkConst(true);
3338 }
3339 }
3340 return returnRewrite(n, ret, "suf/prefix-const");
3341 }
3342 else if (s.isEmptyString())
3343 {
3344 Node ret = n[0].eqNode(n[1]);
3345 return returnRewrite(n, ret, "suf/prefix-empty");
3346 }
3347 else if (s.size() == 1)
3348 {
3349 // (str.prefix x "A") and (str.suffix x "A") are equivalent to
3350 // (str.contains "A" x )
3351 Node ret =
3352 NodeManager::currentNM()->mkNode(kind::STRING_STRCTN, n[1], n[0]);
3353 return returnRewrite(n, ret, "suf/prefix-ctn");
3354 }
3355 }
3356 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]);
3357 Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]);
3358 Node val;
3359 if (isPrefix)
3360 {
3361 val = NodeManager::currentNM()->mkConst(::CVC4::Rational(0));
3362 }
3363 else
3364 {
3365 val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens);
3366 }
3367
3368 // Check if we can turn the prefix/suffix into equalities by showing that the
3369 // prefix/suffix is at least as long as the string
3370 Node eqs = inferEqsFromContains(n[1], n[0]);
3371 if (!eqs.isNull())
3372 {
3373 return returnRewrite(n, eqs, "suf/prefix-to-eqs");
3374 }
3375
3376 // general reduction to equality + substr
3377 Node retNode = n[0].eqNode(
3378 NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens));
3379
3380 return retNode;
3381 }
3382
3383 Node TheoryStringsRewriter::rewriteStringCode(Node n)
3384 {
3385 Assert(n.getKind() == kind::STRING_CODE);
3386 if (n[0].isConst())
3387 {
3388 CVC4::String s = n[0].getConst<String>();
3389 Node ret;
3390 if (s.size() == 1)
3391 {
3392 std::vector<unsigned> vec = s.getVec();
3393 Assert(vec.size() == 1);
3394 ret = NodeManager::currentNM()->mkConst(
3395 Rational(CVC4::String::convertUnsignedIntToCode(vec[0])));
3396 }
3397 else
3398 {
3399 ret = NodeManager::currentNM()->mkConst(Rational(-1));
3400 }
3401 return returnRewrite(n, ret, "code-eval");
3402 }
3403
3404 return n;
3405 }
3406
3407 Node TheoryStringsRewriter::splitConstant( Node a, Node b, int& index, bool isRev ) {
3408 Assert(a.isConst() && b.isConst());
3409 index = a.getConst<String>().size() <= b.getConst<String>().size() ? 1 : 0;
3410 unsigned len_short = index==1 ? a.getConst<String>().size() : b.getConst<String>().size();
3411 bool cmp = isRev ? a.getConst<String>().rstrncmp(b.getConst<String>(), len_short): a.getConst<String>().strncmp(b.getConst<String>(), len_short);
3412 if( cmp ) {
3413 Node l = index==0 ? a : b;
3414 if( isRev ){
3415 int new_len = l.getConst<String>().size() - len_short;
3416 return NodeManager::currentNM()->mkConst(l.getConst<String>().substr( 0, new_len ));
3417 }else{
3418 return NodeManager::currentNM()->mkConst(l.getConst<String>().substr( len_short ));
3419 }
3420 }else{
3421 //not the same prefix/suffix
3422 return Node::null();
3423 }
3424 }
3425
3426 bool TheoryStringsRewriter::canConstantContainConcat( Node c, Node n, int& firstc, int& lastc ) {
3427 Assert(c.isConst());
3428 CVC4::String t = c.getConst<String>();
3429 const std::vector<unsigned>& tvec = t.getVec();
3430 Assert(n.getKind() == kind::STRING_CONCAT);
3431 //must find constant components in order
3432 size_t pos = 0;
3433 firstc = -1;
3434 lastc = -1;
3435 for(unsigned i=0; i<n.getNumChildren(); i++) {
3436 if( n[i].isConst() ){
3437 firstc = firstc==-1 ? i : firstc;
3438 lastc = i;
3439 CVC4::String s = n[i].getConst<String>();
3440 size_t new_pos = t.find(s,pos);
3441 if( new_pos==std::string::npos ) {
3442 return false;
3443 }else{
3444 pos = new_pos + s.size();
3445 }
3446 }
3447 else if (n[i].getKind() == kind::STRING_ITOS && checkEntailArith(n[i][0]))
3448 {
3449 // find the first occurrence of a digit starting at pos
3450 while (pos < tvec.size() && !String::isDigit(tvec[pos]))
3451 {
3452 pos++;
3453 }
3454 if (pos == tvec.size())
3455 {
3456 return false;
3457 }
3458 // must consume at least one digit here
3459 pos++;
3460 }
3461 }
3462 return true;
3463 }
3464
3465 bool TheoryStringsRewriter::canConstantContainList( Node c, std::vector< Node >& l, int& firstc, int& lastc ) {
3466 Assert(c.isConst());
3467 CVC4::String t = c.getConst<String>();
3468 //must find constant components in order
3469 size_t pos = 0;
3470 firstc = -1;
3471 lastc = -1;
3472 for(unsigned i=0; i<l.size(); i++) {
3473 if( l[i].isConst() ){
3474 firstc = firstc==-1 ? i : firstc;
3475 lastc = i;
3476 CVC4::String s = l[i].getConst<String>();
3477 size_t new_pos = t.find(s,pos);
3478 if( new_pos==std::string::npos ) {
3479 return false;
3480 }else{
3481 pos = new_pos + s.size();
3482 }
3483 }
3484 }
3485 return true;
3486 }
3487
3488 bool TheoryStringsRewriter::stripSymbolicLength(std::vector<Node>& n1,
3489 std::vector<Node>& nr,
3490 int dir,
3491 Node& curr)
3492 {
3493 Assert(dir == 1 || dir == -1);
3494 Assert(nr.empty());
3495 Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
3496 bool ret = false;
3497 bool success;
3498 unsigned sindex = 0;
3499 do
3500 {
3501 Assert(!curr.isNull());
3502 success = false;
3503 if (curr != zero && sindex < n1.size())
3504 {
3505 unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
3506 if (n1[sindex_use].isConst())
3507 {
3508 // could strip part of a constant
3509 Node lowerBound = getConstantArithBound(Rewriter::rewrite(curr));
3510 if (!lowerBound.isNull())
3511 {
3512 Assert(lowerBound.isConst());
3513 Rational lbr = lowerBound.getConst<Rational>();
3514 if (lbr.sgn() > 0)
3515 {
3516 Assert(checkEntailArith(curr, true));
3517 CVC4::String s = n1[sindex_use].getConst<String>();
3518 Node ncl =
3519 NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
3520 Node next_s =
3521 NodeManager::currentNM()->mkNode(kind::MINUS, lowerBound, ncl);
3522 next_s = Rewriter::rewrite(next_s);
3523 Assert(next_s.isConst());
3524 // we can remove the entire constant
3525 if (next_s.getConst<Rational>().sgn() >= 0)
3526 {
3527 curr = Rewriter::rewrite(
3528 NodeManager::currentNM()->mkNode(kind::MINUS, curr, ncl));
3529 success = true;
3530 sindex++;
3531 }
3532 else
3533 {
3534 // we can remove part of the constant
3535 // lower bound minus the length of a concrete string is negative,
3536 // hence lowerBound cannot be larger than long max
3537 Assert(lbr < Rational(String::maxSize()));
3538 curr = Rewriter::rewrite(NodeManager::currentNM()->mkNode(
3539 kind::MINUS, curr, lowerBound));
3540 uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
3541 Assert(lbsize < s.size());
3542 if (dir == 1)
3543 {
3544 // strip partially from the front
3545 nr.push_back(
3546 NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
3547 n1[sindex_use] = NodeManager::currentNM()->mkConst(
3548 s.suffix(s.size() - lbsize));
3549 }
3550 else
3551 {
3552 // strip partially from the back
3553 nr.push_back(
3554 NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
3555 n1[sindex_use] = NodeManager::currentNM()->mkConst(
3556 s.prefix(s.size() - lbsize));
3557 }
3558 ret = true;
3559 }
3560 Assert(checkEntailArith(curr));
3561 }
3562 else
3563 {
3564 // we cannot remove the constant
3565 }
3566 }
3567 }
3568 else
3569 {
3570 Node next_s = NodeManager::currentNM()->mkNode(
3571 kind::MINUS,
3572 curr,
3573 NodeManager::currentNM()->mkNode(kind::STRING_LENGTH,
3574 n1[sindex_use]));
3575 next_s = Rewriter::rewrite(next_s);
3576 if (checkEntailArith(next_s))
3577 {
3578 success = true;
3579 curr = next_s;
3580 sindex++;
3581 }
3582 }
3583 }
3584 } while (success);
3585 if (sindex > 0)
3586 {
3587 if (dir == 1)
3588 {
3589 nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
3590 n1.erase(n1.begin(), n1.begin() + sindex);
3591 }
3592 else
3593 {
3594 nr.insert(nr.end(), n1.end() - sindex, n1.end());
3595 n1.erase(n1.end() - sindex, n1.end());
3596 }
3597 ret = true;
3598 }
3599 return ret;
3600 }
3601
3602 int TheoryStringsRewriter::componentContains(std::vector<Node>& n1,
3603 std::vector<Node>& n2,
3604 std::vector<Node>& nb,
3605 std::vector<Node>& ne,
3606 bool computeRemainder,
3607 int remainderDir)
3608 {
3609 Assert(nb.empty());
3610 Assert(ne.empty());
3611 // if n2 is a singleton, we can do optimized version here
3612 if (n2.size() == 1)
3613 {
3614 for (unsigned i = 0; i < n1.size(); i++)
3615 {
3616 Node n1rb;
3617 Node n1re;
3618 if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
3619 {
3620 if (computeRemainder)
3621 {
3622 n1[i] = n2[0];
3623 if (remainderDir != -1)
3624 {
3625 if (!n1re.isNull())
3626 {
3627 ne.push_back(n1re);
3628 }
3629 ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
3630 n1.erase(n1.begin() + i + 1, n1.end());
3631 }
3632 else if (!n1re.isNull())
3633 {
3634 n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode(
3635 kind::STRING_CONCAT, n1[i], n1re));
3636 }
3637 if (remainderDir != 1)
3638 {
3639 nb.insert(nb.end(), n1.begin(), n1.begin() + i);
3640 n1.erase(n1.begin(), n1.begin() + i);
3641 if (!n1rb.isNull())
3642 {
3643 nb.push_back(n1rb);
3644 }
3645 }
3646 else if (!n1rb.isNull())
3647 {
3648 n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode(
3649 kind::STRING_CONCAT, n1rb, n1[i]));
3650 }
3651 }
3652 return i;
3653 }
3654 }
3655 }
3656 else if (n1.size() >= n2.size())
3657 {
3658 unsigned diff = n1.size() - n2.size();
3659 for (unsigned i = 0; i <= diff; i++)
3660 {
3661 Node n1rb_first;
3662 Node n1re_first;
3663 // first component of n2 must be a suffix
3664 if (componentContainsBase(n1[i],
3665 n2[0],
3666 n1rb_first,
3667 n1re_first,
3668 1,
3669 computeRemainder && remainderDir != 1))
3670 {
3671 Assert(n1re_first.isNull());
3672 for (unsigned j = 1; j < n2.size(); j++)
3673 {
3674 // are we in the last component?
3675 if (j + 1 == n2.size())
3676 {
3677 Node n1rb_last;
3678 Node n1re_last;
3679 // last component of n2 must be a prefix
3680 if (componentContainsBase(n1[i + j],
3681 n2[j],
3682 n1rb_last,
3683 n1re_last,
3684 -1,
3685 computeRemainder && remainderDir != -1))
3686 {
3687 Assert(n1rb_last.isNull());
3688 if (computeRemainder)
3689 {
3690 if (remainderDir != -1)
3691 {
3692 if (!n1re_last.isNull())
3693 {
3694 ne.push_back(n1re_last);
3695 }
3696 ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
3697 n1.erase(n1.begin() + i + j + 1, n1.end());
3698 n1[i + j] = n2[j];
3699 }
3700 if (remainderDir != 1)
3701 {
3702 n1[i] = n2[0];
3703 nb.insert(nb.end(), n1.begin(), n1.begin() + i);
3704 n1.erase(n1.begin(), n1.begin() + i);
3705 if (!n1rb_first.isNull())
3706 {
3707 nb.push_back(n1rb_first);
3708 }
3709 }
3710 }
3711 return i;
3712 }
3713 else
3714 {
3715 break;
3716 }
3717 }
3718 else if (n1[i + j] != n2[j])
3719 {
3720 break;
3721 }
3722 }
3723 }
3724 }
3725 }
3726 return -1;
3727 }
3728
3729 bool TheoryStringsRewriter::componentContainsBase(
3730 Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
3731 {
3732 Assert(n1rb.isNull());
3733 Assert(n1re.isNull());
3734
3735 NodeManager* nm = NodeManager::currentNM();
3736
3737 if (n1 == n2)
3738 {
3739 return true;
3740 }
3741 else
3742 {
3743 if (n1.isConst() && n2.isConst())
3744 {
3745 CVC4::String s = n1.getConst<String>();
3746 CVC4::String t = n2.getConst<String>();
3747 if (t.size() < s.size())
3748 {
3749 if (dir == 1)
3750 {
3751 if (s.suffix(t.size()) == t)
3752 {
3753 if (computeRemainder)
3754 {
3755 n1rb = nm->mkConst(::CVC4::String(s.prefix(s.size() - t.size())));
3756 }
3757 return true;
3758 }
3759 }
3760 else if (dir == -1)
3761 {
3762 if (s.prefix(t.size()) == t)
3763 {
3764 if (computeRemainder)
3765 {
3766 n1re = nm->mkConst(::CVC4::String(s.suffix(s.size() - t.size())));
3767 }
3768 return true;
3769 }
3770 }
3771 else
3772 {
3773 size_t f = s.find(t);
3774 if (f != std::string::npos)
3775 {
3776 if (computeRemainder)
3777 {
3778 if (f > 0)
3779 {
3780 n1rb = nm->mkConst(::CVC4::String(s.prefix(f)));
3781 }
3782 if (s.size() > f + t.size())
3783 {
3784 n1re = nm->mkConst(
3785 ::CVC4::String(s.suffix(s.size() - (f + t.size()))));
3786 }
3787 }
3788 return true;
3789 }
3790 }
3791 }
3792 }
3793 else
3794 {
3795 // cases for:
3796 // n1 = x containing n2 = substr( x, n2[1], n2[2] )
3797 if (n2.getKind() == kind::STRING_SUBSTR)
3798 {
3799 if (n2[0] == n1)
3800 {
3801 bool success = true;
3802 Node start_pos = n2[1];
3803 Node end_pos = nm->mkNode(kind::PLUS, n2[1], n2[2]);
3804 Node len_n2s = nm->mkNode(kind::STRING_LENGTH, n2[0]);
3805 if (dir == 1)
3806 {
3807 // To be a suffix, start + length must be greater than
3808 // or equal to the length of the string.
3809 success = checkEntailArith(end_pos, len_n2s);
3810 }
3811 else if (dir == -1)
3812 {
3813 // To be a prefix, must literally start at 0, since
3814 // if we knew it started at <0, it should be rewritten to "",
3815 // if we knew it started at 0, then n2[1] should be rewritten to
3816 // 0.
3817 success = start_pos.isConst()
3818 && start_pos.getConst<Rational>().sgn() == 0;
3819 }
3820 if (success)
3821 {
3822 if (computeRemainder)
3823 {
3824 // we can only compute the remainder if start_pos and end_pos
3825 // are known to be non-negative.
3826 if (!checkEntailArith(start_pos) || !checkEntailArith(end_pos))
3827 {
3828 return false;
3829 }
3830 if (dir != 1)
3831 {
3832 n1rb = nm->mkNode(kind::STRING_SUBSTR,
3833 n2[0],
3834 nm->mkConst(Rational(0)),
3835 start_pos);
3836 }
3837 if (dir != -1)
3838 {
3839 n1re = nm->mkNode(kind::STRING_SUBSTR, n2[0], end_pos, len_n2s);
3840 }
3841 }
3842 return true;
3843 }
3844 }
3845 }
3846
3847 if (!computeRemainder && dir == 0)
3848 {
3849 if (n1.getKind() == STRING_STRREPL)
3850 {
3851 // (str.contains (str.replace x y z) w) ---> true
3852 // if (str.contains x w) --> true and (str.contains z w) ---> true
3853 Node xCtnW = checkEntailContains(n1[0], n2);
3854 if (!xCtnW.isNull() && xCtnW.getConst<bool>())
3855 {
3856 Node zCtnW = checkEntailContains(n1[2], n2);
3857 if (!zCtnW.isNull() && zCtnW.getConst<bool>())
3858 {
3859 return true;
3860 }
3861 }
3862 }
3863 }
3864 }
3865 }
3866 return false;
3867 }
3868
3869 bool TheoryStringsRewriter::stripConstantEndpoints(std::vector<Node>& n1,
3870 std::vector<Node>& n2,
3871 std::vector<Node>& nb,
3872 std::vector<Node>& ne,
3873 int dir)
3874 {
3875 Assert(nb.empty());
3876 Assert(ne.empty());
3877
3878 NodeManager* nm = NodeManager::currentNM();
3879 bool changed = false;
3880 // for ( forwards, backwards ) direction
3881 for (unsigned r = 0; r < 2; r++)
3882 {
3883 if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
3884 {
3885 unsigned index0 = r == 0 ? 0 : n1.size() - 1;
3886 unsigned index1 = r == 0 ? 0 : n2.size() - 1;
3887 bool removeComponent = false;
3888 Node n1cmp = n1[index0];
3889
3890 if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
3891 {
3892 return false;
3893 }
3894
3895 std::vector<Node> sss;
3896 std::vector<Node> sls;
3897 n1cmp = decomposeSubstrChain(n1cmp, sss, sls);
3898 Trace("strings-rewrite-debug2")
3899 << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
3900 << ", dir = " << dir << std::endl;
3901 if (n1cmp.isConst())
3902 {
3903 CVC4::String s = n1cmp.getConst<String>();
3904 // overlap is an overapproximation of the number of characters
3905 // n2[index1] can match in s
3906 unsigned overlap = s.size();
3907 if (n2[index1].isConst())
3908 {
3909 CVC4::String t = n2[index1].getConst<String>();
3910 std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
3911 if (ret == std::string::npos)
3912 {
3913 if (n1.size() == 1)
3914 {
3915 // can remove everything
3916 // e.g. str.contains( "abc", str.++( "ba", x ) ) -->
3917 // str.contains( "", str.++( "ba", x ) )
3918 removeComponent = true;
3919 }
3920 else if (sss.empty()) // only if not substr
3921 {
3922 // check how much overlap there is
3923 // This is used to partially strip off the endpoint
3924 // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
3925 // str.contains( str.++( "c", x ), str.++( "cd", y ) )
3926 overlap = r == 0 ? s.overlap(t) : t.overlap(s);
3927 }
3928 else
3929 {
3930 // if we are looking at a substring, we can remove the component
3931 // if there is no overlap
3932 // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
3933 // --> str.contains( x, "a" )
3934 removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
3935 }
3936 }
3937 else if (sss.empty()) // only if not substr
3938 {
3939 Assert(ret < s.size());
3940 // can strip off up to the find position, e.g.
3941 // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
3942 // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
3943 // and
3944 // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
3945 // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
3946 overlap = s.size() - ret;
3947 }
3948 }
3949 else
3950 {
3951 // inconclusive
3952 }
3953 // process the overlap
3954 if (overlap < s.size())
3955 {
3956 changed = true;
3957 if (overlap == 0)
3958 {
3959 removeComponent = true;
3960 }
3961 else
3962 {
3963 // can drop the prefix (resp. suffix) from the first (resp. last)
3964 // component
3965 if (r == 0)
3966 {
3967 nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
3968 n1[index0] = nm->mkConst(s.suffix(overlap));
3969 }
3970 else
3971 {
3972 ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
3973 n1[index0] = nm->mkConst(s.prefix(overlap));
3974 }
3975 }
3976 }
3977 }
3978 else if (n1cmp.getKind() == kind::STRING_ITOS)
3979 {
3980 if (n2[index1].isConst())
3981 {
3982 CVC4::String t = n2[index1].getConst<String>();
3983
3984 if (n1.size() == 1)
3985 {
3986 // if n1.size()==1, then if n2[index1] is not a number, we can drop
3987 // the entire component
3988 // e.g. str.contains( int.to.str(x), "123a45") --> false
3989 if (!t.isNumber())
3990 {
3991 removeComponent = true;
3992 }
3993 }
3994 else
3995 {
3996 const std::vector<unsigned>& tvec = t.getVec();
3997 Assert(tvec.size() > 0);
3998
3999 // if n1.size()>1, then if the first (resp. last) character of
4000 // n2[index1]
4001 // is not a digit, we can drop the entire component, e.g.:
4002 // str.contains( str.++( int.to.str(x), y ), "a12") -->
4003 // str.contains( y, "a12" )
4004 // str.contains( str.++( y, int.to.str(x) ), "a0b") -->
4005 // str.contains( y, "a0b" )
4006 unsigned i = r == 0 ? 0 : (tvec.size() - 1);
4007 if (!String::isDigit(tvec[i]))
4008 {
4009 removeComponent = true;
4010 }
4011 }
4012 }
4013 }
4014 if (removeComponent)
4015 {
4016 // can drop entire first (resp. last) component
4017 if (r == 0)
4018 {
4019 nb.push_back(n1[index0]);
4020 n1.erase(n1.begin(), n1.begin() + 1);
4021 }
4022 else
4023 {
4024 ne.push_back(n1[index0]);
4025 n1.pop_back();
4026 }
4027 if (n1.empty())
4028 {
4029 // if we've removed everything, just return (we will rewrite to false)
4030 return true;
4031 }
4032 else
4033 {
4034 changed = true;
4035 }
4036 }
4037 }
4038 }
4039 // TODO (#1180) : computing the maximal overlap in this function may be
4040 // important.
4041 // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
4042 // false
4043 // ...since str.to.int(x) can contain at most 1 character from "2aaaa",
4044 // leaving 4 characters
4045 // which is larger that the upper bound for length of str.substr(y,0,3),
4046 // which is 3.
4047 return changed;
4048 }
4049
4050 Node TheoryStringsRewriter::canonicalStrForSymbolicLength(Node len)
4051 {
4052 NodeManager* nm = NodeManager::currentNM();
4053
4054 Node res;
4055 if (len.getKind() == kind::CONST_RATIONAL)
4056 {
4057 // c -> "A" repeated c times
4058 Rational ratLen = len.getConst<Rational>();
4059 Assert(ratLen.getDenominator() == 1);
4060 Integer intLen = ratLen.getNumerator();
4061 res = nm->mkConst(String(std::string(intLen.getUnsignedInt(), 'A')));
4062 }
4063 else if (len.getKind() == kind::PLUS)
4064 {
4065 // x + y -> norm(x) + norm(y)
4066 NodeBuilder<> concatBuilder(kind::STRING_CONCAT);
4067 for (const auto& n : len)
4068 {
4069 Node sn = canonicalStrForSymbolicLength(n);
4070 if (sn.isNull())
4071 {
4072 return Node::null();
4073 }
4074 std::vector<Node> snChildren;
4075 utils::getConcat(sn, snChildren);
4076 concatBuilder.append(snChildren);
4077 }
4078 res = concatBuilder.constructNode();
4079 }
4080 else if (len.getKind() == kind::MULT && len.getNumChildren() == 2
4081 && len[0].isConst())
4082 {
4083 // c * x -> norm(x) repeated c times
4084 Rational ratReps = len[0].getConst<Rational>();
4085 Assert(ratReps.getDenominator() == 1);
4086 Integer intReps = ratReps.getNumerator();
4087
4088 Node nRep = canonicalStrForSymbolicLength(len[1]);
4089 std::vector<Node> nRepChildren;
4090 utils::getConcat(nRep, nRepChildren);
4091 NodeBuilder<> concatBuilder(kind::STRING_CONCAT);
4092 for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
4093 {
4094 concatBuilder.append(nRepChildren);
4095 }
4096 res = concatBuilder.constructNode();
4097 }
4098 else if (len.getKind() == kind::STRING_LENGTH)
4099 {
4100 // len(x) -> x
4101 res = len[0];
4102 }
4103 return res;
4104 }
4105
4106 Node TheoryStringsRewriter::lengthPreserveRewrite(Node n)
4107 {
4108 NodeManager* nm = NodeManager::currentNM();
4109 Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
4110 Node res = canonicalStrForSymbolicLength(len);
4111 return res.isNull() ? n : res;
4112 }
4113
4114 Node TheoryStringsRewriter::checkEntailContains(Node a,
4115 Node b,
4116 bool fullRewriter)
4117 {
4118 NodeManager* nm = NodeManager::currentNM();
4119 Node ctn = nm->mkNode(kind::STRING_STRCTN, a, b);
4120
4121 if (fullRewriter)
4122 {
4123 ctn = Rewriter::rewrite(ctn);
4124 }
4125 else
4126 {
4127 Node prev;
4128 do
4129 {
4130 prev = ctn;
4131 ctn = rewriteContains(ctn);
4132 } while (prev != ctn && ctn.getKind() == kind::STRING_STRCTN);
4133 }
4134
4135 Assert(ctn.getType().isBoolean());
4136 return ctn.isConst() ? ctn : Node::null();
4137 }
4138
4139 bool TheoryStringsRewriter::checkEntailNonEmpty(Node a)
4140 {
4141 Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
4142 len = Rewriter::rewrite(len);
4143 return checkEntailArith(len, true);
4144 }
4145
4146 bool TheoryStringsRewriter::checkEntailLengthOne(Node s, bool strict)
4147 {
4148 NodeManager* nm = NodeManager::currentNM();
4149 Node one = nm->mkConst(Rational(1));
4150 Node len = nm->mkNode(STRING_LENGTH, s);
4151 len = Rewriter::rewrite(len);
4152 return checkEntailArith(one, len) && (!strict || checkEntailArith(len, true));
4153 }
4154
4155 bool TheoryStringsRewriter::checkEntailArithEq(Node a, Node b)
4156 {
4157 if (a == b)
4158 {
4159 return true;
4160 }
4161 else
4162 {
4163 Node ar = Rewriter::rewrite(a);
4164 Node br = Rewriter::rewrite(b);
4165 return ar == br;
4166 }
4167 }
4168
4169 bool TheoryStringsRewriter::checkEntailArith(Node a, Node b, bool strict)
4170 {
4171 if (a == b)
4172 {
4173 return !strict;
4174 }
4175 else
4176 {
4177 Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b);
4178 return checkEntailArith(diff, strict);
4179 }
4180 }
4181
4182 struct StrCheckEntailArithTag
4183 {
4184 };
4185 struct StrCheckEntailArithComputedTag
4186 {
4187 };
4188 /** Attribute true for expressions for which checkEntailArith returned true */
4189 typedef expr::Attribute<StrCheckEntailArithTag, bool> StrCheckEntailArithAttr;
4190 typedef expr::Attribute<StrCheckEntailArithComputedTag, bool>
4191 StrCheckEntailArithComputedAttr;
4192
4193 bool TheoryStringsRewriter::checkEntailArith(Node a, bool strict)
4194 {
4195 if (a.isConst())
4196 {
4197 return a.getConst<Rational>().sgn() >= (strict ? 1 : 0);
4198 }
4199
4200 Node ar =
4201 strict
4202 ? NodeManager::currentNM()->mkNode(
4203 kind::MINUS, a, NodeManager::currentNM()->mkConst(Rational(1)))
4204 : a;
4205 ar = Rewriter::rewrite(ar);
4206
4207 if (ar.getAttribute(StrCheckEntailArithComputedAttr()))
4208 {
4209 return ar.getAttribute(StrCheckEntailArithAttr());
4210 }
4211
4212 bool ret = checkEntailArithInternal(ar);
4213 if (!ret)
4214 {
4215 // try with approximations
4216 ret = checkEntailArithApprox(ar);
4217 }
4218 // cache the result
4219 ar.setAttribute(StrCheckEntailArithAttr(), ret);
4220 ar.setAttribute(StrCheckEntailArithComputedAttr(), true);
4221 return ret;
4222 }
4223
4224 bool TheoryStringsRewriter::checkEntailArithApprox(Node ar)
4225 {
4226 Assert(Rewriter::rewrite(ar) == ar);
4227 NodeManager* nm = NodeManager::currentNM();
4228 std::map<Node, Node> msum;
4229 Trace("strings-ent-approx-debug")
4230 << "Setup arithmetic approximations for " << ar << std::endl;
4231 if (!ArithMSum::getMonomialSum(ar, msum))
4232 {
4233 Trace("strings-ent-approx-debug")
4234 << "...failed to get monomial sum!" << std::endl;
4235 return false;
4236 }
4237 // for each monomial v*c, mApprox[v] a list of
4238 // possibilities for how the term can be soundly approximated, that is,
4239 // if mApprox[v] contains av, then v*c > av*c. Notice that if c
4240 // is positive, then v > av, otherwise if c is negative, then v < av.
4241 // In other words, av is an under-approximation if c is positive, and an
4242 // over-approximation if c is negative.
4243 bool changed = false;
4244 std::map<Node, std::vector<Node> > mApprox;
4245 // map from approximations to their monomial sums
4246 std::map<Node, std::map<Node, Node> > approxMsums;
4247 // aarSum stores each monomial that does not have multiple approximations
4248 std::vector<Node> aarSum;
4249 for (std::pair<const Node, Node>& m : msum)
4250 {
4251 Node v = m.first;
4252 Node c = m.second;
4253 Trace("strings-ent-approx-debug")
4254 << "Get approximations " << v << "..." << std::endl;
4255 if (v.isNull())
4256 {
4257 Node mn = c.isNull() ? nm->mkConst(Rational(1)) : c;
4258 aarSum.push_back(mn);
4259 }
4260 else
4261 {
4262 // c.isNull() means c = 1
4263 bool isOverApprox = !c.isNull() && c.getConst<Rational>().sgn() == -1;
4264 std::vector<Node>& approx = mApprox[v];
4265 std::unordered_set<Node, NodeHashFunction> visited;
4266 std::vector<Node> toProcess;
4267 toProcess.push_back(v);
4268 do
4269 {
4270 Node curr = toProcess.back();
4271 Trace("strings-ent-approx-debug") << " process " << curr << std::endl;
4272 curr = Rewriter::rewrite(curr);
4273 toProcess.pop_back();
4274 if (visited.find(curr) == visited.end())
4275 {
4276 visited.insert(curr);
4277 std::vector<Node> currApprox;
4278 getArithApproximations(curr, currApprox, isOverApprox);
4279 if (currApprox.empty())
4280 {
4281 Trace("strings-ent-approx-debug")
4282 << "...approximation: " << curr << std::endl;
4283 // no approximations, thus curr is a possibility
4284 approx.push_back(curr);
4285 }
4286 else
4287 {
4288 toProcess.insert(
4289 toProcess.end(), currApprox.begin(), currApprox.end());
4290 }
4291 }
4292 } while (!toProcess.empty());
4293 Assert(!approx.empty());
4294 // if we have only one approximation, move it to final
4295 if (approx.size() == 1)
4296 {
4297 changed = v != approx[0];
4298 Node mn = ArithMSum::mkCoeffTerm(c, approx[0]);
4299 aarSum.push_back(mn);
4300 mApprox.erase(v);
4301 }
4302 else
4303 {
4304 // compute monomial sum form for each approximation, used below
4305 for (const Node& aa : approx)
4306 {
4307 if (approxMsums.find(aa) == approxMsums.end())
4308 {
4309 CVC4_UNUSED bool ret =
4310 ArithMSum::getMonomialSum(aa, approxMsums[aa]);
4311 Assert(ret);
4312 }
4313 }
4314 changed = true;
4315 }
4316 }
4317 }
4318 if (!changed)
4319 {
4320 // approximations had no effect, return
4321 Trace("strings-ent-approx-debug") << "...no approximations" << std::endl;
4322 return false;
4323 }
4324 // get the current "fixed" sum for the abstraction of ar
4325 Node aar = aarSum.empty()
4326 ? nm->mkConst(Rational(0))
4327 : (aarSum.size() == 1 ? aarSum[0] : nm->mkNode(PLUS, aarSum));
4328 aar = Rewriter::rewrite(aar);
4329 Trace("strings-ent-approx-debug")
4330 << "...processed fixed sum " << aar << " with " << mApprox.size()
4331 << " approximated monomials." << std::endl;
4332 // if we have a choice of how to approximate
4333 if (!mApprox.empty())
4334 {
4335 // convert aar back to monomial sum
4336 std::map<Node, Node> msumAar;
4337 if (!ArithMSum::getMonomialSum(aar, msumAar))
4338 {
4339 return false;
4340 }
4341 if (Trace.isOn("strings-ent-approx"))
4342 {
4343 Trace("strings-ent-approx")
4344 << "---- Check arithmetic entailment by under-approximation " << ar
4345 << " >= 0" << std::endl;
4346 Trace("strings-ent-approx") << "FIXED:" << std::endl;
4347 ArithMSum::debugPrintMonomialSum(msumAar, "strings-ent-approx");
4348 Trace("strings-ent-approx") << "APPROX:" << std::endl;
4349 for (std::pair<const Node, std::vector<Node> >& a : mApprox)
4350 {
4351 Node c = msum[a.first];
4352 Trace("strings-ent-approx") << " ";
4353 if (!c.isNull())
4354 {
4355 Trace("strings-ent-approx") << c << " * ";
4356 }
4357 Trace("strings-ent-approx")
4358 << a.second << " ...from " << a.first << std::endl;
4359 }
4360 Trace("strings-ent-approx") << std::endl;
4361 }
4362 Rational one(1);
4363 // incorporate monomials one at a time that have a choice of approximations
4364 while (!mApprox.empty())
4365 {
4366 Node v;
4367 Node vapprox;
4368 int maxScore = -1;
4369 // Look at each approximation, take the one with the best score.
4370 // Notice that we are in the process of trying to prove
4371 // ( c1*t1 + .. + cn*tn ) + ( approx_1 | ... | approx_m ) >= 0,
4372 // where c1*t1 + .. + cn*tn is the "fixed" component of our sum (aar)
4373 // and approx_1 ... approx_m are possible approximations. The
4374 // intution here is that we want coefficients c1...cn to be positive.
4375 // This is because arithmetic string terms t1...tn (which may be
4376 // applications of len, indexof, str.to.int) are never entailed to be
4377 // negative. Hence, we add the approx_i that contributes the "most"
4378 // towards making all constants c1...cn positive and cancelling negative
4379 // monomials in approx_i itself.
4380 for (std::pair<const Node, std::vector<Node> >& nam : mApprox)
4381 {
4382 Node cr = msum[nam.first];
4383 for (const Node& aa : nam.second)
4384 {
4385 unsigned helpsCancelCount = 0;
4386 unsigned addsObligationCount = 0;
4387 std::map<Node, Node>::iterator it;
4388 // we are processing an approximation cr*( c1*t1 + ... + cn*tn )
4389 for (std::pair<const Node, Node>& aam : approxMsums[aa])
4390 {
4391 // Say aar is of the form t + c*ti, and aam is the monomial ci*ti
4392 // where ci != 0. We say aam:
4393 // (1) helps cancel if c != 0 and c>0 != ci>0
4394 // (2) adds obligation if c>=0 and c+ci<0
4395 Node ti = aam.first;
4396 Node ci = aam.second;
4397 if (!cr.isNull())
4398 {
4399 ci = ci.isNull() ? cr
4400 : Rewriter::rewrite(nm->mkNode(MULT, ci, cr));
4401 }
4402 Trace("strings-ent-approx-debug") << ci << "*" << ti << " ";
4403 int ciSgn = ci.isNull() ? 1 : ci.getConst<Rational>().sgn();
4404 it = msumAar.find(ti);
4405 if (it != msumAar.end())
4406 {
4407 Node c = it->second;
4408 int cSgn = c.isNull() ? 1 : c.getConst<Rational>().sgn();
4409 if (cSgn == 0)
4410 {
4411 addsObligationCount += (ciSgn == -1 ? 1 : 0);
4412 }
4413 else if (cSgn != ciSgn)
4414 {
4415 helpsCancelCount++;
4416 Rational r1 = c.isNull() ? one : c.getConst<Rational>();
4417 Rational r2 = ci.isNull() ? one : ci.getConst<Rational>();
4418 Rational r12 = r1 + r2;
4419 if (r12.sgn() == -1)
4420 {
4421 addsObligationCount++;
4422 }
4423 }
4424 }
4425 else
4426 {
4427 addsObligationCount += (ciSgn == -1 ? 1 : 0);
4428 }
4429 }
4430 Trace("strings-ent-approx-debug")
4431 << "counts=" << helpsCancelCount << "," << addsObligationCount
4432 << " for " << aa << " into " << aar << std::endl;
4433 int score = (addsObligationCount > 0 ? 0 : 2)
4434 + (helpsCancelCount > 0 ? 1 : 0);
4435 // if its the best, update v and vapprox
4436 if (v.isNull() || score > maxScore)
4437 {
4438 v = nam.first;
4439 vapprox = aa;
4440 maxScore = score;
4441 }
4442 }
4443 if (!v.isNull())
4444 {
4445 break;
4446 }
4447 }
4448 Trace("strings-ent-approx")
4449 << "- Decide " << v << " = " << vapprox << std::endl;
4450 // we incorporate v approximated by vapprox into the overall approximation
4451 // for ar
4452 Assert(!v.isNull() && !vapprox.isNull());
4453 Assert(msum.find(v) != msum.end());
4454 Node mn = ArithMSum::mkCoeffTerm(msum[v], vapprox);
4455 aar = nm->mkNode(PLUS, aar, mn);
4456 // update the msumAar map
4457 aar = Rewriter::rewrite(aar);
4458 msumAar.clear();
4459 if (!ArithMSum::getMonomialSum(aar, msumAar))
4460 {
4461 Assert(false);
4462 Trace("strings-ent-approx")
4463 << "...failed to get monomial sum!" << std::endl;
4464 return false;
4465 }
4466 // we have processed the approximation for v
4467 mApprox.erase(v);
4468 }
4469 Trace("strings-ent-approx") << "-----------------" << std::endl;
4470 }
4471 if (aar == ar)
4472 {
4473 Trace("strings-ent-approx-debug")
4474 << "...approximation had no effect" << std::endl;
4475 // this should never happen, but we avoid the infinite loop for sanity here
4476 Assert(false);
4477 return false;
4478 }
4479 // Check entailment on the approximation of ar.
4480 // Notice that this may trigger further reasoning by approximation. For
4481 // example, len( replace( x ++ y, substr( x, 0, n ), z ) ) may be
4482 // under-approximated as len( x ) + len( y ) - len( substr( x, 0, n ) ) on
4483 // this call, where in the recursive call we may over-approximate
4484 // len( substr( x, 0, n ) ) as len( x ). In this example, we can infer
4485 // that len( replace( x ++ y, substr( x, 0, n ), z ) ) >= len( y ) in two
4486 // steps.
4487 if (checkEntailArith(aar))
4488 {
4489 Trace("strings-ent-approx")
4490 << "*** StrArithApprox: showed " << ar
4491 << " >= 0 using under-approximation!" << std::endl;
4492 Trace("strings-ent-approx")
4493 << "*** StrArithApprox: under-approximation was " << aar << std::endl;
4494 return true;
4495 }
4496 return false;
4497 }
4498
4499 void TheoryStringsRewriter::getArithApproximations(Node a,
4500 std::vector<Node>& approx,
4501 bool isOverApprox)
4502 {
4503 NodeManager* nm = NodeManager::currentNM();
4504 // We do not handle PLUS here since this leads to exponential behavior.
4505 // Instead, this is managed, e.g. during checkEntailArithApprox, where
4506 // PLUS terms are expanded "on-demand" during the reasoning.
4507 Trace("strings-ent-approx-debug")
4508 << "Get arith approximations " << a << std::endl;
4509 Kind ak = a.getKind();
4510 if (ak == MULT)
4511 {
4512 Node c;
4513 Node v;
4514 if (ArithMSum::getMonomial(a, c, v))
4515 {
4516 bool isNeg = c.getConst<Rational>().sgn() == -1;
4517 getArithApproximations(v, approx, isNeg ? !isOverApprox : isOverApprox);
4518 for (unsigned i = 0, size = approx.size(); i < size; i++)
4519 {
4520 approx[i] = nm->mkNode(MULT, c, approx[i]);
4521 }
4522 }
4523 }
4524 else if (ak == STRING_LENGTH)
4525 {
4526 Kind aak = a[0].getKind();
4527 if (aak == STRING_SUBSTR)
4528 {
4529 // over,under-approximations for len( substr( x, n, m ) )
4530 Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]);
4531 if (isOverApprox)
4532 {
4533 // m >= 0 implies
4534 // m >= len( substr( x, n, m ) )
4535 if (checkEntailArith(a[0][2]))
4536 {
4537 approx.push_back(a[0][2]);
4538 }
4539 if (checkEntailArith(lenx, a[0][1]))
4540 {
4541 // n <= len( x ) implies
4542 // len( x ) - n >= len( substr( x, n, m ) )
4543 approx.push_back(nm->mkNode(MINUS, lenx, a[0][1]));
4544 }
4545 else
4546 {
4547 // len( x ) >= len( substr( x, n, m ) )
4548 approx.push_back(lenx);
4549 }
4550 }
4551 else
4552 {
4553 // 0 <= n and n+m <= len( x ) implies
4554 // m <= len( substr( x, n, m ) )
4555 Node npm = nm->mkNode(PLUS, a[0][1], a[0][2]);
4556 if (checkEntailArith(a[0][1]) && checkEntailArith(lenx, npm))
4557 {
4558 approx.push_back(a[0][2]);
4559 }
4560 // 0 <= n and n+m >= len( x ) implies
4561 // len(x)-n <= len( substr( x, n, m ) )
4562 if (checkEntailArith(a[0][1]) && checkEntailArith(npm, lenx))
4563 {
4564 approx.push_back(nm->mkNode(MINUS, lenx, a[0][1]));
4565 }
4566 }
4567 }
4568 else if (aak == STRING_STRREPL)
4569 {
4570 // over,under-approximations for len( replace( x, y, z ) )
4571 // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) )
4572 Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]);
4573 Node leny = nm->mkNode(STRING_LENGTH, a[0][1]);
4574 Node lenz = nm->mkNode(STRING_LENGTH, a[0][2]);
4575 if (isOverApprox)
4576 {
4577 if (checkEntailArith(leny, lenz))
4578 {
4579 // len( y ) >= len( z ) implies
4580 // len( x ) >= len( replace( x, y, z ) )
4581 approx.push_back(lenx);
4582 }
4583 else
4584 {
4585 // len( x ) + len( z ) >= len( replace( x, y, z ) )
4586 approx.push_back(nm->mkNode(PLUS, lenx, lenz));
4587 }
4588 }
4589 else
4590 {
4591 if (checkEntailArith(lenz, leny) || checkEntailArith(lenz, lenx))
4592 {
4593 // len( y ) <= len( z ) or len( x ) <= len( z ) implies
4594 // len( x ) <= len( replace( x, y, z ) )
4595 approx.push_back(lenx);
4596 }
4597 else
4598 {
4599 // len( x ) - len( y ) <= len( replace( x, y, z ) )
4600 approx.push_back(nm->mkNode(MINUS, lenx, leny));
4601 }
4602 }
4603 }
4604 else if (aak == STRING_ITOS)
4605 {
4606 // over,under-approximations for len( int.to.str( x ) )
4607 if (isOverApprox)
4608 {
4609 if (checkEntailArith(a[0][0], false))
4610 {
4611 if (checkEntailArith(a[0][0], true))
4612 {
4613 // x > 0 implies
4614 // x >= len( int.to.str( x ) )
4615 approx.push_back(a[0][0]);
4616 }
4617 else
4618 {
4619 // x >= 0 implies
4620 // x+1 >= len( int.to.str( x ) )
4621 approx.push_back(
4622 nm->mkNode(PLUS, nm->mkConst(Rational(1)), a[0][0]));
4623 }
4624 }
4625 }
4626 else
4627 {
4628 if (checkEntailArith(a[0][0]))
4629 {
4630 // x >= 0 implies
4631 // len( int.to.str( x ) ) >= 1
4632 approx.push_back(nm->mkConst(Rational(1)));
4633 }
4634 // other crazy things are possible here, e.g.
4635 // len( int.to.str( len( y ) + 10 ) ) >= 2
4636 }
4637 }
4638 }
4639 else if (ak == STRING_STRIDOF)
4640 {
4641 // over,under-approximations for indexof( x, y, n )
4642 if (isOverApprox)
4643 {
4644 Node lenx = nm->mkNode(STRING_LENGTH, a[0]);
4645 Node leny = nm->mkNode(STRING_LENGTH, a[1]);
4646 if (checkEntailArith(lenx, leny))
4647 {
4648 // len( x ) >= len( y ) implies
4649 // len( x ) - len( y ) >= indexof( x, y, n )
4650 approx.push_back(nm->mkNode(MINUS, lenx, leny));
4651 }
4652 else
4653 {
4654 // len( x ) >= indexof( x, y, n )
4655 approx.push_back(lenx);
4656 }
4657 }
4658 else
4659 {
4660 // TODO?:
4661 // contains( substr( x, n, len( x ) ), y ) implies
4662 // n <= indexof( x, y, n )
4663 // ...hard to test, runs risk of non-termination
4664
4665 // -1 <= indexof( x, y, n )
4666 approx.push_back(nm->mkConst(Rational(-1)));
4667 }
4668 }
4669 else if (ak == STRING_STOI)
4670 {
4671 // over,under-approximations for str.to.int( x )
4672 if (isOverApprox)
4673 {
4674 // TODO?:
4675 // y >= 0 implies
4676 // y >= str.to.int( int.to.str( y ) )
4677 }
4678 else
4679 {
4680 // -1 <= str.to.int( x )
4681 approx.push_back(nm->mkConst(Rational(-1)));
4682 }
4683 }
4684 Trace("strings-ent-approx-debug") << "Return " << approx.size() << std::endl;
4685 }
4686
4687 bool TheoryStringsRewriter::checkEntailMultisetSubset(Node a, Node b)
4688 {
4689 NodeManager* nm = NodeManager::currentNM();
4690
4691 std::vector<Node> avec;
4692 utils::getConcat(getMultisetApproximation(a), avec);
4693 std::vector<Node> bvec;
4694 utils::getConcat(b, bvec);
4695
4696 std::map<Node, unsigned> num_nconst[2];
4697 std::map<Node, unsigned> num_const[2];
4698 for (unsigned j = 0; j < 2; j++)
4699 {
4700 std::vector<Node>& jvec = j == 0 ? avec : bvec;
4701 for (const Node& cc : jvec)
4702 {
4703 if (cc.isConst())
4704 {
4705 num_const[j][cc]++;
4706 }
4707 else
4708 {
4709 num_nconst[j][cc]++;
4710 }
4711 }
4712 }
4713 bool ms_success = true;
4714 for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
4715 {
4716 if (nncp.second > num_nconst[1][nncp.first])
4717 {
4718 ms_success = false;
4719 break;
4720 }
4721 }
4722 if (ms_success)
4723 {
4724 // count the number of constant characters in the first argument
4725 std::map<Node, unsigned> count_const[2];
4726 std::vector<Node> chars;
4727 for (unsigned j = 0; j < 2; j++)
4728 {
4729 for (std::pair<const Node, unsigned>& ncp : num_const[j])
4730 {
4731 Node cn = ncp.first;
4732 Assert(cn.isConst());
4733 std::vector<unsigned> cc_vec;
4734 const std::vector<unsigned>& cvec = cn.getConst<String>().getVec();
4735 for (unsigned i = 0, size = cvec.size(); i < size; i++)
4736 {
4737 // make the character
4738 cc_vec.clear();
4739 cc_vec.insert(cc_vec.end(), cvec.begin() + i, cvec.begin() + i + 1);
4740 Node ch = nm->mkConst(String(cc_vec));
4741 count_const[j][ch] += ncp.second;
4742 if (std::find(chars.begin(), chars.end(), ch) == chars.end())
4743 {
4744 chars.push_back(ch);
4745 }
4746 }
4747 }
4748 }
4749 Trace("strings-entail-ms-ss")
4750 << "For " << a << " and " << b << " : " << std::endl;
4751 for (const Node& ch : chars)
4752 {
4753 Trace("strings-entail-ms-ss") << " # occurrences of substring ";
4754 Trace("strings-entail-ms-ss") << ch << " in arguments is ";
4755 Trace("strings-entail-ms-ss")
4756 << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
4757 if (count_const[0][ch] < count_const[1][ch])
4758 {
4759 return true;
4760 }
4761 }
4762
4763 // TODO (#1180): count the number of 2,3,4,.. character substrings
4764 // for example:
4765 // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
4766 // since the second argument contains more occurrences of "bb".
4767 // note this is orthogonal reasoning to inductive reasoning
4768 // via regular membership reduction in Liang et al CAV 2015.
4769 }
4770 return false;
4771 }
4772
4773 Node TheoryStringsRewriter::checkEntailHomogeneousString(Node a)
4774 {
4775 NodeManager* nm = NodeManager::currentNM();
4776
4777 std::vector<Node> avec;
4778 utils::getConcat(getMultisetApproximation(a), avec);
4779
4780 bool cValid = false;
4781 unsigned c = 0;
4782 for (const Node& ac : avec)
4783 {
4784 if (ac.getKind() == CONST_STRING)
4785 {
4786 std::vector<unsigned> acv = ac.getConst<String>().getVec();
4787 for (unsigned cc : acv)
4788 {
4789 if (!cValid)
4790 {
4791 cValid = true;
4792 c = cc;
4793 }
4794 else if (c != cc)
4795 {
4796 // Found a different character
4797 return Node::null();
4798 }
4799 }
4800 }
4801 else
4802 {
4803 // Could produce a different character
4804 return Node::null();
4805 }
4806 }
4807
4808 if (!cValid)
4809 {
4810 return nm->mkConst(String(""));
4811 }
4812
4813 std::vector<unsigned> cv = {c};
4814 return nm->mkConst(String(cv));
4815 }
4816
4817 Node TheoryStringsRewriter::getMultisetApproximation(Node a)
4818 {
4819 NodeManager* nm = NodeManager::currentNM();
4820 if (a.getKind() == STRING_SUBSTR)
4821 {
4822 return a[0];
4823 }
4824 else if (a.getKind() == STRING_STRREPL)
4825 {
4826 return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
4827 }
4828 else if (a.getKind() == STRING_CONCAT)
4829 {
4830 NodeBuilder<> nb(STRING_CONCAT);
4831 for (const Node& ac : a)
4832 {
4833 nb << getMultisetApproximation(ac);
4834 }
4835 return nb.constructNode();
4836 }
4837 else
4838 {
4839 return a;
4840 }
4841 }
4842
4843 bool TheoryStringsRewriter::checkEntailArithWithEqAssumption(Node assumption,
4844 Node a,
4845 bool strict)
4846 {
4847 Assert(assumption.getKind() == kind::EQUAL);
4848 Assert(Rewriter::rewrite(assumption) == assumption);
4849
4850 // Find candidates variables to compute substitutions for
4851 std::unordered_set<Node, NodeHashFunction> candVars;
4852 std::vector<Node> toVisit = {assumption};
4853 while (!toVisit.empty())
4854 {
4855 Node curr = toVisit.back();
4856 toVisit.pop_back();
4857
4858 if (curr.getKind() == kind::PLUS || curr.getKind() == kind::MULT
4859 || curr.getKind() == kind::MINUS || curr.getKind() == kind::EQUAL)
4860 {
4861 for (const auto& currChild : curr)
4862 {
4863 toVisit.push_back(currChild);
4864 }
4865 }
4866 else if (curr.isVar() && Theory::theoryOf(curr) == THEORY_ARITH)
4867 {
4868 candVars.insert(curr);
4869 }
4870 else if (curr.getKind() == kind::STRING_LENGTH)
4871 {
4872 candVars.insert(curr);
4873 }
4874 }
4875
4876 // Check if any of the candidate variables are in n
4877 Node v;
4878 Assert(toVisit.empty());
4879 toVisit.push_back(a);
4880 while (!toVisit.empty())
4881 {
4882 Node curr = toVisit.back();
4883 toVisit.pop_back();
4884
4885 for (const auto& currChild : curr)
4886 {
4887 toVisit.push_back(currChild);
4888 }
4889
4890 if (candVars.find(curr) != candVars.end())
4891 {
4892 v = curr;
4893 break;
4894 }
4895 }
4896
4897 if (v.isNull())
4898 {
4899 // No suitable candidate found
4900 return false;
4901 }
4902
4903 Node solution = ArithMSum::solveEqualityFor(assumption, v);
4904 if (solution.isNull())
4905 {
4906 // Could not solve for v
4907 return false;
4908 }
4909
4910 a = a.substitute(TNode(v), TNode(solution));
4911 return checkEntailArith(a, strict);
4912 }
4913
4914 bool TheoryStringsRewriter::checkEntailArithWithAssumption(Node assumption,
4915 Node a,
4916 Node b,
4917 bool strict)
4918 {
4919 Assert(Rewriter::rewrite(assumption) == assumption);
4920
4921 NodeManager* nm = NodeManager::currentNM();
4922
4923 if (!assumption.isConst() && assumption.getKind() != kind::EQUAL)
4924 {
4925 // We rewrite inequality assumptions from x <= y to x + (str.len s) = y
4926 // where s is some fresh string variable. We use (str.len s) because
4927 // (str.len s) must be non-negative for the equation to hold.
4928 Node x, y;
4929 if (assumption.getKind() == kind::GEQ)
4930 {
4931 x = assumption[0];
4932 y = assumption[1];
4933 }
4934 else
4935 {
4936 // (not (>= s t)) --> (>= (t - 1) s)
4937 Assert(assumption.getKind() == kind::NOT
4938 && assumption[0].getKind() == kind::GEQ);
4939 x = nm->mkNode(kind::MINUS, assumption[0][1], nm->mkConst(Rational(1)));
4940 y = assumption[0][0];
4941 }
4942
4943 Node s = nm->mkBoundVar("slackVal", nm->stringType());
4944 Node slen = nm->mkNode(kind::STRING_LENGTH, s);
4945 assumption = Rewriter::rewrite(
4946 nm->mkNode(kind::EQUAL, x, nm->mkNode(kind::PLUS, y, slen)));
4947 }
4948
4949 Node diff = nm->mkNode(kind::MINUS, a, b);
4950 bool res = false;
4951 if (assumption.isConst())
4952 {
4953 bool assumptionBool = assumption.getConst<bool>();
4954 if (assumptionBool)
4955 {
4956 res = checkEntailArith(diff, strict);
4957 }
4958 else
4959 {
4960 res = true;
4961 }
4962 }
4963 else
4964 {
4965 res = checkEntailArithWithEqAssumption(assumption, diff, strict);
4966 }
4967 return res;
4968 }
4969
4970 bool TheoryStringsRewriter::checkEntailArithWithAssumptions(
4971 std::vector<Node> assumptions, Node a, Node b, bool strict)
4972 {
4973 // TODO: We currently try to show the entailment with each assumption
4974 // independently. In the future, we should make better use of multiple
4975 // assumptions.
4976 bool res = false;
4977 for (const auto& assumption : assumptions)
4978 {
4979 Assert(Rewriter::rewrite(assumption) == assumption);
4980
4981 if (checkEntailArithWithAssumption(assumption, a, b, strict))
4982 {
4983 res = true;
4984 break;
4985 }
4986 }
4987 return res;
4988 }
4989
4990 Node TheoryStringsRewriter::getConstantArithBound(Node a, bool isLower)
4991 {
4992 Assert(Rewriter::rewrite(a) == a);
4993 Node ret;
4994 if (a.isConst())
4995 {
4996 ret = a;
4997 }
4998 else if (a.getKind() == kind::STRING_LENGTH)
4999 {
5000 if (isLower)
5001 {
5002 ret = NodeManager::currentNM()->mkConst(Rational(0));
5003 }
5004 }
5005 else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT)
5006 {
5007 std::vector<Node> children;
5008 bool success = true;
5009 for (unsigned i = 0; i < a.getNumChildren(); i++)
5010 {
5011 Node ac = getConstantArithBound(a[i], isLower);
5012 if (ac.isNull())
5013 {
5014 ret = ac;
5015 success = false;
5016 break;
5017 }
5018 else
5019 {
5020 if (ac.getConst<Rational>().sgn() == 0)
5021 {
5022 if (a.getKind() == kind::MULT)
5023 {
5024 ret = ac;
5025 success = false;
5026 break;
5027 }
5028 }
5029 else
5030 {
5031 if (a.getKind() == kind::MULT)
5032 {
5033 if ((ac.getConst<Rational>().sgn() > 0) != isLower)
5034 {
5035 ret = Node::null();
5036 success = false;
5037 break;
5038 }
5039 }
5040 children.push_back(ac);
5041 }
5042 }
5043 }
5044 if (success)
5045 {
5046 if (children.empty())
5047 {
5048 ret = NodeManager::currentNM()->mkConst(Rational(0));
5049 }
5050 else if (children.size() == 1)
5051 {
5052 ret = children[0];
5053 }
5054 else
5055 {
5056 ret = NodeManager::currentNM()->mkNode(a.getKind(), children);
5057 ret = Rewriter::rewrite(ret);
5058 }
5059 }
5060 }
5061 Trace("strings-rewrite-cbound")
5062 << "Constant " << (isLower ? "lower" : "upper") << " bound for " << a
5063 << " is " << ret << std::endl;
5064 Assert(ret.isNull() || ret.isConst());
5065 // entailment check should be at least as powerful as computing a lower bound
5066 Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() < 0
5067 || checkEntailArith(a, false));
5068 Assert(!isLower || ret.isNull() || ret.getConst<Rational>().sgn() <= 0
5069 || checkEntailArith(a, true));
5070 return ret;
5071 }
5072
5073 Node TheoryStringsRewriter::getFixedLengthForRegexp(Node n)
5074 {
5075 NodeManager* nm = NodeManager::currentNM();
5076 if (n.getKind() == STRING_TO_REGEXP)
5077 {
5078 Node ret = nm->mkNode(STRING_LENGTH, n[0]);
5079 ret = Rewriter::rewrite(ret);
5080 if (ret.isConst())
5081 {
5082 return ret;
5083 }
5084 }
5085 else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
5086 {
5087 return nm->mkConst(Rational(1));
5088 }
5089 else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
5090 {
5091 Node ret;
5092 for (const Node& nc : n)
5093 {
5094 Node flc = getFixedLengthForRegexp(nc);
5095 if (flc.isNull() || (!ret.isNull() && ret != flc))
5096 {
5097 return Node::null();
5098 }
5099 else if (ret.isNull())
5100 {
5101 // first time
5102 ret = flc;
5103 }
5104 }
5105 return ret;
5106 }
5107 else if (n.getKind() == REGEXP_CONCAT)
5108 {
5109 NodeBuilder<> nb(PLUS);
5110 for (const Node& nc : n)
5111 {
5112 Node flc = getFixedLengthForRegexp(nc);
5113 if (flc.isNull())
5114 {
5115 return flc;
5116 }
5117 nb << flc;
5118 }
5119 Node ret = nb.constructNode();
5120 ret = Rewriter::rewrite(ret);
5121 return ret;
5122 }
5123 return Node::null();
5124 }
5125
5126 bool TheoryStringsRewriter::checkEntailArithInternal(Node a)
5127 {
5128 Assert(Rewriter::rewrite(a) == a);
5129 // check whether a >= 0
5130 if (a.isConst())
5131 {
5132 return a.getConst<Rational>().sgn() >= 0;
5133 }
5134 else if (a.getKind() == kind::STRING_LENGTH)
5135 {
5136 // str.len( t ) >= 0
5137 return true;
5138 }
5139 else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT)
5140 {
5141 for (unsigned i = 0; i < a.getNumChildren(); i++)
5142 {
5143 if (!checkEntailArithInternal(a[i]))
5144 {
5145 return false;
5146 }
5147 }
5148 // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0
5149 return true;
5150 }
5151
5152 return false;
5153 }
5154
5155 Node TheoryStringsRewriter::decomposeSubstrChain(Node s,
5156 std::vector<Node>& ss,
5157 std::vector<Node>& ls)
5158 {
5159 Assert(ss.empty());
5160 Assert(ls.empty());
5161 while (s.getKind() == STRING_SUBSTR)
5162 {
5163 ss.push_back(s[1]);
5164 ls.push_back(s[2]);
5165 s = s[0];
5166 }
5167 std::reverse(ss.begin(), ss.end());
5168 std::reverse(ls.begin(), ls.end());
5169 return s;
5170 }
5171
5172 Node TheoryStringsRewriter::mkSubstrChain(Node base,
5173 const std::vector<Node>& ss,
5174 const std::vector<Node>& ls)
5175 {
5176 NodeManager* nm = NodeManager::currentNM();
5177 for (unsigned i = 0, size = ss.size(); i < size; i++)
5178 {
5179 base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
5180 }
5181 return base;
5182 }
5183
5184 Node TheoryStringsRewriter::getStringOrEmpty(Node n)
5185 {
5186 NodeManager* nm = NodeManager::currentNM();
5187 Node res;
5188 while (res.isNull())
5189 {
5190 switch (n.getKind())
5191 {
5192 case kind::STRING_STRREPL:
5193 {
5194 Node empty = nm->mkConst(::CVC4::String(""));
5195 if (n[0] == empty)
5196 {
5197 // (str.replace "" x y) --> y
5198 n = n[2];
5199 break;
5200 }
5201
5202 if (checkEntailLengthOne(n[0]) && n[2] == empty)
5203 {
5204 // (str.replace "A" x "") --> "A"
5205 res = n[0];
5206 break;
5207 }
5208
5209 res = n;
5210 break;
5211 }
5212 case kind::STRING_SUBSTR:
5213 {
5214 if (checkEntailLengthOne(n[0]))
5215 {
5216 // (str.substr "A" x y) --> "A"
5217 res = n[0];
5218 break;
5219 }
5220 res = n;
5221 break;
5222 }
5223 default:
5224 {
5225 res = n;
5226 break;
5227 }
5228 }
5229 }
5230 return res;
5231 }
5232
5233 bool TheoryStringsRewriter::inferZerosInSumGeq(Node x,
5234 std::vector<Node>& ys,
5235 std::vector<Node>& zeroYs)
5236 {
5237 Assert(zeroYs.empty());
5238
5239 NodeManager* nm = NodeManager::currentNM();
5240
5241 // Check if we can show that y1 + ... + yn >= x
5242 Node sum = (ys.size() > 1) ? nm->mkNode(PLUS, ys) : ys[0];
5243 if (!checkEntailArith(sum, x))
5244 {
5245 return false;
5246 }
5247
5248 // Try to remove yi one-by-one and check if we can still show:
5249 //
5250 // y1 + ... + yi-1 + yi+1 + ... + yn >= x
5251 //
5252 // If that's the case, we know that yi can be zero and the inequality still
5253 // holds.
5254 size_t i = 0;
5255 while (i < ys.size())
5256 {
5257 Node yi = ys[i];
5258 std::vector<Node>::iterator pos = ys.erase(ys.begin() + i);
5259 if (ys.size() > 1)
5260 {
5261 sum = nm->mkNode(PLUS, ys);
5262 }
5263 else
5264 {
5265 sum = ys.size() == 1 ? ys[0] : nm->mkConst(Rational(0));
5266 }
5267
5268 if (checkEntailArith(sum, x))
5269 {
5270 zeroYs.push_back(yi);
5271 }
5272 else
5273 {
5274 ys.insert(pos, yi);
5275 i++;
5276 }
5277 }
5278 return true;
5279 }
5280
5281 Node TheoryStringsRewriter::inferEqsFromContains(Node x, Node y)
5282 {
5283 NodeManager* nm = NodeManager::currentNM();
5284 Node emp = nm->mkConst(String(""));
5285
5286 Node xLen = nm->mkNode(STRING_LENGTH, x);
5287 std::vector<Node> yLens;
5288 if (y.getKind() != STRING_CONCAT)
5289 {
5290 yLens.push_back(nm->mkNode(STRING_LENGTH, y));
5291 }
5292 else
5293 {
5294 for (const Node& yi : y)
5295 {
5296 yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
5297 }
5298 }
5299
5300 std::vector<Node> zeroLens;
5301 if (x == emp)
5302 {
5303 // If x is the empty string, then all ys must be empty, too, and we can
5304 // skip the expensive checks. Note that this is just a performance
5305 // optimization.
5306 zeroLens.swap(yLens);
5307 }
5308 else
5309 {
5310 // Check if we can infer that str.len(x) <= str.len(y). If that is the
5311 // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
5312 // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
5313 // true. The terms that can have length zero without making the inequality
5314 // false must be all be empty if (str.contains x y) is true.
5315 if (!inferZerosInSumGeq(xLen, yLens, zeroLens))
5316 {
5317 // We could not prove that the inequality holds
5318 return Node::null();
5319 }
5320 else if (yLens.size() == y.getNumChildren())
5321 {
5322 // We could only prove that the inequality holds but not that any of the
5323 // ys must be empty
5324 return nm->mkNode(EQUAL, x, y);
5325 }
5326 }
5327
5328 if (y.getKind() != STRING_CONCAT)
5329 {
5330 if (zeroLens.size() == 1)
5331 {
5332 // y is not a concatenation and we found that it must be empty, so just
5333 // return (= y "")
5334 Assert(zeroLens[0][0] == y);
5335 return nm->mkNode(EQUAL, y, emp);
5336 }
5337 else
5338 {
5339 Assert(yLens.size() == 1 && yLens[0][0] == y);
5340 return nm->mkNode(EQUAL, x, y);
5341 }
5342 }
5343
5344 std::vector<Node> cs;
5345 for (const Node& yiLen : yLens)
5346 {
5347 Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
5348 cs.push_back(yiLen[0]);
5349 }
5350
5351 NodeBuilder<> nb(AND);
5352 // (= x (str.++ y1' ... ym'))
5353 if (!cs.empty())
5354 {
5355 nb << nm->mkNode(EQUAL, x, utils::mkConcat(STRING_CONCAT, cs));
5356 }
5357 // (= y1'' "") ... (= yk'' "")
5358 for (const Node& zeroLen : zeroLens)
5359 {
5360 Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
5361 nb << nm->mkNode(EQUAL, zeroLen[0], emp);
5362 }
5363
5364 // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
5365 return nb.constructNode();
5366 }
5367
5368 std::pair<bool, std::vector<Node> > TheoryStringsRewriter::collectEmptyEqs(
5369 Node x)
5370 {
5371 NodeManager* nm = NodeManager::currentNM();
5372 Node empty = nm->mkConst(::CVC4::String(""));
5373
5374 // Collect the equalities of the form (= x "") (sorted)
5375 std::set<TNode> emptyNodes;
5376 bool allEmptyEqs = true;
5377 if (x.getKind() == kind::EQUAL)
5378 {
5379 if (x[0] == empty)
5380 {
5381 emptyNodes.insert(x[1]);
5382 }
5383 else if (x[1] == empty)
5384 {
5385 emptyNodes.insert(x[0]);
5386 }
5387 else
5388 {
5389 allEmptyEqs = false;
5390 }
5391 }
5392 else if (x.getKind() == kind::AND)
5393 {
5394 for (const Node& c : x)
5395 {
5396 if (c.getKind() == kind::EQUAL)
5397 {
5398 if (c[0] == empty)
5399 {
5400 emptyNodes.insert(c[1]);
5401 }
5402 else if (c[1] == empty)
5403 {
5404 emptyNodes.insert(c[0]);
5405 }
5406 }
5407 else
5408 {
5409 allEmptyEqs = false;
5410 }
5411 }
5412 }
5413
5414 if (emptyNodes.size() == 0)
5415 {
5416 allEmptyEqs = false;
5417 }
5418
5419 return std::make_pair(
5420 allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
5421 }
5422
5423 Node TheoryStringsRewriter::returnRewrite(Node node, Node ret, const char* c)
5424 {
5425 Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << c
5426 << "." << std::endl;
5427
5428 NodeManager* nm = NodeManager::currentNM();
5429
5430 // standard post-processing
5431 // We rewrite (string) equalities immediately here. This allows us to forego
5432 // the standard invariant on equality rewrites (that s=t must rewrite to one
5433 // of { s=t, t=s, true, false } ).
5434 Kind retk = ret.getKind();
5435 if (retk == OR || retk == AND)
5436 {
5437 std::vector<Node> children;
5438 bool childChanged = false;
5439 for (const Node& cret : ret)
5440 {
5441 Node creter = cret;
5442 if (cret.getKind() == EQUAL)
5443 {
5444 creter = rewriteEqualityExt(cret);
5445 }
5446 else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL)
5447 {
5448 creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0]));
5449 }
5450 childChanged = childChanged || cret != creter;
5451 children.push_back(creter);
5452 }
5453 if (childChanged)
5454 {
5455 ret = nm->mkNode(retk, children);
5456 }
5457 }
5458 else if (retk == NOT && ret[0].getKind() == EQUAL)
5459 {
5460 ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0]));
5461 }
5462 else if (retk == EQUAL && node.getKind() != EQUAL)
5463 {
5464 Trace("strings-rewrite")
5465 << "Apply extended equality rewrite on " << ret << std::endl;
5466 ret = rewriteEqualityExt(ret);
5467 }
5468 return ret;
5469 }