Convert more uses of strings to words (#4527)
[cvc5.git] / src / theory / strings / strings_entail.cpp
1 /********************* */
2 /*! \file strings_entail.cpp
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Andres Noetzli
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief Implementation of entailment tests involving strings.
13 **/
14
15 #include "theory/strings/strings_entail.h"
16
17 #include "expr/node_builder.h"
18 #include "theory/rewriter.h"
19 #include "theory/strings/arith_entail.h"
20 #include "theory/strings/sequences_rewriter.h"
21 #include "theory/strings/theory_strings_utils.h"
22 #include "theory/strings/word.h"
23
24 using namespace CVC4::kind;
25
26 namespace CVC4 {
27 namespace theory {
28 namespace strings {
29
30 StringsEntail::StringsEntail(SequencesRewriter& rewriter) : d_rewriter(rewriter)
31 {
32 }
33
34 bool StringsEntail::canConstantContainConcat(Node c,
35 Node n,
36 int& firstc,
37 int& lastc)
38 {
39 Assert(c.isConst());
40 CVC4::String t = c.getConst<String>();
41 const std::vector<unsigned>& tvec = t.getVec();
42 Assert(n.getKind() == STRING_CONCAT);
43 // must find constant components in order
44 size_t pos = 0;
45 firstc = -1;
46 lastc = -1;
47 for (unsigned i = 0; i < n.getNumChildren(); i++)
48 {
49 if (n[i].isConst())
50 {
51 firstc = firstc == -1 ? i : firstc;
52 lastc = i;
53 CVC4::String s = n[i].getConst<String>();
54 size_t new_pos = t.find(s, pos);
55 if (new_pos == std::string::npos)
56 {
57 return false;
58 }
59 else
60 {
61 pos = new_pos + s.size();
62 }
63 }
64 else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
65 {
66 // find the first occurrence of a digit starting at pos
67 while (pos < tvec.size() && !String::isDigit(tvec[pos]))
68 {
69 pos++;
70 }
71 if (pos == tvec.size())
72 {
73 return false;
74 }
75 // must consume at least one digit here
76 pos++;
77 }
78 }
79 return true;
80 }
81
82 bool StringsEntail::canConstantContainList(Node c,
83 std::vector<Node>& l,
84 int& firstc,
85 int& lastc)
86 {
87 Assert(c.isConst());
88 // must find constant components in order
89 size_t pos = 0;
90 firstc = -1;
91 lastc = -1;
92 for (unsigned i = 0; i < l.size(); i++)
93 {
94 if (l[i].isConst())
95 {
96 firstc = firstc == -1 ? i : firstc;
97 lastc = i;
98 size_t new_pos = Word::find(c, l[i], pos);
99 if (new_pos == std::string::npos)
100 {
101 return false;
102 }
103 else
104 {
105 pos = new_pos + Word::getLength(l[i]);
106 }
107 }
108 }
109 return true;
110 }
111
112 bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
113 std::vector<Node>& nr,
114 int dir,
115 Node& curr)
116 {
117 Assert(dir == 1 || dir == -1);
118 Assert(nr.empty());
119 Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
120 bool ret = false;
121 bool success;
122 unsigned sindex = 0;
123 do
124 {
125 Assert(!curr.isNull());
126 success = false;
127 if (curr != zero && sindex < n1.size())
128 {
129 unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
130 if (n1[sindex_use].isConst())
131 {
132 // could strip part of a constant
133 Node lowerBound =
134 ArithEntail::getConstantBound(Rewriter::rewrite(curr));
135 if (!lowerBound.isNull())
136 {
137 Assert(lowerBound.isConst());
138 Rational lbr = lowerBound.getConst<Rational>();
139 if (lbr.sgn() > 0)
140 {
141 Assert(ArithEntail::check(curr, true));
142 CVC4::String s = n1[sindex_use].getConst<String>();
143 Node ncl =
144 NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
145 Node next_s =
146 NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl);
147 next_s = Rewriter::rewrite(next_s);
148 Assert(next_s.isConst());
149 // we can remove the entire constant
150 if (next_s.getConst<Rational>().sgn() >= 0)
151 {
152 curr = Rewriter::rewrite(
153 NodeManager::currentNM()->mkNode(MINUS, curr, ncl));
154 success = true;
155 sindex++;
156 }
157 else
158 {
159 // we can remove part of the constant
160 // lower bound minus the length of a concrete string is negative,
161 // hence lowerBound cannot be larger than long max
162 Assert(lbr < Rational(String::maxSize()));
163 curr = Rewriter::rewrite(
164 NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound));
165 uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
166 Assert(lbsize < s.size());
167 if (dir == 1)
168 {
169 // strip partially from the front
170 nr.push_back(
171 NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
172 n1[sindex_use] = NodeManager::currentNM()->mkConst(
173 s.suffix(s.size() - lbsize));
174 }
175 else
176 {
177 // strip partially from the back
178 nr.push_back(
179 NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
180 n1[sindex_use] = NodeManager::currentNM()->mkConst(
181 s.prefix(s.size() - lbsize));
182 }
183 ret = true;
184 }
185 Assert(ArithEntail::check(curr));
186 }
187 else
188 {
189 // we cannot remove the constant
190 }
191 }
192 }
193 else
194 {
195 Node next_s = NodeManager::currentNM()->mkNode(
196 MINUS,
197 curr,
198 NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
199 next_s = Rewriter::rewrite(next_s);
200 if (ArithEntail::check(next_s))
201 {
202 success = true;
203 curr = next_s;
204 sindex++;
205 }
206 }
207 }
208 } while (success);
209 if (sindex > 0)
210 {
211 if (dir == 1)
212 {
213 nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
214 n1.erase(n1.begin(), n1.begin() + sindex);
215 }
216 else
217 {
218 nr.insert(nr.end(), n1.end() - sindex, n1.end());
219 n1.erase(n1.end() - sindex, n1.end());
220 }
221 ret = true;
222 }
223 return ret;
224 }
225
226 int StringsEntail::componentContains(std::vector<Node>& n1,
227 std::vector<Node>& n2,
228 std::vector<Node>& nb,
229 std::vector<Node>& ne,
230 bool computeRemainder,
231 int remainderDir)
232 {
233 Assert(nb.empty());
234 Assert(ne.empty());
235 // if n2 is a singleton, we can do optimized version here
236 if (n2.size() == 1)
237 {
238 for (unsigned i = 0; i < n1.size(); i++)
239 {
240 Node n1rb;
241 Node n1re;
242 if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
243 {
244 if (computeRemainder)
245 {
246 n1[i] = n2[0];
247 if (remainderDir != -1)
248 {
249 if (!n1re.isNull())
250 {
251 ne.push_back(n1re);
252 }
253 ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
254 n1.erase(n1.begin() + i + 1, n1.end());
255 }
256 else if (!n1re.isNull())
257 {
258 n1[i] = Rewriter::rewrite(
259 NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
260 }
261 if (remainderDir != 1)
262 {
263 nb.insert(nb.end(), n1.begin(), n1.begin() + i);
264 n1.erase(n1.begin(), n1.begin() + i);
265 if (!n1rb.isNull())
266 {
267 nb.push_back(n1rb);
268 }
269 }
270 else if (!n1rb.isNull())
271 {
272 n1[i] = Rewriter::rewrite(
273 NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
274 }
275 }
276 return i;
277 }
278 }
279 }
280 else if (n1.size() >= n2.size())
281 {
282 unsigned diff = n1.size() - n2.size();
283 for (unsigned i = 0; i <= diff; i++)
284 {
285 Node n1rb_first;
286 Node n1re_first;
287 // first component of n2 must be a suffix
288 if (componentContainsBase(n1[i],
289 n2[0],
290 n1rb_first,
291 n1re_first,
292 1,
293 computeRemainder && remainderDir != 1))
294 {
295 Assert(n1re_first.isNull());
296 for (unsigned j = 1; j < n2.size(); j++)
297 {
298 // are we in the last component?
299 if (j + 1 == n2.size())
300 {
301 Node n1rb_last;
302 Node n1re_last;
303 // last component of n2 must be a prefix
304 if (componentContainsBase(n1[i + j],
305 n2[j],
306 n1rb_last,
307 n1re_last,
308 -1,
309 computeRemainder && remainderDir != -1))
310 {
311 Assert(n1rb_last.isNull());
312 if (computeRemainder)
313 {
314 if (remainderDir != -1)
315 {
316 if (!n1re_last.isNull())
317 {
318 ne.push_back(n1re_last);
319 }
320 ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
321 n1.erase(n1.begin() + i + j + 1, n1.end());
322 n1[i + j] = n2[j];
323 }
324 if (remainderDir != 1)
325 {
326 n1[i] = n2[0];
327 nb.insert(nb.end(), n1.begin(), n1.begin() + i);
328 n1.erase(n1.begin(), n1.begin() + i);
329 if (!n1rb_first.isNull())
330 {
331 nb.push_back(n1rb_first);
332 }
333 }
334 }
335 return i;
336 }
337 else
338 {
339 break;
340 }
341 }
342 else if (n1[i + j] != n2[j])
343 {
344 break;
345 }
346 }
347 }
348 }
349 }
350 return -1;
351 }
352
353 bool StringsEntail::componentContainsBase(
354 Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
355 {
356 Assert(n1rb.isNull());
357 Assert(n1re.isNull());
358
359 NodeManager* nm = NodeManager::currentNM();
360
361 if (n1 == n2)
362 {
363 return true;
364 }
365 else
366 {
367 if (n1.isConst() && n2.isConst())
368 {
369 size_t len1 = Word::getLength(n1);
370 size_t len2 = Word::getLength(n2);
371 if (len2 < len1)
372 {
373 if (dir == 1)
374 {
375 if (Word::suffix(n1, len2) == n2)
376 {
377 if (computeRemainder)
378 {
379 n1rb = Word::prefix(n1, len1 - len2);
380 }
381 return true;
382 }
383 }
384 else if (dir == -1)
385 {
386 if (Word::prefix(n1, len2) == n2)
387 {
388 if (computeRemainder)
389 {
390 n1re = Word::suffix(n1, len1 - len2);
391 }
392 return true;
393 }
394 }
395 else
396 {
397 size_t f = Word::find(n1, n2);
398 if (f != std::string::npos)
399 {
400 if (computeRemainder)
401 {
402 if (f > 0)
403 {
404 n1rb = Word::prefix(n1, f);
405 }
406 if (len1 > f + len2)
407 {
408 n1re = Word::suffix(n1, len1 - (f + len2));
409 }
410 }
411 return true;
412 }
413 }
414 }
415 }
416 else
417 {
418 // cases for:
419 // n1 = x containing n2 = substr( x, n2[1], n2[2] )
420 if (n2.getKind() == STRING_SUBSTR)
421 {
422 if (n2[0] == n1)
423 {
424 bool success = true;
425 Node start_pos = n2[1];
426 Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
427 Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
428 if (dir == 1)
429 {
430 // To be a suffix, start + length must be greater than
431 // or equal to the length of the string.
432 success = ArithEntail::check(end_pos, len_n2s);
433 }
434 else if (dir == -1)
435 {
436 // To be a prefix, must literally start at 0, since
437 // if we knew it started at <0, it should be rewritten to "",
438 // if we knew it started at 0, then n2[1] should be rewritten to
439 // 0.
440 success = start_pos.isConst()
441 && start_pos.getConst<Rational>().sgn() == 0;
442 }
443 if (success)
444 {
445 if (computeRemainder)
446 {
447 // we can only compute the remainder if start_pos and end_pos
448 // are known to be non-negative.
449 if (!ArithEntail::check(start_pos)
450 || !ArithEntail::check(end_pos))
451 {
452 return false;
453 }
454 if (dir != 1)
455 {
456 n1rb = nm->mkNode(
457 STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
458 }
459 if (dir != -1)
460 {
461 n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
462 }
463 }
464 return true;
465 }
466 }
467 }
468
469 if (!computeRemainder && dir == 0)
470 {
471 if (n1.getKind() == STRING_STRREPL)
472 {
473 // (str.contains (str.replace x y z) w) ---> true
474 // if (str.contains x w) --> true and (str.contains z w) ---> true
475 Node xCtnW = checkContains(n1[0], n2);
476 if (!xCtnW.isNull() && xCtnW.getConst<bool>())
477 {
478 Node zCtnW = checkContains(n1[2], n2);
479 if (!zCtnW.isNull() && zCtnW.getConst<bool>())
480 {
481 return true;
482 }
483 }
484 }
485 }
486 }
487 }
488 return false;
489 }
490
491 bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
492 std::vector<Node>& n2,
493 std::vector<Node>& nb,
494 std::vector<Node>& ne,
495 int dir)
496 {
497 Assert(nb.empty());
498 Assert(ne.empty());
499
500 NodeManager* nm = NodeManager::currentNM();
501 bool changed = false;
502 // for ( forwards, backwards ) direction
503 for (unsigned r = 0; r < 2; r++)
504 {
505 if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
506 {
507 unsigned index0 = r == 0 ? 0 : n1.size() - 1;
508 unsigned index1 = r == 0 ? 0 : n2.size() - 1;
509 bool removeComponent = false;
510 Node n1cmp = n1[index0];
511
512 if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
513 {
514 return false;
515 }
516
517 std::vector<Node> sss;
518 std::vector<Node> sls;
519 n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
520 Trace("strings-rewrite-debug2")
521 << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
522 << ", dir = " << dir << std::endl;
523 if (n1cmp.isConst())
524 {
525 CVC4::String s = n1cmp.getConst<String>();
526 // overlap is an overapproximation of the number of characters
527 // n2[index1] can match in s
528 unsigned overlap = s.size();
529 if (n2[index1].isConst())
530 {
531 CVC4::String t = n2[index1].getConst<String>();
532 std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
533 if (ret == std::string::npos)
534 {
535 if (n1.size() == 1)
536 {
537 // can remove everything
538 // e.g. str.contains( "abc", str.++( "ba", x ) ) -->
539 // str.contains( "", str.++( "ba", x ) )
540 removeComponent = true;
541 }
542 else if (sss.empty()) // only if not substr
543 {
544 // check how much overlap there is
545 // This is used to partially strip off the endpoint
546 // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
547 // str.contains( str.++( "c", x ), str.++( "cd", y ) )
548 overlap = r == 0 ? s.overlap(t) : t.overlap(s);
549 }
550 else
551 {
552 // if we are looking at a substring, we can remove the component
553 // if there is no overlap
554 // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
555 // --> str.contains( x, "a" )
556 removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
557 }
558 }
559 else if (sss.empty()) // only if not substr
560 {
561 Assert(ret < s.size());
562 // can strip off up to the find position, e.g.
563 // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
564 // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
565 // and
566 // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
567 // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
568 overlap = s.size() - ret;
569 }
570 }
571 else
572 {
573 // inconclusive
574 }
575 // process the overlap
576 if (overlap < s.size())
577 {
578 changed = true;
579 if (overlap == 0)
580 {
581 removeComponent = true;
582 }
583 else
584 {
585 // can drop the prefix (resp. suffix) from the first (resp. last)
586 // component
587 if (r == 0)
588 {
589 nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
590 n1[index0] = nm->mkConst(s.suffix(overlap));
591 }
592 else
593 {
594 ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
595 n1[index0] = nm->mkConst(s.prefix(overlap));
596 }
597 }
598 }
599 }
600 else if (n1cmp.getKind() == STRING_ITOS)
601 {
602 if (n2[index1].isConst())
603 {
604 CVC4::String t = n2[index1].getConst<String>();
605
606 if (n1.size() == 1)
607 {
608 // if n1.size()==1, then if n2[index1] is not a number, we can drop
609 // the entire component
610 // e.g. str.contains( int.to.str(x), "123a45") --> false
611 if (!t.isNumber())
612 {
613 removeComponent = true;
614 }
615 }
616 else
617 {
618 const std::vector<unsigned>& tvec = t.getVec();
619 Assert(tvec.size() > 0);
620
621 // if n1.size()>1, then if the first (resp. last) character of
622 // n2[index1]
623 // is not a digit, we can drop the entire component, e.g.:
624 // str.contains( str.++( int.to.str(x), y ), "a12") -->
625 // str.contains( y, "a12" )
626 // str.contains( str.++( y, int.to.str(x) ), "a0b") -->
627 // str.contains( y, "a0b" )
628 unsigned i = r == 0 ? 0 : (tvec.size() - 1);
629 if (!String::isDigit(tvec[i]))
630 {
631 removeComponent = true;
632 }
633 }
634 }
635 }
636 if (removeComponent)
637 {
638 // can drop entire first (resp. last) component
639 if (r == 0)
640 {
641 nb.push_back(n1[index0]);
642 n1.erase(n1.begin(), n1.begin() + 1);
643 }
644 else
645 {
646 ne.push_back(n1[index0]);
647 n1.pop_back();
648 }
649 if (n1.empty())
650 {
651 // if we've removed everything, just return (we will rewrite to false)
652 return true;
653 }
654 else
655 {
656 changed = true;
657 }
658 }
659 }
660 }
661 // TODO (#1180) : computing the maximal overlap in this function may be
662 // important.
663 // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
664 // false
665 // ...since str.to.int(x) can contain at most 1 character from "2aaaa",
666 // leaving 4 characters
667 // which is larger that the upper bound for length of str.substr(y,0,3),
668 // which is 3.
669 return changed;
670 }
671
672 Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
673 {
674 NodeManager* nm = NodeManager::currentNM();
675 Node ctn = nm->mkNode(STRING_STRCTN, a, b);
676
677 if (fullRewriter)
678 {
679 ctn = Rewriter::rewrite(ctn);
680 }
681 else
682 {
683 Node prev;
684 do
685 {
686 prev = ctn;
687 ctn = d_rewriter.rewriteContains(ctn);
688 } while (prev != ctn && ctn.getKind() == STRING_STRCTN);
689 }
690
691 Assert(ctn.getType().isBoolean());
692 return ctn.isConst() ? ctn : Node::null();
693 }
694
695 bool StringsEntail::checkNonEmpty(Node a)
696 {
697 Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
698 len = Rewriter::rewrite(len);
699 return ArithEntail::check(len, true);
700 }
701
702 bool StringsEntail::checkLengthOne(Node s, bool strict)
703 {
704 NodeManager* nm = NodeManager::currentNM();
705 Node one = nm->mkConst(Rational(1));
706 Node len = nm->mkNode(STRING_LENGTH, s);
707 len = Rewriter::rewrite(len);
708 return ArithEntail::check(one, len)
709 && (!strict || ArithEntail::check(len, true));
710 }
711
712 bool StringsEntail::checkMultisetSubset(Node a, Node b)
713 {
714 std::vector<Node> avec;
715 utils::getConcat(getMultisetApproximation(a), avec);
716 std::vector<Node> bvec;
717 utils::getConcat(b, bvec);
718
719 std::map<Node, unsigned> num_nconst[2];
720 std::map<Node, unsigned> num_const[2];
721 for (unsigned j = 0; j < 2; j++)
722 {
723 std::vector<Node>& jvec = j == 0 ? avec : bvec;
724 for (const Node& cc : jvec)
725 {
726 if (cc.isConst())
727 {
728 num_const[j][cc]++;
729 }
730 else
731 {
732 num_nconst[j][cc]++;
733 }
734 }
735 }
736 bool ms_success = true;
737 for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
738 {
739 if (nncp.second > num_nconst[1][nncp.first])
740 {
741 ms_success = false;
742 break;
743 }
744 }
745 if (ms_success)
746 {
747 // count the number of constant characters in the first argument
748 std::map<Node, unsigned> count_const[2];
749 std::vector<Node> chars;
750 for (unsigned j = 0; j < 2; j++)
751 {
752 for (std::pair<const Node, unsigned>& ncp : num_const[j])
753 {
754 Node cn = ncp.first;
755 Assert(cn.isConst());
756 std::vector<Node> cnChars = Word::getChars(cn);
757 for (const Node& ch : cnChars)
758 {
759 count_const[j][ch] += ncp.second;
760 if (std::find(chars.begin(), chars.end(), ch) == chars.end())
761 {
762 chars.push_back(ch);
763 }
764 }
765 }
766 }
767 Trace("strings-entail-ms-ss")
768 << "For " << a << " and " << b << " : " << std::endl;
769 for (const Node& ch : chars)
770 {
771 Trace("strings-entail-ms-ss") << " # occurrences of substring ";
772 Trace("strings-entail-ms-ss") << ch << " in arguments is ";
773 Trace("strings-entail-ms-ss")
774 << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
775 if (count_const[0][ch] < count_const[1][ch])
776 {
777 return true;
778 }
779 }
780
781 // TODO (#1180): count the number of 2,3,4,.. character substrings
782 // for example:
783 // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
784 // since the second argument contains more occurrences of "bb".
785 // note this is orthogonal reasoning to inductive reasoning
786 // via regular membership reduction in Liang et al CAV 2015.
787 }
788 return false;
789 }
790
791 Node StringsEntail::checkHomogeneousString(Node a)
792 {
793 std::vector<Node> avec;
794 utils::getConcat(getMultisetApproximation(a), avec);
795
796 bool cValid = false;
797 Node c;
798 for (const Node& ac : avec)
799 {
800 if (ac.isConst())
801 {
802 std::vector<Node> acv = Word::getChars(ac);
803 for (const Node& cc : acv)
804 {
805 if (!cValid)
806 {
807 cValid = true;
808 c = cc;
809 }
810 else if (c != cc)
811 {
812 // Found a different character
813 return Node::null();
814 }
815 }
816 }
817 else
818 {
819 // Could produce a different character
820 return Node::null();
821 }
822 }
823
824 if (!cValid)
825 {
826 return Word::mkEmptyWord(a.getType());
827 }
828
829 return c;
830 }
831
832 Node StringsEntail::getMultisetApproximation(Node a)
833 {
834 NodeManager* nm = NodeManager::currentNM();
835 if (a.getKind() == STRING_SUBSTR)
836 {
837 return a[0];
838 }
839 else if (a.getKind() == STRING_STRREPL)
840 {
841 return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
842 }
843 else if (a.getKind() == STRING_CONCAT)
844 {
845 NodeBuilder<> nb(STRING_CONCAT);
846 for (const Node& ac : a)
847 {
848 nb << getMultisetApproximation(ac);
849 }
850 return nb.constructNode();
851 }
852 else
853 {
854 return a;
855 }
856 }
857
858 Node StringsEntail::getStringOrEmpty(Node n)
859 {
860 Node res;
861 while (res.isNull())
862 {
863 switch (n.getKind())
864 {
865 case STRING_STRREPL:
866 {
867 if (Word::isEmpty(n[0]))
868 {
869 // (str.replace "" x y) --> y
870 n = n[2];
871 break;
872 }
873
874 if (checkLengthOne(n[0]) && Word::isEmpty(n[2]))
875 {
876 // (str.replace "A" x "") --> "A"
877 res = n[0];
878 break;
879 }
880
881 res = n;
882 break;
883 }
884 case STRING_SUBSTR:
885 {
886 if (checkLengthOne(n[0]))
887 {
888 // (str.substr "A" x y) --> "A"
889 res = n[0];
890 break;
891 }
892 res = n;
893 break;
894 }
895 default:
896 {
897 res = n;
898 break;
899 }
900 }
901 }
902 return res;
903 }
904
905 Node StringsEntail::inferEqsFromContains(Node x, Node y)
906 {
907 NodeManager* nm = NodeManager::currentNM();
908 Node emp = Word::mkEmptyWord(x.getType());
909 Assert(x.getType() == y.getType());
910 TypeNode stype = x.getType();
911
912 Node xLen = nm->mkNode(STRING_LENGTH, x);
913 std::vector<Node> yLens;
914 if (y.getKind() != STRING_CONCAT)
915 {
916 yLens.push_back(nm->mkNode(STRING_LENGTH, y));
917 }
918 else
919 {
920 for (const Node& yi : y)
921 {
922 yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
923 }
924 }
925
926 std::vector<Node> zeroLens;
927 if (x == emp)
928 {
929 // If x is the empty string, then all ys must be empty, too, and we can
930 // skip the expensive checks. Note that this is just a performance
931 // optimization.
932 zeroLens.swap(yLens);
933 }
934 else
935 {
936 // Check if we can infer that str.len(x) <= str.len(y). If that is the
937 // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
938 // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
939 // true. The terms that can have length zero without making the inequality
940 // false must be all be empty if (str.contains x y) is true.
941 if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens))
942 {
943 // We could not prove that the inequality holds
944 return Node::null();
945 }
946 else if (yLens.size() == y.getNumChildren())
947 {
948 // We could only prove that the inequality holds but not that any of the
949 // ys must be empty
950 return nm->mkNode(EQUAL, x, y);
951 }
952 }
953
954 if (y.getKind() != STRING_CONCAT)
955 {
956 if (zeroLens.size() == 1)
957 {
958 // y is not a concatenation and we found that it must be empty, so just
959 // return (= y "")
960 Assert(zeroLens[0][0] == y);
961 return nm->mkNode(EQUAL, y, emp);
962 }
963 else
964 {
965 Assert(yLens.size() == 1 && yLens[0][0] == y);
966 return nm->mkNode(EQUAL, x, y);
967 }
968 }
969
970 std::vector<Node> cs;
971 for (const Node& yiLen : yLens)
972 {
973 Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
974 cs.push_back(yiLen[0]);
975 }
976
977 NodeBuilder<> nb(AND);
978 // (= x (str.++ y1' ... ym'))
979 if (!cs.empty())
980 {
981 nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
982 }
983 // (= y1'' "") ... (= yk'' "")
984 for (const Node& zeroLen : zeroLens)
985 {
986 Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
987 nb << nm->mkNode(EQUAL, zeroLen[0], emp);
988 }
989
990 // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
991 return nb.constructNode();
992 }
993
994 } // namespace strings
995 } // namespace theory
996 } // namespace CVC4