Update copyright header script to support CMake and Python files (#5067)
[cvc5.git] / src / theory / strings / term_registry.cpp
1 /********************* */
2 /*! \file term_registry.cpp
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Andres Noetzli, Tianyi Liang
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
8 ** in the top-level source directory and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief Implementation of term registry for the theory of strings.
13 **/
14
15 #include "theory/strings/term_registry.h"
16
17 #include "expr/attribute.h"
18 #include "options/smt_options.h"
19 #include "options/strings_options.h"
20 #include "smt/logic_exception.h"
21 #include "theory/rewriter.h"
22 #include "theory/strings/theory_strings_utils.h"
23 #include "theory/strings/word.h"
24
25 using namespace std;
26 using namespace CVC4::context;
27 using namespace CVC4::kind;
28
29 namespace CVC4 {
30 namespace theory {
31 namespace strings {
32
33 struct StringsProxyVarAttributeId
34 {
35 };
36 typedef expr::Attribute<StringsProxyVarAttributeId, bool>
37 StringsProxyVarAttribute;
38
39 TermRegistry::TermRegistry(SolverState& s,
40 OutputChannel& out,
41 SequencesStatistics& statistics,
42 ProofNodeManager* pnm)
43 : d_state(s),
44 d_out(out),
45 d_statistics(statistics),
46 d_hasStrCode(false),
47 d_functionsTerms(s.getSatContext()),
48 d_inputVars(s.getUserContext()),
49 d_preregisteredTerms(s.getUserContext()),
50 d_registeredTerms(s.getUserContext()),
51 d_registeredTypes(s.getUserContext()),
52 d_proxyVar(s.getUserContext()),
53 d_proxyVarToLength(s.getUserContext()),
54 d_lengthLemmaTermsCache(s.getUserContext()),
55 d_epg(pnm ? new EagerProofGenerator(
56 pnm,
57 s.getUserContext(),
58 "strings::TermRegistry::EagerProofGenerator")
59 : nullptr)
60 {
61 NodeManager* nm = NodeManager::currentNM();
62 d_zero = nm->mkConst(Rational(0));
63 d_one = nm->mkConst(Rational(1));
64 d_negOne = NodeManager::currentNM()->mkConst(Rational(-1));
65 d_cardSize = utils::getAlphabetCardinality();
66 }
67
68 TermRegistry::~TermRegistry() {}
69
70 Node TermRegistry::eagerReduce(Node t, SkolemCache* sc)
71 {
72 NodeManager* nm = NodeManager::currentNM();
73 Node lemma;
74 Kind tk = t.getKind();
75 if (tk == STRING_TO_CODE)
76 {
77 // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
78 Node code_len = utils::mkNLength(t[0]).eqNode(nm->mkConst(Rational(1)));
79 Node code_eq_neg1 = t.eqNode(nm->mkConst(Rational(-1)));
80 Node code_range = nm->mkNode(
81 AND,
82 nm->mkNode(GEQ, t, nm->mkConst(Rational(0))),
83 nm->mkNode(
84 LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
85 lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
86 }
87 else if (tk == STRING_STRIDOF)
88 {
89 // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len
90 // x)))
91 Node l = utils::mkNLength(t[0]);
92 lemma = nm->mkNode(AND,
93 nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))),
94 nm->mkNode(LEQ, t, l));
95 }
96 else if (tk == STRING_STOI)
97 {
98 // (>= (str.to_int x) (- 1))
99 lemma = nm->mkNode(GEQ, t, nm->mkConst(Rational(-1)));
100 }
101 else if (tk == STRING_STRCTN)
102 {
103 // ite( (str.contains s r), (= s (str.++ sk1 r sk2)), (not (= s r)))
104 Node sk1 =
105 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_PRE, "sc1");
106 Node sk2 =
107 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_POST, "sc2");
108 lemma = t[0].eqNode(utils::mkNConcat(sk1, t[1], sk2));
109 lemma = nm->mkNode(ITE, t, lemma, t[0].eqNode(t[1]).notNode());
110 }
111 return lemma;
112 }
113
114 Node TermRegistry::lengthPositive(Node t)
115 {
116 NodeManager* nm = NodeManager::currentNM();
117 Node zero = nm->mkConst(Rational(0));
118 Node emp = Word::mkEmptyWord(t.getType());
119 Node tlen = nm->mkNode(STRING_LENGTH, t);
120 Node tlenEqZero = tlen.eqNode(zero);
121 Node tEqEmp = t.eqNode(emp);
122 Node caseEmpty = nm->mkNode(AND, tlenEqZero, tEqEmp);
123 Node caseNEmpty = nm->mkNode(GT, tlen, zero);
124 // (or (and (= (str.len t) 0) (= t "")) (> (str.len t) 0))
125 return nm->mkNode(OR, caseEmpty, caseNEmpty);
126 }
127
128 void TermRegistry::preRegisterTerm(TNode n)
129 {
130 if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end())
131 {
132 return;
133 }
134 eq::EqualityEngine* ee = d_state.getEqualityEngine();
135 d_preregisteredTerms.insert(n);
136 Trace("strings-preregister")
137 << "TheoryString::preregister : " << n << std::endl;
138 // check for logic exceptions
139 Kind k = n.getKind();
140 if (!options::stringExp())
141 {
142 if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
143 || k == STRING_STRREPL || k == STRING_SUBSTR || k == STRING_STRREPLALL
144 || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
145 || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
146 || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE)
147 {
148 std::stringstream ss;
149 ss << "Term of kind " << k
150 << " not supported in default mode, try --strings-exp";
151 throw LogicException(ss.str());
152 }
153 }
154 if (k == EQUAL)
155 {
156 if (n[0].getType().isRegExp())
157 {
158 std::stringstream ss;
159 ss << "Equality between regular expressions is not supported";
160 throw LogicException(ss.str());
161 }
162 ee->addTriggerPredicate(n);
163 return;
164 }
165 else if (k == STRING_IN_REGEXP)
166 {
167 d_out.requirePhase(n, true);
168 ee->addTriggerPredicate(n);
169 ee->addTerm(n[0]);
170 ee->addTerm(n[1]);
171 return;
172 }
173 else if (k == STRING_TO_CODE)
174 {
175 d_hasStrCode = true;
176 }
177 registerTerm(n, 0);
178 TypeNode tn = n.getType();
179 if (tn.isRegExp() && n.isVar())
180 {
181 std::stringstream ss;
182 ss << "Regular expression variables are not supported.";
183 throw LogicException(ss.str());
184 }
185 if (tn.isString()) // string-only
186 {
187 // all characters of constants should fall in the alphabet
188 if (n.isConst())
189 {
190 std::vector<unsigned> vec = n.getConst<String>().getVec();
191 for (unsigned u : vec)
192 {
193 if (u >= d_cardSize)
194 {
195 std::stringstream ss;
196 ss << "Characters in string \"" << n
197 << "\" are outside of the given alphabet.";
198 throw LogicException(ss.str());
199 }
200 }
201 }
202 ee->addTerm(n);
203 }
204 else if (tn.isBoolean())
205 {
206 // Get triggered for both equal and dis-equal
207 ee->addTriggerPredicate(n);
208 }
209 else
210 {
211 // Function applications/predicates
212 ee->addTerm(n);
213 }
214 // Set d_functionsTerms stores all function applications that are
215 // relevant to theory combination. Notice that this is a subset of
216 // the applications whose kinds are function kinds in the equality
217 // engine. This means it does not include applications of operators
218 // like re.++, which is not a function kind in the equality engine.
219 // Concatenation terms do not need to be considered here because
220 // their arguments have string type and do not introduce any shared
221 // terms.
222 if (n.hasOperator() && ee->isFunctionKind(k) && k != STRING_CONCAT)
223 {
224 d_functionsTerms.push_back(n);
225 }
226 if (options::stringFMF())
227 {
228 if (tn.isStringLike())
229 {
230 // Our decision strategy will minimize the length of this term if it is a
231 // variable but not an internally generated Skolem, or a term that does
232 // not belong to this theory.
233 if (n.isVar() ? !d_skCache.isSkolem(n)
234 : kindToTheoryId(k) != THEORY_STRINGS)
235 {
236 d_inputVars.insert(n);
237 Trace("strings-preregister") << "input variable: " << n << std::endl;
238 }
239 }
240 }
241 }
242
243 void TermRegistry::registerTerm(Node n, int effort)
244 {
245 TypeNode tn = n.getType();
246 bool do_register = true;
247 if (!tn.isStringLike())
248 {
249 if (options::stringEagerLen())
250 {
251 do_register = effort == 0;
252 }
253 else
254 {
255 do_register = effort > 0 || n.getKind() != STRING_CONCAT;
256 }
257 }
258 if (!do_register)
259 {
260 return;
261 }
262 if (d_registeredTerms.find(n) != d_registeredTerms.end())
263 {
264 return;
265 }
266 d_registeredTerms.insert(n);
267 // ensure the type is registered
268 registerType(tn);
269 Debug("strings-register") << "TheoryStrings::registerTerm() " << n
270 << ", effort = " << effort << std::endl;
271 TrustNode regTermLem;
272 if (tn.isStringLike())
273 {
274 // register length information:
275 // for variables, split on empty vs positive length
276 // for concat/const/replace, introduce proxy var and state length relation
277 regTermLem = getRegisterTermLemma(n);
278 }
279 else if (n.getKind() != STRING_STRCTN)
280 {
281 // we don't send out eager reduction lemma for str.contains currently
282 Node eagerRedLemma = eagerReduce(n, &d_skCache);
283 if (!eagerRedLemma.isNull())
284 {
285 // if there was an eager reduction, we make the trust node for it
286 if (d_epg != nullptr)
287 {
288 regTermLem = d_epg->mkTrustNode(
289 eagerRedLemma, PfRule::STRING_EAGER_REDUCTION, {}, {n});
290 }
291 else
292 {
293 regTermLem = TrustNode::mkTrustLemma(eagerRedLemma, nullptr);
294 }
295 }
296 }
297 if (!regTermLem.isNull())
298 {
299 Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
300 << std::endl;
301 Trace("strings-assert")
302 << "(assert " << regTermLem.getNode() << ")" << std::endl;
303 ++(d_statistics.d_lemmasRegisterTerm);
304 d_out.trustedLemma(regTermLem);
305 }
306 }
307
308 void TermRegistry::registerType(TypeNode tn)
309 {
310 if (d_registeredTypes.find(tn) != d_registeredTypes.end())
311 {
312 return;
313 }
314 d_registeredTypes.insert(tn);
315 if (tn.isStringLike())
316 {
317 // preregister the empty word for the type
318 Node emp = Word::mkEmptyWord(tn);
319 if (!d_state.hasTerm(emp))
320 {
321 preRegisterTerm(emp);
322 }
323 }
324 }
325
326 TrustNode TermRegistry::getRegisterTermLemma(Node n)
327 {
328 Assert(n.getType().isStringLike());
329 NodeManager* nm = NodeManager::currentNM();
330 // register length information:
331 // for variables, split on empty vs positive length
332 // for concat/const/replace, introduce proxy var and state length relation
333 Node lsum;
334 if (n.getKind() != STRING_CONCAT && !n.isConst())
335 {
336 Node lsumb = nm->mkNode(STRING_LENGTH, n);
337 lsum = Rewriter::rewrite(lsumb);
338 // can register length term if it does not rewrite
339 if (lsum == lsumb)
340 {
341 registerTermAtomic(n, LENGTH_SPLIT);
342 return TrustNode::null();
343 }
344 }
345 Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
346 StringsProxyVarAttribute spva;
347 sk.setAttribute(spva, true);
348 Node eq = Rewriter::rewrite(sk.eqNode(n));
349 d_proxyVar[n] = sk;
350 // If we are introducing a proxy for a constant or concat term, we do not
351 // need to send lemmas about its length, since its length is already
352 // implied.
353 if (n.isConst() || n.getKind() == STRING_CONCAT)
354 {
355 // do not send length lemma for sk.
356 registerTermAtomic(sk, LENGTH_IGNORE);
357 }
358 Node skl = nm->mkNode(STRING_LENGTH, sk);
359 if (n.getKind() == STRING_CONCAT)
360 {
361 std::vector<Node> nodeVec;
362 for (const Node& nc : n)
363 {
364 if (nc.getAttribute(StringsProxyVarAttribute()))
365 {
366 Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
367 nodeVec.push_back(d_proxyVarToLength[nc]);
368 }
369 else
370 {
371 Node lni = nm->mkNode(STRING_LENGTH, nc);
372 nodeVec.push_back(lni);
373 }
374 }
375 lsum = nm->mkNode(PLUS, nodeVec);
376 lsum = Rewriter::rewrite(lsum);
377 }
378 else if (n.isConst())
379 {
380 lsum = nm->mkConst(Rational(Word::getLength(n)));
381 }
382 Assert(!lsum.isNull());
383 d_proxyVarToLength[sk] = lsum;
384 Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
385
386 Node ret = nm->mkNode(AND, eq, ceq);
387
388 // it is a simple rewrite to justify this
389 if (d_epg != nullptr)
390 {
391 return d_epg->mkTrustNode(ret, PfRule::MACRO_SR_PRED_INTRO, {}, {ret});
392 }
393 return TrustNode::mkTrustLemma(ret, nullptr);
394 }
395
396 void TermRegistry::registerTermAtomic(Node n, LengthStatus s)
397 {
398 if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
399 {
400 return;
401 }
402 d_lengthLemmaTermsCache.insert(n);
403
404 if (s == LENGTH_IGNORE)
405 {
406 // ignore it
407 return;
408 }
409 std::map<Node, bool> reqPhase;
410 TrustNode lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
411 if (!lenLem.isNull())
412 {
413 Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
414 << std::endl;
415 Trace("strings-assert")
416 << "(assert " << lenLem.getNode() << ")" << std::endl;
417 ++(d_statistics.d_lemmasRegisterTermAtomic);
418 d_out.trustedLemma(lenLem);
419 }
420 for (const std::pair<const Node, bool>& rp : reqPhase)
421 {
422 d_out.requirePhase(rp.first, rp.second);
423 }
424 }
425
426 SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; }
427
428 const context::CDList<TNode>& TermRegistry::getFunctionTerms() const
429 {
430 return d_functionsTerms;
431 }
432
433 const context::CDHashSet<Node, NodeHashFunction>& TermRegistry::getInputVars()
434 const
435 {
436 return d_inputVars;
437 }
438
439 bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
440
441 TrustNode TermRegistry::getRegisterTermAtomicLemma(
442 Node n, LengthStatus s, std::map<Node, bool>& reqPhase)
443 {
444 if (n.isConst())
445 {
446 // No need to send length for constant terms. This case may be triggered
447 // for cases where the skolem cache automatically replaces a skolem by
448 // a constant.
449 return TrustNode::null();
450 }
451 Assert(n.getType().isStringLike());
452 NodeManager* nm = NodeManager::currentNM();
453 Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
454 Node emp = Word::mkEmptyWord(n.getType());
455 if (s == LENGTH_GEQ_ONE)
456 {
457 Node neq_empty = n.eqNode(emp).negate();
458 Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
459 Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
460 Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
461 << std::endl;
462 Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
463 if (options::proofNewPedantic() > 0)
464 {
465 Unhandled() << "Unhandled lemma Strings::Lemma SK-GEQ-ONE : "
466 << len_geq_one << std::endl;
467 }
468 return TrustNode::mkTrustLemma(len_geq_one, nullptr);
469 }
470
471 if (s == LENGTH_ONE)
472 {
473 Node len_one = n_len.eqNode(d_one);
474 Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
475 << std::endl;
476 Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
477 if (options::proofNewPedantic() > 0)
478 {
479 Unhandled() << "Unhandled lemma Strings::Lemma SK-ONE : " << len_one
480 << std::endl;
481 }
482 return TrustNode::mkTrustLemma(len_one, nullptr);
483 }
484 Assert(s == LENGTH_SPLIT);
485
486 // get the positive length lemma
487 Node lenLemma = lengthPositive(n);
488 // split whether the string is empty
489 Node n_len_eq_z = n_len.eqNode(d_zero);
490 Node n_len_eq_z_2 = n.eqNode(emp);
491 Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
492 Node case_emptyr = Rewriter::rewrite(case_empty);
493 if (!case_emptyr.isConst())
494 {
495 // prefer trying the empty case first
496 // notice that requirePhase must only be called on rewritten literals that
497 // occur in the CNF stream.
498 n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
499 Assert(!n_len_eq_z.isConst());
500 reqPhase[n_len_eq_z] = true;
501 n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
502 Assert(!n_len_eq_z_2.isConst());
503 reqPhase[n_len_eq_z_2] = true;
504 }
505 else
506 {
507 // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
508 // n ---> "". Since this method is only called on non-constants n, it must
509 // be that n = "" ^ len( n ) = 0 does not rewrite to true.
510 Assert(!case_emptyr.getConst<bool>());
511 }
512
513 if (d_epg != nullptr)
514 {
515 return d_epg->mkTrustNode(lenLemma, PfRule::STRING_LENGTH_POS, {}, {n});
516 }
517 return TrustNode::mkTrustLemma(lenLemma, nullptr);
518 }
519
520 Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const
521 {
522 if (n.getNumChildren() == 0)
523 {
524 Node pn = getProxyVariableFor(n);
525 if (pn.isNull())
526 {
527 return Node::null();
528 }
529 Node eq = n.eqNode(pn);
530 eq = Rewriter::rewrite(eq);
531 if (std::find(exp.begin(), exp.end(), eq) == exp.end())
532 {
533 exp.push_back(eq);
534 }
535 return pn;
536 }
537 std::vector<Node> children;
538 if (n.getMetaKind() == metakind::PARAMETERIZED)
539 {
540 children.push_back(n.getOperator());
541 }
542 for (const Node& nc : n)
543 {
544 if (n.getType().isRegExp())
545 {
546 children.push_back(nc);
547 }
548 else
549 {
550 Node ns = getSymbolicDefinition(nc, exp);
551 if (ns.isNull())
552 {
553 return Node::null();
554 }
555 else
556 {
557 children.push_back(ns);
558 }
559 }
560 }
561 return NodeManager::currentNM()->mkNode(n.getKind(), children);
562 }
563
564 Node TermRegistry::getProxyVariableFor(Node n) const
565 {
566 NodeNodeMap::const_iterator it = d_proxyVar.find(n);
567 if (it != d_proxyVar.end())
568 {
569 return (*it).second;
570 }
571 return Node::null();
572 }
573
574 Node TermRegistry::ensureProxyVariableFor(Node n)
575 {
576 Node proxy = getProxyVariableFor(n);
577 if (proxy.isNull())
578 {
579 registerTerm(n, 0);
580 proxy = getProxyVariableFor(n);
581 }
582 Assert(!proxy.isNull());
583 return proxy;
584 }
585
586 void TermRegistry::inferSubstitutionProxyVars(Node n,
587 std::vector<Node>& vars,
588 std::vector<Node>& subs,
589 std::vector<Node>& unproc) const
590 {
591 if (n.getKind() == AND)
592 {
593 for (const Node& nc : n)
594 {
595 inferSubstitutionProxyVars(nc, vars, subs, unproc);
596 }
597 return;
598 }
599 if (n.getKind() == EQUAL)
600 {
601 Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end());
602 ns = Rewriter::rewrite(ns);
603 if (ns.getKind() == EQUAL)
604 {
605 Node s;
606 Node v;
607 for (unsigned i = 0; i < 2; i++)
608 {
609 Node ss;
610 // determine whether this side has a proxy variable
611 if (ns[i].getAttribute(StringsProxyVarAttribute()))
612 {
613 // it is a proxy variable
614 ss = ns[i];
615 }
616 else if (ns[i].isConst())
617 {
618 ss = getProxyVariableFor(ns[i]);
619 }
620 if (!ss.isNull())
621 {
622 v = ns[1 - i];
623 // if the other side is a constant or variable
624 if (v.getNumChildren() == 0)
625 {
626 if (s.isNull())
627 {
628 s = ss;
629 }
630 else
631 {
632 // both sides of the equality correspond to a proxy variable
633 if (ss == s)
634 {
635 // it is a trivial equality, e.g. between a proxy variable
636 // and its definition
637 return;
638 }
639 else
640 {
641 // equality between proxy variables, non-trivial
642 s = Node::null();
643 }
644 }
645 }
646 }
647 }
648 if (!s.isNull())
649 {
650 // the equality can be turned into a substitution
651 subs.push_back(s);
652 vars.push_back(v);
653 return;
654 }
655 }
656 else
657 {
658 n = ns;
659 }
660 }
661 if (!n.isConst() || !n.getConst<bool>())
662 {
663 unproc.push_back(n);
664 }
665 }
666
667 } // namespace strings
668 } // namespace theory
669 } // namespace CVC4