Set up fine grained equality notifications (#6734)
[cvc5.git] / src / theory / strings / term_registry.cpp
1 /******************************************************************************
2 * Top contributors (to current version):
3 * Andrew Reynolds, Andres Noetzli, Morgan Deters
4 *
5 * This file is part of the cvc5 project.
6 *
7 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8 * in the top-level source directory and their institutional affiliations.
9 * All rights reserved. See the file COPYING in the top-level source
10 * directory for licensing information.
11 * ****************************************************************************
12 *
13 * Implementation of term registry for the theory of strings.
14 */
15
16 #include "theory/strings/term_registry.h"
17
18 #include "expr/attribute.h"
19 #include "options/smt_options.h"
20 #include "options/strings_options.h"
21 #include "smt/logic_exception.h"
22 #include "theory/rewriter.h"
23 #include "theory/strings/inference_manager.h"
24 #include "theory/strings/theory_strings_utils.h"
25 #include "theory/strings/word.h"
26 #include "util/rational.h"
27 #include "util/string.h"
28
29 using namespace std;
30 using namespace cvc5::context;
31 using namespace cvc5::kind;
32
33 namespace cvc5 {
34 namespace theory {
35 namespace strings {
36
37 struct StringsProxyVarAttributeId
38 {
39 };
40 typedef expr::Attribute<StringsProxyVarAttributeId, bool>
41 StringsProxyVarAttribute;
42
43 TermRegistry::TermRegistry(SolverState& s,
44 SequencesStatistics& statistics,
45 ProofNodeManager* pnm)
46 : d_state(s),
47 d_im(nullptr),
48 d_statistics(statistics),
49 d_hasStrCode(false),
50 d_functionsTerms(s.getSatContext()),
51 d_inputVars(s.getUserContext()),
52 d_preregisteredTerms(s.getSatContext()),
53 d_registeredTerms(s.getUserContext()),
54 d_registeredTypes(s.getUserContext()),
55 d_proxyVar(s.getUserContext()),
56 d_lengthLemmaTermsCache(s.getUserContext()),
57 d_epg(pnm ? new EagerProofGenerator(
58 pnm,
59 s.getUserContext(),
60 "strings::TermRegistry::EagerProofGenerator")
61 : nullptr)
62 {
63 NodeManager* nm = NodeManager::currentNM();
64 d_zero = nm->mkConst(Rational(0));
65 d_one = nm->mkConst(Rational(1));
66 d_negOne = NodeManager::currentNM()->mkConst(Rational(-1));
67 d_cardSize = utils::getAlphabetCardinality();
68 }
69
70 TermRegistry::~TermRegistry() {}
71
72 void TermRegistry::finishInit(InferenceManager* im) { d_im = im; }
73
74 Node TermRegistry::eagerReduce(Node t, SkolemCache* sc)
75 {
76 NodeManager* nm = NodeManager::currentNM();
77 Node lemma;
78 Kind tk = t.getKind();
79 if (tk == STRING_TO_CODE)
80 {
81 // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
82 Node code_len = utils::mkNLength(t[0]).eqNode(nm->mkConst(Rational(1)));
83 Node code_eq_neg1 = t.eqNode(nm->mkConst(Rational(-1)));
84 Node code_range = nm->mkNode(
85 AND,
86 nm->mkNode(GEQ, t, nm->mkConst(Rational(0))),
87 nm->mkNode(
88 LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
89 lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
90 }
91 else if (tk == STRING_STRIDOF || tk == STRING_INDEXOF_RE)
92 {
93 // (and
94 // (or (= (f x y n) (- 1)) (>= (f x y n) n))
95 // (<= (f x y n) (str.len x)))
96 //
97 // where f in { str.indexof, str.indexof_re }
98 Node l = nm->mkNode(STRING_LENGTH, t[0]);
99 lemma = nm->mkNode(
100 AND,
101 nm->mkNode(
102 OR, nm->mkConst(Rational(-1)).eqNode(t), nm->mkNode(GEQ, t, t[2])),
103 nm->mkNode(LEQ, t, l));
104 }
105 else if (tk == STRING_STOI)
106 {
107 // (>= (str.to_int x) (- 1))
108 lemma = nm->mkNode(GEQ, t, nm->mkConst(Rational(-1)));
109 }
110 else if (tk == STRING_STRCTN)
111 {
112 // ite( (str.contains s r), (= s (str.++ sk1 r sk2)), (not (= s r)))
113 Node sk1 =
114 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_PRE, "sc1");
115 Node sk2 =
116 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_POST, "sc2");
117 lemma = t[0].eqNode(utils::mkNConcat(sk1, t[1], sk2));
118 lemma = nm->mkNode(ITE, t, lemma, t[0].eqNode(t[1]).notNode());
119 }
120 return lemma;
121 }
122
123 Node TermRegistry::lengthPositive(Node t)
124 {
125 NodeManager* nm = NodeManager::currentNM();
126 Node zero = nm->mkConst(Rational(0));
127 Node emp = Word::mkEmptyWord(t.getType());
128 Node tlen = nm->mkNode(STRING_LENGTH, t);
129 Node tlenEqZero = tlen.eqNode(zero);
130 Node tEqEmp = t.eqNode(emp);
131 Node caseEmpty = nm->mkNode(AND, tlenEqZero, tEqEmp);
132 Node caseNEmpty = nm->mkNode(GT, tlen, zero);
133 // (or (and (= (str.len t) 0) (= t "")) (> (str.len t) 0))
134 return nm->mkNode(OR, caseEmpty, caseNEmpty);
135 }
136
137 void TermRegistry::preRegisterTerm(TNode n)
138 {
139 if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end())
140 {
141 return;
142 }
143 eq::EqualityEngine* ee = d_state.getEqualityEngine();
144 d_preregisteredTerms.insert(n);
145 Trace("strings-preregister")
146 << "TheoryString::preregister : " << n << std::endl;
147 // check for logic exceptions
148 Kind k = n.getKind();
149 if (!options::stringExp())
150 {
151 if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS
152 || k == STRING_STOI || k == STRING_STRREPL || k == STRING_SUBSTR
153 || k == STRING_STRREPLALL || k == SEQ_NTH || k == STRING_REPLACE_RE
154 || k == STRING_REPLACE_RE_ALL || k == STRING_STRCTN || k == STRING_LEQ
155 || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV
156 || k == STRING_UPDATE)
157 {
158 std::stringstream ss;
159 ss << "Term of kind " << k
160 << " not supported in default mode, try --strings-exp";
161 throw LogicException(ss.str());
162 }
163 }
164 if (k == EQUAL)
165 {
166 if (n[0].getType().isRegExp())
167 {
168 std::stringstream ss;
169 ss << "Equality between regular expressions is not supported";
170 throw LogicException(ss.str());
171 }
172 ee->addTriggerPredicate(n);
173 return;
174 }
175 else if (k == STRING_IN_REGEXP)
176 {
177 d_im->requirePhase(n, true);
178 ee->addTriggerPredicate(n);
179 ee->addTerm(n[0]);
180 ee->addTerm(n[1]);
181 return;
182 }
183 else if (k == STRING_TO_CODE)
184 {
185 d_hasStrCode = true;
186 }
187 else if (k == REGEXP_RANGE)
188 {
189 for (const Node& nc : n)
190 {
191 if (!nc.isConst())
192 {
193 throw LogicException(
194 "expecting a constant string term in regexp range");
195 }
196 if (nc.getConst<String>().size() != 1)
197 {
198 throw LogicException(
199 "expecting a single constant string term in regexp range");
200 }
201 }
202 }
203 registerTerm(n, 0);
204 TypeNode tn = n.getType();
205 if (tn.isRegExp() && n.isVar())
206 {
207 std::stringstream ss;
208 ss << "Regular expression variables are not supported.";
209 throw LogicException(ss.str());
210 }
211 if (tn.isString()) // string-only
212 {
213 // all characters of constants should fall in the alphabet
214 if (n.isConst())
215 {
216 std::vector<unsigned> vec = n.getConst<String>().getVec();
217 for (unsigned u : vec)
218 {
219 if (u >= d_cardSize)
220 {
221 std::stringstream ss;
222 ss << "Characters in string \"" << n
223 << "\" are outside of the given alphabet.";
224 throw LogicException(ss.str());
225 }
226 }
227 }
228 ee->addTerm(n);
229 }
230 else if (tn.isBoolean())
231 {
232 // All kinds that we do congruence over that may return a Boolean go here
233 if (k==STRING_STRCTN || k == STRING_LEQ || k == SEQ_NTH)
234 {
235 // Get triggered for both equal and dis-equal
236 ee->addTriggerPredicate(n);
237 }
238 }
239 else
240 {
241 // Function applications/predicates
242 ee->addTerm(n);
243 }
244 // Set d_functionsTerms stores all function applications that are
245 // relevant to theory combination. Notice that this is a subset of
246 // the applications whose kinds are function kinds in the equality
247 // engine. This means it does not include applications of operators
248 // like re.++, which is not a function kind in the equality engine.
249 // Concatenation terms do not need to be considered here because
250 // their arguments have string type and do not introduce any shared
251 // terms.
252 if (n.hasOperator() && ee->isFunctionKind(k)
253 && kindToTheoryId(k) == THEORY_STRINGS && k != STRING_CONCAT)
254 {
255 d_functionsTerms.push_back(n);
256 }
257 if (options::stringFMF())
258 {
259 if (tn.isStringLike())
260 {
261 // Our decision strategy will minimize the length of this term if it is a
262 // variable but not an internally generated Skolem, or a term that does
263 // not belong to this theory.
264 if (n.isVar() ? !d_skCache.isSkolem(n)
265 : kindToTheoryId(k) != THEORY_STRINGS)
266 {
267 d_inputVars.insert(n);
268 Trace("strings-preregister") << "input variable: " << n << std::endl;
269 }
270 }
271 }
272 }
273
274 void TermRegistry::registerTerm(Node n, int effort)
275 {
276 Trace("strings-register") << "TheoryStrings::registerTerm() " << n
277 << ", effort = " << effort << std::endl;
278 if (d_registeredTerms.find(n) != d_registeredTerms.end())
279 {
280 Trace("strings-register") << "...already registered" << std::endl;
281 return;
282 }
283 bool do_register = true;
284 TypeNode tn = n.getType();
285 if (!tn.isStringLike())
286 {
287 if (options::stringEagerLen())
288 {
289 do_register = effort == 0;
290 }
291 else
292 {
293 do_register = effort > 0 || n.getKind() != STRING_CONCAT;
294 }
295 }
296 if (!do_register)
297 {
298 Trace("strings-register") << "...do not register" << std::endl;
299 return;
300 }
301 Trace("strings-register") << "...register" << std::endl;
302 d_registeredTerms.insert(n);
303 // ensure the type is registered
304 registerType(tn);
305 TrustNode regTermLem;
306 if (tn.isStringLike())
307 {
308 // register length information:
309 // for variables, split on empty vs positive length
310 // for concat/const/replace, introduce proxy var and state length relation
311 regTermLem = getRegisterTermLemma(n);
312 }
313 else if (n.getKind() != STRING_STRCTN)
314 {
315 // we don't send out eager reduction lemma for str.contains currently
316 Node eagerRedLemma = eagerReduce(n, &d_skCache);
317 if (!eagerRedLemma.isNull())
318 {
319 // if there was an eager reduction, we make the trust node for it
320 if (d_epg != nullptr)
321 {
322 regTermLem = d_epg->mkTrustNode(
323 eagerRedLemma, PfRule::STRING_EAGER_REDUCTION, {}, {n});
324 }
325 else
326 {
327 regTermLem = TrustNode::mkTrustLemma(eagerRedLemma, nullptr);
328 }
329 }
330 }
331 if (!regTermLem.isNull())
332 {
333 Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
334 << std::endl;
335 Trace("strings-assert")
336 << "(assert " << regTermLem.getNode() << ")" << std::endl;
337 d_im->trustedLemma(regTermLem, InferenceId::STRINGS_REGISTER_TERM);
338 }
339 }
340
341 void TermRegistry::registerType(TypeNode tn)
342 {
343 if (d_registeredTypes.find(tn) != d_registeredTypes.end())
344 {
345 return;
346 }
347 d_registeredTypes.insert(tn);
348 if (tn.isStringLike())
349 {
350 // preregister the empty word for the type
351 Node emp = Word::mkEmptyWord(tn);
352 if (!d_state.hasTerm(emp))
353 {
354 preRegisterTerm(emp);
355 }
356 }
357 }
358
359 TrustNode TermRegistry::getRegisterTermLemma(Node n)
360 {
361 Assert(n.getType().isStringLike());
362 NodeManager* nm = NodeManager::currentNM();
363 // register length information:
364 // for variables, split on empty vs positive length
365 // for concat/const/replace, introduce proxy var and state length relation
366 Node lsum;
367 if (n.getKind() != STRING_CONCAT && !n.isConst())
368 {
369 Node lsumb = nm->mkNode(STRING_LENGTH, n);
370 lsum = Rewriter::rewrite(lsumb);
371 // can register length term if it does not rewrite
372 if (lsum == lsumb)
373 {
374 registerTermAtomic(n, LENGTH_SPLIT);
375 return TrustNode::null();
376 }
377 }
378 Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
379 StringsProxyVarAttribute spva;
380 sk.setAttribute(spva, true);
381 Node eq = Rewriter::rewrite(sk.eqNode(n));
382 d_proxyVar[n] = sk;
383 // If we are introducing a proxy for a constant or concat term, we do not
384 // need to send lemmas about its length, since its length is already
385 // implied.
386 if (n.isConst() || n.getKind() == STRING_CONCAT)
387 {
388 // do not send length lemma for sk.
389 registerTermAtomic(sk, LENGTH_IGNORE);
390 }
391 Node skl = nm->mkNode(STRING_LENGTH, sk);
392 if (n.getKind() == STRING_CONCAT)
393 {
394 std::vector<Node> nodeVec;
395 for (const Node& nc : n)
396 {
397 if (nc.getAttribute(StringsProxyVarAttribute()))
398 {
399 Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
400 nodeVec.push_back(d_proxyVarToLength[nc]);
401 }
402 else
403 {
404 Node lni = nm->mkNode(STRING_LENGTH, nc);
405 nodeVec.push_back(lni);
406 }
407 }
408 lsum = nm->mkNode(PLUS, nodeVec);
409 lsum = Rewriter::rewrite(lsum);
410 }
411 else if (n.isConst())
412 {
413 lsum = nm->mkConst(Rational(Word::getLength(n)));
414 }
415 Assert(!lsum.isNull());
416 d_proxyVarToLength[sk] = lsum;
417 Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
418
419 Node ret = nm->mkNode(AND, eq, ceq);
420
421 // it is a simple rewrite to justify this
422 if (d_epg != nullptr)
423 {
424 return d_epg->mkTrustNode(ret, PfRule::MACRO_SR_PRED_INTRO, {}, {ret});
425 }
426 return TrustNode::mkTrustLemma(ret, nullptr);
427 }
428
429 void TermRegistry::registerTermAtomic(Node n, LengthStatus s)
430 {
431 if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
432 {
433 return;
434 }
435 d_lengthLemmaTermsCache.insert(n);
436
437 if (s == LENGTH_IGNORE)
438 {
439 // ignore it
440 return;
441 }
442 std::map<Node, bool> reqPhase;
443 TrustNode lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
444 if (!lenLem.isNull())
445 {
446 Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
447 << std::endl;
448 Trace("strings-assert")
449 << "(assert " << lenLem.getNode() << ")" << std::endl;
450 d_im->trustedLemma(lenLem, InferenceId::STRINGS_REGISTER_TERM_ATOMIC);
451 }
452 for (const std::pair<const Node, bool>& rp : reqPhase)
453 {
454 d_im->requirePhase(rp.first, rp.second);
455 }
456 }
457
458 SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; }
459
460 const context::CDList<TNode>& TermRegistry::getFunctionTerms() const
461 {
462 return d_functionsTerms;
463 }
464
465 const context::CDHashSet<Node>& TermRegistry::getInputVars() const
466 {
467 return d_inputVars;
468 }
469
470 bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
471
472 TrustNode TermRegistry::getRegisterTermAtomicLemma(
473 Node n, LengthStatus s, std::map<Node, bool>& reqPhase)
474 {
475 if (n.isConst())
476 {
477 // No need to send length for constant terms. This case may be triggered
478 // for cases where the skolem cache automatically replaces a skolem by
479 // a constant.
480 return TrustNode::null();
481 }
482 Assert(n.getType().isStringLike());
483 NodeManager* nm = NodeManager::currentNM();
484 Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
485 Node emp = Word::mkEmptyWord(n.getType());
486 if (s == LENGTH_GEQ_ONE)
487 {
488 Node neq_empty = n.eqNode(emp).negate();
489 Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
490 Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
491 Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
492 << std::endl;
493 Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
494 return TrustNode::mkTrustLemma(len_geq_one, nullptr);
495 }
496
497 if (s == LENGTH_ONE)
498 {
499 Node len_one = n_len.eqNode(d_one);
500 Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
501 << std::endl;
502 Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
503 return TrustNode::mkTrustLemma(len_one, nullptr);
504 }
505 Assert(s == LENGTH_SPLIT);
506
507 // get the positive length lemma
508 Node lenLemma = lengthPositive(n);
509 // split whether the string is empty
510 Node n_len_eq_z = n_len.eqNode(d_zero);
511 Node n_len_eq_z_2 = n.eqNode(emp);
512 Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
513 Node case_emptyr = Rewriter::rewrite(case_empty);
514 if (!case_emptyr.isConst())
515 {
516 // prefer trying the empty case first
517 // notice that requirePhase must only be called on rewritten literals that
518 // occur in the CNF stream.
519 n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
520 Assert(!n_len_eq_z.isConst());
521 reqPhase[n_len_eq_z] = true;
522 n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
523 Assert(!n_len_eq_z_2.isConst());
524 reqPhase[n_len_eq_z_2] = true;
525 }
526 else
527 {
528 // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
529 // n ---> "". Since this method is only called on non-constants n, it must
530 // be that n = "" ^ len( n ) = 0 does not rewrite to true.
531 Assert(!case_emptyr.getConst<bool>());
532 }
533
534 if (d_epg != nullptr)
535 {
536 return d_epg->mkTrustNode(lenLemma, PfRule::STRING_LENGTH_POS, {}, {n});
537 }
538 return TrustNode::mkTrustLemma(lenLemma, nullptr);
539 }
540
541 Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const
542 {
543 if (n.getNumChildren() == 0)
544 {
545 Node pn = getProxyVariableFor(n);
546 if (pn.isNull())
547 {
548 return Node::null();
549 }
550 Node eq = n.eqNode(pn);
551 eq = Rewriter::rewrite(eq);
552 if (std::find(exp.begin(), exp.end(), eq) == exp.end())
553 {
554 exp.push_back(eq);
555 }
556 return pn;
557 }
558 std::vector<Node> children;
559 if (n.getMetaKind() == metakind::PARAMETERIZED)
560 {
561 children.push_back(n.getOperator());
562 }
563 for (const Node& nc : n)
564 {
565 if (n.getType().isRegExp())
566 {
567 children.push_back(nc);
568 }
569 else
570 {
571 Node ns = getSymbolicDefinition(nc, exp);
572 if (ns.isNull())
573 {
574 return Node::null();
575 }
576 else
577 {
578 children.push_back(ns);
579 }
580 }
581 }
582 return NodeManager::currentNM()->mkNode(n.getKind(), children);
583 }
584
585 Node TermRegistry::getProxyVariableFor(Node n) const
586 {
587 NodeNodeMap::const_iterator it = d_proxyVar.find(n);
588 if (it != d_proxyVar.end())
589 {
590 return (*it).second;
591 }
592 return Node::null();
593 }
594
595 Node TermRegistry::ensureProxyVariableFor(Node n)
596 {
597 Node proxy = getProxyVariableFor(n);
598 if (proxy.isNull())
599 {
600 registerTerm(n, 0);
601 proxy = getProxyVariableFor(n);
602 }
603 Assert(!proxy.isNull());
604 return proxy;
605 }
606
607 void TermRegistry::removeProxyEqs(Node n, std::vector<Node>& unproc) const
608 {
609 if (n.getKind() == AND)
610 {
611 for (const Node& nc : n)
612 {
613 removeProxyEqs(nc, unproc);
614 }
615 return;
616 }
617 Trace("strings-subs-proxy") << "Input : " << n << std::endl;
618 Node ns = Rewriter::rewrite(n);
619 if (ns.getKind() == EQUAL)
620 {
621 for (size_t i = 0; i < 2; i++)
622 {
623 // determine whether this side has a proxy variable
624 if (ns[i].getAttribute(StringsProxyVarAttribute()))
625 {
626 if (getProxyVariableFor(ns[1 - i]) == ns[i])
627 {
628 Trace("strings-subs-proxy")
629 << "...trivial definition via " << ns[i] << std::endl;
630 // it is a trivial equality, e.g. between a proxy variable
631 // and its definition
632 return;
633 }
634 }
635 }
636 }
637 if (!n.isConst() || !n.getConst<bool>())
638 {
639 Trace("strings-subs-proxy") << "...unprocessed" << std::endl;
640 unproc.push_back(n);
641 }
642 }
643
644 } // namespace strings
645 } // namespace theory
646 } // namespace cvc5