Rename namespace CVC4 to CVC5. (#6249)
[cvc5.git] / src / theory / strings / term_registry.cpp
1 /********************* */
2 /*! \file term_registry.cpp
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Andres Noetzli, Tianyi Liang
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8 ** in the top-level source directory and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief Implementation of term registry for the theory of strings.
13 **/
14
15 #include "theory/strings/term_registry.h"
16
17 #include "expr/attribute.h"
18 #include "options/smt_options.h"
19 #include "options/strings_options.h"
20 #include "smt/logic_exception.h"
21 #include "theory/rewriter.h"
22 #include "theory/strings/inference_manager.h"
23 #include "theory/strings/theory_strings_utils.h"
24 #include "theory/strings/word.h"
25
26 using namespace std;
27 using namespace CVC5::context;
28 using namespace CVC5::kind;
29
30 namespace CVC5 {
31 namespace theory {
32 namespace strings {
33
34 struct StringsProxyVarAttributeId
35 {
36 };
37 typedef expr::Attribute<StringsProxyVarAttributeId, bool>
38 StringsProxyVarAttribute;
39
40 TermRegistry::TermRegistry(SolverState& s,
41 SequencesStatistics& statistics,
42 ProofNodeManager* pnm)
43 : d_state(s),
44 d_im(nullptr),
45 d_statistics(statistics),
46 d_hasStrCode(false),
47 d_functionsTerms(s.getSatContext()),
48 d_inputVars(s.getUserContext()),
49 d_preregisteredTerms(s.getSatContext()),
50 d_registeredTerms(s.getUserContext()),
51 d_registeredTypes(s.getUserContext()),
52 d_proxyVar(s.getUserContext()),
53 d_lengthLemmaTermsCache(s.getUserContext()),
54 d_epg(pnm ? new EagerProofGenerator(
55 pnm,
56 s.getUserContext(),
57 "strings::TermRegistry::EagerProofGenerator")
58 : nullptr)
59 {
60 NodeManager* nm = NodeManager::currentNM();
61 d_zero = nm->mkConst(Rational(0));
62 d_one = nm->mkConst(Rational(1));
63 d_negOne = NodeManager::currentNM()->mkConst(Rational(-1));
64 d_cardSize = utils::getAlphabetCardinality();
65 }
66
67 TermRegistry::~TermRegistry() {}
68
69 void TermRegistry::finishInit(InferenceManager* im) { d_im = im; }
70
71 Node TermRegistry::eagerReduce(Node t, SkolemCache* sc)
72 {
73 NodeManager* nm = NodeManager::currentNM();
74 Node lemma;
75 Kind tk = t.getKind();
76 if (tk == STRING_TO_CODE)
77 {
78 // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
79 Node code_len = utils::mkNLength(t[0]).eqNode(nm->mkConst(Rational(1)));
80 Node code_eq_neg1 = t.eqNode(nm->mkConst(Rational(-1)));
81 Node code_range = nm->mkNode(
82 AND,
83 nm->mkNode(GEQ, t, nm->mkConst(Rational(0))),
84 nm->mkNode(
85 LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
86 lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
87 }
88 else if (tk == STRING_STRIDOF)
89 {
90 // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len
91 // x)))
92 Node l = utils::mkNLength(t[0]);
93 lemma = nm->mkNode(AND,
94 nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))),
95 nm->mkNode(LEQ, t, l));
96 }
97 else if (tk == STRING_STOI)
98 {
99 // (>= (str.to_int x) (- 1))
100 lemma = nm->mkNode(GEQ, t, nm->mkConst(Rational(-1)));
101 }
102 else if (tk == STRING_STRCTN)
103 {
104 // ite( (str.contains s r), (= s (str.++ sk1 r sk2)), (not (= s r)))
105 Node sk1 =
106 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_PRE, "sc1");
107 Node sk2 =
108 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_POST, "sc2");
109 lemma = t[0].eqNode(utils::mkNConcat(sk1, t[1], sk2));
110 lemma = nm->mkNode(ITE, t, lemma, t[0].eqNode(t[1]).notNode());
111 }
112 return lemma;
113 }
114
115 Node TermRegistry::lengthPositive(Node t)
116 {
117 NodeManager* nm = NodeManager::currentNM();
118 Node zero = nm->mkConst(Rational(0));
119 Node emp = Word::mkEmptyWord(t.getType());
120 Node tlen = nm->mkNode(STRING_LENGTH, t);
121 Node tlenEqZero = tlen.eqNode(zero);
122 Node tEqEmp = t.eqNode(emp);
123 Node caseEmpty = nm->mkNode(AND, tlenEqZero, tEqEmp);
124 Node caseNEmpty = nm->mkNode(GT, tlen, zero);
125 // (or (and (= (str.len t) 0) (= t "")) (> (str.len t) 0))
126 return nm->mkNode(OR, caseEmpty, caseNEmpty);
127 }
128
129 void TermRegistry::preRegisterTerm(TNode n)
130 {
131 if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end())
132 {
133 return;
134 }
135 eq::EqualityEngine* ee = d_state.getEqualityEngine();
136 d_preregisteredTerms.insert(n);
137 Trace("strings-preregister")
138 << "TheoryString::preregister : " << n << std::endl;
139 // check for logic exceptions
140 Kind k = n.getKind();
141 if (!options::stringExp())
142 {
143 if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
144 || k == STRING_STRREPL || k == STRING_SUBSTR || k == STRING_STRREPLALL
145 || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
146 || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
147 || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE)
148 {
149 std::stringstream ss;
150 ss << "Term of kind " << k
151 << " not supported in default mode, try --strings-exp";
152 throw LogicException(ss.str());
153 }
154 }
155 if (k == EQUAL)
156 {
157 if (n[0].getType().isRegExp())
158 {
159 std::stringstream ss;
160 ss << "Equality between regular expressions is not supported";
161 throw LogicException(ss.str());
162 }
163 ee->addTriggerPredicate(n);
164 return;
165 }
166 else if (k == STRING_IN_REGEXP)
167 {
168 d_im->requirePhase(n, true);
169 ee->addTriggerPredicate(n);
170 ee->addTerm(n[0]);
171 ee->addTerm(n[1]);
172 return;
173 }
174 else if (k == STRING_TO_CODE)
175 {
176 d_hasStrCode = true;
177 }
178 registerTerm(n, 0);
179 TypeNode tn = n.getType();
180 if (tn.isRegExp() && n.isVar())
181 {
182 std::stringstream ss;
183 ss << "Regular expression variables are not supported.";
184 throw LogicException(ss.str());
185 }
186 if (tn.isString()) // string-only
187 {
188 // all characters of constants should fall in the alphabet
189 if (n.isConst())
190 {
191 std::vector<unsigned> vec = n.getConst<String>().getVec();
192 for (unsigned u : vec)
193 {
194 if (u >= d_cardSize)
195 {
196 std::stringstream ss;
197 ss << "Characters in string \"" << n
198 << "\" are outside of the given alphabet.";
199 throw LogicException(ss.str());
200 }
201 }
202 }
203 ee->addTerm(n);
204 }
205 else if (tn.isBoolean())
206 {
207 // All kinds that we do congruence over that may return a Boolean go here
208 if (k==STRING_STRCTN || k == STRING_LEQ || k == SEQ_NTH)
209 {
210 // Get triggered for both equal and dis-equal
211 ee->addTriggerPredicate(n);
212 }
213 }
214 else
215 {
216 // Function applications/predicates
217 ee->addTerm(n);
218 }
219 // Set d_functionsTerms stores all function applications that are
220 // relevant to theory combination. Notice that this is a subset of
221 // the applications whose kinds are function kinds in the equality
222 // engine. This means it does not include applications of operators
223 // like re.++, which is not a function kind in the equality engine.
224 // Concatenation terms do not need to be considered here because
225 // their arguments have string type and do not introduce any shared
226 // terms.
227 if (n.hasOperator() && ee->isFunctionKind(k) && k != STRING_CONCAT)
228 {
229 d_functionsTerms.push_back(n);
230 }
231 if (options::stringFMF())
232 {
233 if (tn.isStringLike())
234 {
235 // Our decision strategy will minimize the length of this term if it is a
236 // variable but not an internally generated Skolem, or a term that does
237 // not belong to this theory.
238 if (n.isVar() ? !d_skCache.isSkolem(n)
239 : kindToTheoryId(k) != THEORY_STRINGS)
240 {
241 d_inputVars.insert(n);
242 Trace("strings-preregister") << "input variable: " << n << std::endl;
243 }
244 }
245 }
246 }
247
248 void TermRegistry::registerTerm(Node n, int effort)
249 {
250 Trace("strings-register") << "TheoryStrings::registerTerm() " << n
251 << ", effort = " << effort << std::endl;
252 if (d_registeredTerms.find(n) != d_registeredTerms.end())
253 {
254 Trace("strings-register") << "...already registered" << std::endl;
255 return;
256 }
257 bool do_register = true;
258 TypeNode tn = n.getType();
259 if (!tn.isStringLike())
260 {
261 if (options::stringEagerLen())
262 {
263 do_register = effort == 0;
264 }
265 else
266 {
267 do_register = effort > 0 || n.getKind() != STRING_CONCAT;
268 }
269 }
270 if (!do_register)
271 {
272 Trace("strings-register") << "...do not register" << std::endl;
273 return;
274 }
275 Trace("strings-register") << "...register" << std::endl;
276 d_registeredTerms.insert(n);
277 // ensure the type is registered
278 registerType(tn);
279 TrustNode regTermLem;
280 if (tn.isStringLike())
281 {
282 // register length information:
283 // for variables, split on empty vs positive length
284 // for concat/const/replace, introduce proxy var and state length relation
285 regTermLem = getRegisterTermLemma(n);
286 }
287 else if (n.getKind() != STRING_STRCTN)
288 {
289 // we don't send out eager reduction lemma for str.contains currently
290 Node eagerRedLemma = eagerReduce(n, &d_skCache);
291 if (!eagerRedLemma.isNull())
292 {
293 // if there was an eager reduction, we make the trust node for it
294 if (d_epg != nullptr)
295 {
296 regTermLem = d_epg->mkTrustNode(
297 eagerRedLemma, PfRule::STRING_EAGER_REDUCTION, {}, {n});
298 }
299 else
300 {
301 regTermLem = TrustNode::mkTrustLemma(eagerRedLemma, nullptr);
302 }
303 }
304 }
305 if (!regTermLem.isNull())
306 {
307 Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
308 << std::endl;
309 Trace("strings-assert")
310 << "(assert " << regTermLem.getNode() << ")" << std::endl;
311 d_im->trustedLemma(regTermLem, InferenceId::STRINGS_REGISTER_TERM);
312 }
313 }
314
315 void TermRegistry::registerType(TypeNode tn)
316 {
317 if (d_registeredTypes.find(tn) != d_registeredTypes.end())
318 {
319 return;
320 }
321 d_registeredTypes.insert(tn);
322 if (tn.isStringLike())
323 {
324 // preregister the empty word for the type
325 Node emp = Word::mkEmptyWord(tn);
326 if (!d_state.hasTerm(emp))
327 {
328 preRegisterTerm(emp);
329 }
330 }
331 }
332
333 TrustNode TermRegistry::getRegisterTermLemma(Node n)
334 {
335 Assert(n.getType().isStringLike());
336 NodeManager* nm = NodeManager::currentNM();
337 // register length information:
338 // for variables, split on empty vs positive length
339 // for concat/const/replace, introduce proxy var and state length relation
340 Node lsum;
341 if (n.getKind() != STRING_CONCAT && !n.isConst())
342 {
343 Node lsumb = nm->mkNode(STRING_LENGTH, n);
344 lsum = Rewriter::rewrite(lsumb);
345 // can register length term if it does not rewrite
346 if (lsum == lsumb)
347 {
348 registerTermAtomic(n, LENGTH_SPLIT);
349 return TrustNode::null();
350 }
351 }
352 Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
353 StringsProxyVarAttribute spva;
354 sk.setAttribute(spva, true);
355 Node eq = Rewriter::rewrite(sk.eqNode(n));
356 d_proxyVar[n] = sk;
357 // If we are introducing a proxy for a constant or concat term, we do not
358 // need to send lemmas about its length, since its length is already
359 // implied.
360 if (n.isConst() || n.getKind() == STRING_CONCAT)
361 {
362 // do not send length lemma for sk.
363 registerTermAtomic(sk, LENGTH_IGNORE);
364 }
365 Node skl = nm->mkNode(STRING_LENGTH, sk);
366 if (n.getKind() == STRING_CONCAT)
367 {
368 std::vector<Node> nodeVec;
369 for (const Node& nc : n)
370 {
371 if (nc.getAttribute(StringsProxyVarAttribute()))
372 {
373 Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
374 nodeVec.push_back(d_proxyVarToLength[nc]);
375 }
376 else
377 {
378 Node lni = nm->mkNode(STRING_LENGTH, nc);
379 nodeVec.push_back(lni);
380 }
381 }
382 lsum = nm->mkNode(PLUS, nodeVec);
383 lsum = Rewriter::rewrite(lsum);
384 }
385 else if (n.isConst())
386 {
387 lsum = nm->mkConst(Rational(Word::getLength(n)));
388 }
389 Assert(!lsum.isNull());
390 d_proxyVarToLength[sk] = lsum;
391 Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
392
393 Node ret = nm->mkNode(AND, eq, ceq);
394
395 // it is a simple rewrite to justify this
396 if (d_epg != nullptr)
397 {
398 return d_epg->mkTrustNode(ret, PfRule::MACRO_SR_PRED_INTRO, {}, {ret});
399 }
400 return TrustNode::mkTrustLemma(ret, nullptr);
401 }
402
403 void TermRegistry::registerTermAtomic(Node n, LengthStatus s)
404 {
405 if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
406 {
407 return;
408 }
409 d_lengthLemmaTermsCache.insert(n);
410
411 if (s == LENGTH_IGNORE)
412 {
413 // ignore it
414 return;
415 }
416 std::map<Node, bool> reqPhase;
417 TrustNode lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
418 if (!lenLem.isNull())
419 {
420 Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
421 << std::endl;
422 Trace("strings-assert")
423 << "(assert " << lenLem.getNode() << ")" << std::endl;
424 d_im->trustedLemma(lenLem, InferenceId::STRINGS_REGISTER_TERM_ATOMIC);
425 }
426 for (const std::pair<const Node, bool>& rp : reqPhase)
427 {
428 d_im->requirePhase(rp.first, rp.second);
429 }
430 }
431
432 SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; }
433
434 const context::CDList<TNode>& TermRegistry::getFunctionTerms() const
435 {
436 return d_functionsTerms;
437 }
438
439 const context::CDHashSet<Node, NodeHashFunction>& TermRegistry::getInputVars()
440 const
441 {
442 return d_inputVars;
443 }
444
445 bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
446
447 TrustNode TermRegistry::getRegisterTermAtomicLemma(
448 Node n, LengthStatus s, std::map<Node, bool>& reqPhase)
449 {
450 if (n.isConst())
451 {
452 // No need to send length for constant terms. This case may be triggered
453 // for cases where the skolem cache automatically replaces a skolem by
454 // a constant.
455 return TrustNode::null();
456 }
457 Assert(n.getType().isStringLike());
458 NodeManager* nm = NodeManager::currentNM();
459 Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
460 Node emp = Word::mkEmptyWord(n.getType());
461 if (s == LENGTH_GEQ_ONE)
462 {
463 Node neq_empty = n.eqNode(emp).negate();
464 Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
465 Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
466 Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
467 << std::endl;
468 Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
469 return TrustNode::mkTrustLemma(len_geq_one, nullptr);
470 }
471
472 if (s == LENGTH_ONE)
473 {
474 Node len_one = n_len.eqNode(d_one);
475 Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
476 << std::endl;
477 Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
478 return TrustNode::mkTrustLemma(len_one, nullptr);
479 }
480 Assert(s == LENGTH_SPLIT);
481
482 // get the positive length lemma
483 Node lenLemma = lengthPositive(n);
484 // split whether the string is empty
485 Node n_len_eq_z = n_len.eqNode(d_zero);
486 Node n_len_eq_z_2 = n.eqNode(emp);
487 Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
488 Node case_emptyr = Rewriter::rewrite(case_empty);
489 if (!case_emptyr.isConst())
490 {
491 // prefer trying the empty case first
492 // notice that requirePhase must only be called on rewritten literals that
493 // occur in the CNF stream.
494 n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
495 Assert(!n_len_eq_z.isConst());
496 reqPhase[n_len_eq_z] = true;
497 n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
498 Assert(!n_len_eq_z_2.isConst());
499 reqPhase[n_len_eq_z_2] = true;
500 }
501 else
502 {
503 // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
504 // n ---> "". Since this method is only called on non-constants n, it must
505 // be that n = "" ^ len( n ) = 0 does not rewrite to true.
506 Assert(!case_emptyr.getConst<bool>());
507 }
508
509 if (d_epg != nullptr)
510 {
511 return d_epg->mkTrustNode(lenLemma, PfRule::STRING_LENGTH_POS, {}, {n});
512 }
513 return TrustNode::mkTrustLemma(lenLemma, nullptr);
514 }
515
516 Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const
517 {
518 if (n.getNumChildren() == 0)
519 {
520 Node pn = getProxyVariableFor(n);
521 if (pn.isNull())
522 {
523 return Node::null();
524 }
525 Node eq = n.eqNode(pn);
526 eq = Rewriter::rewrite(eq);
527 if (std::find(exp.begin(), exp.end(), eq) == exp.end())
528 {
529 exp.push_back(eq);
530 }
531 return pn;
532 }
533 std::vector<Node> children;
534 if (n.getMetaKind() == metakind::PARAMETERIZED)
535 {
536 children.push_back(n.getOperator());
537 }
538 for (const Node& nc : n)
539 {
540 if (n.getType().isRegExp())
541 {
542 children.push_back(nc);
543 }
544 else
545 {
546 Node ns = getSymbolicDefinition(nc, exp);
547 if (ns.isNull())
548 {
549 return Node::null();
550 }
551 else
552 {
553 children.push_back(ns);
554 }
555 }
556 }
557 return NodeManager::currentNM()->mkNode(n.getKind(), children);
558 }
559
560 Node TermRegistry::getProxyVariableFor(Node n) const
561 {
562 NodeNodeMap::const_iterator it = d_proxyVar.find(n);
563 if (it != d_proxyVar.end())
564 {
565 return (*it).second;
566 }
567 return Node::null();
568 }
569
570 Node TermRegistry::ensureProxyVariableFor(Node n)
571 {
572 Node proxy = getProxyVariableFor(n);
573 if (proxy.isNull())
574 {
575 registerTerm(n, 0);
576 proxy = getProxyVariableFor(n);
577 }
578 Assert(!proxy.isNull());
579 return proxy;
580 }
581
582 void TermRegistry::removeProxyEqs(Node n, std::vector<Node>& unproc) const
583 {
584 if (n.getKind() == AND)
585 {
586 for (const Node& nc : n)
587 {
588 removeProxyEqs(nc, unproc);
589 }
590 return;
591 }
592 Trace("strings-subs-proxy") << "Input : " << n << std::endl;
593 Node ns = Rewriter::rewrite(n);
594 if (ns.getKind() == EQUAL)
595 {
596 for (size_t i = 0; i < 2; i++)
597 {
598 // determine whether this side has a proxy variable
599 if (ns[i].getAttribute(StringsProxyVarAttribute()))
600 {
601 if (getProxyVariableFor(ns[1 - i]) == ns[i])
602 {
603 Trace("strings-subs-proxy")
604 << "...trivial definition via " << ns[i] << std::endl;
605 // it is a trivial equality, e.g. between a proxy variable
606 // and its definition
607 return;
608 }
609 }
610 }
611 }
612 if (!n.isConst() || !n.getConst<bool>())
613 {
614 Trace("strings-subs-proxy") << "...unprocessed" << std::endl;
615 unproc.push_back(n);
616 }
617 }
618
619 } // namespace strings
620 } // namespace theory
621 } // namespace CVC5