Update copyright headers to 2021. (#6081)
[cvc5.git] / src / theory / strings / term_registry.cpp
1 /********************* */
2 /*! \file term_registry.cpp
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Andres Noetzli, Tianyi Liang
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8 ** in the top-level source directory and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief Implementation of term registry for the theory of strings.
13 **/
14
15 #include "theory/strings/term_registry.h"
16
17 #include "expr/attribute.h"
18 #include "options/smt_options.h"
19 #include "options/strings_options.h"
20 #include "smt/logic_exception.h"
21 #include "theory/rewriter.h"
22 #include "theory/strings/theory_strings_utils.h"
23 #include "theory/strings/word.h"
24
25 using namespace std;
26 using namespace CVC4::context;
27 using namespace CVC4::kind;
28
29 namespace CVC4 {
30 namespace theory {
31 namespace strings {
32
33 struct StringsProxyVarAttributeId
34 {
35 };
36 typedef expr::Attribute<StringsProxyVarAttributeId, bool>
37 StringsProxyVarAttribute;
38
39 TermRegistry::TermRegistry(SolverState& s,
40 OutputChannel& out,
41 SequencesStatistics& statistics,
42 ProofNodeManager* pnm)
43 : d_state(s),
44 d_out(out),
45 d_statistics(statistics),
46 d_hasStrCode(false),
47 d_functionsTerms(s.getSatContext()),
48 d_inputVars(s.getUserContext()),
49 d_preregisteredTerms(s.getSatContext()),
50 d_registeredTerms(s.getUserContext()),
51 d_registeredTypes(s.getUserContext()),
52 d_lengthLemmaTermsCache(s.getUserContext()),
53 d_epg(pnm ? new EagerProofGenerator(
54 pnm,
55 s.getUserContext(),
56 "strings::TermRegistry::EagerProofGenerator")
57 : nullptr)
58 {
59 NodeManager* nm = NodeManager::currentNM();
60 d_zero = nm->mkConst(Rational(0));
61 d_one = nm->mkConst(Rational(1));
62 d_negOne = NodeManager::currentNM()->mkConst(Rational(-1));
63 d_cardSize = utils::getAlphabetCardinality();
64 }
65
66 TermRegistry::~TermRegistry() {}
67
68 Node TermRegistry::eagerReduce(Node t, SkolemCache* sc)
69 {
70 NodeManager* nm = NodeManager::currentNM();
71 Node lemma;
72 Kind tk = t.getKind();
73 if (tk == STRING_TO_CODE)
74 {
75 // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
76 Node code_len = utils::mkNLength(t[0]).eqNode(nm->mkConst(Rational(1)));
77 Node code_eq_neg1 = t.eqNode(nm->mkConst(Rational(-1)));
78 Node code_range = nm->mkNode(
79 AND,
80 nm->mkNode(GEQ, t, nm->mkConst(Rational(0))),
81 nm->mkNode(
82 LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
83 lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
84 }
85 else if (tk == STRING_STRIDOF)
86 {
87 // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len
88 // x)))
89 Node l = utils::mkNLength(t[0]);
90 lemma = nm->mkNode(AND,
91 nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))),
92 nm->mkNode(LEQ, t, l));
93 }
94 else if (tk == STRING_STOI)
95 {
96 // (>= (str.to_int x) (- 1))
97 lemma = nm->mkNode(GEQ, t, nm->mkConst(Rational(-1)));
98 }
99 else if (tk == STRING_STRCTN)
100 {
101 // ite( (str.contains s r), (= s (str.++ sk1 r sk2)), (not (= s r)))
102 Node sk1 =
103 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_PRE, "sc1");
104 Node sk2 =
105 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_POST, "sc2");
106 lemma = t[0].eqNode(utils::mkNConcat(sk1, t[1], sk2));
107 lemma = nm->mkNode(ITE, t, lemma, t[0].eqNode(t[1]).notNode());
108 }
109 return lemma;
110 }
111
112 Node TermRegistry::lengthPositive(Node t)
113 {
114 NodeManager* nm = NodeManager::currentNM();
115 Node zero = nm->mkConst(Rational(0));
116 Node emp = Word::mkEmptyWord(t.getType());
117 Node tlen = nm->mkNode(STRING_LENGTH, t);
118 Node tlenEqZero = tlen.eqNode(zero);
119 Node tEqEmp = t.eqNode(emp);
120 Node caseEmpty = nm->mkNode(AND, tlenEqZero, tEqEmp);
121 Node caseNEmpty = nm->mkNode(GT, tlen, zero);
122 // (or (and (= (str.len t) 0) (= t "")) (> (str.len t) 0))
123 return nm->mkNode(OR, caseEmpty, caseNEmpty);
124 }
125
126 void TermRegistry::preRegisterTerm(TNode n)
127 {
128 if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end())
129 {
130 return;
131 }
132 eq::EqualityEngine* ee = d_state.getEqualityEngine();
133 d_preregisteredTerms.insert(n);
134 Trace("strings-preregister")
135 << "TheoryString::preregister : " << n << std::endl;
136 // check for logic exceptions
137 Kind k = n.getKind();
138 if (!options::stringExp())
139 {
140 if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
141 || k == STRING_STRREPL || k == STRING_SUBSTR || k == STRING_STRREPLALL
142 || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
143 || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
144 || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE)
145 {
146 std::stringstream ss;
147 ss << "Term of kind " << k
148 << " not supported in default mode, try --strings-exp";
149 throw LogicException(ss.str());
150 }
151 }
152 if (k == EQUAL)
153 {
154 if (n[0].getType().isRegExp())
155 {
156 std::stringstream ss;
157 ss << "Equality between regular expressions is not supported";
158 throw LogicException(ss.str());
159 }
160 ee->addTriggerPredicate(n);
161 return;
162 }
163 else if (k == STRING_IN_REGEXP)
164 {
165 d_out.requirePhase(n, true);
166 ee->addTriggerPredicate(n);
167 ee->addTerm(n[0]);
168 ee->addTerm(n[1]);
169 return;
170 }
171 else if (k == STRING_TO_CODE)
172 {
173 d_hasStrCode = true;
174 }
175 registerTerm(n, 0);
176 TypeNode tn = n.getType();
177 if (tn.isRegExp() && n.isVar())
178 {
179 std::stringstream ss;
180 ss << "Regular expression variables are not supported.";
181 throw LogicException(ss.str());
182 }
183 if (tn.isString()) // string-only
184 {
185 // all characters of constants should fall in the alphabet
186 if (n.isConst())
187 {
188 std::vector<unsigned> vec = n.getConst<String>().getVec();
189 for (unsigned u : vec)
190 {
191 if (u >= d_cardSize)
192 {
193 std::stringstream ss;
194 ss << "Characters in string \"" << n
195 << "\" are outside of the given alphabet.";
196 throw LogicException(ss.str());
197 }
198 }
199 }
200 ee->addTerm(n);
201 }
202 else if (tn.isBoolean())
203 {
204 // All kinds that we do congruence over that may return a Boolean go here
205 if (k==STRING_STRCTN || k == STRING_LEQ || k == SEQ_NTH)
206 {
207 // Get triggered for both equal and dis-equal
208 ee->addTriggerPredicate(n);
209 }
210 }
211 else
212 {
213 // Function applications/predicates
214 ee->addTerm(n);
215 }
216 // Set d_functionsTerms stores all function applications that are
217 // relevant to theory combination. Notice that this is a subset of
218 // the applications whose kinds are function kinds in the equality
219 // engine. This means it does not include applications of operators
220 // like re.++, which is not a function kind in the equality engine.
221 // Concatenation terms do not need to be considered here because
222 // their arguments have string type and do not introduce any shared
223 // terms.
224 if (n.hasOperator() && ee->isFunctionKind(k) && k != STRING_CONCAT)
225 {
226 d_functionsTerms.push_back(n);
227 }
228 if (options::stringFMF())
229 {
230 if (tn.isStringLike())
231 {
232 // Our decision strategy will minimize the length of this term if it is a
233 // variable but not an internally generated Skolem, or a term that does
234 // not belong to this theory.
235 if (n.isVar() ? !d_skCache.isSkolem(n)
236 : kindToTheoryId(k) != THEORY_STRINGS)
237 {
238 d_inputVars.insert(n);
239 Trace("strings-preregister") << "input variable: " << n << std::endl;
240 }
241 }
242 }
243 }
244
245 void TermRegistry::registerTerm(Node n, int effort)
246 {
247 TypeNode tn = n.getType();
248 bool do_register = true;
249 if (!tn.isStringLike())
250 {
251 if (options::stringEagerLen())
252 {
253 do_register = effort == 0;
254 }
255 else
256 {
257 do_register = effort > 0 || n.getKind() != STRING_CONCAT;
258 }
259 }
260 if (!do_register)
261 {
262 return;
263 }
264 if (d_registeredTerms.find(n) != d_registeredTerms.end())
265 {
266 return;
267 }
268 d_registeredTerms.insert(n);
269 // ensure the type is registered
270 registerType(tn);
271 Debug("strings-register") << "TheoryStrings::registerTerm() " << n
272 << ", effort = " << effort << std::endl;
273 TrustNode regTermLem;
274 if (tn.isStringLike())
275 {
276 // register length information:
277 // for variables, split on empty vs positive length
278 // for concat/const/replace, introduce proxy var and state length relation
279 regTermLem = getRegisterTermLemma(n);
280 }
281 else if (n.getKind() != STRING_STRCTN)
282 {
283 // we don't send out eager reduction lemma for str.contains currently
284 Node eagerRedLemma = eagerReduce(n, &d_skCache);
285 if (!eagerRedLemma.isNull())
286 {
287 // if there was an eager reduction, we make the trust node for it
288 if (d_epg != nullptr)
289 {
290 regTermLem = d_epg->mkTrustNode(
291 eagerRedLemma, PfRule::STRING_EAGER_REDUCTION, {}, {n});
292 }
293 else
294 {
295 regTermLem = TrustNode::mkTrustLemma(eagerRedLemma, nullptr);
296 }
297 }
298 }
299 if (!regTermLem.isNull())
300 {
301 Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
302 << std::endl;
303 Trace("strings-assert")
304 << "(assert " << regTermLem.getNode() << ")" << std::endl;
305 ++(d_statistics.d_lemmasRegisterTerm);
306 d_out.trustedLemma(regTermLem);
307 }
308 }
309
310 void TermRegistry::registerType(TypeNode tn)
311 {
312 if (d_registeredTypes.find(tn) != d_registeredTypes.end())
313 {
314 return;
315 }
316 d_registeredTypes.insert(tn);
317 if (tn.isStringLike())
318 {
319 // preregister the empty word for the type
320 Node emp = Word::mkEmptyWord(tn);
321 if (!d_state.hasTerm(emp))
322 {
323 preRegisterTerm(emp);
324 }
325 }
326 }
327
328 TrustNode TermRegistry::getRegisterTermLemma(Node n)
329 {
330 Assert(n.getType().isStringLike());
331 NodeManager* nm = NodeManager::currentNM();
332 // register length information:
333 // for variables, split on empty vs positive length
334 // for concat/const/replace, introduce proxy var and state length relation
335 Node lsum;
336 if (n.getKind() != STRING_CONCAT && !n.isConst())
337 {
338 Node lsumb = nm->mkNode(STRING_LENGTH, n);
339 lsum = Rewriter::rewrite(lsumb);
340 // can register length term if it does not rewrite
341 if (lsum == lsumb)
342 {
343 registerTermAtomic(n, LENGTH_SPLIT);
344 return TrustNode::null();
345 }
346 }
347 Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
348 StringsProxyVarAttribute spva;
349 sk.setAttribute(spva, true);
350 Node eq = Rewriter::rewrite(sk.eqNode(n));
351 d_proxyVar[n] = sk;
352 // If we are introducing a proxy for a constant or concat term, we do not
353 // need to send lemmas about its length, since its length is already
354 // implied.
355 if (n.isConst() || n.getKind() == STRING_CONCAT)
356 {
357 // do not send length lemma for sk.
358 registerTermAtomic(sk, LENGTH_IGNORE);
359 }
360 Node skl = nm->mkNode(STRING_LENGTH, sk);
361 if (n.getKind() == STRING_CONCAT)
362 {
363 std::vector<Node> nodeVec;
364 for (const Node& nc : n)
365 {
366 if (nc.getAttribute(StringsProxyVarAttribute()))
367 {
368 Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
369 nodeVec.push_back(d_proxyVarToLength[nc]);
370 }
371 else
372 {
373 Node lni = nm->mkNode(STRING_LENGTH, nc);
374 nodeVec.push_back(lni);
375 }
376 }
377 lsum = nm->mkNode(PLUS, nodeVec);
378 lsum = Rewriter::rewrite(lsum);
379 }
380 else if (n.isConst())
381 {
382 lsum = nm->mkConst(Rational(Word::getLength(n)));
383 }
384 Assert(!lsum.isNull());
385 d_proxyVarToLength[sk] = lsum;
386 Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
387
388 Node ret = nm->mkNode(AND, eq, ceq);
389
390 // it is a simple rewrite to justify this
391 if (d_epg != nullptr)
392 {
393 return d_epg->mkTrustNode(ret, PfRule::MACRO_SR_PRED_INTRO, {}, {ret});
394 }
395 return TrustNode::mkTrustLemma(ret, nullptr);
396 }
397
398 void TermRegistry::registerTermAtomic(Node n, LengthStatus s)
399 {
400 if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
401 {
402 return;
403 }
404 d_lengthLemmaTermsCache.insert(n);
405
406 if (s == LENGTH_IGNORE)
407 {
408 // ignore it
409 return;
410 }
411 std::map<Node, bool> reqPhase;
412 TrustNode lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
413 if (!lenLem.isNull())
414 {
415 Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
416 << std::endl;
417 Trace("strings-assert")
418 << "(assert " << lenLem.getNode() << ")" << std::endl;
419 ++(d_statistics.d_lemmasRegisterTermAtomic);
420 d_out.trustedLemma(lenLem);
421 }
422 for (const std::pair<const Node, bool>& rp : reqPhase)
423 {
424 d_out.requirePhase(rp.first, rp.second);
425 }
426 }
427
428 SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; }
429
430 const context::CDList<TNode>& TermRegistry::getFunctionTerms() const
431 {
432 return d_functionsTerms;
433 }
434
435 const context::CDHashSet<Node, NodeHashFunction>& TermRegistry::getInputVars()
436 const
437 {
438 return d_inputVars;
439 }
440
441 bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
442
443 TrustNode TermRegistry::getRegisterTermAtomicLemma(
444 Node n, LengthStatus s, std::map<Node, bool>& reqPhase)
445 {
446 if (n.isConst())
447 {
448 // No need to send length for constant terms. This case may be triggered
449 // for cases where the skolem cache automatically replaces a skolem by
450 // a constant.
451 return TrustNode::null();
452 }
453 Assert(n.getType().isStringLike());
454 NodeManager* nm = NodeManager::currentNM();
455 Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
456 Node emp = Word::mkEmptyWord(n.getType());
457 if (s == LENGTH_GEQ_ONE)
458 {
459 Node neq_empty = n.eqNode(emp).negate();
460 Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
461 Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
462 Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
463 << std::endl;
464 Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
465 return TrustNode::mkTrustLemma(len_geq_one, nullptr);
466 }
467
468 if (s == LENGTH_ONE)
469 {
470 Node len_one = n_len.eqNode(d_one);
471 Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
472 << std::endl;
473 Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
474 return TrustNode::mkTrustLemma(len_one, nullptr);
475 }
476 Assert(s == LENGTH_SPLIT);
477
478 // get the positive length lemma
479 Node lenLemma = lengthPositive(n);
480 // split whether the string is empty
481 Node n_len_eq_z = n_len.eqNode(d_zero);
482 Node n_len_eq_z_2 = n.eqNode(emp);
483 Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
484 Node case_emptyr = Rewriter::rewrite(case_empty);
485 if (!case_emptyr.isConst())
486 {
487 // prefer trying the empty case first
488 // notice that requirePhase must only be called on rewritten literals that
489 // occur in the CNF stream.
490 n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
491 Assert(!n_len_eq_z.isConst());
492 reqPhase[n_len_eq_z] = true;
493 n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
494 Assert(!n_len_eq_z_2.isConst());
495 reqPhase[n_len_eq_z_2] = true;
496 }
497 else
498 {
499 // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
500 // n ---> "". Since this method is only called on non-constants n, it must
501 // be that n = "" ^ len( n ) = 0 does not rewrite to true.
502 Assert(!case_emptyr.getConst<bool>());
503 }
504
505 if (d_epg != nullptr)
506 {
507 return d_epg->mkTrustNode(lenLemma, PfRule::STRING_LENGTH_POS, {}, {n});
508 }
509 return TrustNode::mkTrustLemma(lenLemma, nullptr);
510 }
511
512 Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const
513 {
514 if (n.getNumChildren() == 0)
515 {
516 Node pn = getProxyVariableFor(n);
517 if (pn.isNull())
518 {
519 return Node::null();
520 }
521 Node eq = n.eqNode(pn);
522 eq = Rewriter::rewrite(eq);
523 if (std::find(exp.begin(), exp.end(), eq) == exp.end())
524 {
525 exp.push_back(eq);
526 }
527 return pn;
528 }
529 std::vector<Node> children;
530 if (n.getMetaKind() == metakind::PARAMETERIZED)
531 {
532 children.push_back(n.getOperator());
533 }
534 for (const Node& nc : n)
535 {
536 if (n.getType().isRegExp())
537 {
538 children.push_back(nc);
539 }
540 else
541 {
542 Node ns = getSymbolicDefinition(nc, exp);
543 if (ns.isNull())
544 {
545 return Node::null();
546 }
547 else
548 {
549 children.push_back(ns);
550 }
551 }
552 }
553 return NodeManager::currentNM()->mkNode(n.getKind(), children);
554 }
555
556 Node TermRegistry::getProxyVariableFor(Node n) const
557 {
558 std::map<Node, Node>::const_iterator it = d_proxyVar.find(n);
559 if (it != d_proxyVar.end())
560 {
561 return (*it).second;
562 }
563 return Node::null();
564 }
565
566 Node TermRegistry::ensureProxyVariableFor(Node n)
567 {
568 Node proxy = getProxyVariableFor(n);
569 if (proxy.isNull())
570 {
571 registerTerm(n, 0);
572 proxy = getProxyVariableFor(n);
573 }
574 Assert(!proxy.isNull());
575 return proxy;
576 }
577
578 void TermRegistry::removeProxyEqs(Node n, std::vector<Node>& unproc) const
579 {
580 if (n.getKind() == AND)
581 {
582 for (const Node& nc : n)
583 {
584 removeProxyEqs(nc, unproc);
585 }
586 return;
587 }
588 Trace("strings-subs-proxy") << "Input : " << n << std::endl;
589 Node ns = Rewriter::rewrite(n);
590 if (ns.getKind() == EQUAL)
591 {
592 for (size_t i = 0; i < 2; i++)
593 {
594 // determine whether this side has a proxy variable
595 if (ns[i].getAttribute(StringsProxyVarAttribute()))
596 {
597 if (getProxyVariableFor(ns[1 - i]) == ns[i])
598 {
599 Trace("strings-subs-proxy")
600 << "...trivial definition via " << ns[i] << std::endl;
601 // it is a trivial equality, e.g. between a proxy variable
602 // and its definition
603 return;
604 }
605 }
606 }
607 }
608 if (!n.isConst() || !n.getConst<bool>())
609 {
610 Trace("strings-subs-proxy") << "...unprocessed" << std::endl;
611 unproc.push_back(n);
612 }
613 }
614
615 } // namespace strings
616 } // namespace theory
617 } // namespace CVC4