(proof-new) Miscellaneous updates to strings from proof-new (#6557)
[cvc5.git] / src / theory / strings / term_registry.cpp
1 /******************************************************************************
2 * Top contributors (to current version):
3 * Andrew Reynolds, Andres Noetzli, Morgan Deters
4 *
5 * This file is part of the cvc5 project.
6 *
7 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8 * in the top-level source directory and their institutional affiliations.
9 * All rights reserved. See the file COPYING in the top-level source
10 * directory for licensing information.
11 * ****************************************************************************
12 *
13 * Implementation of term registry for the theory of strings.
14 */
15
16 #include "theory/strings/term_registry.h"
17
18 #include "expr/attribute.h"
19 #include "options/smt_options.h"
20 #include "options/strings_options.h"
21 #include "smt/logic_exception.h"
22 #include "theory/rewriter.h"
23 #include "theory/strings/inference_manager.h"
24 #include "theory/strings/theory_strings_utils.h"
25 #include "theory/strings/word.h"
26
27 using namespace std;
28 using namespace cvc5::context;
29 using namespace cvc5::kind;
30
31 namespace cvc5 {
32 namespace theory {
33 namespace strings {
34
35 struct StringsProxyVarAttributeId
36 {
37 };
38 typedef expr::Attribute<StringsProxyVarAttributeId, bool>
39 StringsProxyVarAttribute;
40
41 TermRegistry::TermRegistry(SolverState& s,
42 SequencesStatistics& statistics,
43 ProofNodeManager* pnm)
44 : d_state(s),
45 d_im(nullptr),
46 d_statistics(statistics),
47 d_hasStrCode(false),
48 d_functionsTerms(s.getSatContext()),
49 d_inputVars(s.getUserContext()),
50 d_preregisteredTerms(s.getSatContext()),
51 d_registeredTerms(s.getUserContext()),
52 d_registeredTypes(s.getUserContext()),
53 d_proxyVar(s.getUserContext()),
54 d_lengthLemmaTermsCache(s.getUserContext()),
55 d_epg(pnm ? new EagerProofGenerator(
56 pnm,
57 s.getUserContext(),
58 "strings::TermRegistry::EagerProofGenerator")
59 : nullptr)
60 {
61 NodeManager* nm = NodeManager::currentNM();
62 d_zero = nm->mkConst(Rational(0));
63 d_one = nm->mkConst(Rational(1));
64 d_negOne = NodeManager::currentNM()->mkConst(Rational(-1));
65 d_cardSize = utils::getAlphabetCardinality();
66 }
67
68 TermRegistry::~TermRegistry() {}
69
70 void TermRegistry::finishInit(InferenceManager* im) { d_im = im; }
71
72 Node TermRegistry::eagerReduce(Node t, SkolemCache* sc)
73 {
74 NodeManager* nm = NodeManager::currentNM();
75 Node lemma;
76 Kind tk = t.getKind();
77 if (tk == STRING_TO_CODE)
78 {
79 // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
80 Node code_len = utils::mkNLength(t[0]).eqNode(nm->mkConst(Rational(1)));
81 Node code_eq_neg1 = t.eqNode(nm->mkConst(Rational(-1)));
82 Node code_range = nm->mkNode(
83 AND,
84 nm->mkNode(GEQ, t, nm->mkConst(Rational(0))),
85 nm->mkNode(
86 LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
87 lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
88 }
89 else if (tk == STRING_STRIDOF)
90 {
91 // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len
92 // x)))
93 Node l = nm->mkNode(STRING_LENGTH, t[0]);
94 lemma = nm->mkNode(AND,
95 nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))),
96 nm->mkNode(LEQ, t, l));
97 }
98 else if (tk == STRING_STOI)
99 {
100 // (>= (str.to_int x) (- 1))
101 lemma = nm->mkNode(GEQ, t, nm->mkConst(Rational(-1)));
102 }
103 else if (tk == STRING_STRCTN)
104 {
105 // ite( (str.contains s r), (= s (str.++ sk1 r sk2)), (not (= s r)))
106 Node sk1 =
107 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_PRE, "sc1");
108 Node sk2 =
109 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_POST, "sc2");
110 lemma = t[0].eqNode(utils::mkNConcat(sk1, t[1], sk2));
111 lemma = nm->mkNode(ITE, t, lemma, t[0].eqNode(t[1]).notNode());
112 }
113 return lemma;
114 }
115
116 Node TermRegistry::lengthPositive(Node t)
117 {
118 NodeManager* nm = NodeManager::currentNM();
119 Node zero = nm->mkConst(Rational(0));
120 Node emp = Word::mkEmptyWord(t.getType());
121 Node tlen = nm->mkNode(STRING_LENGTH, t);
122 Node tlenEqZero = tlen.eqNode(zero);
123 Node tEqEmp = t.eqNode(emp);
124 Node caseEmpty = nm->mkNode(AND, tlenEqZero, tEqEmp);
125 Node caseNEmpty = nm->mkNode(GT, tlen, zero);
126 // (or (and (= (str.len t) 0) (= t "")) (> (str.len t) 0))
127 return nm->mkNode(OR, caseEmpty, caseNEmpty);
128 }
129
130 void TermRegistry::preRegisterTerm(TNode n)
131 {
132 if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end())
133 {
134 return;
135 }
136 eq::EqualityEngine* ee = d_state.getEqualityEngine();
137 d_preregisteredTerms.insert(n);
138 Trace("strings-preregister")
139 << "TheoryString::preregister : " << n << std::endl;
140 // check for logic exceptions
141 Kind k = n.getKind();
142 if (!options::stringExp())
143 {
144 if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
145 || k == STRING_STRREPL || k == STRING_SUBSTR || k == STRING_STRREPLALL
146 || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
147 || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
148 || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE)
149 {
150 std::stringstream ss;
151 ss << "Term of kind " << k
152 << " not supported in default mode, try --strings-exp";
153 throw LogicException(ss.str());
154 }
155 }
156 if (k == EQUAL)
157 {
158 if (n[0].getType().isRegExp())
159 {
160 std::stringstream ss;
161 ss << "Equality between regular expressions is not supported";
162 throw LogicException(ss.str());
163 }
164 ee->addTriggerPredicate(n);
165 return;
166 }
167 else if (k == STRING_IN_REGEXP)
168 {
169 d_im->requirePhase(n, true);
170 ee->addTriggerPredicate(n);
171 ee->addTerm(n[0]);
172 ee->addTerm(n[1]);
173 return;
174 }
175 else if (k == STRING_TO_CODE)
176 {
177 d_hasStrCode = true;
178 }
179 else if (k == REGEXP_RANGE)
180 {
181 for (const Node& nc : n)
182 {
183 if (!nc.isConst())
184 {
185 throw LogicException(
186 "expecting a constant string term in regexp range");
187 }
188 if (nc.getConst<String>().size() != 1)
189 {
190 throw LogicException(
191 "expecting a single constant string term in regexp range");
192 }
193 }
194 }
195 registerTerm(n, 0);
196 TypeNode tn = n.getType();
197 if (tn.isRegExp() && n.isVar())
198 {
199 std::stringstream ss;
200 ss << "Regular expression variables are not supported.";
201 throw LogicException(ss.str());
202 }
203 if (tn.isString()) // string-only
204 {
205 // all characters of constants should fall in the alphabet
206 if (n.isConst())
207 {
208 std::vector<unsigned> vec = n.getConst<String>().getVec();
209 for (unsigned u : vec)
210 {
211 if (u >= d_cardSize)
212 {
213 std::stringstream ss;
214 ss << "Characters in string \"" << n
215 << "\" are outside of the given alphabet.";
216 throw LogicException(ss.str());
217 }
218 }
219 }
220 ee->addTerm(n);
221 }
222 else if (tn.isBoolean())
223 {
224 // All kinds that we do congruence over that may return a Boolean go here
225 if (k==STRING_STRCTN || k == STRING_LEQ || k == SEQ_NTH)
226 {
227 // Get triggered for both equal and dis-equal
228 ee->addTriggerPredicate(n);
229 }
230 }
231 else
232 {
233 // Function applications/predicates
234 ee->addTerm(n);
235 }
236 // Set d_functionsTerms stores all function applications that are
237 // relevant to theory combination. Notice that this is a subset of
238 // the applications whose kinds are function kinds in the equality
239 // engine. This means it does not include applications of operators
240 // like re.++, which is not a function kind in the equality engine.
241 // Concatenation terms do not need to be considered here because
242 // their arguments have string type and do not introduce any shared
243 // terms.
244 if (n.hasOperator() && ee->isFunctionKind(k) && k != STRING_CONCAT)
245 {
246 d_functionsTerms.push_back(n);
247 }
248 if (options::stringFMF())
249 {
250 if (tn.isStringLike())
251 {
252 // Our decision strategy will minimize the length of this term if it is a
253 // variable but not an internally generated Skolem, or a term that does
254 // not belong to this theory.
255 if (n.isVar() ? !d_skCache.isSkolem(n)
256 : kindToTheoryId(k) != THEORY_STRINGS)
257 {
258 d_inputVars.insert(n);
259 Trace("strings-preregister") << "input variable: " << n << std::endl;
260 }
261 }
262 }
263 }
264
265 void TermRegistry::registerTerm(Node n, int effort)
266 {
267 Trace("strings-register") << "TheoryStrings::registerTerm() " << n
268 << ", effort = " << effort << std::endl;
269 if (d_registeredTerms.find(n) != d_registeredTerms.end())
270 {
271 Trace("strings-register") << "...already registered" << std::endl;
272 return;
273 }
274 bool do_register = true;
275 TypeNode tn = n.getType();
276 if (!tn.isStringLike())
277 {
278 if (options::stringEagerLen())
279 {
280 do_register = effort == 0;
281 }
282 else
283 {
284 do_register = effort > 0 || n.getKind() != STRING_CONCAT;
285 }
286 }
287 if (!do_register)
288 {
289 Trace("strings-register") << "...do not register" << std::endl;
290 return;
291 }
292 Trace("strings-register") << "...register" << std::endl;
293 d_registeredTerms.insert(n);
294 // ensure the type is registered
295 registerType(tn);
296 TrustNode regTermLem;
297 if (tn.isStringLike())
298 {
299 // register length information:
300 // for variables, split on empty vs positive length
301 // for concat/const/replace, introduce proxy var and state length relation
302 regTermLem = getRegisterTermLemma(n);
303 }
304 else if (n.getKind() != STRING_STRCTN)
305 {
306 // we don't send out eager reduction lemma for str.contains currently
307 Node eagerRedLemma = eagerReduce(n, &d_skCache);
308 if (!eagerRedLemma.isNull())
309 {
310 // if there was an eager reduction, we make the trust node for it
311 if (d_epg != nullptr)
312 {
313 regTermLem = d_epg->mkTrustNode(
314 eagerRedLemma, PfRule::STRING_EAGER_REDUCTION, {}, {n});
315 }
316 else
317 {
318 regTermLem = TrustNode::mkTrustLemma(eagerRedLemma, nullptr);
319 }
320 }
321 }
322 if (!regTermLem.isNull())
323 {
324 Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
325 << std::endl;
326 Trace("strings-assert")
327 << "(assert " << regTermLem.getNode() << ")" << std::endl;
328 d_im->trustedLemma(regTermLem, InferenceId::STRINGS_REGISTER_TERM);
329 }
330 }
331
332 void TermRegistry::registerType(TypeNode tn)
333 {
334 if (d_registeredTypes.find(tn) != d_registeredTypes.end())
335 {
336 return;
337 }
338 d_registeredTypes.insert(tn);
339 if (tn.isStringLike())
340 {
341 // preregister the empty word for the type
342 Node emp = Word::mkEmptyWord(tn);
343 if (!d_state.hasTerm(emp))
344 {
345 preRegisterTerm(emp);
346 }
347 }
348 }
349
350 TrustNode TermRegistry::getRegisterTermLemma(Node n)
351 {
352 Assert(n.getType().isStringLike());
353 NodeManager* nm = NodeManager::currentNM();
354 // register length information:
355 // for variables, split on empty vs positive length
356 // for concat/const/replace, introduce proxy var and state length relation
357 Node lsum;
358 if (n.getKind() != STRING_CONCAT && !n.isConst())
359 {
360 Node lsumb = nm->mkNode(STRING_LENGTH, n);
361 lsum = Rewriter::rewrite(lsumb);
362 // can register length term if it does not rewrite
363 if (lsum == lsumb)
364 {
365 registerTermAtomic(n, LENGTH_SPLIT);
366 return TrustNode::null();
367 }
368 }
369 Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
370 StringsProxyVarAttribute spva;
371 sk.setAttribute(spva, true);
372 Node eq = Rewriter::rewrite(sk.eqNode(n));
373 d_proxyVar[n] = sk;
374 // If we are introducing a proxy for a constant or concat term, we do not
375 // need to send lemmas about its length, since its length is already
376 // implied.
377 if (n.isConst() || n.getKind() == STRING_CONCAT)
378 {
379 // do not send length lemma for sk.
380 registerTermAtomic(sk, LENGTH_IGNORE);
381 }
382 Node skl = nm->mkNode(STRING_LENGTH, sk);
383 if (n.getKind() == STRING_CONCAT)
384 {
385 std::vector<Node> nodeVec;
386 for (const Node& nc : n)
387 {
388 if (nc.getAttribute(StringsProxyVarAttribute()))
389 {
390 Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
391 nodeVec.push_back(d_proxyVarToLength[nc]);
392 }
393 else
394 {
395 Node lni = nm->mkNode(STRING_LENGTH, nc);
396 nodeVec.push_back(lni);
397 }
398 }
399 lsum = nm->mkNode(PLUS, nodeVec);
400 lsum = Rewriter::rewrite(lsum);
401 }
402 else if (n.isConst())
403 {
404 lsum = nm->mkConst(Rational(Word::getLength(n)));
405 }
406 Assert(!lsum.isNull());
407 d_proxyVarToLength[sk] = lsum;
408 Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
409
410 Node ret = nm->mkNode(AND, eq, ceq);
411
412 // it is a simple rewrite to justify this
413 if (d_epg != nullptr)
414 {
415 return d_epg->mkTrustNode(ret, PfRule::MACRO_SR_PRED_INTRO, {}, {ret});
416 }
417 return TrustNode::mkTrustLemma(ret, nullptr);
418 }
419
420 void TermRegistry::registerTermAtomic(Node n, LengthStatus s)
421 {
422 if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
423 {
424 return;
425 }
426 d_lengthLemmaTermsCache.insert(n);
427
428 if (s == LENGTH_IGNORE)
429 {
430 // ignore it
431 return;
432 }
433 std::map<Node, bool> reqPhase;
434 TrustNode lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
435 if (!lenLem.isNull())
436 {
437 Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
438 << std::endl;
439 Trace("strings-assert")
440 << "(assert " << lenLem.getNode() << ")" << std::endl;
441 d_im->trustedLemma(lenLem, InferenceId::STRINGS_REGISTER_TERM_ATOMIC);
442 }
443 for (const std::pair<const Node, bool>& rp : reqPhase)
444 {
445 d_im->requirePhase(rp.first, rp.second);
446 }
447 }
448
449 SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; }
450
451 const context::CDList<TNode>& TermRegistry::getFunctionTerms() const
452 {
453 return d_functionsTerms;
454 }
455
456 const context::CDHashSet<Node>& TermRegistry::getInputVars() const
457 {
458 return d_inputVars;
459 }
460
461 bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
462
463 TrustNode TermRegistry::getRegisterTermAtomicLemma(
464 Node n, LengthStatus s, std::map<Node, bool>& reqPhase)
465 {
466 if (n.isConst())
467 {
468 // No need to send length for constant terms. This case may be triggered
469 // for cases where the skolem cache automatically replaces a skolem by
470 // a constant.
471 return TrustNode::null();
472 }
473 Assert(n.getType().isStringLike());
474 NodeManager* nm = NodeManager::currentNM();
475 Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
476 Node emp = Word::mkEmptyWord(n.getType());
477 if (s == LENGTH_GEQ_ONE)
478 {
479 Node neq_empty = n.eqNode(emp).negate();
480 Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
481 Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
482 Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
483 << std::endl;
484 Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
485 return TrustNode::mkTrustLemma(len_geq_one, nullptr);
486 }
487
488 if (s == LENGTH_ONE)
489 {
490 Node len_one = n_len.eqNode(d_one);
491 Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
492 << std::endl;
493 Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
494 return TrustNode::mkTrustLemma(len_one, nullptr);
495 }
496 Assert(s == LENGTH_SPLIT);
497
498 // get the positive length lemma
499 Node lenLemma = lengthPositive(n);
500 // split whether the string is empty
501 Node n_len_eq_z = n_len.eqNode(d_zero);
502 Node n_len_eq_z_2 = n.eqNode(emp);
503 Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
504 Node case_emptyr = Rewriter::rewrite(case_empty);
505 if (!case_emptyr.isConst())
506 {
507 // prefer trying the empty case first
508 // notice that requirePhase must only be called on rewritten literals that
509 // occur in the CNF stream.
510 n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
511 Assert(!n_len_eq_z.isConst());
512 reqPhase[n_len_eq_z] = true;
513 n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
514 Assert(!n_len_eq_z_2.isConst());
515 reqPhase[n_len_eq_z_2] = true;
516 }
517 else
518 {
519 // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
520 // n ---> "". Since this method is only called on non-constants n, it must
521 // be that n = "" ^ len( n ) = 0 does not rewrite to true.
522 Assert(!case_emptyr.getConst<bool>());
523 }
524
525 if (d_epg != nullptr)
526 {
527 return d_epg->mkTrustNode(lenLemma, PfRule::STRING_LENGTH_POS, {}, {n});
528 }
529 return TrustNode::mkTrustLemma(lenLemma, nullptr);
530 }
531
532 Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const
533 {
534 if (n.getNumChildren() == 0)
535 {
536 Node pn = getProxyVariableFor(n);
537 if (pn.isNull())
538 {
539 return Node::null();
540 }
541 Node eq = n.eqNode(pn);
542 eq = Rewriter::rewrite(eq);
543 if (std::find(exp.begin(), exp.end(), eq) == exp.end())
544 {
545 exp.push_back(eq);
546 }
547 return pn;
548 }
549 std::vector<Node> children;
550 if (n.getMetaKind() == metakind::PARAMETERIZED)
551 {
552 children.push_back(n.getOperator());
553 }
554 for (const Node& nc : n)
555 {
556 if (n.getType().isRegExp())
557 {
558 children.push_back(nc);
559 }
560 else
561 {
562 Node ns = getSymbolicDefinition(nc, exp);
563 if (ns.isNull())
564 {
565 return Node::null();
566 }
567 else
568 {
569 children.push_back(ns);
570 }
571 }
572 }
573 return NodeManager::currentNM()->mkNode(n.getKind(), children);
574 }
575
576 Node TermRegistry::getProxyVariableFor(Node n) const
577 {
578 NodeNodeMap::const_iterator it = d_proxyVar.find(n);
579 if (it != d_proxyVar.end())
580 {
581 return (*it).second;
582 }
583 return Node::null();
584 }
585
586 Node TermRegistry::ensureProxyVariableFor(Node n)
587 {
588 Node proxy = getProxyVariableFor(n);
589 if (proxy.isNull())
590 {
591 registerTerm(n, 0);
592 proxy = getProxyVariableFor(n);
593 }
594 Assert(!proxy.isNull());
595 return proxy;
596 }
597
598 void TermRegistry::removeProxyEqs(Node n, std::vector<Node>& unproc) const
599 {
600 if (n.getKind() == AND)
601 {
602 for (const Node& nc : n)
603 {
604 removeProxyEqs(nc, unproc);
605 }
606 return;
607 }
608 Trace("strings-subs-proxy") << "Input : " << n << std::endl;
609 Node ns = Rewriter::rewrite(n);
610 if (ns.getKind() == EQUAL)
611 {
612 for (size_t i = 0; i < 2; i++)
613 {
614 // determine whether this side has a proxy variable
615 if (ns[i].getAttribute(StringsProxyVarAttribute()))
616 {
617 if (getProxyVariableFor(ns[1 - i]) == ns[i])
618 {
619 Trace("strings-subs-proxy")
620 << "...trivial definition via " << ns[i] << std::endl;
621 // it is a trivial equality, e.g. between a proxy variable
622 // and its definition
623 return;
624 }
625 }
626 }
627 }
628 if (!n.isConst() || !n.getConst<bool>())
629 {
630 Trace("strings-subs-proxy") << "...unprocessed" << std::endl;
631 unproc.push_back(n);
632 }
633 }
634
635 } // namespace strings
636 } // namespace theory
637 } // namespace cvc5