Front end support for sequences (#4690)
[cvc5.git] / src / theory / strings / term_registry.cpp
1 /********************* */
2 /*! \file term_registry.cpp
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Andres Noetzli, Tianyi Liang
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief Implementation of term registry for the theory of strings.
13 **/
14
15 #include "theory/strings/term_registry.h"
16
17 #include "expr/attribute.h"
18 #include "options/smt_options.h"
19 #include "options/strings_options.h"
20 #include "smt/logic_exception.h"
21 #include "theory/rewriter.h"
22 #include "theory/strings/theory_strings_utils.h"
23 #include "theory/strings/word.h"
24
25 using namespace std;
26 using namespace CVC4::context;
27 using namespace CVC4::kind;
28
29 namespace CVC4 {
30 namespace theory {
31 namespace strings {
32
33 struct StringsProxyVarAttributeId
34 {
35 };
36 typedef expr::Attribute<StringsProxyVarAttributeId, bool>
37 StringsProxyVarAttribute;
38
39 TermRegistry::TermRegistry(SolverState& s,
40 eq::EqualityEngine& ee,
41 OutputChannel& out,
42 SequencesStatistics& statistics,
43 ProofNodeManager* pnm)
44 : d_state(s),
45 d_ee(ee),
46 d_out(out),
47 d_statistics(statistics),
48 d_hasStrCode(false),
49 d_functionsTerms(s.getSatContext()),
50 d_inputVars(s.getUserContext()),
51 d_preregisteredTerms(s.getUserContext()),
52 d_registeredTerms(s.getUserContext()),
53 d_registeredTypes(s.getUserContext()),
54 d_proxyVar(s.getUserContext()),
55 d_proxyVarToLength(s.getUserContext()),
56 d_lengthLemmaTermsCache(s.getUserContext()),
57 d_epg(nullptr)
58 {
59 NodeManager* nm = NodeManager::currentNM();
60 d_zero = nm->mkConst(Rational(0));
61 d_one = nm->mkConst(Rational(1));
62 d_negOne = NodeManager::currentNM()->mkConst(Rational(-1));
63 d_cardSize = utils::getAlphabetCardinality();
64 }
65
66 TermRegistry::~TermRegistry() {}
67
68 Node TermRegistry::eagerReduce(Node t, SkolemCache* sc)
69 {
70 NodeManager* nm = NodeManager::currentNM();
71 Node lemma;
72 Kind tk = t.getKind();
73 if (tk == STRING_TO_CODE)
74 {
75 // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
76 Node code_len = utils::mkNLength(t[0]).eqNode(nm->mkConst(Rational(1)));
77 Node code_eq_neg1 = t.eqNode(nm->mkConst(Rational(-1)));
78 Node code_range = nm->mkNode(
79 AND,
80 nm->mkNode(GEQ, t, nm->mkConst(Rational(0))),
81 nm->mkNode(
82 LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
83 lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
84 }
85 else if (tk == STRING_STRIDOF)
86 {
87 // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len
88 // x)))
89 Node l = utils::mkNLength(t[0]);
90 lemma = nm->mkNode(AND,
91 nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))),
92 nm->mkNode(LEQ, t, l));
93 }
94 else if (tk == STRING_STOI)
95 {
96 // (>= (str.to_int x) (- 1))
97 lemma = nm->mkNode(GEQ, t, nm->mkConst(Rational(-1)));
98 }
99 else if (tk == STRING_STRCTN)
100 {
101 // ite( (str.contains s r), (= s (str.++ sk1 r sk2)), (not (= s r)))
102 Node sk1 =
103 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_PRE, "sc1");
104 Node sk2 =
105 sc->mkSkolemCached(t[0], t[1], SkolemCache::SK_FIRST_CTN_POST, "sc2");
106 lemma = t[0].eqNode(utils::mkNConcat(sk1, t[1], sk2));
107 lemma = nm->mkNode(ITE, t, lemma, t[0].eqNode(t[1]).notNode());
108 }
109 return lemma;
110 }
111
112 Node TermRegistry::lengthPositive(Node t)
113 {
114 NodeManager* nm = NodeManager::currentNM();
115 Node zero = nm->mkConst(Rational(0));
116 Node emp = Word::mkEmptyWord(t.getType());
117 Node tlen = nm->mkNode(STRING_LENGTH, t);
118 Node tlenEqZero = tlen.eqNode(zero);
119 Node tEqEmp = t.eqNode(emp);
120 Node caseEmpty = nm->mkNode(AND, tlenEqZero, tEqEmp);
121 Node caseNEmpty = nm->mkNode(GT, tlen, zero);
122 // (or (and (= (str.len t) 0) (= t "")) (> (str.len t) 0))
123 return nm->mkNode(OR, caseEmpty, caseNEmpty);
124 }
125
126 void TermRegistry::preRegisterTerm(TNode n)
127 {
128 if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end())
129 {
130 return;
131 }
132 d_preregisteredTerms.insert(n);
133 Trace("strings-preregister")
134 << "TheoryString::preregister : " << n << std::endl;
135 // check for logic exceptions
136 Kind k = n.getKind();
137 if (!options::stringExp())
138 {
139 if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
140 || k == STRING_STRREPL || k == STRING_STRREPLALL
141 || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
142 || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
143 || k == STRING_TOUPPER || k == STRING_REV)
144 {
145 std::stringstream ss;
146 ss << "Term of kind " << k
147 << " not supported in default mode, try --strings-exp";
148 throw LogicException(ss.str());
149 }
150 }
151 if (k == EQUAL)
152 {
153 if (n[0].getType().isRegExp())
154 {
155 std::stringstream ss;
156 ss << "Equality between regular expressions is not supported";
157 throw LogicException(ss.str());
158 }
159 d_ee.addTriggerEquality(n);
160 return;
161 }
162 else if (k == STRING_IN_REGEXP)
163 {
164 d_out.requirePhase(n, true);
165 d_ee.addTriggerPredicate(n);
166 d_ee.addTerm(n[0]);
167 d_ee.addTerm(n[1]);
168 return;
169 }
170 else if (k == STRING_TO_CODE)
171 {
172 d_hasStrCode = true;
173 }
174 registerTerm(n, 0);
175 TypeNode tn = n.getType();
176 if (tn.isRegExp() && n.isVar())
177 {
178 std::stringstream ss;
179 ss << "Regular expression variables are not supported.";
180 throw LogicException(ss.str());
181 }
182 if (tn.isString()) // string-only
183 {
184 // all characters of constants should fall in the alphabet
185 if (n.isConst())
186 {
187 std::vector<unsigned> vec = n.getConst<String>().getVec();
188 for (unsigned u : vec)
189 {
190 if (u >= d_cardSize)
191 {
192 std::stringstream ss;
193 ss << "Characters in string \"" << n
194 << "\" are outside of the given alphabet.";
195 throw LogicException(ss.str());
196 }
197 }
198 }
199 d_ee.addTerm(n);
200 }
201 else if (tn.isBoolean())
202 {
203 // Get triggered for both equal and dis-equal
204 d_ee.addTriggerPredicate(n);
205 }
206 else
207 {
208 // Function applications/predicates
209 d_ee.addTerm(n);
210 }
211 // Set d_functionsTerms stores all function applications that are
212 // relevant to theory combination. Notice that this is a subset of
213 // the applications whose kinds are function kinds in the equality
214 // engine. This means it does not include applications of operators
215 // like re.++, which is not a function kind in the equality engine.
216 // Concatenation terms do not need to be considered here because
217 // their arguments have string type and do not introduce any shared
218 // terms.
219 if (n.hasOperator() && d_ee.isFunctionKind(k) && k != STRING_CONCAT)
220 {
221 d_functionsTerms.push_back(n);
222 }
223 if (options::stringFMF())
224 {
225 if (tn.isStringLike())
226 {
227 // Our decision strategy will minimize the length of this term if it is a
228 // variable but not an internally generated Skolem, or a term that does
229 // not belong to this theory.
230 if (n.isVar() ? !d_skCache.isSkolem(n)
231 : kindToTheoryId(k) != THEORY_STRINGS)
232 {
233 d_inputVars.insert(n);
234 Trace("strings-preregister") << "input variable: " << n << std::endl;
235 }
236 }
237 }
238 }
239
240 void TermRegistry::registerTerm(Node n, int effort)
241 {
242 TypeNode tn = n.getType();
243 bool do_register = true;
244 if (!tn.isStringLike())
245 {
246 if (options::stringEagerLen())
247 {
248 do_register = effort == 0;
249 }
250 else
251 {
252 do_register = effort > 0 || n.getKind() != STRING_CONCAT;
253 }
254 }
255 if (!do_register)
256 {
257 return;
258 }
259 if (d_registeredTerms.find(n) != d_registeredTerms.end())
260 {
261 return;
262 }
263 d_registeredTerms.insert(n);
264 // ensure the type is registered
265 registerType(tn);
266 Debug("strings-register") << "TheoryStrings::registerTerm() " << n
267 << ", effort = " << effort << std::endl;
268 Node regTermLem;
269 if (tn.isStringLike())
270 {
271 // register length information:
272 // for variables, split on empty vs positive length
273 // for concat/const/replace, introduce proxy var and state length relation
274 regTermLem = getRegisterTermLemma(n);
275 }
276 else if (n.getKind() != STRING_STRCTN)
277 {
278 // we don't send out eager reduction lemma for str.contains currently
279 regTermLem = eagerReduce(n, &d_skCache);
280 }
281 if (!regTermLem.isNull())
282 {
283 Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
284 << std::endl;
285 Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl;
286 ++(d_statistics.d_lemmasRegisterTerm);
287 d_out.lemma(regTermLem);
288 }
289 }
290
291 void TermRegistry::registerType(TypeNode tn)
292 {
293 if (d_registeredTypes.find(tn) != d_registeredTypes.end())
294 {
295 return;
296 }
297 d_registeredTypes.insert(tn);
298 if (tn.isStringLike())
299 {
300 // preregister the empty word for the type
301 Node emp = Word::mkEmptyWord(tn);
302 if (!d_ee.hasTerm(emp))
303 {
304 preRegisterTerm(emp);
305 }
306 }
307 }
308
309 Node TermRegistry::getRegisterTermLemma(Node n)
310 {
311 Assert(n.getType().isStringLike());
312 NodeManager* nm = NodeManager::currentNM();
313 // register length information:
314 // for variables, split on empty vs positive length
315 // for concat/const/replace, introduce proxy var and state length relation
316 Node lsum;
317 if (n.getKind() != STRING_CONCAT && !n.isConst())
318 {
319 Node lsumb = nm->mkNode(STRING_LENGTH, n);
320 lsum = Rewriter::rewrite(lsumb);
321 // can register length term if it does not rewrite
322 if (lsum == lsumb)
323 {
324 registerTermAtomic(n, LENGTH_SPLIT);
325 return Node::null();
326 }
327 }
328 Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
329 StringsProxyVarAttribute spva;
330 sk.setAttribute(spva, true);
331 Node eq = Rewriter::rewrite(sk.eqNode(n));
332 d_proxyVar[n] = sk;
333 // If we are introducing a proxy for a constant or concat term, we do not
334 // need to send lemmas about its length, since its length is already
335 // implied.
336 if (n.isConst() || n.getKind() == STRING_CONCAT)
337 {
338 // do not send length lemma for sk.
339 registerTermAtomic(sk, LENGTH_IGNORE);
340 }
341 Node skl = nm->mkNode(STRING_LENGTH, sk);
342 if (n.getKind() == STRING_CONCAT)
343 {
344 std::vector<Node> nodeVec;
345 for (const Node& nc : n)
346 {
347 if (nc.getAttribute(StringsProxyVarAttribute()))
348 {
349 Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
350 nodeVec.push_back(d_proxyVarToLength[nc]);
351 }
352 else
353 {
354 Node lni = nm->mkNode(STRING_LENGTH, nc);
355 nodeVec.push_back(lni);
356 }
357 }
358 lsum = nm->mkNode(PLUS, nodeVec);
359 lsum = Rewriter::rewrite(lsum);
360 }
361 else if (n.isConst())
362 {
363 lsum = nm->mkConst(Rational(Word::getLength(n)));
364 }
365 Assert(!lsum.isNull());
366 d_proxyVarToLength[sk] = lsum;
367 Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
368
369 Node ret = nm->mkNode(AND, eq, ceq);
370
371 return ret;
372 }
373
374 void TermRegistry::registerTermAtomic(Node n, LengthStatus s)
375 {
376 if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
377 {
378 return;
379 }
380 d_lengthLemmaTermsCache.insert(n);
381
382 if (s == LENGTH_IGNORE)
383 {
384 // ignore it
385 return;
386 }
387 std::map<Node, bool> reqPhase;
388 Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
389 if (!lenLem.isNull())
390 {
391 Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
392 << std::endl;
393 Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl;
394 ++(d_statistics.d_lemmasRegisterTermAtomic);
395 d_out.lemma(lenLem);
396 }
397 for (const std::pair<const Node, bool>& rp : reqPhase)
398 {
399 d_out.requirePhase(rp.first, rp.second);
400 }
401 }
402
403 SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; }
404
405 const context::CDList<TNode>& TermRegistry::getFunctionTerms() const
406 {
407 return d_functionsTerms;
408 }
409
410 const context::CDHashSet<Node, NodeHashFunction>& TermRegistry::getInputVars()
411 const
412 {
413 return d_inputVars;
414 }
415
416 bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
417
418 Node TermRegistry::getRegisterTermAtomicLemma(Node n,
419 LengthStatus s,
420 std::map<Node, bool>& reqPhase)
421 {
422 if (n.isConst())
423 {
424 // No need to send length for constant terms. This case may be triggered
425 // for cases where the skolem cache automatically replaces a skolem by
426 // a constant.
427 return Node::null();
428 }
429 Assert(n.getType().isStringLike());
430 NodeManager* nm = NodeManager::currentNM();
431 Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
432 Node emp = Word::mkEmptyWord(n.getType());
433 if (s == LENGTH_GEQ_ONE)
434 {
435 Node neq_empty = n.eqNode(emp).negate();
436 Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
437 Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
438 Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
439 << std::endl;
440 Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
441 return len_geq_one;
442 }
443
444 if (s == LENGTH_ONE)
445 {
446 Node len_one = n_len.eqNode(d_one);
447 Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
448 << std::endl;
449 Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
450 return len_one;
451 }
452 Assert(s == LENGTH_SPLIT);
453
454 // get the positive length lemma
455 Node lenLemma = lengthPositive(n);
456 // split whether the string is empty
457 Node n_len_eq_z = n_len.eqNode(d_zero);
458 Node n_len_eq_z_2 = n.eqNode(emp);
459 Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
460 Node case_emptyr = Rewriter::rewrite(case_empty);
461 if (!case_emptyr.isConst())
462 {
463 // prefer trying the empty case first
464 // notice that requirePhase must only be called on rewritten literals that
465 // occur in the CNF stream.
466 n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
467 Assert(!n_len_eq_z.isConst());
468 reqPhase[n_len_eq_z] = true;
469 n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
470 Assert(!n_len_eq_z_2.isConst());
471 reqPhase[n_len_eq_z_2] = true;
472 }
473 else
474 {
475 // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
476 // n ---> "". Since this method is only called on non-constants n, it must
477 // be that n = "" ^ len( n ) = 0 does not rewrite to true.
478 Assert(!case_emptyr.getConst<bool>());
479 }
480
481 return lenLemma;
482 }
483
484 Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const
485 {
486 if (n.getNumChildren() == 0)
487 {
488 Node pn = getProxyVariableFor(n);
489 if (pn.isNull())
490 {
491 return Node::null();
492 }
493 Node eq = n.eqNode(pn);
494 eq = Rewriter::rewrite(eq);
495 if (std::find(exp.begin(), exp.end(), eq) == exp.end())
496 {
497 exp.push_back(eq);
498 }
499 return pn;
500 }
501 std::vector<Node> children;
502 if (n.getMetaKind() == metakind::PARAMETERIZED)
503 {
504 children.push_back(n.getOperator());
505 }
506 for (const Node& nc : n)
507 {
508 if (n.getType().isRegExp())
509 {
510 children.push_back(nc);
511 }
512 else
513 {
514 Node ns = getSymbolicDefinition(nc, exp);
515 if (ns.isNull())
516 {
517 return Node::null();
518 }
519 else
520 {
521 children.push_back(ns);
522 }
523 }
524 }
525 return NodeManager::currentNM()->mkNode(n.getKind(), children);
526 }
527
528 Node TermRegistry::getProxyVariableFor(Node n) const
529 {
530 NodeNodeMap::const_iterator it = d_proxyVar.find(n);
531 if (it != d_proxyVar.end())
532 {
533 return (*it).second;
534 }
535 return Node::null();
536 }
537
538 void TermRegistry::inferSubstitutionProxyVars(Node n,
539 std::vector<Node>& vars,
540 std::vector<Node>& subs,
541 std::vector<Node>& unproc) const
542 {
543 if (n.getKind() == AND)
544 {
545 for (const Node& nc : n)
546 {
547 inferSubstitutionProxyVars(nc, vars, subs, unproc);
548 }
549 return;
550 }
551 if (n.getKind() == EQUAL)
552 {
553 Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end());
554 ns = Rewriter::rewrite(ns);
555 if (ns.getKind() == EQUAL)
556 {
557 Node s;
558 Node v;
559 for (unsigned i = 0; i < 2; i++)
560 {
561 Node ss;
562 // determine whether this side has a proxy variable
563 if (ns[i].getAttribute(StringsProxyVarAttribute()))
564 {
565 // it is a proxy variable
566 ss = ns[i];
567 }
568 else if (ns[i].isConst())
569 {
570 ss = getProxyVariableFor(ns[i]);
571 }
572 if (!ss.isNull())
573 {
574 v = ns[1 - i];
575 // if the other side is a constant or variable
576 if (v.getNumChildren() == 0)
577 {
578 if (s.isNull())
579 {
580 s = ss;
581 }
582 else
583 {
584 // both sides of the equality correspond to a proxy variable
585 if (ss == s)
586 {
587 // it is a trivial equality, e.g. between a proxy variable
588 // and its definition
589 return;
590 }
591 else
592 {
593 // equality between proxy variables, non-trivial
594 s = Node::null();
595 }
596 }
597 }
598 }
599 }
600 if (!s.isNull())
601 {
602 // the equality can be turned into a substitution
603 subs.push_back(s);
604 vars.push_back(v);
605 return;
606 }
607 }
608 else
609 {
610 n = ns;
611 }
612 }
613 if (!n.isConst() || !n.getConst<bool>())
614 {
615 unproc.push_back(n);
616 }
617 }
618
619 } // namespace strings
620 } // namespace theory
621 } // namespace CVC4