90ebc4f8b3eb8903808c5f0bd8a22aff52bb7b47
[cvc5.git] / src / theory / strings / term_registry.cpp
1 /********************* */
2 /*! \file term_registry.cpp
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Andres Noetzli, Tianyi Liang
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief Implementation of term registry for the theory of strings.
13 **/
14
15 #include "theory/strings/term_registry.h"
16
17 #include "expr/attribute.h"
18 #include "options/strings_options.h"
19 #include "smt/logic_exception.h"
20 #include "theory/rewriter.h"
21 #include "theory/strings/theory_strings_utils.h"
22 #include "theory/strings/word.h"
23
24 using namespace std;
25 using namespace CVC4::context;
26 using namespace CVC4::kind;
27
28 namespace CVC4 {
29 namespace theory {
30 namespace strings {
31
32 struct StringsProxyVarAttributeId
33 {
34 };
35 typedef expr::Attribute<StringsProxyVarAttributeId, bool>
36 StringsProxyVarAttribute;
37
38 TermRegistry::TermRegistry(context::Context* c,
39 context::UserContext* u,
40 eq::EqualityEngine& ee,
41 OutputChannel& out,
42 SequencesStatistics& statistics)
43 : d_ee(ee),
44 d_out(out),
45 d_statistics(statistics),
46 d_hasStrCode(false),
47 d_functionsTerms(c),
48 d_inputVars(u),
49 d_preregisteredTerms(u),
50 d_registeredTerms(u),
51 d_registeredTypes(u),
52 d_proxyVar(u),
53 d_proxyVarToLength(u),
54 d_lengthLemmaTermsCache(u)
55 {
56 NodeManager* nm = NodeManager::currentNM();
57 d_zero = nm->mkConst(Rational(0));
58 d_one = nm->mkConst(Rational(1));
59 d_negOne = NodeManager::currentNM()->mkConst(Rational(-1));
60 d_cardSize = utils::getAlphabetCardinality();
61 }
62
63 TermRegistry::~TermRegistry() {}
64
65 void TermRegistry::preRegisterTerm(TNode n)
66 {
67 if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end())
68 {
69 return;
70 }
71 d_preregisteredTerms.insert(n);
72 Trace("strings-preregister")
73 << "TheoryString::preregister : " << n << std::endl;
74 // check for logic exceptions
75 Kind k = n.getKind();
76 if (!options::stringExp())
77 {
78 if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
79 || k == STRING_STRREPL || k == STRING_STRREPLALL
80 || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
81 || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
82 || k == STRING_TOUPPER || k == STRING_REV)
83 {
84 std::stringstream ss;
85 ss << "Term of kind " << k
86 << " not supported in default mode, try --strings-exp";
87 throw LogicException(ss.str());
88 }
89 }
90 if (k == EQUAL)
91 {
92 if (n[0].getType().isRegExp())
93 {
94 std::stringstream ss;
95 ss << "Equality between regular expressions is not supported";
96 throw LogicException(ss.str());
97 }
98 d_ee.addTriggerEquality(n);
99 return;
100 }
101 else if (k == STRING_IN_REGEXP)
102 {
103 d_out.requirePhase(n, true);
104 d_ee.addTriggerPredicate(n);
105 d_ee.addTerm(n[0]);
106 d_ee.addTerm(n[1]);
107 return;
108 }
109 else if (k == STRING_TO_CODE)
110 {
111 d_hasStrCode = true;
112 }
113 registerTerm(n, 0);
114 TypeNode tn = n.getType();
115 if (tn.isRegExp() && n.isVar())
116 {
117 std::stringstream ss;
118 ss << "Regular expression variables are not supported.";
119 throw LogicException(ss.str());
120 }
121 if (tn.isString())
122 {
123 // all characters of constants should fall in the alphabet
124 if (n.isConst())
125 {
126 std::vector<unsigned> vec = n.getConst<String>().getVec();
127 for (unsigned u : vec)
128 {
129 if (u >= d_cardSize)
130 {
131 std::stringstream ss;
132 ss << "Characters in string \"" << n
133 << "\" are outside of the given alphabet.";
134 throw LogicException(ss.str());
135 }
136 }
137 }
138 d_ee.addTerm(n);
139 }
140 else if (tn.isBoolean())
141 {
142 // Get triggered for both equal and dis-equal
143 d_ee.addTriggerPredicate(n);
144 }
145 else
146 {
147 // Function applications/predicates
148 d_ee.addTerm(n);
149 }
150 // Set d_functionsTerms stores all function applications that are
151 // relevant to theory combination. Notice that this is a subset of
152 // the applications whose kinds are function kinds in the equality
153 // engine. This means it does not include applications of operators
154 // like re.++, which is not a function kind in the equality engine.
155 // Concatenation terms do not need to be considered here because
156 // their arguments have string type and do not introduce any shared
157 // terms.
158 if (n.hasOperator() && d_ee.isFunctionKind(k) && k != STRING_CONCAT)
159 {
160 d_functionsTerms.push_back(n);
161 }
162 if (options::stringFMF())
163 {
164 if (tn.isStringLike())
165 {
166 // Our decision strategy will minimize the length of this term if it is a
167 // variable but not an internally generated Skolem, or a term that does
168 // not belong to this theory.
169 if (n.isVar() ? !d_skCache.isSkolem(n)
170 : kindToTheoryId(k) != THEORY_STRINGS)
171 {
172 d_inputVars.insert(n);
173 Trace("strings-preregister") << "input variable: " << n << std::endl;
174 }
175 }
176 }
177 }
178
179 void TermRegistry::registerTerm(Node n, int effort)
180 {
181 TypeNode tn = n.getType();
182 bool do_register = true;
183 if (!tn.isStringLike())
184 {
185 if (options::stringEagerLen())
186 {
187 do_register = effort == 0;
188 }
189 else
190 {
191 do_register = effort > 0 || n.getKind() != STRING_CONCAT;
192 }
193 }
194 if (!do_register)
195 {
196 return;
197 }
198 if (d_registeredTerms.find(n) != d_registeredTerms.end())
199 {
200 return;
201 }
202 d_registeredTerms.insert(n);
203 // ensure the type is registered
204 registerType(tn);
205 NodeManager* nm = NodeManager::currentNM();
206 Debug("strings-register") << "TheoryStrings::registerTerm() " << n
207 << ", effort = " << effort << std::endl;
208 Node regTermLem;
209 if (tn.isStringLike())
210 {
211 // register length information:
212 // for variables, split on empty vs positive length
213 // for concat/const/replace, introduce proxy var and state length relation
214 regTermLem = getRegisterTermLemma(n);
215 }
216 else if (n.getKind() == STRING_TO_CODE)
217 {
218 // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
219 Node code_len = utils::mkNLength(n[0]).eqNode(d_one);
220 Node code_eq_neg1 = n.eqNode(d_negOne);
221 Node code_range = nm->mkNode(
222 AND,
223 nm->mkNode(GEQ, n, d_zero),
224 nm->mkNode(
225 LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
226 regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
227 }
228 else if (n.getKind() == STRING_STRIDOF)
229 {
230 Node len = utils::mkNLength(n[0]);
231 regTermLem = nm->mkNode(AND,
232 nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))),
233 nm->mkNode(LEQ, n, len));
234 }
235 else if (n.getKind() == STRING_STOI)
236 {
237 regTermLem = nm->mkNode(GEQ, n, nm->mkConst(Rational(-1)));
238 }
239 if (!regTermLem.isNull())
240 {
241 Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
242 << std::endl;
243 Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl;
244 ++(d_statistics.d_lemmasRegisterTerm);
245 d_out.lemma(regTermLem);
246 }
247 }
248
249 void TermRegistry::registerType(TypeNode tn)
250 {
251 if (d_registeredTypes.find(tn) != d_registeredTypes.end())
252 {
253 return;
254 }
255 d_registeredTypes.insert(tn);
256 if (tn.isStringLike())
257 {
258 // preregister the empty word for the type
259 Node emp = Word::mkEmptyWord(tn);
260 if (!d_ee.hasTerm(emp))
261 {
262 preRegisterTerm(emp);
263 }
264 }
265 }
266
267 Node TermRegistry::getRegisterTermLemma(Node n)
268 {
269 Assert(n.getType().isStringLike());
270 NodeManager* nm = NodeManager::currentNM();
271 // register length information:
272 // for variables, split on empty vs positive length
273 // for concat/const/replace, introduce proxy var and state length relation
274 Node lsum;
275 if (n.getKind() != STRING_CONCAT && !n.isConst())
276 {
277 Node lsumb = nm->mkNode(STRING_LENGTH, n);
278 lsum = Rewriter::rewrite(lsumb);
279 // can register length term if it does not rewrite
280 if (lsum == lsumb)
281 {
282 registerTermAtomic(n, LENGTH_SPLIT);
283 return Node::null();
284 }
285 }
286 Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
287 StringsProxyVarAttribute spva;
288 sk.setAttribute(spva, true);
289 Node eq = Rewriter::rewrite(sk.eqNode(n));
290 d_proxyVar[n] = sk;
291 // If we are introducing a proxy for a constant or concat term, we do not
292 // need to send lemmas about its length, since its length is already
293 // implied.
294 if (n.isConst() || n.getKind() == STRING_CONCAT)
295 {
296 // do not send length lemma for sk.
297 registerTermAtomic(sk, LENGTH_IGNORE);
298 }
299 Node skl = nm->mkNode(STRING_LENGTH, sk);
300 if (n.getKind() == STRING_CONCAT)
301 {
302 std::vector<Node> nodeVec;
303 for (const Node& nc : n)
304 {
305 if (nc.getAttribute(StringsProxyVarAttribute()))
306 {
307 Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
308 nodeVec.push_back(d_proxyVarToLength[nc]);
309 }
310 else
311 {
312 Node lni = nm->mkNode(STRING_LENGTH, nc);
313 nodeVec.push_back(lni);
314 }
315 }
316 lsum = nm->mkNode(PLUS, nodeVec);
317 lsum = Rewriter::rewrite(lsum);
318 }
319 else if (n.isConst())
320 {
321 lsum = nm->mkConst(Rational(Word::getLength(n)));
322 }
323 Assert(!lsum.isNull());
324 d_proxyVarToLength[sk] = lsum;
325 Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
326
327 return nm->mkNode(AND, eq, ceq);
328 }
329
330 void TermRegistry::registerTermAtomic(Node n, LengthStatus s)
331 {
332 if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
333 {
334 return;
335 }
336 d_lengthLemmaTermsCache.insert(n);
337
338 if (s == LENGTH_IGNORE)
339 {
340 // ignore it
341 return;
342 }
343 std::map<Node, bool> reqPhase;
344 Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
345 if (!lenLem.isNull())
346 {
347 Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
348 << std::endl;
349 Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl;
350 ++(d_statistics.d_lemmasRegisterTermAtomic);
351 d_out.lemma(lenLem);
352 }
353 for (const std::pair<const Node, bool>& rp : reqPhase)
354 {
355 d_out.requirePhase(rp.first, rp.second);
356 }
357 }
358
359 SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; }
360
361 const context::CDList<TNode>& TermRegistry::getFunctionTerms() const
362 {
363 return d_functionsTerms;
364 }
365
366 const context::CDHashSet<Node, NodeHashFunction>& TermRegistry::getInputVars()
367 const
368 {
369 return d_inputVars;
370 }
371
372 bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
373
374 Node TermRegistry::getRegisterTermAtomicLemma(Node n,
375 LengthStatus s,
376 std::map<Node, bool>& reqPhase)
377 {
378 if (n.isConst())
379 {
380 // No need to send length for constant terms. This case may be triggered
381 // for cases where the skolem cache automatically replaces a skolem by
382 // a constant.
383 return Node::null();
384 }
385 Assert(n.getType().isStringLike());
386 NodeManager* nm = NodeManager::currentNM();
387 Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
388 Node emp = Word::mkEmptyWord(n.getType());
389 if (s == LENGTH_GEQ_ONE)
390 {
391 Node neq_empty = n.eqNode(emp).negate();
392 Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
393 Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
394 Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
395 << std::endl;
396 Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
397 return len_geq_one;
398 }
399
400 if (s == LENGTH_ONE)
401 {
402 Node len_one = n_len.eqNode(d_one);
403 Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
404 << std::endl;
405 Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
406 return len_one;
407 }
408 Assert(s == LENGTH_SPLIT);
409
410 std::vector<Node> lems;
411 // split whether the string is empty
412 Node n_len_eq_z = n_len.eqNode(d_zero);
413 Node n_len_eq_z_2 = n.eqNode(emp);
414 Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
415 case_empty = Rewriter::rewrite(case_empty);
416 Node case_nempty = nm->mkNode(GT, n_len, d_zero);
417 if (!case_empty.isConst())
418 {
419 Node lem = nm->mkNode(OR, case_empty, case_nempty);
420 lems.push_back(lem);
421 // prefer trying the empty case first
422 // notice that requirePhase must only be called on rewritten literals that
423 // occur in the CNF stream.
424 n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
425 Assert(!n_len_eq_z.isConst());
426 reqPhase[n_len_eq_z] = true;
427 n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
428 Assert(!n_len_eq_z_2.isConst());
429 reqPhase[n_len_eq_z_2] = true;
430 }
431 else if (!case_empty.getConst<bool>())
432 {
433 // the rewriter knows that n is non-empty
434 lems.push_back(case_nempty);
435 }
436 else
437 {
438 // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
439 // n ---> "". Since this method is only called on non-constants n, it must
440 // be that n = "" ^ len( n ) = 0 does not rewrite to true.
441 Assert(false);
442 }
443
444 if (lems.empty())
445 {
446 return Node::null();
447 }
448 return lems.size() == 1 ? lems[0] : nm->mkNode(AND, lems);
449 }
450
451 Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const
452 {
453 if (n.getNumChildren() == 0)
454 {
455 Node pn = getProxyVariableFor(n);
456 if (pn.isNull())
457 {
458 return Node::null();
459 }
460 Node eq = n.eqNode(pn);
461 eq = Rewriter::rewrite(eq);
462 if (std::find(exp.begin(), exp.end(), eq) == exp.end())
463 {
464 exp.push_back(eq);
465 }
466 return pn;
467 }
468 std::vector<Node> children;
469 if (n.getMetaKind() == metakind::PARAMETERIZED)
470 {
471 children.push_back(n.getOperator());
472 }
473 for (const Node& nc : n)
474 {
475 if (n.getType().isRegExp())
476 {
477 children.push_back(nc);
478 }
479 else
480 {
481 Node ns = getSymbolicDefinition(nc, exp);
482 if (ns.isNull())
483 {
484 return Node::null();
485 }
486 else
487 {
488 children.push_back(ns);
489 }
490 }
491 }
492 return NodeManager::currentNM()->mkNode(n.getKind(), children);
493 }
494
495 Node TermRegistry::getProxyVariableFor(Node n) const
496 {
497 NodeNodeMap::const_iterator it = d_proxyVar.find(n);
498 if (it != d_proxyVar.end())
499 {
500 return (*it).second;
501 }
502 return Node::null();
503 }
504
505 void TermRegistry::inferSubstitutionProxyVars(Node n,
506 std::vector<Node>& vars,
507 std::vector<Node>& subs,
508 std::vector<Node>& unproc) const
509 {
510 if (n.getKind() == AND)
511 {
512 for (const Node& nc : n)
513 {
514 inferSubstitutionProxyVars(nc, vars, subs, unproc);
515 }
516 return;
517 }
518 if (n.getKind() == EQUAL)
519 {
520 Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end());
521 ns = Rewriter::rewrite(ns);
522 if (ns.getKind() == EQUAL)
523 {
524 Node s;
525 Node v;
526 for (unsigned i = 0; i < 2; i++)
527 {
528 Node ss;
529 // determine whether this side has a proxy variable
530 if (ns[i].getAttribute(StringsProxyVarAttribute()))
531 {
532 // it is a proxy variable
533 ss = ns[i];
534 }
535 else if (ns[i].isConst())
536 {
537 ss = getProxyVariableFor(ns[i]);
538 }
539 if (!ss.isNull())
540 {
541 v = ns[1 - i];
542 // if the other side is a constant or variable
543 if (v.getNumChildren() == 0)
544 {
545 if (s.isNull())
546 {
547 s = ss;
548 }
549 else
550 {
551 // both sides of the equality correspond to a proxy variable
552 if (ss == s)
553 {
554 // it is a trivial equality, e.g. between a proxy variable
555 // and its definition
556 return;
557 }
558 else
559 {
560 // equality between proxy variables, non-trivial
561 s = Node::null();
562 }
563 }
564 }
565 }
566 }
567 if (!s.isNull())
568 {
569 // the equality can be turned into a substitution
570 subs.push_back(s);
571 vars.push_back(v);
572 return;
573 }
574 }
575 else
576 {
577 n = ns;
578 }
579 }
580 if (!n.isConst() || !n.getConst<bool>())
581 {
582 unproc.push_back(n);
583 }
584 }
585
586 } // namespace strings
587 } // namespace theory
588 } // namespace CVC4