1 /******************************************************************************
2 * Top contributors (to current version):
3 * Andrew Reynolds, Andres Noetzli, Morgan Deters
5 * This file is part of the cvc5 project.
7 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8 * in the top-level source directory and their institutional affiliations.
9 * All rights reserved. See the file COPYING in the top-level source
10 * directory for licensing information.
11 * ****************************************************************************
13 * Implementation of term registry for the theory of strings.
16 #include "theory/strings/term_registry.h"
18 #include "expr/attribute.h"
19 #include "options/smt_options.h"
20 #include "options/strings_options.h"
21 #include "smt/logic_exception.h"
22 #include "theory/rewriter.h"
23 #include "theory/strings/inference_manager.h"
24 #include "theory/strings/theory_strings_utils.h"
25 #include "theory/strings/word.h"
26 #include "util/rational.h"
27 #include "util/string.h"
30 using namespace cvc5::context
;
31 using namespace cvc5::kind
;
37 struct StringsProxyVarAttributeId
40 typedef expr::Attribute
<StringsProxyVarAttributeId
, bool>
41 StringsProxyVarAttribute
;
43 TermRegistry::TermRegistry(SolverState
& s
,
44 SequencesStatistics
& statistics
,
45 ProofNodeManager
* pnm
)
48 d_statistics(statistics
),
50 d_functionsTerms(s
.getSatContext()),
51 d_inputVars(s
.getUserContext()),
52 d_preregisteredTerms(s
.getSatContext()),
53 d_registeredTerms(s
.getUserContext()),
54 d_registeredTypes(s
.getUserContext()),
55 d_proxyVar(s
.getUserContext()),
56 d_lengthLemmaTermsCache(s
.getUserContext()),
57 d_epg(pnm
? new EagerProofGenerator(
60 "strings::TermRegistry::EagerProofGenerator")
63 NodeManager
* nm
= NodeManager::currentNM();
64 d_zero
= nm
->mkConst(Rational(0));
65 d_one
= nm
->mkConst(Rational(1));
66 d_negOne
= NodeManager::currentNM()->mkConst(Rational(-1));
67 d_cardSize
= utils::getAlphabetCardinality();
70 TermRegistry::~TermRegistry() {}
72 void TermRegistry::finishInit(InferenceManager
* im
) { d_im
= im
; }
74 Node
TermRegistry::eagerReduce(Node t
, SkolemCache
* sc
)
76 NodeManager
* nm
= NodeManager::currentNM();
78 Kind tk
= t
.getKind();
79 if (tk
== STRING_TO_CODE
)
81 // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
82 Node code_len
= utils::mkNLength(t
[0]).eqNode(nm
->mkConst(Rational(1)));
83 Node code_eq_neg1
= t
.eqNode(nm
->mkConst(Rational(-1)));
84 Node code_range
= nm
->mkNode(
86 nm
->mkNode(GEQ
, t
, nm
->mkConst(Rational(0))),
88 LT
, t
, nm
->mkConst(Rational(utils::getAlphabetCardinality()))));
89 lemma
= nm
->mkNode(ITE
, code_len
, code_range
, code_eq_neg1
);
91 else if (tk
== STRING_INDEXOF
|| tk
== STRING_INDEXOF_RE
)
94 // (or (= (f x y n) (- 1)) (>= (f x y n) n))
95 // (<= (f x y n) (str.len x)))
97 // where f in { str.indexof, str.indexof_re }
98 Node l
= nm
->mkNode(STRING_LENGTH
, t
[0]);
102 OR
, nm
->mkConst(Rational(-1)).eqNode(t
), nm
->mkNode(GEQ
, t
, t
[2])),
103 nm
->mkNode(LEQ
, t
, l
));
105 else if (tk
== STRING_STOI
)
107 // (>= (str.to_int x) (- 1))
108 lemma
= nm
->mkNode(GEQ
, t
, nm
->mkConst(Rational(-1)));
110 else if (tk
== STRING_CONTAINS
)
112 // ite( (str.contains s r), (= s (str.++ sk1 r sk2)), (not (= s r)))
114 sc
->mkSkolemCached(t
[0], t
[1], SkolemCache::SK_FIRST_CTN_PRE
, "sc1");
116 sc
->mkSkolemCached(t
[0], t
[1], SkolemCache::SK_FIRST_CTN_POST
, "sc2");
117 lemma
= t
[0].eqNode(utils::mkNConcat(sk1
, t
[1], sk2
));
118 lemma
= nm
->mkNode(ITE
, t
, lemma
, t
[0].eqNode(t
[1]).notNode());
123 Node
TermRegistry::lengthPositive(Node t
)
125 NodeManager
* nm
= NodeManager::currentNM();
126 Node zero
= nm
->mkConst(Rational(0));
127 Node emp
= Word::mkEmptyWord(t
.getType());
128 Node tlen
= nm
->mkNode(STRING_LENGTH
, t
);
129 Node tlenEqZero
= tlen
.eqNode(zero
);
130 Node tEqEmp
= t
.eqNode(emp
);
131 Node caseEmpty
= nm
->mkNode(AND
, tlenEqZero
, tEqEmp
);
132 Node caseNEmpty
= nm
->mkNode(GT
, tlen
, zero
);
133 // (or (and (= (str.len t) 0) (= t "")) (> (str.len t) 0))
134 return nm
->mkNode(OR
, caseEmpty
, caseNEmpty
);
137 void TermRegistry::preRegisterTerm(TNode n
)
139 if (d_preregisteredTerms
.find(n
) != d_preregisteredTerms
.end())
143 eq::EqualityEngine
* ee
= d_state
.getEqualityEngine();
144 d_preregisteredTerms
.insert(n
);
145 Trace("strings-preregister")
146 << "TheoryString::preregister : " << n
<< std::endl
;
147 // check for logic exceptions
148 Kind k
= n
.getKind();
149 if (!options::stringExp())
151 if (k
== STRING_INDEXOF
|| k
== STRING_INDEXOF_RE
|| k
== STRING_ITOS
152 || k
== STRING_STOI
|| k
== STRING_REPLACE
|| k
== STRING_SUBSTR
153 || k
== STRING_REPLACE_ALL
|| k
== SEQ_NTH
|| k
== STRING_REPLACE_RE
154 || k
== STRING_REPLACE_RE_ALL
|| k
== STRING_CONTAINS
|| k
== STRING_LEQ
155 || k
== STRING_TOLOWER
|| k
== STRING_TOUPPER
|| k
== STRING_REV
156 || k
== STRING_UPDATE
)
158 std::stringstream ss
;
159 ss
<< "Term of kind " << k
160 << " not supported in default mode, try --strings-exp";
161 throw LogicException(ss
.str());
166 if (n
[0].getType().isRegExp())
168 std::stringstream ss
;
169 ss
<< "Equality between regular expressions is not supported";
170 throw LogicException(ss
.str());
172 ee
->addTriggerPredicate(n
);
175 else if (k
== STRING_IN_REGEXP
)
177 d_im
->requirePhase(n
, true);
178 ee
->addTriggerPredicate(n
);
183 else if (k
== STRING_TO_CODE
)
187 else if (k
== REGEXP_RANGE
)
189 for (const Node
& nc
: n
)
193 throw LogicException(
194 "expecting a constant string term in regexp range");
196 if (nc
.getConst
<String
>().size() != 1)
198 throw LogicException(
199 "expecting a single constant string term in regexp range");
204 TypeNode tn
= n
.getType();
205 if (tn
.isRegExp() && n
.isVar())
207 std::stringstream ss
;
208 ss
<< "Regular expression variables are not supported.";
209 throw LogicException(ss
.str());
211 if (tn
.isString()) // string-only
213 // all characters of constants should fall in the alphabet
216 std::vector
<unsigned> vec
= n
.getConst
<String
>().getVec();
217 for (unsigned u
: vec
)
221 std::stringstream ss
;
222 ss
<< "Characters in string \"" << n
223 << "\" are outside of the given alphabet.";
224 throw LogicException(ss
.str());
230 else if (tn
.isBoolean())
232 // All kinds that we do congruence over that may return a Boolean go here
233 if (k
== STRING_CONTAINS
|| k
== STRING_LEQ
|| k
== SEQ_NTH
)
235 // Get triggered for both equal and dis-equal
236 ee
->addTriggerPredicate(n
);
241 // Function applications/predicates
244 // Set d_functionsTerms stores all function applications that are
245 // relevant to theory combination. Notice that this is a subset of
246 // the applications whose kinds are function kinds in the equality
247 // engine. This means it does not include applications of operators
248 // like re.++, which is not a function kind in the equality engine.
249 // Concatenation terms do not need to be considered here because
250 // their arguments have string type and do not introduce any shared
252 if (n
.hasOperator() && ee
->isFunctionKind(k
)
253 && kindToTheoryId(k
) == THEORY_STRINGS
&& k
!= STRING_CONCAT
)
255 d_functionsTerms
.push_back(n
);
257 if (options::stringFMF())
259 if (tn
.isStringLike())
261 // Our decision strategy will minimize the length of this term if it is a
262 // variable but not an internally generated Skolem, or a term that does
263 // not belong to this theory.
264 if (n
.isVar() ? !d_skCache
.isSkolem(n
)
265 : kindToTheoryId(k
) != THEORY_STRINGS
)
267 d_inputVars
.insert(n
);
268 Trace("strings-preregister") << "input variable: " << n
<< std::endl
;
274 void TermRegistry::registerTerm(Node n
, int effort
)
276 Trace("strings-register") << "TheoryStrings::registerTerm() " << n
277 << ", effort = " << effort
<< std::endl
;
278 if (d_registeredTerms
.find(n
) != d_registeredTerms
.end())
280 Trace("strings-register") << "...already registered" << std::endl
;
283 bool do_register
= true;
284 TypeNode tn
= n
.getType();
285 if (!tn
.isStringLike())
287 if (options::stringEagerLen())
289 do_register
= effort
== 0;
293 do_register
= effort
> 0 || n
.getKind() != STRING_CONCAT
;
298 Trace("strings-register") << "...do not register" << std::endl
;
301 Trace("strings-register") << "...register" << std::endl
;
302 d_registeredTerms
.insert(n
);
303 // ensure the type is registered
305 TrustNode regTermLem
;
306 if (tn
.isStringLike())
308 // register length information:
309 // for variables, split on empty vs positive length
310 // for concat/const/replace, introduce proxy var and state length relation
311 regTermLem
= getRegisterTermLemma(n
);
313 else if (n
.getKind() != STRING_CONTAINS
)
315 // we don't send out eager reduction lemma for str.contains currently
316 Node eagerRedLemma
= eagerReduce(n
, &d_skCache
);
317 if (!eagerRedLemma
.isNull())
319 // if there was an eager reduction, we make the trust node for it
320 if (d_epg
!= nullptr)
322 regTermLem
= d_epg
->mkTrustNode(
323 eagerRedLemma
, PfRule::STRING_EAGER_REDUCTION
, {}, {n
});
327 regTermLem
= TrustNode::mkTrustLemma(eagerRedLemma
, nullptr);
331 if (!regTermLem
.isNull())
333 Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
335 Trace("strings-assert")
336 << "(assert " << regTermLem
.getNode() << ")" << std::endl
;
337 d_im
->trustedLemma(regTermLem
, InferenceId::STRINGS_REGISTER_TERM
);
341 void TermRegistry::registerType(TypeNode tn
)
343 if (d_registeredTypes
.find(tn
) != d_registeredTypes
.end())
347 d_registeredTypes
.insert(tn
);
348 if (tn
.isStringLike())
350 // preregister the empty word for the type
351 Node emp
= Word::mkEmptyWord(tn
);
352 if (!d_state
.hasTerm(emp
))
354 preRegisterTerm(emp
);
359 TrustNode
TermRegistry::getRegisterTermLemma(Node n
)
361 Assert(n
.getType().isStringLike());
362 NodeManager
* nm
= NodeManager::currentNM();
363 // register length information:
364 // for variables, split on empty vs positive length
365 // for concat/const/replace, introduce proxy var and state length relation
367 if (n
.getKind() != STRING_CONCAT
&& !n
.isConst())
369 Node lsumb
= nm
->mkNode(STRING_LENGTH
, n
);
370 lsum
= Rewriter::rewrite(lsumb
);
371 // can register length term if it does not rewrite
374 registerTermAtomic(n
, LENGTH_SPLIT
);
375 return TrustNode::null();
378 Node sk
= d_skCache
.mkSkolemCached(n
, SkolemCache::SK_PURIFY
, "lsym");
379 StringsProxyVarAttribute spva
;
380 sk
.setAttribute(spva
, true);
381 Node eq
= Rewriter::rewrite(sk
.eqNode(n
));
383 // If we are introducing a proxy for a constant or concat term, we do not
384 // need to send lemmas about its length, since its length is already
386 if (n
.isConst() || n
.getKind() == STRING_CONCAT
)
388 // do not send length lemma for sk.
389 registerTermAtomic(sk
, LENGTH_IGNORE
);
391 Node skl
= nm
->mkNode(STRING_LENGTH
, sk
);
392 if (n
.getKind() == STRING_CONCAT
)
394 std::vector
<Node
> nodeVec
;
395 for (const Node
& nc
: n
)
397 if (nc
.getAttribute(StringsProxyVarAttribute()))
399 Assert(d_proxyVarToLength
.find(nc
) != d_proxyVarToLength
.end());
400 nodeVec
.push_back(d_proxyVarToLength
[nc
]);
404 Node lni
= nm
->mkNode(STRING_LENGTH
, nc
);
405 nodeVec
.push_back(lni
);
408 lsum
= nm
->mkNode(PLUS
, nodeVec
);
409 lsum
= Rewriter::rewrite(lsum
);
411 else if (n
.isConst())
413 lsum
= nm
->mkConst(Rational(Word::getLength(n
)));
415 Assert(!lsum
.isNull());
416 d_proxyVarToLength
[sk
] = lsum
;
417 Node ceq
= Rewriter::rewrite(skl
.eqNode(lsum
));
419 Node ret
= nm
->mkNode(AND
, eq
, ceq
);
421 // it is a simple rewrite to justify this
422 if (d_epg
!= nullptr)
424 return d_epg
->mkTrustNode(ret
, PfRule::MACRO_SR_PRED_INTRO
, {}, {ret
});
426 return TrustNode::mkTrustLemma(ret
, nullptr);
429 void TermRegistry::registerTermAtomic(Node n
, LengthStatus s
)
431 if (d_lengthLemmaTermsCache
.find(n
) != d_lengthLemmaTermsCache
.end())
435 d_lengthLemmaTermsCache
.insert(n
);
437 if (s
== LENGTH_IGNORE
)
442 std::map
<Node
, bool> reqPhase
;
443 TrustNode lenLem
= getRegisterTermAtomicLemma(n
, s
, reqPhase
);
444 if (!lenLem
.isNull())
446 Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
448 Trace("strings-assert")
449 << "(assert " << lenLem
.getNode() << ")" << std::endl
;
450 d_im
->trustedLemma(lenLem
, InferenceId::STRINGS_REGISTER_TERM_ATOMIC
);
452 for (const std::pair
<const Node
, bool>& rp
: reqPhase
)
454 d_im
->requirePhase(rp
.first
, rp
.second
);
458 SkolemCache
* TermRegistry::getSkolemCache() { return &d_skCache
; }
460 const context::CDList
<TNode
>& TermRegistry::getFunctionTerms() const
462 return d_functionsTerms
;
465 const context::CDHashSet
<Node
>& TermRegistry::getInputVars() const
470 bool TermRegistry::hasStringCode() const { return d_hasStrCode
; }
472 TrustNode
TermRegistry::getRegisterTermAtomicLemma(
473 Node n
, LengthStatus s
, std::map
<Node
, bool>& reqPhase
)
477 // No need to send length for constant terms. This case may be triggered
478 // for cases where the skolem cache automatically replaces a skolem by
480 return TrustNode::null();
482 Assert(n
.getType().isStringLike());
483 NodeManager
* nm
= NodeManager::currentNM();
484 Node n_len
= nm
->mkNode(kind::STRING_LENGTH
, n
);
485 Node emp
= Word::mkEmptyWord(n
.getType());
486 if (s
== LENGTH_GEQ_ONE
)
488 Node neq_empty
= n
.eqNode(emp
).negate();
489 Node len_n_gt_z
= nm
->mkNode(GT
, n_len
, d_zero
);
490 Node len_geq_one
= nm
->mkNode(AND
, neq_empty
, len_n_gt_z
);
491 Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
493 Trace("strings-assert") << "(assert " << len_geq_one
<< ")" << std::endl
;
494 return TrustNode::mkTrustLemma(len_geq_one
, nullptr);
499 Node len_one
= n_len
.eqNode(d_one
);
500 Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
502 Trace("strings-assert") << "(assert " << len_one
<< ")" << std::endl
;
503 return TrustNode::mkTrustLemma(len_one
, nullptr);
505 Assert(s
== LENGTH_SPLIT
);
507 // get the positive length lemma
508 Node lenLemma
= lengthPositive(n
);
509 // split whether the string is empty
510 Node n_len_eq_z
= n_len
.eqNode(d_zero
);
511 Node n_len_eq_z_2
= n
.eqNode(emp
);
512 Node case_empty
= nm
->mkNode(AND
, n_len_eq_z
, n_len_eq_z_2
);
513 Node case_emptyr
= Rewriter::rewrite(case_empty
);
514 if (!case_emptyr
.isConst())
516 // prefer trying the empty case first
517 // notice that requirePhase must only be called on rewritten literals that
518 // occur in the CNF stream.
519 n_len_eq_z
= Rewriter::rewrite(n_len_eq_z
);
520 Assert(!n_len_eq_z
.isConst());
521 reqPhase
[n_len_eq_z
] = true;
522 n_len_eq_z_2
= Rewriter::rewrite(n_len_eq_z_2
);
523 Assert(!n_len_eq_z_2
.isConst());
524 reqPhase
[n_len_eq_z_2
] = true;
528 // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
529 // n ---> "". Since this method is only called on non-constants n, it must
530 // be that n = "" ^ len( n ) = 0 does not rewrite to true.
531 Assert(!case_emptyr
.getConst
<bool>());
534 if (d_epg
!= nullptr)
536 return d_epg
->mkTrustNode(lenLemma
, PfRule::STRING_LENGTH_POS
, {}, {n
});
538 return TrustNode::mkTrustLemma(lenLemma
, nullptr);
541 Node
TermRegistry::getSymbolicDefinition(Node n
, std::vector
<Node
>& exp
) const
543 if (n
.getNumChildren() == 0)
545 Node pn
= getProxyVariableFor(n
);
550 Node eq
= n
.eqNode(pn
);
551 eq
= Rewriter::rewrite(eq
);
552 if (std::find(exp
.begin(), exp
.end(), eq
) == exp
.end())
558 std::vector
<Node
> children
;
559 if (n
.getMetaKind() == metakind::PARAMETERIZED
)
561 children
.push_back(n
.getOperator());
563 for (const Node
& nc
: n
)
565 if (n
.getType().isRegExp())
567 children
.push_back(nc
);
571 Node ns
= getSymbolicDefinition(nc
, exp
);
578 children
.push_back(ns
);
582 return NodeManager::currentNM()->mkNode(n
.getKind(), children
);
585 Node
TermRegistry::getProxyVariableFor(Node n
) const
587 NodeNodeMap::const_iterator it
= d_proxyVar
.find(n
);
588 if (it
!= d_proxyVar
.end())
595 Node
TermRegistry::ensureProxyVariableFor(Node n
)
597 Node proxy
= getProxyVariableFor(n
);
601 proxy
= getProxyVariableFor(n
);
603 Assert(!proxy
.isNull());
607 void TermRegistry::removeProxyEqs(Node n
, std::vector
<Node
>& unproc
) const
609 if (n
.getKind() == AND
)
611 for (const Node
& nc
: n
)
613 removeProxyEqs(nc
, unproc
);
617 Trace("strings-subs-proxy") << "Input : " << n
<< std::endl
;
618 Node ns
= Rewriter::rewrite(n
);
619 if (ns
.getKind() == EQUAL
)
621 for (size_t i
= 0; i
< 2; i
++)
623 // determine whether this side has a proxy variable
624 if (ns
[i
].getAttribute(StringsProxyVarAttribute()))
626 if (getProxyVariableFor(ns
[1 - i
]) == ns
[i
])
628 Trace("strings-subs-proxy")
629 << "...trivial definition via " << ns
[i
] << std::endl
;
630 // it is a trivial equality, e.g. between a proxy variable
631 // and its definition
637 if (!n
.isConst() || !n
.getConst
<bool>())
639 Trace("strings-subs-proxy") << "...unprocessed" << std::endl
;
644 } // namespace strings
645 } // namespace theory