1 /******************************************************************************
2 * Top contributors (to current version):
3 * Andrew Reynolds, Andres Noetzli, Tianyi Liang
5 * This file is part of the cvc5 project.
7 * Copyright (c) 2009-2022 by the authors listed in the file AUTHORS
8 * in the top-level source directory and their institutional affiliations.
9 * All rights reserved. See the file COPYING in the top-level source
10 * directory for licensing information.
11 * ****************************************************************************
13 * Implementation of the theory of strings.
16 #include "theory/strings/theory_strings.h"
18 #include "expr/attribute.h"
19 #include "expr/bound_var_manager.h"
20 #include "expr/kind.h"
21 #include "expr/sequence.h"
22 #include "options/smt_options.h"
23 #include "options/strings_options.h"
24 #include "options/theory_options.h"
25 #include "smt/logic_exception.h"
26 #include "theory/decision_manager.h"
27 #include "theory/ext_theory.h"
28 #include "theory/rewriter.h"
29 #include "theory/strings/theory_strings_utils.h"
30 #include "theory/strings/type_enumerator.h"
31 #include "theory/strings/word.h"
32 #include "theory/theory_model.h"
33 #include "theory/valuation.h"
36 using namespace cvc5::context
;
37 using namespace cvc5::internal::kind
;
39 namespace cvc5::internal
{
44 * Attribute used for making unique (bound variables) which correspond to
45 * unique element values used in sequence models. See use in collectModelValues
48 struct SeqModelVarAttributeId
51 using SeqModelVarAttribute
= expr::Attribute
<SeqModelVarAttributeId
, Node
>;
53 TheoryStrings::TheoryStrings(Env
& env
, OutputChannel
& out
, Valuation valuation
)
54 : Theory(THEORY_STRINGS
, env
, out
, valuation
),
57 d_state(env
, d_valuation
),
58 d_termReg(env
, *this, d_state
, d_statistics
, d_pnm
),
59 d_rewriter(env
.getRewriter(),
60 &d_statistics
.d_rewrites
,
61 d_termReg
.getAlphabetCardinality()),
62 d_eagerSolver(options().strings
.stringEagerSolver
63 ? new EagerSolver(env
, d_state
, d_termReg
)
66 d_im(env
, *this, d_state
, d_termReg
, d_extTheory
, d_statistics
),
67 d_extTheory(env
, d_extTheoryCb
, d_im
),
68 // the checker depends on the cardinality of the alphabet
69 d_checker(d_termReg
.getAlphabetCardinality()),
70 d_bsolver(env
, d_state
, d_im
, d_termReg
),
71 d_csolver(env
, d_state
, d_im
, d_termReg
, d_bsolver
),
82 env
, d_state
, d_im
, d_termReg
, d_csolver
, d_esolver
, d_extTheory
),
84 env
, d_state
, d_im
, d_termReg
, d_csolver
, d_esolver
, d_statistics
),
86 options().strings
.regExpElim
== options::RegExpElimMode::AGG
,
89 d_stringsFmf(env
, valuation
, d_termReg
),
94 d_termReg
.finishInit(&d_im
);
96 d_zero
= NodeManager::currentNM()->mkConstInt(Rational(0));
97 d_one
= NodeManager::currentNM()->mkConstInt(Rational(1));
98 d_neg_one
= NodeManager::currentNM()->mkConstInt(Rational(-1));
99 d_true
= NodeManager::currentNM()->mkConst( true );
100 d_false
= NodeManager::currentNM()->mkConst( false );
102 // set up the extended function callback
103 d_extTheoryCb
.d_esolver
= &d_esolver
;
105 // use the state object as the official theory state
106 d_theoryState
= &d_state
;
107 // use the inference manager as the official inference manager
108 d_inferManager
= &d_im
;
111 TheoryStrings::~TheoryStrings() {
115 TheoryRewriter
* TheoryStrings::getTheoryRewriter() { return &d_rewriter
; }
117 ProofRuleChecker
* TheoryStrings::getProofChecker() { return &d_checker
; }
119 bool TheoryStrings::needsEqualityEngine(EeSetupInfo
& esi
)
121 esi
.d_notify
= &d_notify
;
122 esi
.d_name
= "theory::strings::ee";
123 esi
.d_notifyNewClass
= true;
124 esi
.d_notifyMerge
= true;
125 esi
.d_notifyDisequal
= true;
129 void TheoryStrings::finishInit()
131 Assert(d_equalityEngine
!= nullptr);
133 // witness is used to eliminate str.from_code
134 d_valuation
.setUnevaluatedKind(WITNESS
);
136 bool eagerEval
= options().strings
.stringEagerEval
;
137 // The kinds we are treating as function application in congruence
138 d_equalityEngine
->addFunctionKind(kind::STRING_LENGTH
, eagerEval
);
139 d_equalityEngine
->addFunctionKind(kind::STRING_CONCAT
, eagerEval
);
140 d_equalityEngine
->addFunctionKind(kind::STRING_IN_REGEXP
, eagerEval
);
141 d_equalityEngine
->addFunctionKind(kind::STRING_TO_CODE
, eagerEval
);
142 d_equalityEngine
->addFunctionKind(kind::SEQ_UNIT
, eagerEval
);
143 // `seq.nth` is not always defined, and so we do not evaluate it eagerly.
144 d_equalityEngine
->addFunctionKind(kind::SEQ_NTH
, false);
145 // extended functions
146 d_equalityEngine
->addFunctionKind(kind::STRING_CONTAINS
, eagerEval
);
147 d_equalityEngine
->addFunctionKind(kind::STRING_LEQ
, eagerEval
);
148 d_equalityEngine
->addFunctionKind(kind::STRING_SUBSTR
, eagerEval
);
149 d_equalityEngine
->addFunctionKind(kind::STRING_UPDATE
, eagerEval
);
150 d_equalityEngine
->addFunctionKind(kind::STRING_ITOS
, eagerEval
);
151 d_equalityEngine
->addFunctionKind(kind::STRING_STOI
, eagerEval
);
152 d_equalityEngine
->addFunctionKind(kind::STRING_INDEXOF
, eagerEval
);
153 d_equalityEngine
->addFunctionKind(kind::STRING_INDEXOF_RE
, eagerEval
);
154 d_equalityEngine
->addFunctionKind(kind::STRING_REPLACE
, eagerEval
);
155 d_equalityEngine
->addFunctionKind(kind::STRING_REPLACE_ALL
, eagerEval
);
156 d_equalityEngine
->addFunctionKind(kind::STRING_REPLACE_RE
, eagerEval
);
157 d_equalityEngine
->addFunctionKind(kind::STRING_REPLACE_RE_ALL
, eagerEval
);
158 d_equalityEngine
->addFunctionKind(kind::STRING_REPLACE_ALL
, eagerEval
);
159 d_equalityEngine
->addFunctionKind(kind::STRING_TO_LOWER
, eagerEval
);
160 d_equalityEngine
->addFunctionKind(kind::STRING_TO_UPPER
, eagerEval
);
161 d_equalityEngine
->addFunctionKind(kind::STRING_REV
, eagerEval
);
163 // memberships are not relevant for model building
164 d_valuation
.setIrrelevantKind(kind::STRING_IN_REGEXP
);
167 std::string
TheoryStrings::identify() const
169 return std::string("TheoryStrings");
172 bool TheoryStrings::propagateLit(TNode literal
)
174 return d_im
.propagateLit(literal
);
177 TrustNode
TheoryStrings::explain(TNode literal
)
179 Trace("strings-explain") << "explain called on " << literal
<< std::endl
;
180 return d_im
.explainLit(literal
);
183 void TheoryStrings::presolve() {
184 Trace("strings-presolve")
185 << "TheoryStrings::Presolving : get fmf options "
186 << (options().strings
.stringFMF
? "true" : "false") << std::endl
;
187 d_strat
.initializeStrategy();
189 // if strings fmf is enabled, register the strategy
190 if (options().strings
.stringFMF
)
192 d_stringsFmf
.presolve();
193 // This strategy is local to a check-sat call, since we refresh the strategy
194 // on every call to presolve.
195 d_im
.getDecisionManager()->registerStrategy(
196 DecisionManager::STRAT_STRINGS_SUM_LENGTHS
,
197 d_stringsFmf
.getDecisionStrategy(),
198 DecisionManager::STRAT_SCOPE_LOCAL_SOLVE
);
200 Trace("strings-presolve") << "Finished presolve" << std::endl
;
203 /////////////////////////////////////////////////////////////////////////////
205 /////////////////////////////////////////////////////////////////////////////
207 bool TheoryStrings::collectModelValues(TheoryModel
* m
,
208 const std::set
<Node
>& termSet
)
210 if (TraceIsOn("strings-debug-model"))
212 Trace("strings-debug-model")
213 << "TheoryStrings::collectModelValues" << std::endl
;
214 Trace("strings-debug-model") << "Equivalence classes are:" << std::endl
;
215 Trace("strings-debug-model") << debugPrintStringsEqc() << std::endl
;
216 Trace("strings-debug-model") << "Extended functions are:" << std::endl
;
217 Trace("strings-debug-model") << d_esolver
.debugPrintModel() << std::endl
;
219 Trace("strings-model") << "TheoryStrings::collectModelValues" << std::endl
;
220 // Collects representatives by types and orders sequence types by how nested
222 std::map
<TypeNode
, std::unordered_set
<Node
>> repSet
;
223 std::unordered_set
<TypeNode
> toProcess
;
225 // get the relevant string equivalence classes
226 for (const Node
& s
: termSet
)
228 TypeNode tn
= s
.getType();
229 if (tn
.isStringLike())
231 Node r
= d_state
.getRepresentative(s
);
232 repSet
[tn
].insert(r
);
233 toProcess
.insert(tn
);
237 while (!toProcess
.empty())
239 // Pick one of the remaining types to collect model values for
240 TypeNode tn
= *toProcess
.begin();
241 if (!collectModelInfoType(tn
, toProcess
, repSet
, m
))
251 * Object to sort by the value of pairs in the write model returned by the
252 * sequences array solver.
257 /** the comparison */
258 bool operator()(const std::pair
<Node
, Node
>& i
,
259 const std::pair
<Node
, Node
>& j
)
261 Assert(i
.first
.isConst() && i
.first
.getType().isInteger()
262 && j
.first
.isConst() && j
.first
.getType().isInteger());
263 Assert(i
.first
!= j
.first
);
264 return i
.first
.getConst
<Rational
>() < j
.first
.getConst
<Rational
>();
268 bool TheoryStrings::collectModelInfoType(
270 std::unordered_set
<TypeNode
>& toProcess
,
271 const std::map
<TypeNode
, std::unordered_set
<Node
> >& repSet
,
274 // Make sure that the model values for the element type of sequences are
276 if (tn
.isSequence() && tn
.getSequenceElementType().isSequence())
278 TypeNode tnElem
= tn
.getSequenceElementType();
279 if (toProcess
.find(tnElem
) != toProcess
.end()
280 && !collectModelInfoType(tnElem
, toProcess
, repSet
, m
))
288 // get partition of strings of equal lengths for the representatives of the
290 std::map
<TypeNode
, std::vector
<std::vector
<Node
> > > colT
;
291 std::map
<TypeNode
, std::vector
<Node
> > ltsT
;
292 const std::vector
<Node
> repVec(repSet
.at(tn
).begin(), repSet
.at(tn
).end());
293 d_state
.separateByLength(repVec
, colT
, ltsT
);
294 const std::vector
<std::vector
<Node
> >& col
= colT
[tn
];
295 const std::vector
<Node
>& lts
= ltsT
[tn
];
296 // indices in col that have lengths that are too big to represent
297 std::unordered_set
<size_t> oobIndices
;
299 NodeManager
* nm
= NodeManager::currentNM();
300 std::map
< Node
, Node
> processed
;
301 //step 1 : get all values for known lengths
302 std::vector
< Node
> lts_values
;
303 std::map
<std::size_t, Node
> values_used
;
304 std::vector
<Node
> len_splits
;
305 for (size_t i
= 0, csize
= col
.size(); i
< csize
; i
++)
307 Trace("strings-model") << "Checking length for { " << col
[i
];
308 Trace("strings-model") << " } (length is " << lts
[i
] << ")" << std::endl
;
310 if( lts
[i
].isConst() ) {
313 else if (!lts
[i
].isNull())
315 // get the model value for lts[i]
316 len_value
= d_valuation
.getModelValue(lts
[i
]);
318 if (len_value
.isNull())
320 lts_values
.push_back(Node::null());
322 else if (len_value
.getConst
<Rational
>() > options().strings
.stringsModelMaxLength
)
324 // note that we give a warning instead of throwing logic exception if we
325 // cannot construct the string, these are then assigned witness terms
327 warning() << "The model was computed to have strings of length "
328 << len_value
<< ". Based on the current value of option --strings-model-max-len, we only allow strings up to length "
329 << options().strings
.stringsModelMaxLength
<< std::endl
;
330 oobIndices
.insert(i
);
331 lts_values
.push_back(len_value
);
336 len_value
.getConst
<Rational
>().getNumerator().toUnsignedInt();
337 auto itvu
= values_used
.find(lvalue
);
338 if (itvu
== values_used
.end())
340 values_used
[lvalue
] = lts
[i
];
344 len_splits
.push_back(lts
[i
].eqNode(itvu
->second
));
346 lts_values
.push_back(len_value
);
349 ////step 2 : assign arbitrary values for unknown lengths?
350 // confirmed by calculus invariant, see paper
351 Trace("strings-model") << "Assign to equivalence classes..." << std::endl
;
352 std::map
<Node
, Node
> pure_eq_assign
;
353 // if we are using the sequences array solver, get the connected sequences
354 const std::map
<Node
, Node
>* conSeq
= nullptr;
355 std::map
<Node
, Node
>::const_iterator itcs
;
356 if (options().strings
.seqArray
!= options::SeqArrayMode::NONE
)
358 conSeq
= &d_asolver
.getConnectedSequences();
360 //step 3 : assign values to equivalence classes that are pure variables
361 for (size_t i
= 0, csize
= col
.size(); i
< csize
; i
++)
363 bool wasOob
= (oobIndices
.find(i
) != oobIndices
.end());
364 std::vector
< Node
> pure_eq
;
365 Node lenValue
= lts_values
[i
];
366 Trace("strings-model") << "Considering (" << col
[i
].size()
367 << ") equivalence classes for length " << lenValue
369 for (const Node
& eqc
: col
[i
])
371 Trace("strings-model") << "- eqc: " << eqc
<< std::endl
;
372 //check if col[i][j] has only variables
375 processed
[eqc
] = eqc
;
376 // Make sure that constants are asserted to the theory model that we
377 // are building. It is possible that new constants were introduced by
378 // the eager evaluation in the equality engine. These terms are missing
379 // in the term set and, as a result, are skipped when the equality
380 // engine is asserted to the theory model.
381 m
->getEqualityEngine()->addTerm(eqc
);
383 // For sequences constants, also add the elements (expanding elements
385 if (eqc
.getType().isSequence())
387 const std::vector
<Node
> elems
= eqc
.getConst
<Sequence
>().getVec();
388 std::vector
<TNode
> visit(elems
.begin(), elems
.end());
389 for (size_t j
= 0; j
< visit
.size(); j
++)
392 Assert(se
.isConst());
393 if (se
.getType().isSequence())
395 const std::vector
<Node
> selems
= se
.getConst
<Sequence
>().getVec();
396 visit
.insert(visit
.end(), selems
.begin(), selems
.end());
398 m
->getEqualityEngine()->addTerm(se
);
402 Trace("strings-model") << "-> constant" << std::endl
;
405 NormalForm
& nfe
= d_csolver
.getNormalForm(eqc
);
406 if (nfe
.d_nf
.size() != 1)
408 // will be assigned via a concatenation of normal form eqc
411 // check if the length is too big to represent
414 processed
[eqc
] = eqc
;
415 Assert(!lenValue
.isNull() && lenValue
.isConst());
416 // make the abstract value (witness ((x String)) (= (str.len x)
418 Node w
= utils::mkAbstractStringValueForLength(
419 eqc
, lenValue
, d_absModelCounter
);
421 Trace("strings-model")
422 << "-> length out of bounds, assign abstract " << w
<< std::endl
;
423 if (!m
->assertEquality(eqc
, w
, true))
425 Unreachable() << "TheoryStrings::collectModelInfoType: Inconsistent "
432 // ensure we have decided on length value at this point
433 if (lenValue
.isNull())
435 // start with length two (other lengths have special precendence)
437 while (values_used
.find(lvalue
) != values_used
.end())
441 Trace("strings-model")
442 << "*** Decide to make length of " << lvalue
<< std::endl
;
443 lenValue
= nm
->mkConstInt(Rational(lvalue
));
444 values_used
[lvalue
] = Node::null();
446 // is it an equivalence class with a seq.unit term?
448 if (nfe
.d_nf
[0].getKind() == SEQ_UNIT
)
451 if (nfe
.d_nf
[0][0].getType().isStringLike())
453 // By this point, we should have assigned model values for the
454 // elements of this sequence type because of the check in the
455 // beginning of this method
456 argVal
= m
->getRepresentative(nfe
.d_nf
[0][0]);
460 // Otherwise, we use the term itself. We cannot get the model
461 // value of this term, since it might not be available yet, as
462 // it may belong to a theory that has not built its model yet.
463 // Hence, we assign a (non-constant) skeleton (seq.unit argVal).
464 argVal
= nfe
.d_nf
[0][0];
466 Assert(!argVal
.isNull()) << "No value for " << nfe
.d_nf
[0][0];
467 assignedValue
= rewrite(
468 nm
->mkSeqUnit(eqc
.getType().getSequenceElementType(), argVal
));
469 Trace("strings-model")
470 << "-> assign via seq.unit: " << assignedValue
<< std::endl
;
472 else if (d_termReg
.hasStringCode() && lenValue
== d_one
)
474 // It has a code and the length of these equivalence classes are one.
475 // Note this code is solely for strings, not sequences.
476 EqcInfo
* eip
= d_state
.getOrMakeEqcInfo(eqc
, false);
477 if (eip
&& !eip
->d_codeTerm
.get().isNull())
479 // its value must be equal to its code
480 Node ct
= nm
->mkNode(kind::STRING_TO_CODE
, eip
->d_codeTerm
.get());
481 Node ctv
= d_valuation
.getModelValue(ct
);
483 ctv
.getConst
<Rational
>().getNumerator().toUnsignedInt();
484 Trace("strings-model") << "(code: " << cvalue
<< ") ";
485 std::vector
<unsigned> vec
;
486 vec
.push_back(cvalue
);
487 assignedValue
= nm
->mkConst(String(vec
));
488 Trace("strings-model")
489 << "-> assign via str.code: " << assignedValue
<< std::endl
;
492 else if (options().strings
.seqArray
!= options::SeqArrayMode::NONE
)
494 TypeNode etype
= eqc
.getType().getSequenceElementType();
495 // determine skeleton based on the write model, if it exists
496 const std::map
<Node
, Node
>& writeModel
= d_asolver
.getWriteModel(eqc
);
497 Trace("strings-model")
498 << "write model size " << writeModel
.size() << std::endl
;
499 if (!writeModel
.empty())
501 Trace("strings-model")
502 << "Write model for " << tn
<< " is:" << std::endl
;
503 std::vector
<std::pair
<Node
, Node
>> writes
;
504 std::unordered_set
<Node
> usedWrites
;
505 for (const std::pair
<const Node
, Node
>& w
: writeModel
)
507 Trace("strings-model")
508 << " " << w
.first
<< " -> " << w
.second
<< std::endl
;
509 Node ivalue
= d_valuation
.getModelValue(w
.first
);
510 Assert(ivalue
.isConst() && ivalue
.getType().isInteger());
511 // ignore if out of bounds
512 Rational irat
= ivalue
.getConst
<Rational
>();
513 if (irat
.sgn() == -1 || irat
>= lenValue
.getConst
<Rational
>())
517 if (usedWrites
.find(ivalue
) != usedWrites
.end())
521 usedWrites
.insert(ivalue
);
522 Node wsunit
= nm
->mkSeqUnit(etype
, w
.second
);
523 writes
.emplace_back(ivalue
, wsunit
);
525 // sort based on index value
527 std::sort(writes
.begin(), writes
.end(), ssi
);
528 std::vector
<Node
> cc
;
529 uint32_t currIndex
= 0;
530 for (size_t w
= 0, wsize
= writes
.size(); w
<= wsize
; w
++)
533 if (w
== writes
.size())
536 lenValue
.getConst
<Rational
>().getNumerator().toUnsignedInt();
540 Node windex
= writes
[w
].first
;
541 Assert(windex
.getConst
<Rational
>()
542 <= Rational(String::maxSize()));
544 windex
.getConst
<Rational
>().getNumerator().toUnsignedInt();
545 Assert(nextIndex
>= currIndex
);
547 if (nextIndex
> currIndex
)
549 // allocate arbitrary value to fill gap
550 Assert(conSeq
!= nullptr);
552 itcs
= conSeq
->find(eqc
);
553 if (itcs
!= conSeq
->end())
557 // use a skeleton for the gap and not a concrete value, as we
558 // do not know how which values from the element type are
559 // allowable (i.e. unconstrained) to assign to the gap
560 Node cgap
= mkSkeletonFromBase(base
, currIndex
, nextIndex
);
566 cc
.push_back(writes
[w
].second
);
568 currIndex
= nextIndex
+ 1;
570 assignedValue
= utils::mkConcat(cc
, tn
);
571 Trace("strings-model")
572 << "-> assign via seq.update/nth eqc: " << assignedValue
576 if (!assignedValue
.isNull())
578 pure_eq_assign
[eqc
] = assignedValue
;
579 m
->getEqualityEngine()->addTerm(assignedValue
);
583 Trace("strings-model") << "-> no assignment" << std::endl
;
585 pure_eq
.push_back(eqc
);
588 //assign a new length if necessary
589 if( !pure_eq
.empty() ){
590 Trace("strings-model") << "Need to assign values of length " << lenValue
591 << " to equivalence classes ";
592 for( unsigned j
=0; j
<pure_eq
.size(); j
++ ){
593 Trace("strings-model") << pure_eq
[j
] << " ";
595 Trace("strings-model") << std::endl
;
597 //use type enumerator
598 Assert(lenValue
.getConst
<Rational
>() <= Rational(String::maxSize()))
599 << "Exceeded UINT32_MAX in string model";
601 lenValue
.getConst
<Rational
>().getNumerator().toUnsignedInt();
602 Trace("strings-model") << "Cardinality of alphabet is "
603 << d_termReg
.getAlphabetCardinality() << std::endl
;
604 SEnumLen
* sel
= sels
.getEnumerator(currLen
, tn
);
605 for (const Node
& eqc
: pure_eq
)
608 std::map
<Node
, Node
>::iterator itp
= pure_eq_assign
.find(eqc
);
609 if (itp
== pure_eq_assign
.end())
613 if (sel
->isFinished())
615 // We are in a case where model construction is impossible due to
616 // an insufficient number of constants of a given length.
618 // Consider an integer equivalence class E whose value is assigned
619 // n in the model. Let { S_1, ..., S_m } be the set of string
620 // equivalence classes such that len( x ) is a member of E for
621 // some member x of each class S1, ...,Sm. Since our calculus is
622 // saturated with respect to cardinality inference (see Liang
623 // et al, Figure 6, CAV 2014), we have that m <= A^n, where A is
624 // the cardinality of our alphabet.
626 // Now, consider the case where there exists two integer
627 // equivalence classes E1 and E2 that are assigned n, and moreover
628 // we did not received notification from arithmetic that E1 = E2.
629 // This typically should never happen, but assume in the following
632 // Now, it may be the case that there are string equivalence
633 // classes { S_1, ..., S_m1 } whose lengths are in E1,
634 // and classes { S'_1, ..., S'_m2 } whose lengths are in E2, where
635 // m1 + m2 > A^n. In this case, we have insufficient strings to
636 // assign to { S_1, ..., S_m1, S'_1, ..., S'_m2 }. If this
637 // happens, we add a split on len( u1 ) = len( u2 ) for some
638 // len( u1 ) in E1, len( u2 ) in E2. We do this for each pair of
639 // integer equivalence classes that are assigned to the same value
641 AlwaysAssert(!len_splits
.empty());
642 for (const Node
& sl
: len_splits
)
644 Node spl
= nm
->mkNode(OR
, sl
, sl
.negate());
645 d_im
.lemma(spl
, InferenceId::STRINGS_CMI_SPLIT
);
646 Trace("strings-lemma")
647 << "Strings::CollectModelInfoSplit: " << spl
<< std::endl
;
649 // we added a lemma, so can return here
652 c
= sel
->getCurrent();
653 // if we are a sequence with infinite element type
655 && !d_env
.isFiniteType(tn
.getSequenceElementType()))
657 // Make a skeleton instead.
658 c
= mkSkeletonFor(c
);
662 } while (m
->hasTerm(c
));
668 Trace("strings-model") << "*** Assigned constant " << c
<< " for "
671 if (!m
->assertEquality(eqc
, c
, true))
673 // this should never happen due to the model soundness argument
676 << "TheoryStrings::collectModelInfoType: Inconsistent equality"
683 Trace("strings-model") << "String Model : Pure Assigned." << std::endl
;
684 //step 4 : assign constants to all other equivalence classes
685 for (const Node
& rn
: repVec
)
687 if (processed
.find(rn
) == processed
.end())
689 NormalForm
& nf
= d_csolver
.getNormalForm(rn
);
690 if (TraceIsOn("strings-model"))
692 Trace("strings-model")
693 << "Construct model for " << rn
<< " based on normal form ";
694 for (unsigned j
= 0, size
= nf
.d_nf
.size(); j
< size
; j
++)
699 Trace("strings-model") << " ++ ";
701 Trace("strings-model") << n
;
702 Node r
= d_state
.getRepresentative(n
);
703 if (!r
.isConst() && processed
.find(r
) == processed
.end())
705 Trace("strings-model") << "(UNPROCESSED)";
709 Trace("strings-model") << std::endl
;
710 std::vector
< Node
> nc
;
711 for (const Node
& n
: nf
.d_nf
)
713 Node r
= d_state
.getRepresentative(n
);
714 Assert(r
.isConst() || processed
.find(r
) != processed
.end());
715 nc
.push_back(r
.isConst() ? r
: processed
[r
]);
717 Node cc
= d_termReg
.mkNConcat(nc
, tn
);
718 Trace("strings-model")
719 << "*** Determined constant " << cc
<< " for " << rn
<< std::endl
;
721 if (!m
->assertEquality(rn
, cc
, true))
723 // this should never happen due to the model soundness argument
725 Unreachable() << "TheoryStrings::collectModelInfoType: "
726 "Inconsistent equality (unprocessed eqc)"
730 else if (!cc
.isConst())
732 // the value may be specified by seq.unit components, ensure this
733 // is marked as the skeleton for constructing values in this class.
734 m
->assertSkeleton(cc
);
738 //Trace("strings-model") << "String Model : Assigned." << std::endl;
739 Trace("strings-model") << "String Model : Finished." << std::endl
;
743 Node
TheoryStrings::mkSkeletonFor(Node c
)
745 NodeManager
* nm
= NodeManager::currentNM();
746 SkolemManager
* sm
= nm
->getSkolemManager();
747 BoundVarManager
* bvm
= nm
->getBoundVarManager();
748 Assert(c
.getKind() == CONST_SEQUENCE
);
749 const Sequence
& sn
= c
.getConst
<Sequence
>();
750 const std::vector
<Node
>& snvec
= sn
.getVec();
751 std::vector
<Node
> skChildren
;
752 TypeNode etn
= c
.getType().getSequenceElementType();
753 for (const Node
& snv
: snvec
)
755 Assert(snv
.getType().isSubtypeOf(etn
));
756 Node v
= bvm
->mkBoundVar
<SeqModelVarAttribute
>(snv
, etn
);
757 // use a skolem, not a bound variable
758 Node kv
= sm
->mkPurifySkolem(v
, "smv");
759 skChildren
.push_back(nm
->mkSeqUnit(etn
, kv
));
761 return utils::mkConcat(skChildren
, c
.getType());
764 Node
TheoryStrings::mkSkeletonFromBase(Node r
,
769 Assert(r
.getType().isSequence());
770 NodeManager
* nm
= NodeManager::currentNM();
771 SkolemManager
* sm
= nm
->getSkolemManager();
772 std::vector
<Node
> cacheVals
;
773 cacheVals
.push_back(r
);
774 std::vector
<Node
> skChildren
;
775 TypeNode etn
= r
.getType().getSequenceElementType();
776 for (size_t i
= currIndex
; i
< nextIndex
; i
++)
778 cacheVals
.push_back(nm
->mkConst(CONST_RATIONAL
, Rational(currIndex
)));
779 Node kv
= sm
->mkSkolemFunction(
780 SkolemFunId::SEQ_MODEL_BASE_ELEMENT
, etn
, cacheVals
);
781 skChildren
.push_back(nm
->mkSeqUnit(etn
, kv
));
782 cacheVals
.pop_back();
784 return utils::mkConcat(skChildren
, r
.getType());
787 /////////////////////////////////////////////////////////////////////////////
789 /////////////////////////////////////////////////////////////////////////////
791 void TheoryStrings::preRegisterTerm(TNode n
)
793 Trace("strings-preregister")
794 << "TheoryStrings::preRegisterTerm: " << n
<< std::endl
;
795 d_termReg
.preRegisterTerm(n
);
796 // Register the term with the extended theory. Notice we do not recurse on
797 // this term here since preRegisterTerm is already called recursively on all
798 // subterms in preregistered literals.
799 d_extTheory
.registerTerm(n
);
802 bool TheoryStrings::preNotifyFact(
803 TNode atom
, bool pol
, TNode fact
, bool isPrereg
, bool isInternal
)
805 if (atom
.getKind() == EQUAL
)
807 // this is only required for internal facts, others are already registered
810 // We must ensure these terms are registered. We register eagerly here for
811 // performance reasons. Alternatively, terms could be registered at full
812 // effort in e.g. BaseSolver::init.
813 for (const Node
& t
: atom
)
815 d_termReg
.registerTerm(t
, 0);
818 // store disequalities between strings that occur as literals
819 if (!pol
&& atom
[0].getType().isStringLike())
821 d_state
.addDisequality(atom
[0], atom
[1]);
827 void TheoryStrings::notifyFact(TNode atom
,
834 d_eagerSolver
->notifyFact(atom
, polarity
, fact
, isInternal
);
836 // process pending conflicts due to reasoning about endpoints
837 if (!d_state
.isInConflict() && d_state
.hasPendingConflict())
839 InferInfo
iiPendingConf(InferenceId::UNKNOWN
);
840 d_state
.getPendingConflict(iiPendingConf
);
841 Trace("strings-pending")
842 << "Process pending conflict " << iiPendingConf
.d_premises
<< std::endl
;
843 Trace("strings-conflict")
844 << "CONFLICT: Eager : " << iiPendingConf
.d_premises
<< std::endl
;
845 ++(d_statistics
.d_conflictsEager
);
846 // call the inference manager to send the conflict
847 d_im
.processConflict(iiPendingConf
);
850 Trace("strings-pending-debug") << " Now collect terms" << std::endl
;
851 Trace("strings-pending-debug") << " Finished collect terms" << std::endl
;
854 void TheoryStrings::postCheck(Effort e
)
856 d_im
.doPendingFacts();
858 Assert(d_strat
.isStrategyInit());
859 if (!d_state
.isInConflict() && !d_valuation
.needCheck()
860 && d_strat
.hasStrategyEffort(e
))
862 Trace("strings-check-debug")
863 << "Theory of strings " << e
<< " effort check " << std::endl
;
864 if (TraceIsOn("strings-eqc"))
866 Trace("strings-eqc") << debugPrintStringsEqc() << std::endl
;
868 ++(d_statistics
.d_checkRuns
);
869 bool sentLemma
= false;
870 bool hadPending
= false;
871 Trace("strings-check") << "Full effort check..." << std::endl
;
874 ++(d_statistics
.d_strategyRuns
);
875 Trace("strings-check") << " * Run strategy..." << std::endl
;
877 // remember if we had pending facts or lemmas
878 hadPending
= d_im
.hasPending();
879 // Send the facts *and* the lemmas. We send lemmas regardless of whether
880 // we send facts since some lemmas cannot be dropped. Other lemmas are
881 // otherwise avoided by aborting the strategy when a fact is ready.
883 // Did we successfully send a lemma? Notice that if hasPending = true
884 // and sentLemma = false, then the above call may have:
885 // (1) had no pending lemmas, but successfully processed pending facts,
886 // (2) unsuccessfully processed pending lemmas.
887 // In either case, we repeat the strategy if we are not in conflict.
888 sentLemma
= d_im
.hasSentLemma();
889 if (TraceIsOn("strings-check"))
891 Trace("strings-check") << " ...finish run strategy: ";
892 Trace("strings-check") << (hadPending
? "hadPending " : "");
893 Trace("strings-check") << (sentLemma
? "sentLemma " : "");
894 Trace("strings-check") << (d_state
.isInConflict() ? "conflict " : "");
895 if (!hadPending
&& !sentLemma
&& !d_state
.isInConflict())
897 Trace("strings-check") << "(none)";
899 Trace("strings-check") << std::endl
;
901 // repeat if we did not add a lemma or conflict, and we had pending
903 } while (!d_state
.isInConflict() && !sentLemma
&& hadPending
);
905 Trace("strings-check") << "Theory of strings, done check : " << e
<< std::endl
;
906 Assert(!d_im
.hasPendingFact());
907 Assert(!d_im
.hasPendingLemma());
910 bool TheoryStrings::needsCheckLastEffort() {
911 if (options().strings
.stringModelBasedReduction
)
913 bool hasExtf
= d_esolver
.hasExtendedFunctions();
914 Trace("strings-process")
915 << "needsCheckLastEffort: hasExtf = " << hasExtf
<< std::endl
;
921 /** Conflict when merging two constants */
922 void TheoryStrings::conflict(TNode a
, TNode b
){
923 if (d_state
.isInConflict())
925 // already in conflict
928 d_im
.conflictEqConstantMerge(a
, b
);
929 Trace("strings-conflict") << "CONFLICT: Eq engine conflict" << std::endl
;
930 ++(d_statistics
.d_conflictsEqEngine
);
933 void TheoryStrings::eqNotifyNewClass(TNode t
){
934 Kind k
= t
.getKind();
935 if (k
== STRING_LENGTH
|| k
== STRING_TO_CODE
)
937 Trace("strings-debug") << "New length eqc : " << t
<< std::endl
;
938 //we care about the length of this string
939 d_termReg
.registerTerm(t
[0], 1);
941 eq::EqualityEngine
* ee
= d_state
.getEqualityEngine();
942 Node r
= ee
->getRepresentative(t
[0]);
943 EqcInfo
* ei
= d_state
.getOrMakeEqcInfo(r
);
944 if (k
== STRING_LENGTH
)
946 ei
->d_lengthTerm
= t
;
950 ei
->d_codeTerm
= t
[0];
955 d_eagerSolver
->eqNotifyNewClass(t
);
959 void TheoryStrings::eqNotifyMerge(TNode t1
, TNode t2
)
961 EqcInfo
* e2
= d_state
.getOrMakeEqcInfo(t2
, false);
966 // always create it if e2 was non-null
967 EqcInfo
* e1
= d_state
.getOrMakeEqcInfo(t1
);
971 d_eagerSolver
->eqNotifyMerge(e1
, t1
, e2
, t2
);
974 // add information from e2 to e1
975 if (!e2
->d_lengthTerm
.get().isNull())
977 e1
->d_lengthTerm
.set(e2
->d_lengthTerm
);
979 if (!e2
->d_codeTerm
.get().isNull())
981 e1
->d_codeTerm
.set(e2
->d_codeTerm
);
983 if (e2
->d_cardinalityLemK
.get() > e1
->d_cardinalityLemK
.get())
985 e1
->d_cardinalityLemK
.set(e2
->d_cardinalityLemK
);
987 if (!e2
->d_normalizedLength
.get().isNull())
989 e1
->d_normalizedLength
.set(e2
->d_normalizedLength
);
993 void TheoryStrings::computeCareGraph(){
994 //computing the care graph here is probably still necessary, due to operators that take non-string arguments TODO: verify
995 Trace("strings-cg") << "TheoryStrings::computeCareGraph(): Build term indices..." << std::endl
;
996 // Term index for each (type, operator) pair. We require the operator here
997 // since operators are polymorphic, taking strings/sequences.
998 std::map
<std::pair
<TypeNode
, Node
>, TNodeTrie
> index
;
999 std::map
< Node
, unsigned > arity
;
1000 const context::CDList
<TNode
>& fterms
= d_termReg
.getFunctionTerms();
1001 size_t functionTerms
= fterms
.size();
1002 for (unsigned i
= 0; i
< functionTerms
; ++ i
) {
1003 TNode f1
= fterms
[i
];
1004 Trace("strings-cg") << "...build for " << f1
<< std::endl
;
1005 Node op
= f1
.getOperator();
1006 std::vector
< TNode
> reps
;
1007 bool has_trigger_arg
= false;
1008 for( unsigned j
=0; j
<f1
.getNumChildren(); j
++ ){
1009 reps
.push_back(d_equalityEngine
->getRepresentative(f1
[j
]));
1010 if (d_equalityEngine
->isTriggerTerm(f1
[j
], THEORY_STRINGS
))
1012 has_trigger_arg
= true;
1015 if( has_trigger_arg
){
1016 TypeNode ft
= utils::getOwnerStringType(f1
);
1017 std::pair
<TypeNode
, Node
> ikey
= std::pair
<TypeNode
, Node
>(ft
, op
);
1018 index
[ikey
].addTerm(f1
, reps
);
1019 arity
[op
] = reps
.size();
1023 for (std::pair
<const std::pair
<TypeNode
, Node
>, TNodeTrie
>& ti
: index
)
1025 Trace("strings-cg") << "TheoryStrings::computeCareGraph(): Process index "
1026 << ti
.first
<< "..." << std::endl
;
1027 Node op
= ti
.first
.second
;
1028 nodeTriePathPairProcess(&ti
.second
, arity
[op
], d_cpacb
);
1032 void TheoryStrings::checkRegisterTermsPreNormalForm()
1034 const std::vector
<Node
>& seqc
= d_bsolver
.getStringLikeEqc();
1035 for (const Node
& eqc
: seqc
)
1037 eq::EqClassIterator eqc_i
= eq::EqClassIterator(eqc
, d_equalityEngine
);
1038 while (!eqc_i
.isFinished())
1041 if (!d_bsolver
.isCongruent(n
))
1043 d_termReg
.registerTerm(n
, 2);
1050 void TheoryStrings::checkCodes()
1052 // ensure that lemmas regarding str.code been added for each constant string
1054 if (d_termReg
.hasStringCode())
1056 NodeManager
* nm
= NodeManager::currentNM();
1057 // str.code applied to the code term for each equivalence class that has a
1058 // code term but is not a constant
1059 std::vector
<Node
> nconst_codes
;
1060 // str.code applied to the proxy variables for each equivalence classes that
1061 // are constants of size one
1062 std::vector
<Node
> const_codes
;
1063 const std::vector
<Node
>& seqc
= d_bsolver
.getStringLikeEqc();
1064 for (const Node
& eqc
: seqc
)
1066 if (!eqc
.getType().isString())
1071 NormalForm
& nfe
= d_csolver
.getNormalForm(eqc
);
1072 if (nfe
.d_nf
.size() == 1 && nfe
.d_nf
[0].isConst())
1074 Node c
= nfe
.d_nf
[0];
1075 Trace("strings-code-debug") << "Get proxy variable for " << c
1077 Node cc
= nm
->mkNode(kind::STRING_TO_CODE
, c
);
1079 Assert(cc
.isConst());
1080 Node cp
= d_termReg
.ensureProxyVariableFor(c
);
1081 Node vc
= nm
->mkNode(STRING_TO_CODE
, cp
);
1082 if (!d_state
.areEqual(cc
, vc
))
1084 std::vector
<Node
> emptyVec
;
1085 d_im
.sendInference(emptyVec
, cc
.eqNode(vc
), InferenceId::STRINGS_CODE_PROXY
);
1087 const_codes
.push_back(vc
);
1091 EqcInfo
* ei
= d_state
.getOrMakeEqcInfo(eqc
, false);
1092 if (ei
&& !ei
->d_codeTerm
.get().isNull())
1094 Node vc
= nm
->mkNode(kind::STRING_TO_CODE
, ei
->d_codeTerm
.get());
1095 nconst_codes
.push_back(vc
);
1099 if (d_im
.hasProcessed())
1103 // now, ensure that str.code is injective
1104 std::vector
<Node
> cmps
;
1105 cmps
.insert(cmps
.end(), const_codes
.rbegin(), const_codes
.rend());
1106 cmps
.insert(cmps
.end(), nconst_codes
.rbegin(), nconst_codes
.rend());
1107 for (unsigned i
= 0, num_ncc
= nconst_codes
.size(); i
< num_ncc
; i
++)
1109 Node c1
= nconst_codes
[i
];
1111 for (const Node
& c2
: cmps
)
1113 Trace("strings-code-debug")
1114 << "Compare codes : " << c1
<< " " << c2
<< std::endl
;
1115 if (!d_state
.areDisequal(c1
, c2
) && !d_state
.areEqual(c1
, d_neg_one
))
1117 Node eq_no
= c1
.eqNode(d_neg_one
);
1118 Node deq
= c1
.eqNode(c2
).negate();
1119 Node eqn
= c1
[0].eqNode(c2
[0]);
1120 // str.code(x)==-1 V str.code(x)!=str.code(y) V x==y
1121 Node inj_lem
= nm
->mkNode(kind::OR
, eq_no
, deq
, eqn
);
1123 d_im
.addPendingPhaseRequirement(deq
, false);
1124 std::vector
<Node
> emptyVec
;
1125 d_im
.sendInference(emptyVec
, inj_lem
, InferenceId::STRINGS_CODE_INJ
);
1132 void TheoryStrings::checkRegisterTermsNormalForms()
1134 const std::vector
<Node
>& seqc
= d_bsolver
.getStringLikeEqc();
1135 for (const Node
& eqc
: seqc
)
1137 NormalForm
& nfi
= d_csolver
.getNormalForm(eqc
);
1138 // check if there is a length term for this equivalence class
1139 EqcInfo
* ei
= d_state
.getOrMakeEqcInfo(eqc
, false);
1140 Node lt
= ei
? ei
->d_lengthTerm
: Node::null();
1143 Node c
= d_termReg
.mkNConcat(nfi
.d_nf
, eqc
.getType());
1144 d_termReg
.registerTerm(c
, 3);
1149 TrustNode
TheoryStrings::ppRewrite(TNode atom
, std::vector
<SkolemLemma
>& lems
)
1151 Trace("strings-ppr") << "TheoryStrings::ppRewrite " << atom
<< std::endl
;
1152 if (atom
.getKind() == EQUAL
)
1154 // always apply aggressive equality rewrites here
1155 Node ret
= d_rewriter
.rewriteEqualityExt(atom
);
1158 return TrustNode::mkTrustRewrite(atom
, ret
, nullptr);
1161 if (atom
.getKind() == STRING_FROM_CODE
)
1163 // str.from_code(t) ---> ite(0 <= t < |A|, t = str.to_code(k), k = "")
1164 NodeManager
* nm
= NodeManager::currentNM();
1165 SkolemCache
* sc
= d_termReg
.getSkolemCache();
1166 Node k
= sc
->mkSkolemCached(atom
, SkolemCache::SK_PURIFY
, "kFromCode");
1168 Node card
= nm
->mkConstInt(Rational(d_termReg
.getAlphabetCardinality()));
1170 nm
->mkNode(AND
, nm
->mkNode(LEQ
, d_zero
, t
), nm
->mkNode(LT
, t
, card
));
1171 Node emp
= Word::mkEmptyWord(atom
.getType());
1172 Node pred
= nm
->mkNode(
1173 ITE
, cond
, t
.eqNode(nm
->mkNode(STRING_TO_CODE
, k
)), k
.eqNode(emp
));
1174 TrustNode tnk
= TrustNode::mkTrustLemma(pred
);
1175 lems
.push_back(SkolemLemma(tnk
, k
));
1176 return TrustNode::mkTrustRewrite(atom
, k
, nullptr);
1179 Node atomRet
= atom
;
1180 if (options().strings
.regExpElim
!= options::RegExpElimMode::OFF
1181 && atom
.getKind() == STRING_IN_REGEXP
)
1183 // aggressive elimination of regular expression membership
1184 ret
= d_regexp_elim
.eliminateTrusted(atomRet
);
1187 Trace("strings-ppr") << " rewrote " << atom
<< " -> " << ret
.getNode()
1188 << " via regular expression elimination."
1190 atomRet
= ret
.getNode();
1196 /** run the given inference step */
1197 void TheoryStrings::runInferStep(InferStep s
, int effort
)
1199 Trace("strings-process") << "Run " << s
;
1202 Trace("strings-process") << ", effort = " << effort
;
1204 Trace("strings-process") << "..." << std::endl
;
1207 case CHECK_INIT
: d_bsolver
.checkInit(); break;
1208 case CHECK_CONST_EQC
: d_bsolver
.checkConstantEquivalenceClasses(); break;
1209 case CHECK_EXTF_EVAL
: d_esolver
.checkExtfEval(effort
); break;
1210 case CHECK_CYCLES
: d_csolver
.checkCycles(); break;
1211 case CHECK_FLAT_FORMS
: d_csolver
.checkFlatForms(); break;
1212 case CHECK_REGISTER_TERMS_PRE_NF
: checkRegisterTermsPreNormalForm(); break;
1213 case CHECK_NORMAL_FORMS_EQ
: d_csolver
.checkNormalFormsEq(); break;
1214 case CHECK_NORMAL_FORMS_DEQ
: d_csolver
.checkNormalFormsDeq(); break;
1215 case CHECK_CODES
: checkCodes(); break;
1216 case CHECK_LENGTH_EQC
: d_csolver
.checkLengthsEqc(); break;
1217 case CHECK_SEQUENCES_ARRAY_CONCAT
: d_asolver
.checkArrayConcat(); break;
1218 case CHECK_SEQUENCES_ARRAY
: d_asolver
.checkArray(); break;
1219 case CHECK_SEQUENCES_ARRAY_EAGER
: d_asolver
.checkArrayEager(); break;
1220 case CHECK_REGISTER_TERMS_NF
: checkRegisterTermsNormalForms(); break;
1221 case CHECK_EXTF_REDUCTION
: d_esolver
.checkExtfReductions(effort
); break;
1222 case CHECK_MEMBERSHIP
: d_rsolver
.checkMemberships(effort
); break;
1223 case CHECK_CARDINALITY
: d_bsolver
.checkCardinality(); break;
1224 default: Unreachable(); break;
1226 Trace("strings-process") << "Done " << s
1227 << ", addedFact = " << d_im
.hasPendingFact()
1228 << ", addedLemma = " << d_im
.hasPendingLemma()
1229 << ", conflict = " << d_state
.isInConflict()
1233 void TheoryStrings::runStrategy(Theory::Effort e
)
1235 std::vector
<std::pair
<InferStep
, int> >::iterator it
= d_strat
.stepBegin(e
);
1236 std::vector
<std::pair
<InferStep
, int> >::iterator stepEnd
=
1239 Trace("strings-process") << "----check, next round---" << std::endl
;
1240 while (it
!= stepEnd
)
1242 InferStep curr
= it
->first
;
1245 if (d_im
.hasProcessed())
1252 runInferStep(curr
, it
->second
);
1253 if (d_state
.isInConflict())
1260 Trace("strings-process") << "----finished round---" << std::endl
;
1263 std::string
TheoryStrings::debugPrintStringsEqc()
1265 std::stringstream ss
;
1266 for (unsigned t
= 0; t
< 2; t
++)
1268 eq::EqClassesIterator eqcs2_i
= eq::EqClassesIterator(d_equalityEngine
);
1269 ss
<< (t
== 0 ? "STRINGS:" : "OTHER:") << std::endl
;
1270 while (!eqcs2_i
.isFinished())
1272 Node eqc
= (*eqcs2_i
);
1273 bool print
= (t
== 0 && eqc
.getType().isStringLike())
1274 || (t
== 1 && !eqc
.getType().isStringLike());
1277 eq::EqClassIterator eqc2_i
= eq::EqClassIterator(eqc
, d_equalityEngine
);
1278 ss
<< "Eqc( " << eqc
<< " ) : { ";
1279 while (!eqc2_i
.isFinished())
1281 if ((*eqc2_i
) != eqc
&& (*eqc2_i
).getKind() != kind::EQUAL
)
1283 ss
<< (*eqc2_i
) << " ";
1287 ss
<< " } " << std::endl
;
1288 EqcInfo
* ei
= d_state
.getOrMakeEqcInfo(eqc
, false);
1291 Trace("strings-eqc-debug")
1292 << "* Length term : " << ei
->d_lengthTerm
.get() << std::endl
;
1293 Trace("strings-eqc-debug")
1294 << "* Cardinality lemma k : " << ei
->d_cardinalityLemK
.get()
1296 Trace("strings-eqc-debug")
1297 << "* Normalization length lemma : "
1298 << ei
->d_normalizedLength
.get() << std::endl
;
1309 } // namespace strings
1310 } // namespace theory
1311 } // namespace cvc5::internal