6e638e445a63a54ebe7615a90f795a3da99dc6f0
[cvc5.git] / src / theory / strings / theory_strings.h
1 /********************* */
2 /*! \file theory_strings.h
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Tianyi Liang, Tim King
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief Theory of strings
13 **
14 ** Theory of strings.
15 **/
16
17 #include "cvc4_private.h"
18
19 #ifndef __CVC4__THEORY__STRINGS__THEORY_STRINGS_H
20 #define __CVC4__THEORY__STRINGS__THEORY_STRINGS_H
21
22 #include "context/cdhashset.h"
23 #include "context/cdlist.h"
24 #include "expr/attribute.h"
25 #include "theory/strings/regexp_operation.h"
26 #include "theory/strings/theory_strings_preprocess.h"
27 #include "theory/theory.h"
28 #include "theory/uf/equality_engine.h"
29
30 #include <climits>
31 #include <deque>
32
33 namespace CVC4 {
34 namespace theory {
35
36 namespace quantifiers{
37 class TermArgTrie;
38 }
39
40 namespace strings {
41
42 /**
43 * Decision procedure for strings.
44 *
45 */
46
47 /** Types of inferences used in the procedure
48 *
49 * These are variants of the inference rules in Figures 3-5 of Liang et al.
50 * "A DPLL(T) Solver for a Theory of Strings and Regular Expressions", CAV 2014.
51 */
52 enum Inference
53 {
54 INFER_NONE,
55 // string split constant propagation, for example:
56 // x = y, x = "abc", y = y1 ++ "b" ++ y2
57 // implies y1 = "a" ++ y1'
58 INFER_SSPLIT_CST_PROP,
59 // string split variable propagation, for example:
60 // x = y, x = x1 ++ x2, y = y1 ++ y2, len( x1 ) >= len( y1 )
61 // implies x1 = y1 ++ x1'
62 // This is inspired by Zheng et al CAV 2015.
63 INFER_SSPLIT_VAR_PROP,
64 // length split, for example:
65 // len( x1 ) = len( y1 ) V len( x1 ) != len( y1 )
66 // This is inferred when e.g. x = y, x = x1 ++ x2, y = y1 ++ y2.
67 INFER_LEN_SPLIT,
68 // length split empty, for example:
69 // z = "" V z != ""
70 // This is inferred when, e.g. x = y, x = z ++ x1, y = y1 ++ z
71 INFER_LEN_SPLIT_EMP,
72 // string split constant binary, for example:
73 // x1 = "aaaa" ++ x1' V "aaaa" = x1 ++ x1'
74 // This is inferred when, e.g. x = y, x = x1 ++ x2, y = "aaaaaaaa" ++ y2.
75 // This inference is disabled by default and is enabled by stringBinaryCsp().
76 INFER_SSPLIT_CST_BINARY,
77 // string split constant
78 // x = y, x = "c" ++ x2, y = y1 ++ y2, y1 != ""
79 // implies y1 = "c" ++ y1'
80 // This is a special case of F-Split in Figure 5 of Liang et al CAV 2014.
81 INFER_SSPLIT_CST,
82 // string split variable, for example:
83 // x = y, x = x1 ++ x2, y = y1 ++ y2
84 // implies x1 = y1 ++ x1' V y1 = x1 ++ y1'
85 // This is rule F-Split in Figure 5 of Liang et al CAV 2014.
86 INFER_SSPLIT_VAR,
87 // flat form loop, for example:
88 // x = y, x = x1 ++ z, y = z ++ y2
89 // implies z = u2 ++ u1, u in ( u1 ++ u2 )*, x1 = u2 ++ u, y2 = u ++ u1
90 // for fresh u, u1, u2.
91 // This is the rule F-Loop from Figure 5 of Liang et al CAV 2014.
92 INFER_FLOOP,
93 };
94 std::ostream& operator<<(std::ostream& out, Inference i);
95
96 /** inference steps
97 *
98 * Corresponds to a step in the overall strategy of the strings solver. For
99 * details on the individual steps, see documentation on the inference schemas
100 * within TheoryStrings.
101 */
102 enum InferStep
103 {
104 // indicates that the strategy should break if lemmas or facts are added
105 BREAK,
106 // check initial
107 CHECK_INIT,
108 // check constant equivalence classes
109 CHECK_CONST_EQC,
110 // check extended function evaluation
111 CHECK_EXTF_EVAL,
112 // check cycles
113 CHECK_CYCLES,
114 // check flat forms
115 CHECK_FLAT_FORMS,
116 // check normal forms equalities
117 CHECK_NORMAL_FORMS_EQ,
118 // check normal forms disequalities
119 CHECK_NORMAL_FORMS_DEQ,
120 // check codes
121 CHECK_CODES,
122 // check lengths for equivalence classes
123 CHECK_LENGTH_EQC,
124 // check extended function reductions
125 CHECK_EXTF_REDUCTION,
126 // check regular expression memberships
127 CHECK_MEMBERSHIP,
128 // check cardinality
129 CHECK_CARDINALITY,
130 };
131 std::ostream& operator<<(std::ostream& out, Inference i);
132
133 struct StringsProxyVarAttributeId {};
134 typedef expr::Attribute< StringsProxyVarAttributeId, bool > StringsProxyVarAttribute;
135
136 class TheoryStrings : public Theory {
137 typedef context::CDList<Node> NodeList;
138 typedef context::CDHashMap<Node, bool, NodeHashFunction> NodeBoolMap;
139 typedef context::CDHashMap<Node, int, NodeHashFunction> NodeIntMap;
140 typedef context::CDHashMap<Node, Node, NodeHashFunction> NodeNodeMap;
141 typedef context::CDHashSet<Node, NodeHashFunction> NodeSet;
142
143 public:
144 TheoryStrings(context::Context* c, context::UserContext* u,
145 OutputChannel& out, Valuation valuation,
146 const LogicInfo& logicInfo);
147 ~TheoryStrings();
148
149 void setMasterEqualityEngine(eq::EqualityEngine* eq) override;
150
151 std::string identify() const override { return std::string("TheoryStrings"); }
152
153 public:
154 void propagate(Effort e) override;
155 bool propagate(TNode literal);
156 void explain( TNode literal, std::vector<TNode>& assumptions );
157 Node explain(TNode literal) override;
158 eq::EqualityEngine* getEqualityEngine() override { return &d_equalityEngine; }
159 bool getCurrentSubstitution(int effort,
160 std::vector<Node>& vars,
161 std::vector<Node>& subs,
162 std::map<Node, std::vector<Node> >& exp) override;
163 int getReduction(int effort, Node n, Node& nr) override;
164
165 // NotifyClass for equality engine
166 class NotifyClass : public eq::EqualityEngineNotify {
167 TheoryStrings& d_str;
168 public:
169 NotifyClass(TheoryStrings& t_str): d_str(t_str) {}
170 bool eqNotifyTriggerEquality(TNode equality, bool value) override
171 {
172 Debug("strings") << "NotifyClass::eqNotifyTriggerEquality(" << equality << ", " << (value ? "true" : "false" )<< ")" << std::endl;
173 if (value) {
174 return d_str.propagate(equality);
175 } else {
176 // We use only literal triggers so taking not is safe
177 return d_str.propagate(equality.notNode());
178 }
179 }
180 bool eqNotifyTriggerPredicate(TNode predicate, bool value) override
181 {
182 Debug("strings") << "NotifyClass::eqNotifyTriggerPredicate(" << predicate << ", " << (value ? "true" : "false") << ")" << std::endl;
183 if (value) {
184 return d_str.propagate(predicate);
185 } else {
186 return d_str.propagate(predicate.notNode());
187 }
188 }
189 bool eqNotifyTriggerTermEquality(TheoryId tag,
190 TNode t1,
191 TNode t2,
192 bool value) override
193 {
194 Debug("strings") << "NotifyClass::eqNotifyTriggerTermMerge(" << tag << ", " << t1 << ", " << t2 << ")" << std::endl;
195 if (value) {
196 return d_str.propagate(t1.eqNode(t2));
197 } else {
198 return d_str.propagate(t1.eqNode(t2).notNode());
199 }
200 }
201 void eqNotifyConstantTermMerge(TNode t1, TNode t2) override
202 {
203 Debug("strings") << "NotifyClass::eqNotifyConstantTermMerge(" << t1 << ", " << t2 << ")" << std::endl;
204 d_str.conflict(t1, t2);
205 }
206 void eqNotifyNewClass(TNode t) override
207 {
208 Debug("strings") << "NotifyClass::eqNotifyNewClass(" << t << std::endl;
209 d_str.eqNotifyNewClass(t);
210 }
211 void eqNotifyPreMerge(TNode t1, TNode t2) override
212 {
213 Debug("strings") << "NotifyClass::eqNotifyPreMerge(" << t1 << ", " << t2 << std::endl;
214 d_str.eqNotifyPreMerge(t1, t2);
215 }
216 void eqNotifyPostMerge(TNode t1, TNode t2) override
217 {
218 Debug("strings") << "NotifyClass::eqNotifyPostMerge(" << t1 << ", " << t2 << std::endl;
219 d_str.eqNotifyPostMerge(t1, t2);
220 }
221 void eqNotifyDisequal(TNode t1, TNode t2, TNode reason) override
222 {
223 Debug("strings") << "NotifyClass::eqNotifyDisequal(" << t1 << ", " << t2 << ", " << reason << std::endl;
224 d_str.eqNotifyDisequal(t1, t2, reason);
225 }
226 };/* class TheoryStrings::NotifyClass */
227
228 private:
229 // Constants
230 Node d_emptyString;
231 Node d_emptyRegexp;
232 Node d_true;
233 Node d_false;
234 Node d_zero;
235 Node d_one;
236 Node d_neg_one;
237 CVC4::Rational RMAXINT;
238 unsigned d_card_size;
239 // Helper functions
240 Node getRepresentative( Node t );
241 bool hasTerm( Node a );
242 bool areEqual( Node a, Node b );
243 bool areDisequal( Node a, Node b );
244 bool areCareDisequal( TNode x, TNode y );
245 // t is representative, te = t, add lt = te to explanation exp
246 Node getLengthExp( Node t, std::vector< Node >& exp, Node te );
247 Node getLength( Node t, std::vector< Node >& exp );
248
249 private:
250 /** The notify class */
251 NotifyClass d_notify;
252 /** Equaltity engine */
253 eq::EqualityEngine d_equalityEngine;
254 /** Are we in conflict */
255 context::CDO<bool> d_conflict;
256 //list of pairs of nodes to merge
257 std::map< Node, Node > d_pending_exp;
258 std::vector< Node > d_pending;
259 std::vector< Node > d_lemma_cache;
260 std::map< Node, bool > d_pending_req_phase;
261 /** inferences: maintained to ensure ref count for internally introduced nodes */
262 NodeList d_infer;
263 NodeList d_infer_exp;
264 /** normal forms */
265 std::map< Node, Node > d_normal_forms_base;
266 std::map< Node, std::vector< Node > > d_normal_forms;
267 std::map< Node, std::vector< Node > > d_normal_forms_exp;
268 std::map< Node, std::map< Node, std::map< bool, int > > > d_normal_forms_exp_depend;
269 //map of pairs of terms that have the same normal form
270 NodeIntMap d_nf_pairs;
271 std::map< Node, std::vector< Node > > d_nf_pairs_data;
272 void addNormalFormPair( Node n1, Node n2 );
273 bool isNormalFormPair( Node n1, Node n2 );
274 bool isNormalFormPair2( Node n1, Node n2 );
275 // preReg cache
276 NodeSet d_pregistered_terms_cache;
277 NodeSet d_registered_terms_cache;
278 NodeSet d_length_lemma_terms_cache;
279 NodeSet d_skolem_ne_reg_cache;
280 // preprocess cache
281 StringsPreprocess d_preproc;
282 NodeBoolMap d_preproc_cache;
283 // extended functions inferences cache
284 NodeSet d_extf_infer_cache;
285 NodeSet d_extf_infer_cache_u;
286 std::vector< Node > d_empty_vec;
287 //
288 NodeList d_ee_disequalities;
289 private:
290 NodeSet d_congruent;
291 std::map< Node, Node > d_eqc_to_const;
292 std::map< Node, Node > d_eqc_to_const_base;
293 std::map< Node, Node > d_eqc_to_const_exp;
294 Node getConstantEqc( Node eqc );
295
296 std::map< Node, Node > d_eqc_to_len_term;
297 std::vector< Node > d_strings_eqc;
298 Node d_emptyString_r;
299 class TermIndex {
300 public:
301 Node d_data;
302 std::map< TNode, TermIndex > d_children;
303 Node add( TNode n, unsigned index, TheoryStrings* t, Node er, std::vector< Node >& c );
304 void clear(){ d_children.clear(); }
305 };
306 std::map< Kind, TermIndex > d_term_index;
307 //list of non-congruent concat terms in each eqc
308 std::map< Node, std::vector< Node > > d_eqc;
309 std::map< Node, std::vector< Node > > d_flat_form;
310 std::map< Node, std::vector< int > > d_flat_form_index;
311
312 void debugPrintFlatForms( const char * tc );
313 void debugPrintNormalForms( const char * tc );
314 /////////////////////////////////////////////////////////////////////////////
315 // MODEL GENERATION
316 /////////////////////////////////////////////////////////////////////////////
317 public:
318 bool collectModelInfo(TheoryModel* m) override;
319
320 /////////////////////////////////////////////////////////////////////////////
321 // NOTIFICATIONS
322 /////////////////////////////////////////////////////////////////////////////
323 public:
324 void presolve() override;
325 void shutdown() override {}
326
327 /////////////////////////////////////////////////////////////////////////////
328 // MAIN SOLVER
329 /////////////////////////////////////////////////////////////////////////////
330 private:
331 void addSharedTerm(TNode n) override;
332 EqualityStatus getEqualityStatus(TNode a, TNode b) override;
333
334 private:
335 /** SAT-context-dependent information about an equivalence class */
336 class EqcInfo {
337 public:
338 EqcInfo( context::Context* c );
339 ~EqcInfo(){}
340 /**
341 * If non-null, this is a term x from this eq class such that str.len( x )
342 * occurs as a term in this SAT context.
343 */
344 context::CDO< Node > d_length_term;
345 /**
346 * If non-null, this is a term x from this eq class such that str.code( x )
347 * occurs as a term in this SAT context.
348 */
349 context::CDO<Node> d_code_term;
350 context::CDO< unsigned > d_cardinality_lem_k;
351 context::CDO< Node > d_normalized_length;
352 };
353 /** map from representatives to information necessary for equivalence classes */
354 std::map< Node, EqcInfo* > d_eqc_info;
355 /**
356 * Get the above information for equivalence class eqc. If doMake is true,
357 * we construct a new information class if one does not exist. The term eqc
358 * should currently be a representative of the equality engine of this class.
359 */
360 EqcInfo * getOrMakeEqcInfo( Node eqc, bool doMake = true );
361 //maintain which concat terms have the length lemma instantiated
362 NodeNodeMap d_proxy_var;
363 NodeNodeMap d_proxy_var_to_length;
364 /** All the function terms that the theory has seen */
365 context::CDList<TNode> d_functionsTerms;
366 private:
367 //any non-reduced extended functions exist
368 context::CDO< bool > d_has_extf;
369 /** have we asserted any str.code terms? */
370 bool d_has_str_code;
371 // static information about extf
372 class ExtfInfo {
373 public:
374 //all variables in this term
375 std::vector< Node > d_vars;
376 };
377 // non-static information about extf
378 class ExtfInfoTmp {
379 public:
380 void init(){
381 d_pol = 0;
382 d_model_active = true;
383 }
384 // list of terms that something (does not) contain and their explanation
385 std::map< bool, std::vector< Node > > d_ctn;
386 std::map< bool, std::vector< Node > > d_ctn_from;
387 //polarity
388 int d_pol;
389 //explanation
390 std::vector< Node > d_exp;
391 //false if it is reduced in the model
392 bool d_model_active;
393 };
394 std::map< Node, ExtfInfoTmp > d_extf_info_tmp;
395 private:
396 class InferInfo {
397 public:
398 unsigned d_i;
399 unsigned d_j;
400 bool d_rev;
401 std::vector< Node > d_ant;
402 std::vector< Node > d_antn;
403 std::map< int, std::vector< Node > > d_new_skolem;
404 Node d_conc;
405 Inference d_id;
406 std::map< Node, bool > d_pending_phase;
407 unsigned d_index;
408 Node d_nf_pair[2];
409 bool sendAsLemma();
410 };
411 void checkConstantEquivalenceClasses( TermIndex* ti, std::vector< Node >& vecc );
412 void checkExtfInference( Node n, Node nr, ExtfInfoTmp& in, int effort );
413 Node getSymbolicDefinition( Node n, std::vector< Node >& exp );
414
415 //--------------------------for checkFlatForm
416 /**
417 * This checks whether there are flat form inferences between eqc[start] and
418 * eqc[j] for some j>start. If the flag isRev is true, we check for flat form
419 * interferences in the reverse direction of the flat forms. For more details,
420 * see checkFlatForms below.
421 */
422 void checkFlatForm(std::vector<Node>& eqc, unsigned start, bool isRev);
423 //--------------------------end for checkFlatForm
424
425 //--------------------------for checkCycles
426 Node checkCycles( Node eqc, std::vector< Node >& curr, std::vector< Node >& exp );
427 //--------------------------end for checkCycles
428
429 //--------------------------for checkNormalFormsEq
430 void normalizeEquivalenceClass( Node n );
431 void getNormalForms( Node &eqc, std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
432 std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend );
433 bool detectLoop( std::vector< std::vector< Node > > &normal_forms, int i, int j, int index, int &loop_in_i, int &loop_in_j, unsigned rproc );
434 bool processLoop( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
435 int i, int j, int loop_n_index, int other_n_index,int loop_index, int index, InferInfo& info );
436 void processNEqc( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
437 std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend );
438 void processReverseNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
439 std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
440 unsigned i, unsigned j, unsigned& index, unsigned rproc, std::vector< InferInfo >& pinfer );
441 void processSimpleNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
442 std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
443 unsigned i, unsigned j, unsigned& index, bool isRev, unsigned rproc, std::vector< InferInfo >& pinfer );
444 //--------------------------end for checkNormalFormsEq
445
446 //--------------------------for checkNormalFormsDeq
447 void processDeq( Node n1, Node n2 );
448 int processReverseDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj );
449 int processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj, unsigned& index, bool isRev );
450 void getExplanationVectorForPrefix( std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
451 unsigned i, int index, bool isRev, std::vector< Node >& curr_exp );
452 void getExplanationVectorForPrefixEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
453 std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
454 unsigned i, unsigned j, int index_i, int index_j, bool isRev, std::vector< Node >& curr_exp );
455 //--------------------------end for checkNormalFormsDeq
456
457 //--------------------------------for checkMemberships
458 // check membership constraints
459 Node mkRegExpAntec(Node atom, Node ant);
460 bool applyRConsume( CVC4::String &s, Node &r );
461 Node applyRSplit( Node s1, Node s2, Node r );
462 bool applyRLen( std::map< Node, std::vector< Node > > &XinR_with_exps );
463 bool checkPDerivative( Node x, Node r, Node atom, bool &addedLemma, std::vector< Node > &nf_exp);
464 //check contains
465 void checkPosContains( std::vector< Node >& posContains );
466 void checkNegContains( std::vector< Node >& negContains );
467 //--------------------------------end for checkMemberships
468
469 private:
470 void addCarePairs( quantifiers::TermArgTrie * t1, quantifiers::TermArgTrie * t2, unsigned arity, unsigned depth );
471
472 public:
473 /** preregister term */
474 void preRegisterTerm(TNode n) override;
475 /** Expand definition */
476 Node expandDefinition(LogicRequest& logicRequest, Node n) override;
477 /** Check at effort e */
478 void check(Effort e) override;
479 /** needs check last effort */
480 bool needsCheckLastEffort() override;
481 /** Conflict when merging two constants */
482 void conflict(TNode a, TNode b);
483 /** called when a new equivalence class is created */
484 void eqNotifyNewClass(TNode t);
485 /** called when two equivalence classes will merge */
486 void eqNotifyPreMerge(TNode t1, TNode t2);
487 /** called when two equivalence classes have merged */
488 void eqNotifyPostMerge(TNode t1, TNode t2);
489 /** called when two equivalence classes are made disequal */
490 void eqNotifyDisequal(TNode t1, TNode t2, TNode reason);
491 /** get preprocess */
492 StringsPreprocess* getPreprocess() { return &d_preproc; }
493
494 protected:
495 /** compute care graph */
496 void computeCareGraph() override;
497
498 // do pending merges
499 void assertPendingFact(Node atom, bool polarity, Node exp);
500 void doPendingFacts();
501 void doPendingLemmas();
502 bool hasProcessed();
503 void addToExplanation(Node a, Node b, std::vector<Node>& exp);
504 void addToExplanation(Node lit, std::vector<Node>& exp);
505
506 /** Register term
507 *
508 * This performs SAT-context-independent registration for a term n, which
509 * may cause lemmas to be sent on the output channel that involve
510 * "initial refinement lemmas" for n. This includes introducing proxy
511 * variables for string terms and asserting that str.code terms are within
512 * proper bounds.
513 *
514 * Effort is one of the following (TODO make enum #1881):
515 * 0 : upon preregistration or internal assertion
516 * 1 : upon occurrence in length term
517 * 2 : before normal form computation
518 * 3 : called on normal form terms
519 *
520 * Based on the strategy, we may choose to add these initial refinement
521 * lemmas at one of the following efforts, where if it is not the given
522 * effort, the call to this method does nothing.
523 */
524 void registerTerm(Node n, int effort);
525 // send lemma
526 void sendInference(std::vector<Node>& exp,
527 std::vector<Node>& exp_n,
528 Node eq,
529 const char* c,
530 bool asLemma = false);
531 void sendInference(std::vector<Node>& exp,
532 Node eq,
533 const char* c,
534 bool asLemma = false);
535 void sendLemma(Node ant, Node conc, const char* c);
536 void sendInfer(Node eq_exp, Node eq, const char* c);
537 bool sendSplit(Node a, Node b, const char* c, bool preq = true);
538 void sendLengthLemma(Node n);
539 /** mkConcat **/
540 inline Node mkConcat(Node n1, Node n2);
541 inline Node mkConcat(Node n1, Node n2, Node n3);
542 inline Node mkConcat(const std::vector<Node>& c);
543 inline Node mkLength(Node n);
544 // mkSkolem
545 enum
546 {
547 sk_id_c_spt,
548 sk_id_vc_spt,
549 sk_id_vc_bin_spt,
550 sk_id_v_spt,
551 sk_id_c_spt_rev,
552 sk_id_vc_spt_rev,
553 sk_id_vc_bin_spt_rev,
554 sk_id_v_spt_rev,
555 sk_id_ctn_pre,
556 sk_id_ctn_post,
557 sk_id_dc_spt,
558 sk_id_dc_spt_rem,
559 sk_id_deq_x,
560 sk_id_deq_y,
561 sk_id_deq_z,
562 };
563 std::map<Node, std::map<Node, std::map<int, Node> > > d_skolem_cache;
564 /** the set of all skolems we have generated */
565 std::unordered_set<Node, NodeHashFunction> d_all_skolems;
566 Node mkSkolemCached(
567 Node a, Node b, int id, const char* c, int isLenSplit = 0);
568 inline Node mkSkolemS(const char* c, int isLenSplit = 0);
569 void registerNonEmptySkolem(Node sk);
570 // inline Node mkSkolemI(const char * c);
571 /** mkExplain **/
572 Node mkExplain(std::vector<Node>& a);
573 Node mkExplain(std::vector<Node>& a, std::vector<Node>& an);
574 /** mkAnd **/
575 Node mkAnd(std::vector<Node>& a);
576 /** get concat vector */
577 void getConcatVec(Node n, std::vector<Node>& c);
578
579 // get equivalence classes
580 void getEquivalenceClasses(std::vector<Node>& eqcs);
581
582 // separate into collections with equal length
583 void separateByLength(std::vector<Node>& n,
584 std::vector<std::vector<Node> >& col,
585 std::vector<Node>& lts);
586 void printConcat(std::vector<Node>& n, const char* c);
587
588 void inferSubstitutionProxyVars(Node n,
589 std::vector<Node>& vars,
590 std::vector<Node>& subs,
591 std::vector<Node>& unproc);
592
593 // Symbolic Regular Expression
594 private:
595 // regular expression memberships
596 NodeList d_regexp_memberships;
597 NodeSet d_regexp_ucached;
598 NodeSet d_regexp_ccached;
599 // stored assertions
600 NodeIntMap d_pos_memberships;
601 std::map< Node, std::vector< Node > > d_pos_memberships_data;
602 NodeIntMap d_neg_memberships;
603 std::map< Node, std::vector< Node > > d_neg_memberships_data;
604 unsigned getNumMemberships( Node n, bool isPos );
605 Node getMembership( Node n, bool isPos, unsigned i );
606 // semi normal forms for symbolic expression
607 std::map< Node, Node > d_nf_regexps;
608 std::map< Node, std::vector< Node > > d_nf_regexps_exp;
609 // intersection
610 NodeNodeMap d_inter_cache;
611 NodeIntMap d_inter_index;
612 // processed memberships
613 NodeSet d_processed_memberships;
614 // antecedant for why regexp membership must be true
615 NodeNodeMap d_regexp_ant;
616 // membership length
617 //std::map< Node, bool > d_membership_length;
618 // regular expression operations
619 RegExpOpr d_regexp_opr;
620
621 CVC4::String getHeadConst( Node x );
622 bool deriveRegExp( Node x, Node r, Node ant );
623 void addMembership(Node assertion);
624 Node getNormalString(Node x, std::vector<Node> &nf_exp);
625 Node getNormalSymRegExp(Node r, std::vector<Node> &nf_exp);
626
627
628 // Finite Model Finding
629 private:
630 NodeSet d_input_vars;
631 context::CDO< Node > d_input_var_lsum;
632 context::CDHashMap< int, Node > d_cardinality_lits;
633 context::CDO< int > d_curr_cardinality;
634
635 public:
636 //for finite model finding
637 Node getNextDecisionRequest(unsigned& priority) override;
638 // ppRewrite
639 Node ppRewrite(TNode atom) override;
640
641 public:
642 /** statistics class */
643 class Statistics {
644 public:
645 IntStat d_splits;
646 IntStat d_eq_splits;
647 IntStat d_deq_splits;
648 IntStat d_loop_lemmas;
649 IntStat d_new_skolems;
650 Statistics();
651 ~Statistics();
652 };/* class TheoryStrings::Statistics */
653 Statistics d_statistics;
654
655 private:
656 //-----------------------inference steps
657 /** check initial
658 *
659 * This function initializes term indices for each strings function symbol.
660 * One key aspect of this construction is that concat terms are indexed by
661 * their list of non-empty components. For example, if x = "" is an equality
662 * asserted in this SAT context, then y ++ x ++ z may be indexed by (y,z).
663 * This method may infer various facts while building these term indices, for
664 * instance, based on congruence. An example would be inferring:
665 * y ++ x ++ z = y ++ z
666 * if both terms are registered in this SAT context.
667 *
668 * This function should be called as a first step of any strategy.
669 */
670 void checkInit();
671 /** check constant equivalence classes
672 *
673 * This function infers whether CONCAT terms can be simplified to constants.
674 * For example, if x = "a" and y = "b" are equalities in the current SAT
675 * context, then we may infer x ++ "c" ++ y is equivalent to "acb". In this
676 * case, we infer the fact x ++ "c" ++ y = "acb".
677 */
678 void checkConstantEquivalenceClasses();
679 /** check extended functions evaluation
680 *
681 * This applies "context-dependent simplification" for all active extended
682 * function terms in this SAT context. This infers facts of the form:
683 * x = c => f( t1 ... tn ) = c'
684 * where the rewritten form of f( t1...tn ) { x |-> c } is c', and x = c
685 * is a (tuple of) equalities that are asserted in this SAT context, and
686 * f( t1 ... tn ) is a term from this SAT context.
687 *
688 * For more details, this is steps 4 when effort=0 and step 6 when
689 * effort=1 from Strategy 1 in Reynolds et al, "Scaling up DPLL(T) String
690 * Solvers using Context-Dependent Simplification", CAV 2017. When called with
691 * effort=3, we apply context-dependent simplification based on model values.
692 */
693 void checkExtfEval(int effort);
694 /** check cycles
695 *
696 * This inference schema ensures that a containment ordering < over the
697 * string equivalence classes is acyclic. We define this ordering < such that
698 * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2
699 * if there exists a ti whose flat form (see below) is [w1...sj...wk] for
700 * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat
701 * form components that do not constitute this chain, e.g. (w1...wk) \ sj
702 * in the flat form above, must be empty.
703 *
704 * For more details, see the inference S-Cycle in Liang et al CAV 2014.
705 */
706 void checkCycles();
707 /** check flat forms
708 *
709 * This applies an inference schema based on "flat forms". The flat form of a
710 * string term t is a vector of representative terms [r1, ..., rn] such that
711 * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to
712 * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of
713 * the equivalence class containing t1. For example, if t is y ++ z ++ z,
714 * E is { y = "", w = z }, and w is the representative of the equivalence
715 * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms
716 * in the same equivalence classes with flat forms [r1...rn] and [s1...sm].
717 * We may infer various facts based on this pair of terms. For example:
718 * ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si),
719 * rn = sn, if n=m and rj == sj for each j < n,
720 * ri = empty, if n=m+1 and ri == rj for each i=1,...,m.
721 * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences
722 * respectively.
723 *
724 * Notice that this inference scheme is an optimization and not needed for
725 * model-soundness. The motivation for this schema is that it is simpler than
726 * checkNormalFormsEq, which can be seen as a recursive version of this
727 * schema (see difference of "normal form" vs "flat form" below), and
728 * checkNormalFormsEq is complete, in the sense that if it passes with no
729 * inferences, we are ensured that all string equalities in the current
730 * context are satisfied.
731 *
732 * Must call checkCycles before this function in a strategy.
733 */
734 void checkFlatForms();
735 /** check normal forms equalities
736 *
737 * This applies an inference schema based on "normal forms". The normal form
738 * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn},
739 * where t1...tn are concatenation terms is a vector of representative terms
740 * [r1, ..., rm] such that:
741 * (1) if n=0, then m=1 and r1 is the representative of e,
742 * (2) if n>0, say
743 * t1 = t^1_1 ++ ... ++ t^1_m_1
744 * ...
745 * tn = t^1_n ++ ... ++ t^_m_n
746 * for *each* i=1, ..., n, the result of concenating the normal forms of
747 * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class
748 * can be assigned a normal form, then all equalities between ti and tj are
749 * satisfied by all models that correspond to extensions of the current
750 * assignment. For further detail on this terminology, see Liang et al
751 * CAV 2014.
752 *
753 * Notice that all constant words are implicitly considered concatentation
754 * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c".
755 *
756 * At a high level, we build normal forms for equivalence classes bottom-up,
757 * starting with equivalence classes that are minimal with respect to the
758 * containment ordering < computed during checkCycles. While computing a
759 * normal form for an equivalence class, we may infer equalities between
760 * components of strings that must be equal (e.g. x=y when x++z == y++w when
761 * len(x)==len(y) is asserted), derive conflicts if two strings have disequal
762 * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split
763 * string terms into smaller components using fresh skolem variables (see
764 * Inference values with names "SPLIT"). We also may introduce regular
765 * expression constraints in this method for looping word equations (see
766 * the Inference INFER_FLOOP).
767 *
768 * If this inference schema returns no facts, lemmas, or conflicts, then
769 * we have successfully assigned normal forms for all equivalence classes, as
770 * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or
771 * conflict based on inferences in the Inference enumeration above.
772 */
773 void checkNormalFormsEq();
774 /** check normal forms disequalities
775 *
776 * This inference schema can be seen as the converse of the above schema. In
777 * particular, it ensures that each pair of distinct equivalence classes
778 * e1 and e2 have distinct normal forms.
779 *
780 * This method considers all pairs of distinct equivalence classes (e1,e2)
781 * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It
782 * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn]
783 * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are
784 * disequal and have the same length, then x1 and x2 have distinct normal
785 * forms. Otherwise, we may add splitting lemmas on the length of ri and si,
786 * or split on an equality between ri and si.
787 *
788 * If this inference schema returns no facts, lemmas, or conflicts, then all
789 * disequalities between string terms are satisfied by all models that are
790 * extensions of the current assignment.
791 */
792 void checkNormalFormsDeq();
793 /** check codes
794 *
795 * This inference schema ensures that constraints between str.code terms
796 * are satisfied by models that correspond to extensions of the current
797 * assignment. In particular, this method ensures that str.code can be
798 * given an interpretation that is injective for string arguments with length
799 * one. It may add lemmas of the form:
800 * str.code(x) == -1 V str.code(x) != str.code(y) V x == y
801 */
802 void checkCodes();
803 /** check lengths for equivalence classes
804 *
805 * This inference schema adds lemmas of the form:
806 * E => len( x ) = rewrite( len( r1 ++ ... ++ rn ) )
807 * where [r1, ..., rn] is the normal form of the equivalence class containing
808 * x. This schema is not required for correctness but experimentally has
809 * shown to be helpful.
810 */
811 void checkLengthsEqc();
812 /** check extended function reductions
813 *
814 * This adds "reduction" lemmas for each active extended function in this SAT
815 * context. These are generally lemmas of the form:
816 * F[t1...tn,k] ^ f( t1 ... tn ) = k
817 * where f( t1 ... tn ) is an active extended function, k is a fresh constant
818 * and F is a formula that constrains k based on the definition of f.
819 *
820 * For more details, this is step 7 from Strategy 1 in Reynolds et al,
821 * CAV 2017. We stratify this in practice, where calling this with effort=1
822 * reduces some of the "easier" extended functions, and effort=2 reduces
823 * the rest.
824 */
825 void checkExtfReductions(int effort);
826 /** check regular expression memberships
827 *
828 * This checks the satisfiability of all regular expression memberships
829 * of the form (not) s in R. We use various heuristic techniques based on
830 * unrolling, combined with techniques from Liang et al, "A Decision Procedure
831 * for Regular Membership and Length Constraints over Unbounded Strings",
832 * FroCoS 2015.
833 */
834 void checkMemberships();
835 /** check cardinality
836 *
837 * This function checks whether a cardinality inference needs to be applied
838 * to a set of equivalence classes. For details, see Step 5 of the proof
839 * procedure from Liang et al, CAV 2014.
840 */
841 void checkCardinality();
842 //-----------------------end inference steps
843
844 //-----------------------representation of the strategy
845 /** is strategy initialized */
846 bool d_strategy_init;
847 /** run the given inference step */
848 void runInferStep(InferStep s, int effort);
849 /** the strategy */
850 std::vector<InferStep> d_infer_steps;
851 /** the effort levels */
852 std::vector<int> d_infer_step_effort;
853 /** the range (begin, end) of steps to run at given efforts */
854 std::map<Effort, std::pair<unsigned, unsigned> > d_strat_steps;
855 /** do we have a strategy for effort e? */
856 bool hasStrategyEffort(Effort e) const;
857 /** initialize the strategy
858 *
859 * This adds (s,effort) as a strategy step to the vectors d_infer_steps and
860 * d_infer_step_effort. This indicates that a call to runInferStep should
861 * be run as the next step in the strategy. If addBreak is true, we add
862 * a BREAK to the strategy following this step.
863 */
864 void addStrategyStep(InferStep s, int effort = 0, bool addBreak = true);
865 /** initialize the strategy
866 *
867 * This initializes the above information based on the options. This makes
868 * a series of calls to addStrategyStep above.
869 */
870 void initializeStrategy();
871 /** run strategy
872 *
873 * This executes the inference steps starting at index sbegin and ending at
874 * index send. We exit if any step in this sequence adds a lemma or infers a
875 * fact.
876 */
877 void runStrategy(unsigned sbegin, unsigned send);
878 //-----------------------end representation of the strategy
879
880 };/* class TheoryStrings */
881
882 }/* CVC4::theory::strings namespace */
883 }/* CVC4::theory namespace */
884 }/* CVC4 namespace */
885
886 #endif /* __CVC4__THEORY__STRINGS__THEORY_STRINGS_H */