1 /********************* */
2 /*! \file theory_strings.h
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Tianyi Liang, Tim King
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
12 ** \brief Theory of strings
17 #include "cvc4_private.h"
19 #ifndef __CVC4__THEORY__STRINGS__THEORY_STRINGS_H
20 #define __CVC4__THEORY__STRINGS__THEORY_STRINGS_H
22 #include "context/cdhashset.h"
23 #include "context/cdlist.h"
24 #include "expr/attribute.h"
25 #include "theory/strings/regexp_operation.h"
26 #include "theory/strings/theory_strings_preprocess.h"
27 #include "theory/theory.h"
28 #include "theory/uf/equality_engine.h"
36 namespace quantifiers
{
43 * Decision procedure for strings.
47 /** Types of inferences used in the procedure
49 * These are variants of the inference rules in Figures 3-5 of Liang et al.
50 * "A DPLL(T) Solver for a Theory of Strings and Regular Expressions", CAV 2014.
55 // string split constant propagation, for example:
56 // x = y, x = "abc", y = y1 ++ "b" ++ y2
57 // implies y1 = "a" ++ y1'
58 INFER_SSPLIT_CST_PROP
,
59 // string split variable propagation, for example:
60 // x = y, x = x1 ++ x2, y = y1 ++ y2, len( x1 ) >= len( y1 )
61 // implies x1 = y1 ++ x1'
62 // This is inspired by Zheng et al CAV 2015.
63 INFER_SSPLIT_VAR_PROP
,
64 // length split, for example:
65 // len( x1 ) = len( y1 ) V len( x1 ) != len( y1 )
66 // This is inferred when e.g. x = y, x = x1 ++ x2, y = y1 ++ y2.
68 // length split empty, for example:
70 // This is inferred when, e.g. x = y, x = z ++ x1, y = y1 ++ z
72 // string split constant binary, for example:
73 // x1 = "aaaa" ++ x1' V "aaaa" = x1 ++ x1'
74 // This is inferred when, e.g. x = y, x = x1 ++ x2, y = "aaaaaaaa" ++ y2.
75 // This inference is disabled by default and is enabled by stringBinaryCsp().
76 INFER_SSPLIT_CST_BINARY
,
77 // string split constant
78 // x = y, x = "c" ++ x2, y = y1 ++ y2, y1 != ""
79 // implies y1 = "c" ++ y1'
80 // This is a special case of F-Split in Figure 5 of Liang et al CAV 2014.
82 // string split variable, for example:
83 // x = y, x = x1 ++ x2, y = y1 ++ y2
84 // implies x1 = y1 ++ x1' V y1 = x1 ++ y1'
85 // This is rule F-Split in Figure 5 of Liang et al CAV 2014.
87 // flat form loop, for example:
88 // x = y, x = x1 ++ z, y = z ++ y2
89 // implies z = u2 ++ u1, u in ( u1 ++ u2 )*, x1 = u2 ++ u, y2 = u ++ u1
90 // for fresh u, u1, u2.
91 // This is the rule F-Loop from Figure 5 of Liang et al CAV 2014.
94 std::ostream
& operator<<(std::ostream
& out
, Inference i
);
98 * Corresponds to a step in the overall strategy of the strings solver. For
99 * details on the individual steps, see documentation on the inference schemas
100 * within TheoryStrings.
104 // indicates that the strategy should break if lemmas or facts are added
108 // check constant equivalence classes
110 // check extended function evaluation
116 // check normal forms equalities
117 CHECK_NORMAL_FORMS_EQ
,
118 // check normal forms disequalities
119 CHECK_NORMAL_FORMS_DEQ
,
122 // check lengths for equivalence classes
124 // check extended function reductions
125 CHECK_EXTF_REDUCTION
,
126 // check regular expression memberships
131 std::ostream
& operator<<(std::ostream
& out
, Inference i
);
133 struct StringsProxyVarAttributeId
{};
134 typedef expr::Attribute
< StringsProxyVarAttributeId
, bool > StringsProxyVarAttribute
;
136 class TheoryStrings
: public Theory
{
137 typedef context::CDList
<Node
> NodeList
;
138 typedef context::CDHashMap
<Node
, bool, NodeHashFunction
> NodeBoolMap
;
139 typedef context::CDHashMap
<Node
, int, NodeHashFunction
> NodeIntMap
;
140 typedef context::CDHashMap
<Node
, Node
, NodeHashFunction
> NodeNodeMap
;
141 typedef context::CDHashSet
<Node
, NodeHashFunction
> NodeSet
;
144 TheoryStrings(context::Context
* c
, context::UserContext
* u
,
145 OutputChannel
& out
, Valuation valuation
,
146 const LogicInfo
& logicInfo
);
149 void setMasterEqualityEngine(eq::EqualityEngine
* eq
) override
;
151 std::string
identify() const override
{ return std::string("TheoryStrings"); }
154 void propagate(Effort e
) override
;
155 bool propagate(TNode literal
);
156 void explain( TNode literal
, std::vector
<TNode
>& assumptions
);
157 Node
explain(TNode literal
) override
;
158 eq::EqualityEngine
* getEqualityEngine() override
{ return &d_equalityEngine
; }
159 bool getCurrentSubstitution(int effort
,
160 std::vector
<Node
>& vars
,
161 std::vector
<Node
>& subs
,
162 std::map
<Node
, std::vector
<Node
> >& exp
) override
;
163 int getReduction(int effort
, Node n
, Node
& nr
) override
;
165 // NotifyClass for equality engine
166 class NotifyClass
: public eq::EqualityEngineNotify
{
167 TheoryStrings
& d_str
;
169 NotifyClass(TheoryStrings
& t_str
): d_str(t_str
) {}
170 bool eqNotifyTriggerEquality(TNode equality
, bool value
) override
172 Debug("strings") << "NotifyClass::eqNotifyTriggerEquality(" << equality
<< ", " << (value
? "true" : "false" )<< ")" << std::endl
;
174 return d_str
.propagate(equality
);
176 // We use only literal triggers so taking not is safe
177 return d_str
.propagate(equality
.notNode());
180 bool eqNotifyTriggerPredicate(TNode predicate
, bool value
) override
182 Debug("strings") << "NotifyClass::eqNotifyTriggerPredicate(" << predicate
<< ", " << (value
? "true" : "false") << ")" << std::endl
;
184 return d_str
.propagate(predicate
);
186 return d_str
.propagate(predicate
.notNode());
189 bool eqNotifyTriggerTermEquality(TheoryId tag
,
194 Debug("strings") << "NotifyClass::eqNotifyTriggerTermMerge(" << tag
<< ", " << t1
<< ", " << t2
<< ")" << std::endl
;
196 return d_str
.propagate(t1
.eqNode(t2
));
198 return d_str
.propagate(t1
.eqNode(t2
).notNode());
201 void eqNotifyConstantTermMerge(TNode t1
, TNode t2
) override
203 Debug("strings") << "NotifyClass::eqNotifyConstantTermMerge(" << t1
<< ", " << t2
<< ")" << std::endl
;
204 d_str
.conflict(t1
, t2
);
206 void eqNotifyNewClass(TNode t
) override
208 Debug("strings") << "NotifyClass::eqNotifyNewClass(" << t
<< std::endl
;
209 d_str
.eqNotifyNewClass(t
);
211 void eqNotifyPreMerge(TNode t1
, TNode t2
) override
213 Debug("strings") << "NotifyClass::eqNotifyPreMerge(" << t1
<< ", " << t2
<< std::endl
;
214 d_str
.eqNotifyPreMerge(t1
, t2
);
216 void eqNotifyPostMerge(TNode t1
, TNode t2
) override
218 Debug("strings") << "NotifyClass::eqNotifyPostMerge(" << t1
<< ", " << t2
<< std::endl
;
219 d_str
.eqNotifyPostMerge(t1
, t2
);
221 void eqNotifyDisequal(TNode t1
, TNode t2
, TNode reason
) override
223 Debug("strings") << "NotifyClass::eqNotifyDisequal(" << t1
<< ", " << t2
<< ", " << reason
<< std::endl
;
224 d_str
.eqNotifyDisequal(t1
, t2
, reason
);
226 };/* class TheoryStrings::NotifyClass */
237 CVC4::Rational RMAXINT
;
238 unsigned d_card_size
;
240 Node
getRepresentative( Node t
);
241 bool hasTerm( Node a
);
242 bool areEqual( Node a
, Node b
);
243 bool areDisequal( Node a
, Node b
);
244 bool areCareDisequal( TNode x
, TNode y
);
245 // t is representative, te = t, add lt = te to explanation exp
246 Node
getLengthExp( Node t
, std::vector
< Node
>& exp
, Node te
);
247 Node
getLength( Node t
, std::vector
< Node
>& exp
);
250 /** The notify class */
251 NotifyClass d_notify
;
252 /** Equaltity engine */
253 eq::EqualityEngine d_equalityEngine
;
254 /** Are we in conflict */
255 context::CDO
<bool> d_conflict
;
256 //list of pairs of nodes to merge
257 std::map
< Node
, Node
> d_pending_exp
;
258 std::vector
< Node
> d_pending
;
259 std::vector
< Node
> d_lemma_cache
;
260 std::map
< Node
, bool > d_pending_req_phase
;
261 /** inferences: maintained to ensure ref count for internally introduced nodes */
263 NodeList d_infer_exp
;
265 std::map
< Node
, Node
> d_normal_forms_base
;
266 std::map
< Node
, std::vector
< Node
> > d_normal_forms
;
267 std::map
< Node
, std::vector
< Node
> > d_normal_forms_exp
;
268 std::map
< Node
, std::map
< Node
, std::map
< bool, int > > > d_normal_forms_exp_depend
;
269 //map of pairs of terms that have the same normal form
270 NodeIntMap d_nf_pairs
;
271 std::map
< Node
, std::vector
< Node
> > d_nf_pairs_data
;
272 void addNormalFormPair( Node n1
, Node n2
);
273 bool isNormalFormPair( Node n1
, Node n2
);
274 bool isNormalFormPair2( Node n1
, Node n2
);
276 NodeSet d_pregistered_terms_cache
;
277 NodeSet d_registered_terms_cache
;
278 NodeSet d_length_lemma_terms_cache
;
279 NodeSet d_skolem_ne_reg_cache
;
281 StringsPreprocess d_preproc
;
282 NodeBoolMap d_preproc_cache
;
283 // extended functions inferences cache
284 NodeSet d_extf_infer_cache
;
285 NodeSet d_extf_infer_cache_u
;
286 std::vector
< Node
> d_empty_vec
;
288 NodeList d_ee_disequalities
;
291 std::map
< Node
, Node
> d_eqc_to_const
;
292 std::map
< Node
, Node
> d_eqc_to_const_base
;
293 std::map
< Node
, Node
> d_eqc_to_const_exp
;
294 Node
getConstantEqc( Node eqc
);
296 std::map
< Node
, Node
> d_eqc_to_len_term
;
297 std::vector
< Node
> d_strings_eqc
;
298 Node d_emptyString_r
;
302 std::map
< TNode
, TermIndex
> d_children
;
303 Node
add( TNode n
, unsigned index
, TheoryStrings
* t
, Node er
, std::vector
< Node
>& c
);
304 void clear(){ d_children
.clear(); }
306 std::map
< Kind
, TermIndex
> d_term_index
;
307 //list of non-congruent concat terms in each eqc
308 std::map
< Node
, std::vector
< Node
> > d_eqc
;
309 std::map
< Node
, std::vector
< Node
> > d_flat_form
;
310 std::map
< Node
, std::vector
< int > > d_flat_form_index
;
312 void debugPrintFlatForms( const char * tc
);
313 void debugPrintNormalForms( const char * tc
);
314 /////////////////////////////////////////////////////////////////////////////
316 /////////////////////////////////////////////////////////////////////////////
318 bool collectModelInfo(TheoryModel
* m
) override
;
320 /////////////////////////////////////////////////////////////////////////////
322 /////////////////////////////////////////////////////////////////////////////
324 void presolve() override
;
325 void shutdown() override
{}
327 /////////////////////////////////////////////////////////////////////////////
329 /////////////////////////////////////////////////////////////////////////////
331 void addSharedTerm(TNode n
) override
;
332 EqualityStatus
getEqualityStatus(TNode a
, TNode b
) override
;
335 /** SAT-context-dependent information about an equivalence class */
338 EqcInfo( context::Context
* c
);
341 * If non-null, this is a term x from this eq class such that str.len( x )
342 * occurs as a term in this SAT context.
344 context::CDO
< Node
> d_length_term
;
346 * If non-null, this is a term x from this eq class such that str.code( x )
347 * occurs as a term in this SAT context.
349 context::CDO
<Node
> d_code_term
;
350 context::CDO
< unsigned > d_cardinality_lem_k
;
351 context::CDO
< Node
> d_normalized_length
;
353 /** map from representatives to information necessary for equivalence classes */
354 std::map
< Node
, EqcInfo
* > d_eqc_info
;
356 * Get the above information for equivalence class eqc. If doMake is true,
357 * we construct a new information class if one does not exist. The term eqc
358 * should currently be a representative of the equality engine of this class.
360 EqcInfo
* getOrMakeEqcInfo( Node eqc
, bool doMake
= true );
361 //maintain which concat terms have the length lemma instantiated
362 NodeNodeMap d_proxy_var
;
363 NodeNodeMap d_proxy_var_to_length
;
364 /** All the function terms that the theory has seen */
365 context::CDList
<TNode
> d_functionsTerms
;
367 //any non-reduced extended functions exist
368 context::CDO
< bool > d_has_extf
;
369 /** have we asserted any str.code terms? */
371 // static information about extf
374 //all variables in this term
375 std::vector
< Node
> d_vars
;
377 // non-static information about extf
382 d_model_active
= true;
384 // list of terms that something (does not) contain and their explanation
385 std::map
< bool, std::vector
< Node
> > d_ctn
;
386 std::map
< bool, std::vector
< Node
> > d_ctn_from
;
390 std::vector
< Node
> d_exp
;
391 //false if it is reduced in the model
394 std::map
< Node
, ExtfInfoTmp
> d_extf_info_tmp
;
401 std::vector
< Node
> d_ant
;
402 std::vector
< Node
> d_antn
;
403 std::map
< int, std::vector
< Node
> > d_new_skolem
;
406 std::map
< Node
, bool > d_pending_phase
;
411 void checkConstantEquivalenceClasses( TermIndex
* ti
, std::vector
< Node
>& vecc
);
412 void checkExtfInference( Node n
, Node nr
, ExtfInfoTmp
& in
, int effort
);
413 Node
getSymbolicDefinition( Node n
, std::vector
< Node
>& exp
);
415 //--------------------------for checkFlatForm
417 * This checks whether there are flat form inferences between eqc[start] and
418 * eqc[j] for some j>start. If the flag isRev is true, we check for flat form
419 * interferences in the reverse direction of the flat forms. For more details,
420 * see checkFlatForms below.
422 void checkFlatForm(std::vector
<Node
>& eqc
, unsigned start
, bool isRev
);
423 //--------------------------end for checkFlatForm
425 //--------------------------for checkCycles
426 Node
checkCycles( Node eqc
, std::vector
< Node
>& curr
, std::vector
< Node
>& exp
);
427 //--------------------------end for checkCycles
429 //--------------------------for checkNormalFormsEq
430 void normalizeEquivalenceClass( Node n
);
431 void getNormalForms( Node
&eqc
, std::vector
< std::vector
< Node
> > &normal_forms
, std::vector
< Node
> &normal_form_src
,
432 std::vector
< std::vector
< Node
> > &normal_forms_exp
, std::vector
< std::map
< Node
, std::map
< bool, int > > >& normal_forms_exp_depend
);
433 bool detectLoop( std::vector
< std::vector
< Node
> > &normal_forms
, int i
, int j
, int index
, int &loop_in_i
, int &loop_in_j
, unsigned rproc
);
434 bool processLoop( std::vector
< std::vector
< Node
> > &normal_forms
, std::vector
< Node
> &normal_form_src
,
435 int i
, int j
, int loop_n_index
, int other_n_index
,int loop_index
, int index
, InferInfo
& info
);
436 void processNEqc( std::vector
< std::vector
< Node
> > &normal_forms
, std::vector
< Node
> &normal_form_src
,
437 std::vector
< std::vector
< Node
> > &normal_forms_exp
, std::vector
< std::map
< Node
, std::map
< bool, int > > >& normal_forms_exp_depend
);
438 void processReverseNEq( std::vector
< std::vector
< Node
> > &normal_forms
, std::vector
< Node
> &normal_form_src
,
439 std::vector
< std::vector
< Node
> > &normal_forms_exp
, std::vector
< std::map
< Node
, std::map
< bool, int > > >& normal_forms_exp_depend
,
440 unsigned i
, unsigned j
, unsigned& index
, unsigned rproc
, std::vector
< InferInfo
>& pinfer
);
441 void processSimpleNEq( std::vector
< std::vector
< Node
> > &normal_forms
, std::vector
< Node
> &normal_form_src
,
442 std::vector
< std::vector
< Node
> > &normal_forms_exp
, std::vector
< std::map
< Node
, std::map
< bool, int > > >& normal_forms_exp_depend
,
443 unsigned i
, unsigned j
, unsigned& index
, bool isRev
, unsigned rproc
, std::vector
< InferInfo
>& pinfer
);
444 //--------------------------end for checkNormalFormsEq
446 //--------------------------for checkNormalFormsDeq
447 void processDeq( Node n1
, Node n2
);
448 int processReverseDeq( std::vector
< Node
>& nfi
, std::vector
< Node
>& nfj
, Node ni
, Node nj
);
449 int processSimpleDeq( std::vector
< Node
>& nfi
, std::vector
< Node
>& nfj
, Node ni
, Node nj
, unsigned& index
, bool isRev
);
450 void getExplanationVectorForPrefix( std::vector
< std::vector
< Node
> > &normal_forms_exp
, std::vector
< std::map
< Node
, std::map
< bool, int > > >& normal_forms_exp_depend
,
451 unsigned i
, int index
, bool isRev
, std::vector
< Node
>& curr_exp
);
452 void getExplanationVectorForPrefixEq( std::vector
< std::vector
< Node
> > &normal_forms
, std::vector
< Node
> &normal_form_src
,
453 std::vector
< std::vector
< Node
> > &normal_forms_exp
, std::vector
< std::map
< Node
, std::map
< bool, int > > >& normal_forms_exp_depend
,
454 unsigned i
, unsigned j
, int index_i
, int index_j
, bool isRev
, std::vector
< Node
>& curr_exp
);
455 //--------------------------end for checkNormalFormsDeq
457 //--------------------------------for checkMemberships
458 // check membership constraints
459 Node
mkRegExpAntec(Node atom
, Node ant
);
460 bool applyRConsume( CVC4::String
&s
, Node
&r
);
461 Node
applyRSplit( Node s1
, Node s2
, Node r
);
462 bool applyRLen( std::map
< Node
, std::vector
< Node
> > &XinR_with_exps
);
463 bool checkPDerivative( Node x
, Node r
, Node atom
, bool &addedLemma
, std::vector
< Node
> &nf_exp
);
465 void checkPosContains( std::vector
< Node
>& posContains
);
466 void checkNegContains( std::vector
< Node
>& negContains
);
467 //--------------------------------end for checkMemberships
470 void addCarePairs( quantifiers::TermArgTrie
* t1
, quantifiers::TermArgTrie
* t2
, unsigned arity
, unsigned depth
);
473 /** preregister term */
474 void preRegisterTerm(TNode n
) override
;
475 /** Expand definition */
476 Node
expandDefinition(LogicRequest
& logicRequest
, Node n
) override
;
477 /** Check at effort e */
478 void check(Effort e
) override
;
479 /** needs check last effort */
480 bool needsCheckLastEffort() override
;
481 /** Conflict when merging two constants */
482 void conflict(TNode a
, TNode b
);
483 /** called when a new equivalence class is created */
484 void eqNotifyNewClass(TNode t
);
485 /** called when two equivalence classes will merge */
486 void eqNotifyPreMerge(TNode t1
, TNode t2
);
487 /** called when two equivalence classes have merged */
488 void eqNotifyPostMerge(TNode t1
, TNode t2
);
489 /** called when two equivalence classes are made disequal */
490 void eqNotifyDisequal(TNode t1
, TNode t2
, TNode reason
);
491 /** get preprocess */
492 StringsPreprocess
* getPreprocess() { return &d_preproc
; }
495 /** compute care graph */
496 void computeCareGraph() override
;
499 void assertPendingFact(Node atom
, bool polarity
, Node exp
);
500 void doPendingFacts();
501 void doPendingLemmas();
503 void addToExplanation(Node a
, Node b
, std::vector
<Node
>& exp
);
504 void addToExplanation(Node lit
, std::vector
<Node
>& exp
);
508 * This performs SAT-context-independent registration for a term n, which
509 * may cause lemmas to be sent on the output channel that involve
510 * "initial refinement lemmas" for n. This includes introducing proxy
511 * variables for string terms and asserting that str.code terms are within
514 * Effort is one of the following (TODO make enum #1881):
515 * 0 : upon preregistration or internal assertion
516 * 1 : upon occurrence in length term
517 * 2 : before normal form computation
518 * 3 : called on normal form terms
520 * Based on the strategy, we may choose to add these initial refinement
521 * lemmas at one of the following efforts, where if it is not the given
522 * effort, the call to this method does nothing.
524 void registerTerm(Node n
, int effort
);
526 void sendInference(std::vector
<Node
>& exp
,
527 std::vector
<Node
>& exp_n
,
530 bool asLemma
= false);
531 void sendInference(std::vector
<Node
>& exp
,
534 bool asLemma
= false);
535 void sendLemma(Node ant
, Node conc
, const char* c
);
536 void sendInfer(Node eq_exp
, Node eq
, const char* c
);
537 bool sendSplit(Node a
, Node b
, const char* c
, bool preq
= true);
538 void sendLengthLemma(Node n
);
540 inline Node
mkConcat(Node n1
, Node n2
);
541 inline Node
mkConcat(Node n1
, Node n2
, Node n3
);
542 inline Node
mkConcat(const std::vector
<Node
>& c
);
543 inline Node
mkLength(Node n
);
553 sk_id_vc_bin_spt_rev
,
563 std::map
<Node
, std::map
<Node
, std::map
<int, Node
> > > d_skolem_cache
;
564 /** the set of all skolems we have generated */
565 std::unordered_set
<Node
, NodeHashFunction
> d_all_skolems
;
567 Node a
, Node b
, int id
, const char* c
, int isLenSplit
= 0);
568 inline Node
mkSkolemS(const char* c
, int isLenSplit
= 0);
569 void registerNonEmptySkolem(Node sk
);
570 // inline Node mkSkolemI(const char * c);
572 Node
mkExplain(std::vector
<Node
>& a
);
573 Node
mkExplain(std::vector
<Node
>& a
, std::vector
<Node
>& an
);
575 Node
mkAnd(std::vector
<Node
>& a
);
576 /** get concat vector */
577 void getConcatVec(Node n
, std::vector
<Node
>& c
);
579 // get equivalence classes
580 void getEquivalenceClasses(std::vector
<Node
>& eqcs
);
582 // separate into collections with equal length
583 void separateByLength(std::vector
<Node
>& n
,
584 std::vector
<std::vector
<Node
> >& col
,
585 std::vector
<Node
>& lts
);
586 void printConcat(std::vector
<Node
>& n
, const char* c
);
588 void inferSubstitutionProxyVars(Node n
,
589 std::vector
<Node
>& vars
,
590 std::vector
<Node
>& subs
,
591 std::vector
<Node
>& unproc
);
593 // Symbolic Regular Expression
595 // regular expression memberships
596 NodeList d_regexp_memberships
;
597 NodeSet d_regexp_ucached
;
598 NodeSet d_regexp_ccached
;
600 NodeIntMap d_pos_memberships
;
601 std::map
< Node
, std::vector
< Node
> > d_pos_memberships_data
;
602 NodeIntMap d_neg_memberships
;
603 std::map
< Node
, std::vector
< Node
> > d_neg_memberships_data
;
604 unsigned getNumMemberships( Node n
, bool isPos
);
605 Node
getMembership( Node n
, bool isPos
, unsigned i
);
606 // semi normal forms for symbolic expression
607 std::map
< Node
, Node
> d_nf_regexps
;
608 std::map
< Node
, std::vector
< Node
> > d_nf_regexps_exp
;
610 NodeNodeMap d_inter_cache
;
611 NodeIntMap d_inter_index
;
612 // processed memberships
613 NodeSet d_processed_memberships
;
614 // antecedant for why regexp membership must be true
615 NodeNodeMap d_regexp_ant
;
617 //std::map< Node, bool > d_membership_length;
618 // regular expression operations
619 RegExpOpr d_regexp_opr
;
621 CVC4::String
getHeadConst( Node x
);
622 bool deriveRegExp( Node x
, Node r
, Node ant
);
623 void addMembership(Node assertion
);
624 Node
getNormalString(Node x
, std::vector
<Node
> &nf_exp
);
625 Node
getNormalSymRegExp(Node r
, std::vector
<Node
> &nf_exp
);
628 // Finite Model Finding
630 NodeSet d_input_vars
;
631 context::CDO
< Node
> d_input_var_lsum
;
632 context::CDHashMap
< int, Node
> d_cardinality_lits
;
633 context::CDO
< int > d_curr_cardinality
;
636 //for finite model finding
637 Node
getNextDecisionRequest(unsigned& priority
) override
;
639 Node
ppRewrite(TNode atom
) override
;
642 /** statistics class */
647 IntStat d_deq_splits
;
648 IntStat d_loop_lemmas
;
649 IntStat d_new_skolems
;
652 };/* class TheoryStrings::Statistics */
653 Statistics d_statistics
;
656 //-----------------------inference steps
659 * This function initializes term indices for each strings function symbol.
660 * One key aspect of this construction is that concat terms are indexed by
661 * their list of non-empty components. For example, if x = "" is an equality
662 * asserted in this SAT context, then y ++ x ++ z may be indexed by (y,z).
663 * This method may infer various facts while building these term indices, for
664 * instance, based on congruence. An example would be inferring:
665 * y ++ x ++ z = y ++ z
666 * if both terms are registered in this SAT context.
668 * This function should be called as a first step of any strategy.
671 /** check constant equivalence classes
673 * This function infers whether CONCAT terms can be simplified to constants.
674 * For example, if x = "a" and y = "b" are equalities in the current SAT
675 * context, then we may infer x ++ "c" ++ y is equivalent to "acb". In this
676 * case, we infer the fact x ++ "c" ++ y = "acb".
678 void checkConstantEquivalenceClasses();
679 /** check extended functions evaluation
681 * This applies "context-dependent simplification" for all active extended
682 * function terms in this SAT context. This infers facts of the form:
683 * x = c => f( t1 ... tn ) = c'
684 * where the rewritten form of f( t1...tn ) { x |-> c } is c', and x = c
685 * is a (tuple of) equalities that are asserted in this SAT context, and
686 * f( t1 ... tn ) is a term from this SAT context.
688 * For more details, this is steps 4 when effort=0 and step 6 when
689 * effort=1 from Strategy 1 in Reynolds et al, "Scaling up DPLL(T) String
690 * Solvers using Context-Dependent Simplification", CAV 2017. When called with
691 * effort=3, we apply context-dependent simplification based on model values.
693 void checkExtfEval(int effort
);
696 * This inference schema ensures that a containment ordering < over the
697 * string equivalence classes is acyclic. We define this ordering < such that
698 * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2
699 * if there exists a ti whose flat form (see below) is [w1...sj...wk] for
700 * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat
701 * form components that do not constitute this chain, e.g. (w1...wk) \ sj
702 * in the flat form above, must be empty.
704 * For more details, see the inference S-Cycle in Liang et al CAV 2014.
709 * This applies an inference schema based on "flat forms". The flat form of a
710 * string term t is a vector of representative terms [r1, ..., rn] such that
711 * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to
712 * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of
713 * the equivalence class containing t1. For example, if t is y ++ z ++ z,
714 * E is { y = "", w = z }, and w is the representative of the equivalence
715 * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms
716 * in the same equivalence classes with flat forms [r1...rn] and [s1...sm].
717 * We may infer various facts based on this pair of terms. For example:
718 * ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si),
719 * rn = sn, if n=m and rj == sj for each j < n,
720 * ri = empty, if n=m+1 and ri == rj for each i=1,...,m.
721 * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences
724 * Notice that this inference scheme is an optimization and not needed for
725 * model-soundness. The motivation for this schema is that it is simpler than
726 * checkNormalFormsEq, which can be seen as a recursive version of this
727 * schema (see difference of "normal form" vs "flat form" below), and
728 * checkNormalFormsEq is complete, in the sense that if it passes with no
729 * inferences, we are ensured that all string equalities in the current
730 * context are satisfied.
732 * Must call checkCycles before this function in a strategy.
734 void checkFlatForms();
735 /** check normal forms equalities
737 * This applies an inference schema based on "normal forms". The normal form
738 * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn},
739 * where t1...tn are concatenation terms is a vector of representative terms
740 * [r1, ..., rm] such that:
741 * (1) if n=0, then m=1 and r1 is the representative of e,
743 * t1 = t^1_1 ++ ... ++ t^1_m_1
745 * tn = t^1_n ++ ... ++ t^_m_n
746 * for *each* i=1, ..., n, the result of concenating the normal forms of
747 * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class
748 * can be assigned a normal form, then all equalities between ti and tj are
749 * satisfied by all models that correspond to extensions of the current
750 * assignment. For further detail on this terminology, see Liang et al
753 * Notice that all constant words are implicitly considered concatentation
754 * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c".
756 * At a high level, we build normal forms for equivalence classes bottom-up,
757 * starting with equivalence classes that are minimal with respect to the
758 * containment ordering < computed during checkCycles. While computing a
759 * normal form for an equivalence class, we may infer equalities between
760 * components of strings that must be equal (e.g. x=y when x++z == y++w when
761 * len(x)==len(y) is asserted), derive conflicts if two strings have disequal
762 * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split
763 * string terms into smaller components using fresh skolem variables (see
764 * Inference values with names "SPLIT"). We also may introduce regular
765 * expression constraints in this method for looping word equations (see
766 * the Inference INFER_FLOOP).
768 * If this inference schema returns no facts, lemmas, or conflicts, then
769 * we have successfully assigned normal forms for all equivalence classes, as
770 * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or
771 * conflict based on inferences in the Inference enumeration above.
773 void checkNormalFormsEq();
774 /** check normal forms disequalities
776 * This inference schema can be seen as the converse of the above schema. In
777 * particular, it ensures that each pair of distinct equivalence classes
778 * e1 and e2 have distinct normal forms.
780 * This method considers all pairs of distinct equivalence classes (e1,e2)
781 * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It
782 * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn]
783 * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are
784 * disequal and have the same length, then x1 and x2 have distinct normal
785 * forms. Otherwise, we may add splitting lemmas on the length of ri and si,
786 * or split on an equality between ri and si.
788 * If this inference schema returns no facts, lemmas, or conflicts, then all
789 * disequalities between string terms are satisfied by all models that are
790 * extensions of the current assignment.
792 void checkNormalFormsDeq();
795 * This inference schema ensures that constraints between str.code terms
796 * are satisfied by models that correspond to extensions of the current
797 * assignment. In particular, this method ensures that str.code can be
798 * given an interpretation that is injective for string arguments with length
799 * one. It may add lemmas of the form:
800 * str.code(x) == -1 V str.code(x) != str.code(y) V x == y
803 /** check lengths for equivalence classes
805 * This inference schema adds lemmas of the form:
806 * E => len( x ) = rewrite( len( r1 ++ ... ++ rn ) )
807 * where [r1, ..., rn] is the normal form of the equivalence class containing
808 * x. This schema is not required for correctness but experimentally has
809 * shown to be helpful.
811 void checkLengthsEqc();
812 /** check extended function reductions
814 * This adds "reduction" lemmas for each active extended function in this SAT
815 * context. These are generally lemmas of the form:
816 * F[t1...tn,k] ^ f( t1 ... tn ) = k
817 * where f( t1 ... tn ) is an active extended function, k is a fresh constant
818 * and F is a formula that constrains k based on the definition of f.
820 * For more details, this is step 7 from Strategy 1 in Reynolds et al,
821 * CAV 2017. We stratify this in practice, where calling this with effort=1
822 * reduces some of the "easier" extended functions, and effort=2 reduces
825 void checkExtfReductions(int effort
);
826 /** check regular expression memberships
828 * This checks the satisfiability of all regular expression memberships
829 * of the form (not) s in R. We use various heuristic techniques based on
830 * unrolling, combined with techniques from Liang et al, "A Decision Procedure
831 * for Regular Membership and Length Constraints over Unbounded Strings",
834 void checkMemberships();
835 /** check cardinality
837 * This function checks whether a cardinality inference needs to be applied
838 * to a set of equivalence classes. For details, see Step 5 of the proof
839 * procedure from Liang et al, CAV 2014.
841 void checkCardinality();
842 //-----------------------end inference steps
844 //-----------------------representation of the strategy
845 /** is strategy initialized */
846 bool d_strategy_init
;
847 /** run the given inference step */
848 void runInferStep(InferStep s
, int effort
);
850 std::vector
<InferStep
> d_infer_steps
;
851 /** the effort levels */
852 std::vector
<int> d_infer_step_effort
;
853 /** the range (begin, end) of steps to run at given efforts */
854 std::map
<Effort
, std::pair
<unsigned, unsigned> > d_strat_steps
;
855 /** do we have a strategy for effort e? */
856 bool hasStrategyEffort(Effort e
) const;
857 /** initialize the strategy
859 * This adds (s,effort) as a strategy step to the vectors d_infer_steps and
860 * d_infer_step_effort. This indicates that a call to runInferStep should
861 * be run as the next step in the strategy. If addBreak is true, we add
862 * a BREAK to the strategy following this step.
864 void addStrategyStep(InferStep s
, int effort
= 0, bool addBreak
= true);
865 /** initialize the strategy
867 * This initializes the above information based on the options. This makes
868 * a series of calls to addStrategyStep above.
870 void initializeStrategy();
873 * This executes the inference steps starting at index sbegin and ending at
874 * index send. We exit if any step in this sequence adds a lemma or infers a
877 void runStrategy(unsigned sbegin
, unsigned send
);
878 //-----------------------end representation of the strategy
880 };/* class TheoryStrings */
882 }/* CVC4::theory::strings namespace */
883 }/* CVC4::theory namespace */
884 }/* CVC4 namespace */
886 #endif /* __CVC4__THEORY__STRINGS__THEORY_STRINGS_H */