src/theory/strings/theory_strings.h

   1 /*********************                                                        */
   2 /*! \file theory_strings.h
   3  ** \verbatim
   4  ** Top contributors (to current version):
   5  **   Andrew Reynolds, Tianyi Liang, Tim King
   6  ** This file is part of the CVC4 project.
   7  ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS
   8  ** in the top-level source directory) and their institutional affiliations.
   9  ** All rights reserved.  See the file COPYING in the top-level source
  10  ** directory for licensing information.\endverbatim
  11  **
  12  ** \brief Theory of strings
  13  **
  14  ** Theory of strings.
  15  **/
  16
  17 #include "cvc4_private.h"
  18
  19 #ifndef __CVC4__THEORY__STRINGS__THEORY_STRINGS_H
  20 #define __CVC4__THEORY__STRINGS__THEORY_STRINGS_H
  21
  22 #include "context/cdhashset.h"
  23 #include "context/cdlist.h"
  24 #include "expr/attribute.h"
  25 #include "theory/strings/regexp_operation.h"
  26 #include "theory/strings/theory_strings_preprocess.h"
  27 #include "theory/theory.h"
  28 #include "theory/uf/equality_engine.h"
  29
  30 #include <climits>
  31 #include <deque>
  32
  33 namespace CVC4 {
  34 namespace theory {
  35
  36 namespace quantifiers{
  37   class TermArgTrie;
  38 }
  39
  40 namespace strings {
  41
  42 /**
  43  * Decision procedure for strings.
  44  *
  45  */
  46
  47 /** Types of inferences used in the procedure
  48  *
  49  * These are variants of the inference rules in Figures 3-5 of Liang et al.
  50  * "A DPLL(T) Solver for a Theory of Strings and Regular Expressions", CAV 2014.
  51  */
  52 enum Inference
  53 {
  54   INFER_NONE,
  55   // string split constant propagation, for example:
  56   //     x = y, x = "abc", y = y1 ++ "b" ++ y2
  57   //       implies y1 = "a" ++ y1'
  58   INFER_SSPLIT_CST_PROP,
  59   // string split variable propagation, for example:
  60   //     x = y, x = x1 ++ x2, y = y1 ++ y2, len( x1 ) >= len( y1 )
  61   //       implies x1 = y1 ++ x1'
  62   // This is inspired by Zheng et al CAV 2015.
  63   INFER_SSPLIT_VAR_PROP,
  64   // length split, for example:
  65   //     len( x1 ) = len( y1 ) V len( x1 ) != len( y1 )
  66   // This is inferred when e.g. x = y, x = x1 ++ x2, y = y1 ++ y2.
  67   INFER_LEN_SPLIT,
  68   // length split empty, for example:
  69   //     z = "" V z != ""
  70   // This is inferred when, e.g. x = y, x = z ++ x1, y = y1 ++ z
  71   INFER_LEN_SPLIT_EMP,
  72   // string split constant binary, for example:
  73   //     x1 = "aaaa" ++ x1' V "aaaa" = x1 ++ x1'
  74   // This is inferred when, e.g. x = y, x = x1 ++ x2, y = "aaaaaaaa" ++ y2.
  75   // This inference is disabled by default and is enabled by stringBinaryCsp().
  76   INFER_SSPLIT_CST_BINARY,
  77   // string split constant
  78   //    x = y, x = "c" ++ x2, y = y1 ++ y2, y1 != ""
  79   //      implies y1 = "c" ++ y1'
  80   // This is a special case of F-Split in Figure 5 of Liang et al CAV 2014.
  81   INFER_SSPLIT_CST,
  82   // string split variable, for example:
  83   //    x = y, x = x1 ++ x2, y = y1 ++ y2
  84   //      implies x1 = y1 ++ x1' V y1 = x1 ++ y1'
  85   // This is rule F-Split in Figure 5 of Liang et al CAV 2014.
  86   INFER_SSPLIT_VAR,
  87   // flat form loop, for example:
  88   //    x = y, x = x1 ++ z, y = z ++ y2
  89   //      implies z = u2 ++ u1, u in ( u1 ++ u2 )*, x1 = u2 ++ u, y2 = u ++ u1
  90   //        for fresh u, u1, u2.
  91   // This is the rule F-Loop from Figure 5 of Liang et al CAV 2014.
  92   INFER_FLOOP,
  93 };
  94 std::ostream& operator<<(std::ostream& out, Inference i);
  95
  96 /** inference steps
  97  *
  98  * Corresponds to a step in the overall strategy of the strings solver. For
  99  * details on the individual steps, see documentation on the inference schemas
 100  * within TheoryStrings.
 101  */
 102 enum InferStep
 103 {
 104   // indicates that the strategy should break if lemmas or facts are added
 105   BREAK,
 106   // check initial
 107   CHECK_INIT,
 108   // check constant equivalence classes
 109   CHECK_CONST_EQC,
 110   // check extended function evaluation
 111   CHECK_EXTF_EVAL,
 112   // check cycles
 113   CHECK_CYCLES,
 114   // check flat forms
 115   CHECK_FLAT_FORMS,
 116   // check normal forms equalities
 117   CHECK_NORMAL_FORMS_EQ,
 118   // check normal forms disequalities
 119   CHECK_NORMAL_FORMS_DEQ,
 120   // check codes
 121   CHECK_CODES,
 122   // check lengths for equivalence classes
 123   CHECK_LENGTH_EQC,
 124   // check extended function reductions
 125   CHECK_EXTF_REDUCTION,
 126   // check regular expression memberships
 127   CHECK_MEMBERSHIP,
 128   // check cardinality
 129   CHECK_CARDINALITY,
 130 };
 131 std::ostream& operator<<(std::ostream& out, Inference i);
 132
 133 struct StringsProxyVarAttributeId {};
 134 typedef expr::Attribute< StringsProxyVarAttributeId, bool > StringsProxyVarAttribute;
 135
 136 class TheoryStrings : public Theory {
 137   typedef context::CDList<Node> NodeList;
 138   typedef context::CDHashMap<Node, bool, NodeHashFunction> NodeBoolMap;
 139   typedef context::CDHashMap<Node, int, NodeHashFunction> NodeIntMap;
 140   typedef context::CDHashMap<Node, Node, NodeHashFunction> NodeNodeMap;
 141   typedef context::CDHashSet<Node, NodeHashFunction> NodeSet;
 142
 143  public:
 144   TheoryStrings(context::Context* c, context::UserContext* u,
 145                 OutputChannel& out, Valuation valuation,
 146                 const LogicInfo& logicInfo);
 147   ~TheoryStrings();
 148
 149   void setMasterEqualityEngine(eq::EqualityEngine* eq) override;
 150
 151   std::string identify() const override { return std::string("TheoryStrings"); }
 152
 153  public:
 154   void propagate(Effort e) override;
 155   bool propagate(TNode literal);
 156   void explain( TNode literal, std::vector<TNode>& assumptions );
 157   Node explain(TNode literal) override;
 158   eq::EqualityEngine* getEqualityEngine() override { return &d_equalityEngine; }
 159   bool getCurrentSubstitution(int effort,
 160                               std::vector<Node>& vars,
 161                               std::vector<Node>& subs,
 162                               std::map<Node, std::vector<Node> >& exp) override;
 163   int getReduction(int effort, Node n, Node& nr) override;
 164
 165   // NotifyClass for equality engine
 166   class NotifyClass : public eq::EqualityEngineNotify {
 167     TheoryStrings& d_str;
 168   public:
 169     NotifyClass(TheoryStrings& t_str): d_str(t_str) {}
 170     bool eqNotifyTriggerEquality(TNode equality, bool value) override
 171     {
 172       Debug("strings") << "NotifyClass::eqNotifyTriggerEquality(" << equality << ", " << (value ? "true" : "false" )<< ")" << std::endl;
 173       if (value) {
 174         return d_str.propagate(equality);
 175       } else {
 176         // We use only literal triggers so taking not is safe
 177         return d_str.propagate(equality.notNode());
 178       }
 179     }
 180     bool eqNotifyTriggerPredicate(TNode predicate, bool value) override
 181     {
 182       Debug("strings") << "NotifyClass::eqNotifyTriggerPredicate(" << predicate << ", " << (value ? "true" : "false") << ")" << std::endl;
 183       if (value) {
 184         return d_str.propagate(predicate);
 185       } else {
 186          return d_str.propagate(predicate.notNode());
 187       }
 188     }
 189     bool eqNotifyTriggerTermEquality(TheoryId tag,
 190                                      TNode t1,
 191                                      TNode t2,
 192                                      bool value) override
 193     {
 194       Debug("strings") << "NotifyClass::eqNotifyTriggerTermMerge(" << tag << ", " << t1 << ", " << t2 << ")" << std::endl;
 195       if (value) {
 196       return d_str.propagate(t1.eqNode(t2));
 197       } else {
 198       return d_str.propagate(t1.eqNode(t2).notNode());
 199       }
 200     }
 201     void eqNotifyConstantTermMerge(TNode t1, TNode t2) override
 202     {
 203       Debug("strings") << "NotifyClass::eqNotifyConstantTermMerge(" << t1 << ", " << t2 << ")" << std::endl;
 204       d_str.conflict(t1, t2);
 205     }
 206     void eqNotifyNewClass(TNode t) override
 207     {
 208       Debug("strings") << "NotifyClass::eqNotifyNewClass(" << t << std::endl;
 209       d_str.eqNotifyNewClass(t);
 210     }
 211     void eqNotifyPreMerge(TNode t1, TNode t2) override
 212     {
 213       Debug("strings") << "NotifyClass::eqNotifyPreMerge(" << t1 << ", " << t2 << std::endl;
 214       d_str.eqNotifyPreMerge(t1, t2);
 215     }
 216     void eqNotifyPostMerge(TNode t1, TNode t2) override
 217     {
 218       Debug("strings") << "NotifyClass::eqNotifyPostMerge(" << t1 << ", " << t2 << std::endl;
 219       d_str.eqNotifyPostMerge(t1, t2);
 220     }
 221     void eqNotifyDisequal(TNode t1, TNode t2, TNode reason) override
 222     {
 223       Debug("strings") << "NotifyClass::eqNotifyDisequal(" << t1 << ", " << t2 << ", " << reason << std::endl;
 224       d_str.eqNotifyDisequal(t1, t2, reason);
 225     }
 226   };/* class TheoryStrings::NotifyClass */
 227
 228 private:
 229   // Constants
 230   Node d_emptyString;
 231   Node d_emptyRegexp;
 232   Node d_true;
 233   Node d_false;
 234   Node d_zero;
 235   Node d_one;
 236   Node d_neg_one;
 237   CVC4::Rational RMAXINT;
 238   unsigned d_card_size;
 239   // Helper functions
 240   Node getRepresentative( Node t );
 241   bool hasTerm( Node a );
 242   bool areEqual( Node a, Node b );
 243   bool areDisequal( Node a, Node b );
 244   bool areCareDisequal( TNode x, TNode y );
 245   // t is representative, te = t, add lt = te to explanation exp
 246   Node getLengthExp( Node t, std::vector< Node >& exp, Node te );
 247   Node getLength( Node t, std::vector< Node >& exp );
 248
 249 private:
 250   /** The notify class */
 251   NotifyClass d_notify;
 252   /** Equaltity engine */
 253   eq::EqualityEngine d_equalityEngine;
 254   /** Are we in conflict */
 255   context::CDO<bool> d_conflict;
 256   //list of pairs of nodes to merge
 257   std::map< Node, Node > d_pending_exp;
 258   std::vector< Node > d_pending;
 259   std::vector< Node > d_lemma_cache;
 260   std::map< Node, bool > d_pending_req_phase;
 261   /** inferences: maintained to ensure ref count for internally introduced nodes */
 262   NodeList d_infer;
 263   NodeList d_infer_exp;
 264   /** normal forms */
 265   std::map< Node, Node > d_normal_forms_base;
 266   std::map< Node, std::vector< Node > > d_normal_forms;
 267   std::map< Node, std::vector< Node > > d_normal_forms_exp;
 268   std::map< Node, std::map< Node, std::map< bool, int > > > d_normal_forms_exp_depend;
 269   //map of pairs of terms that have the same normal form
 270   NodeIntMap d_nf_pairs;
 271   std::map< Node, std::vector< Node > > d_nf_pairs_data;
 272   void addNormalFormPair( Node n1, Node n2 );
 273   bool isNormalFormPair( Node n1, Node n2 );
 274   bool isNormalFormPair2( Node n1, Node n2 );
 275   // preReg cache
 276   NodeSet d_pregistered_terms_cache;
 277   NodeSet d_registered_terms_cache;
 278   NodeSet d_length_lemma_terms_cache;
 279   NodeSet d_skolem_ne_reg_cache;
 280   // preprocess cache
 281   StringsPreprocess d_preproc;
 282   NodeBoolMap d_preproc_cache;
 283   // extended functions inferences cache
 284   NodeSet d_extf_infer_cache;
 285   NodeSet d_extf_infer_cache_u;
 286   std::vector< Node > d_empty_vec;
 287   //
 288   NodeList d_ee_disequalities;
 289 private:
 290   NodeSet d_congruent;
 291   std::map< Node, Node > d_eqc_to_const;
 292   std::map< Node, Node > d_eqc_to_const_base;
 293   std::map< Node, Node > d_eqc_to_const_exp;
 294   Node getConstantEqc( Node eqc );
 295
 296   std::map< Node, Node > d_eqc_to_len_term;
 297   std::vector< Node > d_strings_eqc;
 298   Node d_emptyString_r;
 299   class TermIndex {
 300   public:
 301     Node d_data;
 302     std::map< TNode, TermIndex > d_children;
 303     Node add( TNode n, unsigned index, TheoryStrings* t, Node er, std::vector< Node >& c );
 304     void clear(){ d_children.clear(); }
 305   };
 306   std::map< Kind, TermIndex > d_term_index;
 307   //list of non-congruent concat terms in each eqc
 308   std::map< Node, std::vector< Node > > d_eqc;
 309   std::map< Node, std::vector< Node > > d_flat_form;
 310   std::map< Node, std::vector< int > > d_flat_form_index;
 311
 312   void debugPrintFlatForms( const char * tc );
 313   void debugPrintNormalForms( const char * tc );
 314   /////////////////////////////////////////////////////////////////////////////
 315   // MODEL GENERATION
 316   /////////////////////////////////////////////////////////////////////////////
 317  public:
 318   bool collectModelInfo(TheoryModel* m) override;
 319
 320   /////////////////////////////////////////////////////////////////////////////
 321   // NOTIFICATIONS
 322   /////////////////////////////////////////////////////////////////////////////
 323  public:
 324   void presolve() override;
 325   void shutdown() override {}
 326
 327   /////////////////////////////////////////////////////////////////////////////
 328   // MAIN SOLVER
 329   /////////////////////////////////////////////////////////////////////////////
 330  private:
 331   void addSharedTerm(TNode n) override;
 332   EqualityStatus getEqualityStatus(TNode a, TNode b) override;
 333
 334  private:
 335   /** SAT-context-dependent information about an equivalence class */
 336   class EqcInfo {
 337   public:
 338     EqcInfo( context::Context* c );
 339     ~EqcInfo(){}
 340     /**
 341      * If non-null, this is a term x from this eq class such that str.len( x )
 342      * occurs as a term in this SAT context.
 343      */
 344     context::CDO< Node > d_length_term;
 345     /**
 346      * If non-null, this is a term x from this eq class such that str.code( x )
 347      * occurs as a term in this SAT context.
 348      */
 349     context::CDO<Node> d_code_term;
 350     context::CDO< unsigned > d_cardinality_lem_k;
 351     context::CDO< Node > d_normalized_length;
 352   };
 353   /** map from representatives to information necessary for equivalence classes */
 354   std::map< Node, EqcInfo* > d_eqc_info;
 355   /**
 356    * Get the above information for equivalence class eqc. If doMake is true,
 357    * we construct a new information class if one does not exist. The term eqc
 358    * should currently be a representative of the equality engine of this class.
 359    */
 360   EqcInfo * getOrMakeEqcInfo( Node eqc, bool doMake = true );
 361   //maintain which concat terms have the length lemma instantiated
 362   NodeNodeMap d_proxy_var;
 363   NodeNodeMap d_proxy_var_to_length;
 364   /** All the function terms that the theory has seen */
 365   context::CDList<TNode> d_functionsTerms;
 366 private:
 367   //any non-reduced extended functions exist
 368   context::CDO< bool > d_has_extf;
 369   /** have we asserted any str.code terms? */
 370   bool d_has_str_code;
 371   // static information about extf
 372   class ExtfInfo {
 373   public:
 374     //all variables in this term
 375     std::vector< Node > d_vars;
 376   };
 377   // non-static information about extf
 378   class ExtfInfoTmp {
 379   public:
 380     void init(){
 381       d_pol = 0;
 382       d_model_active = true;
 383     }
 384     // list of terms that something (does not) contain and their explanation
 385     std::map< bool, std::vector< Node > > d_ctn;
 386     std::map< bool, std::vector< Node > > d_ctn_from;
 387     //polarity
 388     int d_pol;
 389     //explanation
 390     std::vector< Node > d_exp;
 391     //false if it is reduced in the model
 392     bool d_model_active;
 393   };
 394   std::map< Node, ExtfInfoTmp > d_extf_info_tmp;
 395 private:
 396   class InferInfo {
 397   public:
 398     unsigned d_i;
 399     unsigned d_j;
 400     bool d_rev;
 401     std::vector< Node > d_ant;
 402     std::vector< Node > d_antn;
 403     std::map< int, std::vector< Node > > d_new_skolem;
 404     Node d_conc;
 405     Inference d_id;
 406     std::map< Node, bool > d_pending_phase;
 407     unsigned d_index;
 408     Node d_nf_pair[2];
 409     bool sendAsLemma();
 410   };
 411   void checkConstantEquivalenceClasses( TermIndex* ti, std::vector< Node >& vecc );
 412   void checkExtfInference( Node n, Node nr, ExtfInfoTmp& in, int effort );
 413   Node getSymbolicDefinition( Node n, std::vector< Node >& exp );
 414
 415   //--------------------------for checkFlatForm
 416   /**
 417    * This checks whether there are flat form inferences between eqc[start] and
 418    * eqc[j] for some j>start. If the flag isRev is true, we check for flat form
 419    * interferences in the reverse direction of the flat forms. For more details,
 420    * see checkFlatForms below.
 421    */
 422   void checkFlatForm(std::vector<Node>& eqc, unsigned start, bool isRev);
 423   //--------------------------end for checkFlatForm
 424
 425   //--------------------------for checkCycles
 426   Node checkCycles( Node eqc, std::vector< Node >& curr, std::vector< Node >& exp );
 427   //--------------------------end for checkCycles
 428
 429   //--------------------------for checkNormalFormsEq
 430   void normalizeEquivalenceClass( Node n );
 431   void getNormalForms( Node &eqc, std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
 432                        std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend );
 433   bool detectLoop( std::vector< std::vector< Node > > &normal_forms, int i, int j, int index, int &loop_in_i, int &loop_in_j, unsigned rproc );
 434   bool processLoop( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
 435                     int i, int j, int loop_n_index, int other_n_index,int loop_index, int index, InferInfo& info );
 436   void processNEqc( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
 437                     std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend );
 438   void processReverseNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
 439                           std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
 440                           unsigned i, unsigned j, unsigned& index, unsigned rproc, std::vector< InferInfo >& pinfer );
 441   void processSimpleNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
 442                          std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
 443                          unsigned i, unsigned j, unsigned& index, bool isRev, unsigned rproc, std::vector< InferInfo >& pinfer );
 444   //--------------------------end for checkNormalFormsEq
 445
 446   //--------------------------for checkNormalFormsDeq
 447   void processDeq( Node n1, Node n2 );
 448   int processReverseDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj );
 449   int processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj, unsigned& index, bool isRev );
 450   void getExplanationVectorForPrefix( std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
 451                                       unsigned i, int index, bool isRev, std::vector< Node >& curr_exp );
 452   void getExplanationVectorForPrefixEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src,
 453                                         std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend,
 454                                         unsigned i, unsigned j, int index_i, int index_j, bool isRev, std::vector< Node >& curr_exp );
 455   //--------------------------end for checkNormalFormsDeq
 456
 457   //--------------------------------for checkMemberships
 458   // check membership constraints
 459   Node mkRegExpAntec(Node atom, Node ant);
 460   bool applyRConsume( CVC4::String &s, Node &r );
 461   Node applyRSplit( Node s1, Node s2, Node r );
 462   bool applyRLen( std::map< Node, std::vector< Node > > &XinR_with_exps );
 463   bool checkPDerivative( Node x, Node r, Node atom, bool &addedLemma, std::vector< Node > &nf_exp);
 464   //check contains
 465   void checkPosContains( std::vector< Node >& posContains );
 466   void checkNegContains( std::vector< Node >& negContains );
 467   //--------------------------------end for checkMemberships
 468
 469  private:
 470   void addCarePairs( quantifiers::TermArgTrie * t1, quantifiers::TermArgTrie * t2, unsigned arity, unsigned depth );
 471
 472  public:
 473   /** preregister term */
 474   void preRegisterTerm(TNode n) override;
 475   /** Expand definition */
 476   Node expandDefinition(LogicRequest& logicRequest, Node n) override;
 477   /** Check at effort e */
 478   void check(Effort e) override;
 479   /** needs check last effort */
 480   bool needsCheckLastEffort() override;
 481   /** Conflict when merging two constants */
 482   void conflict(TNode a, TNode b);
 483   /** called when a new equivalence class is created */
 484   void eqNotifyNewClass(TNode t);
 485   /** called when two equivalence classes will merge */
 486   void eqNotifyPreMerge(TNode t1, TNode t2);
 487   /** called when two equivalence classes have merged */
 488   void eqNotifyPostMerge(TNode t1, TNode t2);
 489   /** called when two equivalence classes are made disequal */
 490   void eqNotifyDisequal(TNode t1, TNode t2, TNode reason);
 491   /** get preprocess */
 492   StringsPreprocess* getPreprocess() { return &d_preproc; }
 493
 494  protected:
 495   /** compute care graph */
 496   void computeCareGraph() override;
 497
 498   // do pending merges
 499   void assertPendingFact(Node atom, bool polarity, Node exp);
 500   void doPendingFacts();
 501   void doPendingLemmas();
 502   bool hasProcessed();
 503   void addToExplanation(Node a, Node b, std::vector<Node>& exp);
 504   void addToExplanation(Node lit, std::vector<Node>& exp);
 505
 506   /** Register term
 507    *
 508    * This performs SAT-context-independent registration for a term n, which
 509    * may cause lemmas to be sent on the output channel that involve
 510    * "initial refinement lemmas" for n. This includes introducing proxy
 511    * variables for string terms and asserting that str.code terms are within
 512    * proper bounds.
 513    *
 514    * Effort is one of the following (TODO make enum #1881):
 515    * 0 : upon preregistration or internal assertion
 516    * 1 : upon occurrence in length term
 517    * 2 : before normal form computation
 518    * 3 : called on normal form terms
 519    *
 520    * Based on the strategy, we may choose to add these initial refinement
 521    * lemmas at one of the following efforts, where if it is not the given
 522    * effort, the call to this method does nothing.
 523    */
 524   void registerTerm(Node n, int effort);
 525   // send lemma
 526   void sendInference(std::vector<Node>& exp,
 527                      std::vector<Node>& exp_n,
 528                      Node eq,
 529                      const char* c,
 530                      bool asLemma = false);
 531   void sendInference(std::vector<Node>& exp,
 532                      Node eq,
 533                      const char* c,
 534                      bool asLemma = false);
 535   void sendLemma(Node ant, Node conc, const char* c);
 536   void sendInfer(Node eq_exp, Node eq, const char* c);
 537   bool sendSplit(Node a, Node b, const char* c, bool preq = true);
 538   void sendLengthLemma(Node n);
 539   /** mkConcat **/
 540   inline Node mkConcat(Node n1, Node n2);
 541   inline Node mkConcat(Node n1, Node n2, Node n3);
 542   inline Node mkConcat(const std::vector<Node>& c);
 543   inline Node mkLength(Node n);
 544   // mkSkolem
 545   enum
 546   {
 547     sk_id_c_spt,
 548     sk_id_vc_spt,
 549     sk_id_vc_bin_spt,
 550     sk_id_v_spt,
 551     sk_id_c_spt_rev,
 552     sk_id_vc_spt_rev,
 553     sk_id_vc_bin_spt_rev,
 554     sk_id_v_spt_rev,
 555     sk_id_ctn_pre,
 556     sk_id_ctn_post,
 557     sk_id_dc_spt,
 558     sk_id_dc_spt_rem,
 559     sk_id_deq_x,
 560     sk_id_deq_y,
 561     sk_id_deq_z,
 562   };
 563   std::map<Node, std::map<Node, std::map<int, Node> > > d_skolem_cache;
 564   /** the set of all skolems we have generated */
 565   std::unordered_set<Node, NodeHashFunction> d_all_skolems;
 566   Node mkSkolemCached(
 567       Node a, Node b, int id, const char* c, int isLenSplit = 0);
 568   inline Node mkSkolemS(const char* c, int isLenSplit = 0);
 569   void registerNonEmptySkolem(Node sk);
 570   // inline Node mkSkolemI(const char * c);
 571   /** mkExplain **/
 572   Node mkExplain(std::vector<Node>& a);
 573   Node mkExplain(std::vector<Node>& a, std::vector<Node>& an);
 574   /** mkAnd **/
 575   Node mkAnd(std::vector<Node>& a);
 576   /** get concat vector */
 577   void getConcatVec(Node n, std::vector<Node>& c);
 578
 579   // get equivalence classes
 580   void getEquivalenceClasses(std::vector<Node>& eqcs);
 581
 582   // separate into collections with equal length
 583   void separateByLength(std::vector<Node>& n,
 584                         std::vector<std::vector<Node> >& col,
 585                         std::vector<Node>& lts);
 586   void printConcat(std::vector<Node>& n, const char* c);
 587
 588   void inferSubstitutionProxyVars(Node n,
 589                                   std::vector<Node>& vars,
 590                                   std::vector<Node>& subs,
 591                                   std::vector<Node>& unproc);
 592
 593   // Symbolic Regular Expression
 594  private:
 595   // regular expression memberships
 596   NodeList d_regexp_memberships;
 597   NodeSet d_regexp_ucached;
 598   NodeSet d_regexp_ccached;
 599   // stored assertions
 600   NodeIntMap d_pos_memberships;
 601   std::map< Node, std::vector< Node > > d_pos_memberships_data;
 602   NodeIntMap d_neg_memberships;
 603   std::map< Node, std::vector< Node > > d_neg_memberships_data;
 604   unsigned getNumMemberships( Node n, bool isPos );
 605   Node getMembership( Node n, bool isPos, unsigned i );
 606   // semi normal forms for symbolic expression
 607   std::map< Node, Node > d_nf_regexps;
 608   std::map< Node, std::vector< Node > > d_nf_regexps_exp;
 609   // intersection
 610   NodeNodeMap d_inter_cache;
 611   NodeIntMap d_inter_index;
 612   // processed memberships
 613   NodeSet d_processed_memberships;
 614   // antecedant for why regexp membership must be true
 615   NodeNodeMap d_regexp_ant;
 616   // membership length
 617   //std::map< Node, bool > d_membership_length;
 618   // regular expression operations
 619   RegExpOpr d_regexp_opr;
 620
 621   CVC4::String getHeadConst( Node x );
 622   bool deriveRegExp( Node x, Node r, Node ant );
 623   void addMembership(Node assertion);
 624   Node getNormalString(Node x, std::vector<Node> &nf_exp);
 625   Node getNormalSymRegExp(Node r, std::vector<Node> &nf_exp);
 626
 627
 628   // Finite Model Finding
 629  private:
 630   NodeSet d_input_vars;
 631   context::CDO< Node > d_input_var_lsum;
 632   context::CDHashMap< int, Node > d_cardinality_lits;
 633   context::CDO< int > d_curr_cardinality;
 634
 635  public:
 636   //for finite model finding
 637   Node getNextDecisionRequest(unsigned& priority) override;
 638   // ppRewrite
 639   Node ppRewrite(TNode atom) override;
 640
 641  public:
 642   /** statistics class */
 643   class Statistics {
 644   public:
 645     IntStat d_splits;
 646     IntStat d_eq_splits;
 647     IntStat d_deq_splits;
 648     IntStat d_loop_lemmas;
 649     IntStat d_new_skolems;
 650     Statistics();
 651     ~Statistics();
 652   };/* class TheoryStrings::Statistics */
 653   Statistics d_statistics;
 654
 655  private:
 656   //-----------------------inference steps
 657   /** check initial
 658    *
 659    * This function initializes term indices for each strings function symbol.
 660    * One key aspect of this construction is that concat terms are indexed by
 661    * their list of non-empty components. For example, if x = "" is an equality
 662    * asserted in this SAT context, then y ++ x ++ z may be indexed by (y,z).
 663    * This method may infer various facts while building these term indices, for
 664    * instance, based on congruence. An example would be inferring:
 665    *   y ++ x ++ z = y ++ z
 666    * if both terms are registered in this SAT context.
 667    *
 668    * This function should be called as a first step of any strategy.
 669    */
 670   void checkInit();
 671   /** check constant equivalence classes
 672    *
 673    * This function infers whether CONCAT terms can be simplified to constants.
 674    * For example, if x = "a" and y = "b" are equalities in the current SAT
 675    * context, then we may infer x ++ "c" ++ y is equivalent to "acb". In this
 676    * case, we infer the fact x ++ "c" ++ y = "acb".
 677    */
 678   void checkConstantEquivalenceClasses();
 679   /** check extended functions evaluation
 680    *
 681    * This applies "context-dependent simplification" for all active extended
 682    * function terms in this SAT context. This infers facts of the form:
 683    *   x = c => f( t1 ... tn ) = c'
 684    * where the rewritten form of f( t1...tn ) { x |-> c } is c', and x = c
 685    * is a (tuple of) equalities that are asserted in this SAT context, and
 686    * f( t1 ... tn ) is a term from this SAT context.
 687    *
 688    * For more details, this is steps 4 when effort=0 and step 6 when
 689    * effort=1 from Strategy 1 in Reynolds et al, "Scaling up DPLL(T) String
 690    * Solvers using Context-Dependent Simplification", CAV 2017. When called with
 691    * effort=3, we apply context-dependent simplification based on model values.
 692    */
 693   void checkExtfEval(int effort);
 694   /** check cycles
 695    *
 696    * This inference schema ensures that a containment ordering < over the
 697    * string equivalence classes is acyclic. We define this ordering < such that
 698    * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2
 699    * if there exists a ti whose flat form (see below) is [w1...sj...wk] for
 700    * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat
 701    * form components that do not constitute this chain, e.g. (w1...wk) \ sj
 702    * in the flat form above, must be empty.
 703    *
 704    * For more details, see the inference S-Cycle in Liang et al CAV 2014.
 705    */
 706   void checkCycles();
 707   /** check flat forms
 708    *
 709    * This applies an inference schema based on "flat forms". The flat form of a
 710    * string term t is a vector of representative terms [r1, ..., rn] such that
 711    * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to
 712    * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of
 713    * the equivalence class containing t1. For example, if t is y ++ z ++ z,
 714    * E is { y = "", w = z }, and w is the representative of the equivalence
 715    * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms
 716    * in the same equivalence classes with flat forms [r1...rn] and [s1...sm].
 717    * We may infer various facts based on this pair of terms. For example:
 718    *   ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si),
 719    *   rn = sn, if n=m and rj == sj for each j < n,
 720    *   ri = empty, if n=m+1 and ri == rj for each i=1,...,m.
 721    * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences
 722    * respectively.
 723    *
 724    * Notice that this inference scheme is an optimization and not needed for
 725    * model-soundness. The motivation for this schema is that it is simpler than
 726    * checkNormalFormsEq, which can be seen as a recursive version of this
 727    * schema (see difference of "normal form" vs "flat form" below), and
 728    * checkNormalFormsEq is complete, in the sense that if it passes with no
 729    * inferences, we are ensured that all string equalities in the current
 730    * context are satisfied.
 731    *
 732    * Must call checkCycles before this function in a strategy.
 733    */
 734   void checkFlatForms();
 735   /** check normal forms equalities
 736    *
 737    * This applies an inference schema based on "normal forms". The normal form
 738    * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn},
 739    * where t1...tn are concatenation terms is a vector of representative terms
 740    * [r1, ..., rm] such that:
 741    * (1) if n=0, then m=1 and r1 is the representative of e,
 742    * (2) if n>0, say
 743    *   t1 = t^1_1 ++ ... ++ t^1_m_1
 744    *   ...
 745    *   tn = t^1_n ++ ... ++ t^_m_n
 746    * for *each* i=1, ..., n, the result of concenating the normal forms of
 747    * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class
 748    * can be assigned a normal form, then all equalities between ti and tj are
 749    * satisfied by all models that correspond to extensions of the current
 750    * assignment. For further detail on this terminology, see Liang et al
 751    * CAV 2014.
 752    *
 753    * Notice that all constant words are implicitly considered concatentation
 754    * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c".
 755    *
 756    * At a high level, we build normal forms for equivalence classes bottom-up,
 757    * starting with equivalence classes that are minimal with respect to the
 758    * containment ordering < computed during checkCycles. While computing a
 759    * normal form for an equivalence class, we may infer equalities between
 760    * components of strings that must be equal (e.g. x=y when x++z == y++w when
 761    * len(x)==len(y) is asserted), derive conflicts if two strings have disequal
 762    * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split
 763    * string terms into smaller components using fresh skolem variables (see
 764    * Inference values with names "SPLIT"). We also may introduce regular
 765    * expression constraints in this method for looping word equations (see
 766    * the Inference INFER_FLOOP).
 767    *
 768    * If this inference schema returns no facts, lemmas, or conflicts, then
 769    * we have successfully assigned normal forms for all equivalence classes, as
 770    * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or
 771    * conflict based on inferences in the Inference enumeration above.
 772    */
 773   void checkNormalFormsEq();
 774   /** check normal forms disequalities
 775    *
 776    * This inference schema can be seen as the converse of the above schema. In
 777    * particular, it ensures that each pair of distinct equivalence classes
 778    * e1 and e2 have distinct normal forms.
 779    *
 780    * This method considers all pairs of distinct equivalence classes (e1,e2)
 781    * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It
 782    * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn]
 783    * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are
 784    * disequal and have the same length, then x1 and x2 have distinct normal
 785    * forms. Otherwise, we may add splitting lemmas on the length of ri and si,
 786    * or split on an equality between ri and si.
 787    *
 788    * If this inference schema returns no facts, lemmas, or conflicts, then all
 789    * disequalities between string terms are satisfied by all models that are
 790    * extensions of the current assignment.
 791    */
 792   void checkNormalFormsDeq();
 793   /** check codes
 794    *
 795    * This inference schema ensures that constraints between str.code terms
 796    * are satisfied by models that correspond to extensions of the current
 797    * assignment. In particular, this method ensures that str.code can be
 798    * given an interpretation that is injective for string arguments with length
 799    * one. It may add lemmas of the form:
 800    *   str.code(x) == -1 V str.code(x) != str.code(y) V x == y
 801    */
 802   void checkCodes();
 803   /** check lengths for equivalence classes
 804    *
 805    * This inference schema adds lemmas of the form:
 806    *   E => len( x ) = rewrite( len( r1 ++ ... ++ rn ) )
 807    * where [r1, ..., rn] is the normal form of the equivalence class containing
 808    * x. This schema is not required for correctness but experimentally has
 809    * shown to be helpful.
 810    */
 811   void checkLengthsEqc();
 812   /** check extended function reductions
 813    *
 814    * This adds "reduction" lemmas for each active extended function in this SAT
 815    * context. These are generally lemmas of the form:
 816    *   F[t1...tn,k] ^ f( t1 ... tn ) = k
 817    * where f( t1 ... tn ) is an active extended function, k is a fresh constant
 818    * and F is a formula that constrains k based on the definition of f.
 819    *
 820    * For more details, this is step 7 from Strategy 1 in Reynolds et al,
 821    * CAV 2017. We stratify this in practice, where calling this with effort=1
 822    * reduces some of the "easier" extended functions, and effort=2 reduces
 823    * the rest.
 824    */
 825   void checkExtfReductions(int effort);
 826   /** check regular expression memberships
 827    *
 828    * This checks the satisfiability of all regular expression memberships
 829    * of the form (not) s in R. We use various heuristic techniques based on
 830    * unrolling, combined with techniques from Liang et al, "A Decision Procedure
 831    * for Regular Membership and Length Constraints over Unbounded Strings",
 832    * FroCoS 2015.
 833    */
 834   void checkMemberships();
 835   /** check cardinality
 836    *
 837    * This function checks whether a cardinality inference needs to be applied
 838    * to a set of equivalence classes. For details, see Step 5 of the proof
 839    * procedure from Liang et al, CAV 2014.
 840    */
 841   void checkCardinality();
 842   //-----------------------end inference steps
 843
 844   //-----------------------representation of the strategy
 845   /** is strategy initialized */
 846   bool d_strategy_init;
 847   /** run the given inference step */
 848   void runInferStep(InferStep s, int effort);
 849   /** the strategy */
 850   std::vector<InferStep> d_infer_steps;
 851   /** the effort levels */
 852   std::vector<int> d_infer_step_effort;
 853   /** the range (begin, end) of steps to run at given efforts */
 854   std::map<Effort, std::pair<unsigned, unsigned> > d_strat_steps;
 855   /** do we have a strategy for effort e? */
 856   bool hasStrategyEffort(Effort e) const;
 857   /** initialize the strategy
 858    *
 859    * This adds (s,effort) as a strategy step to the vectors d_infer_steps and
 860    * d_infer_step_effort. This indicates that a call to runInferStep should
 861    * be run as the next step in the strategy. If addBreak is true, we add
 862    * a BREAK to the strategy following this step.
 863    */
 864   void addStrategyStep(InferStep s, int effort = 0, bool addBreak = true);
 865   /** initialize the strategy
 866    *
 867    * This initializes the above information based on the options. This makes
 868    * a series of calls to addStrategyStep above.
 869    */
 870   void initializeStrategy();
 871   /** run strategy
 872    *
 873    * This executes the inference steps starting at index sbegin and ending at
 874    * index send. We exit if any step in this sequence adds a lemma or infers a
 875    * fact.
 876    */
 877   void runStrategy(unsigned sbegin, unsigned send);
 878   //-----------------------end representation of the strategy
 879
 880 };/* class TheoryStrings */
 881
 882 }/* CVC4::theory::strings namespace */
 883 }/* CVC4::theory namespace */
 884 }/* CVC4 namespace */
 885
 886 #endif /* __CVC4__THEORY__STRINGS__THEORY_STRINGS_H */