src/theory/strings/skolem_cache.h

   1 /*********************                                                        */
   2 /*! \file skolem_cache.h
   3  ** \verbatim
   4  ** Top contributors (to current version):
   5  **   Andrew Reynolds, Andres Noetzli, Yoni Zohar
   6  ** This file is part of the CVC4 project.
   7  ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
   8  ** in the top-level source directory and their institutional affiliations.
   9  ** All rights reserved.  See the file COPYING in the top-level source
  10  ** directory for licensing information.\endverbatim
  11  **
  12  ** \brief A cache of skolems for theory of strings.
  13  **/
  14
  15 #include "cvc4_private.h"
  16
  17 #ifndef CVC4__THEORY__STRINGS__SKOLEM_CACHE_H
  18 #define CVC4__THEORY__STRINGS__SKOLEM_CACHE_H
  19
  20 #include <map>
  21 #include <tuple>
  22 #include <unordered_set>
  23
  24 #include "expr/node.h"
  25 #include "expr/skolem_manager.h"
  26
  27 namespace CVC4 {
  28 namespace theory {
  29 namespace strings {
  30
  31 /**
  32  * A cache of all string skolems generated by the TheoryStrings class. This
  33  * cache is used to ensure that duplicate skolems are not generated when
  34  * possible, and helps identify what skolems were allocated in the current run.
  35  */
  36 class SkolemCache
  37 {
  38  public:
  39   /**
  40    * Constructor.
  41    *
  42    * useOpts determines if we aggressively share Skolems or return the constants
  43    * they are entailed to be equal to.
  44    */
  45   SkolemCache(bool useOpts = true);
  46   /** Identifiers for skolem types
  47    *
  48    * The comments below document the properties of each skolem introduced by
  49    * inference in the strings solver, where by skolem we mean the fresh
  50    * string variable that witnesses each of "exists k".
  51    *
  52    * The skolems with _REV suffixes are used for the reverse version of the
  53    * preconditions below, e.g. where we are considering a' ++ a = b' ++ b.
  54    *
  55    * All skolems assume a and b are strings unless otherwise stated.
  56    */
  57   enum SkolemId
  58   {
  59     // exists k. k = a
  60     SK_PURIFY,
  61     // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' =>
  62     //    exists k. a = "cccc" ++ k
  63     SK_ID_C_SPT,
  64     SK_ID_C_SPT_REV,
  65     // a != "" ^ b = "c" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
  66     //    exists k. a = "c" ++ k
  67     SK_ID_VC_SPT,
  68     SK_ID_VC_SPT_REV,
  69     // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
  70     //    exists k1 k2. len( k1 )>0 ^ len( k2 )>0 ^
  71     //                  ( a ++ k1 = b OR a = b ++ k2 )
  72     // k1 is the variable for (a,b) and k2 is the skolem for (b,a).
  73     SK_ID_V_SPT,
  74     SK_ID_V_SPT_REV,
  75     // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
  76     //    exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k )
  77     SK_ID_V_UNIFIED_SPT,
  78     SK_ID_V_UNIFIED_SPT_REV,
  79     // a != ""  ^ b = "c" ^ a ++ a' != b ++ b' =>
  80     //    exists k, k_rem.
  81     //         len( k ) = 1 ^
  82     //         ( ( a = k ++ k_rem ^ k != "c" ) OR ( a = "c" ++ k_rem ) )
  83     SK_ID_DC_SPT,
  84     SK_ID_DC_SPT_REM,
  85     // a != ""  ^ b != "" ^ len( a ) != len( b ) ^ a ++ a' != b ++ b' =>
  86     //    exists k_x k_y k_z.
  87     //         ( len( k_y ) = len( a ) ^ len( k_x ) = len( b ) ^ len( k_z) > 0
  88     //           ( a = k_x ++ k_z OR b = k_y ++ k_z ) )
  89     SK_ID_DEQ_X,
  90     SK_ID_DEQ_Y,
  91     // contains( a, b ) =>
  92     //    exists k_pre, k_post. a = k_pre ++ b ++ k_post ^
  93     //                          ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b)
  94     //
  95     // As an optimization, these skolems are reused for positive occurrences of
  96     // contains, where they have the semantics:
  97     //
  98     //   contains( a, b ) =>
  99     //      exists k_pre, k_post. a = k_pre ++ b ++ k_post
 100     //
 101     // We reuse them since it is sound to consider w.l.o.g. the first occurrence
 102     // of b in a as the witness for contains( a, b ).
 103     SK_FIRST_CTN_PRE,
 104     SK_FIRST_CTN_POST,
 105     // For sequence a and regular expression b,
 106     // in_re(a, re.++(_*, b, _*)) =>
 107     //    exists k_pre, k_match, k_post.
 108     //       a = k_pre ++ k_match ++ k_post ^
 109     //       ~in_re(k_pre ++ substr(k_match, 0, str.len(k_match) - 1),
 110     //              re.++(_*, b, _*)) ^
 111     //       in_re(k2, y)
 112     SK_FIRST_MATCH_PRE,
 113     SK_FIRST_MATCH,
 114     SK_FIRST_MATCH_POST,
 115     // For integer b,
 116     // len( a ) > b =>
 117     //    exists k. a = k ++ a' ^ len( k ) = b
 118     SK_PREFIX,
 119     // For integer b,
 120     // b > 0 =>
 121     //    exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 )
 122     SK_SUFFIX_REM,
 123     // --------------- integer skolems
 124     // exists k. ( b occurs k times in a )
 125     SK_NUM_OCCUR,
 126     // --------------- function skolems
 127     // For function k: Int -> Int
 128     //   exists k.
 129     //     forall 0 <= x <= n,
 130     //       k(x) is the end index of the x^th occurrence of b in a
 131     //   where n is the number of occurrences of b in a, and k(0)=0.
 132     SK_OCCUR_INDEX,
 133     // For function k: Int -> Int
 134     //   exists k.
 135     //     forall 0 <= x < n,
 136     //       k(x) is the length of the x^th occurrence of b in a (excluding
 137     //       matches of empty strings)
 138     //   where b is a regular expression, n is the number of occurrences of b
 139     //   in a, and k(0)=0.
 140     SK_OCCUR_LEN,
 141     // For function k: ((Seq U) x Int) -> U
 142     // exists k.
 143     // forall s, n.
 144     //  k(s, n) is some undefined value of sort U
 145     SK_NTH,
 146   };
 147   /**
 148    * Returns a skolem of type string that is cached for (a,b,id) and has
 149    * name c.
 150    */
 151   Node mkSkolemCached(Node a, Node b, SkolemId id, const char* c);
 152   /**
 153    * Returns a skolem of type string that is cached for (a,[null],id) and has
 154    * name c.
 155    */
 156   Node mkSkolemCached(Node a, SkolemId id, const char* c);
 157   /** Same as above, but the skolem to construct has a custom type tn */
 158   Node mkTypedSkolemCached(
 159       TypeNode tn, Node a, Node b, SkolemId id, const char* c);
 160   /** Same as mkTypedSkolemCached above for (a,[null],id) */
 161   Node mkTypedSkolemCached(TypeNode tn, Node a, SkolemId id, const char* c);
 162   /**
 163    * Specific version for seq.nth, used for cases where the index is out of
 164    * range for sequence type seqType.
 165    */
 166   Node mkSkolemSeqNth(TypeNode seqType, const char* c);
 167   /** Returns a (uncached) skolem of type string with name c */
 168   Node mkSkolem(const char* c);
 169   /** Returns true if n is a skolem allocated by this class */
 170   bool isSkolem(Node n) const;
 171   /** Make index variable
 172    *
 173    * This returns an integer variable of kind BOUND_VARIABLE that is used
 174    * for axiomatizing the behavior of a term or predicate t. Notice that this
 175    * index variable does *not* necessarily refer to indices in the term t
 176    * itself. Instead, it refers to indices in the relevant string in the
 177    * reduction of t. For example, the index variable for the term str.to_int(s)
 178    * is used to quantify over the positions in string term s.
 179    */
 180   static Node mkIndexVar(Node t);
 181
 182  private:
 183   /**
 184    * Simplifies the arguments for a string skolem used for indexing into the
 185    * cache. In certain cases, we can share skolems with similar arguments e.g.
 186    * SK_FIRST_CTN(a, c) can be used instead of SK_FIRST_CTN((str.substr a 0 n),
 187    * c) because the first occurrence of "c" in "(str.substr a 0 n)" is also the
 188    * first occurrence of "c" in "a" (assuming that "c" appears in both and
 189    * otherwise the value of SK_FIRST_CTN does not matter).
 190    *
 191    * @param id The type of skolem
 192    * @param a The first argument used for indexing
 193    * @param b The second argument used for indexing
 194    * @return A tuple with the new skolem id, the new first, and the new second
 195    * argument
 196    */
 197   std::tuple<SkolemId, Node, Node> normalizeStringSkolem(SkolemId id,
 198                                                          Node a,
 199                                                          Node b);
 200   /** whether we are using optimizations */
 201   bool d_useOpts;
 202   /** string type */
 203   TypeNode d_strType;
 204   /** Constant node zero */
 205   Node d_zero;
 206   /** map from node pairs and identifiers to skolems */
 207   std::map<Node, std::map<Node, std::map<SkolemId, Node> > > d_skolemCache;
 208   /** the set of all skolems we have generated */
 209   std::unordered_set<Node, NodeHashFunction> d_allSkolems;
 210 };
 211
 212 }  // namespace strings
 213 }  // namespace theory
 214 }  // namespace CVC4
 215
 216 #endif /* CVC4__THEORY__STRINGS__SKOLEM_CACHE_H */