Fix corner case of wrongly applied selector as trigger (#5786)
[cvc5.git] / src / theory / strings / skolem_cache.h
1 /********************* */
2 /*! \file skolem_cache.h
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Andres Noetzli, Yoni Zohar
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
8 ** in the top-level source directory and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief A cache of skolems for theory of strings.
13 **/
14
15 #include "cvc4_private.h"
16
17 #ifndef CVC4__THEORY__STRINGS__SKOLEM_CACHE_H
18 #define CVC4__THEORY__STRINGS__SKOLEM_CACHE_H
19
20 #include <map>
21 #include <tuple>
22 #include <unordered_set>
23
24 #include "expr/node.h"
25 #include "expr/skolem_manager.h"
26
27 namespace CVC4 {
28 namespace theory {
29 namespace strings {
30
31 /**
32 * A cache of all string skolems generated by the TheoryStrings class. This
33 * cache is used to ensure that duplicate skolems are not generated when
34 * possible, and helps identify what skolems were allocated in the current run.
35 */
36 class SkolemCache
37 {
38 public:
39 /**
40 * Constructor.
41 *
42 * useOpts determines if we aggressively share Skolems or return the constants
43 * they are entailed to be equal to.
44 */
45 SkolemCache(bool useOpts = true);
46 /** Identifiers for skolem types
47 *
48 * The comments below document the properties of each skolem introduced by
49 * inference in the strings solver, where by skolem we mean the fresh
50 * string variable that witnesses each of "exists k".
51 *
52 * The skolems with _REV suffixes are used for the reverse version of the
53 * preconditions below, e.g. where we are considering a' ++ a = b' ++ b.
54 *
55 * All skolems assume a and b are strings unless otherwise stated.
56 */
57 enum SkolemId
58 {
59 // exists k. k = a
60 SK_PURIFY,
61 // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' =>
62 // exists k. a = "cccc" ++ k
63 SK_ID_C_SPT,
64 SK_ID_C_SPT_REV,
65 // a != "" ^ b = "c" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
66 // exists k. a = "c" ++ k
67 SK_ID_VC_SPT,
68 SK_ID_VC_SPT_REV,
69 // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
70 // exists k1 k2. len( k1 )>0 ^ len( k2 )>0 ^
71 // ( a ++ k1 = b OR a = b ++ k2 )
72 // k1 is the variable for (a,b) and k2 is the skolem for (b,a).
73 SK_ID_V_SPT,
74 SK_ID_V_SPT_REV,
75 // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
76 // exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k )
77 SK_ID_V_UNIFIED_SPT,
78 SK_ID_V_UNIFIED_SPT_REV,
79 // a != "" ^ b = "c" ^ a ++ a' != b ++ b' =>
80 // exists k, k_rem.
81 // len( k ) = 1 ^
82 // ( ( a = k ++ k_rem ^ k != "c" ) OR ( a = "c" ++ k_rem ) )
83 SK_ID_DC_SPT,
84 SK_ID_DC_SPT_REM,
85 // a != "" ^ b != "" ^ len( a ) != len( b ) ^ a ++ a' != b ++ b' =>
86 // exists k_x k_y k_z.
87 // ( len( k_y ) = len( a ) ^ len( k_x ) = len( b ) ^ len( k_z) > 0
88 // ( a = k_x ++ k_z OR b = k_y ++ k_z ) )
89 SK_ID_DEQ_X,
90 SK_ID_DEQ_Y,
91 // contains( a, b ) =>
92 // exists k_pre, k_post. a = k_pre ++ b ++ k_post ^
93 // ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b)
94 //
95 // As an optimization, these skolems are reused for positive occurrences of
96 // contains, where they have the semantics:
97 //
98 // contains( a, b ) =>
99 // exists k_pre, k_post. a = k_pre ++ b ++ k_post
100 //
101 // We reuse them since it is sound to consider w.l.o.g. the first occurrence
102 // of b in a as the witness for contains( a, b ).
103 SK_FIRST_CTN_PRE,
104 SK_FIRST_CTN_POST,
105 // For sequence a and regular expression b,
106 // in_re(a, re.++(_*, b, _*)) =>
107 // exists k_pre, k_match, k_post.
108 // a = k_pre ++ k_match ++ k_post ^
109 // ~in_re(k_pre ++ substr(k_match, 0, str.len(k_match) - 1),
110 // re.++(_*, b, _*)) ^
111 // in_re(k2, y)
112 SK_FIRST_MATCH_PRE,
113 SK_FIRST_MATCH,
114 SK_FIRST_MATCH_POST,
115 // For integer b,
116 // len( a ) > b =>
117 // exists k. a = k ++ a' ^ len( k ) = b
118 SK_PREFIX,
119 // For integer b,
120 // b > 0 =>
121 // exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 )
122 SK_SUFFIX_REM,
123 // --------------- integer skolems
124 // exists k. ( b occurs k times in a )
125 SK_NUM_OCCUR,
126 // --------------- function skolems
127 // For function k: Int -> Int
128 // exists k.
129 // forall 0 <= x <= n,
130 // k(x) is the end index of the x^th occurrence of b in a
131 // where n is the number of occurrences of b in a, and k(0)=0.
132 SK_OCCUR_INDEX,
133 // For function k: Int -> Int
134 // exists k.
135 // forall 0 <= x < n,
136 // k(x) is the length of the x^th occurrence of b in a (excluding
137 // matches of empty strings)
138 // where b is a regular expression, n is the number of occurrences of b
139 // in a, and k(0)=0.
140 SK_OCCUR_LEN,
141 // For function k: ((Seq U) x Int) -> U
142 // exists k.
143 // forall s, n.
144 // k(s, n) is some undefined value of sort U
145 SK_NTH,
146 };
147 /**
148 * Returns a skolem of type string that is cached for (a,b,id) and has
149 * name c.
150 */
151 Node mkSkolemCached(Node a, Node b, SkolemId id, const char* c);
152 /**
153 * Returns a skolem of type string that is cached for (a,[null],id) and has
154 * name c.
155 */
156 Node mkSkolemCached(Node a, SkolemId id, const char* c);
157 /** Same as above, but the skolem to construct has a custom type tn */
158 Node mkTypedSkolemCached(
159 TypeNode tn, Node a, Node b, SkolemId id, const char* c);
160 /** Same as mkTypedSkolemCached above for (a,[null],id) */
161 Node mkTypedSkolemCached(TypeNode tn, Node a, SkolemId id, const char* c);
162 /**
163 * Specific version for seq.nth, used for cases where the index is out of
164 * range for sequence type seqType.
165 */
166 Node mkSkolemSeqNth(TypeNode seqType, const char* c);
167 /** Returns a (uncached) skolem of type string with name c */
168 Node mkSkolem(const char* c);
169 /** Returns true if n is a skolem allocated by this class */
170 bool isSkolem(Node n) const;
171 /** Make index variable
172 *
173 * This returns an integer variable of kind BOUND_VARIABLE that is used
174 * for axiomatizing the behavior of a term or predicate t. Notice that this
175 * index variable does *not* necessarily refer to indices in the term t
176 * itself. Instead, it refers to indices in the relevant string in the
177 * reduction of t. For example, the index variable for the term str.to_int(s)
178 * is used to quantify over the positions in string term s.
179 */
180 static Node mkIndexVar(Node t);
181
182 private:
183 /**
184 * Simplifies the arguments for a string skolem used for indexing into the
185 * cache. In certain cases, we can share skolems with similar arguments e.g.
186 * SK_FIRST_CTN(a, c) can be used instead of SK_FIRST_CTN((str.substr a 0 n),
187 * c) because the first occurrence of "c" in "(str.substr a 0 n)" is also the
188 * first occurrence of "c" in "a" (assuming that "c" appears in both and
189 * otherwise the value of SK_FIRST_CTN does not matter).
190 *
191 * @param id The type of skolem
192 * @param a The first argument used for indexing
193 * @param b The second argument used for indexing
194 * @return A tuple with the new skolem id, the new first, and the new second
195 * argument
196 */
197 std::tuple<SkolemId, Node, Node> normalizeStringSkolem(SkolemId id,
198 Node a,
199 Node b);
200 /** whether we are using optimizations */
201 bool d_useOpts;
202 /** string type */
203 TypeNode d_strType;
204 /** Constant node zero */
205 Node d_zero;
206 /** map from node pairs and identifiers to skolems */
207 std::map<Node, std::map<Node, std::map<SkolemId, Node> > > d_skolemCache;
208 /** the set of all skolems we have generated */
209 std::unordered_set<Node, NodeHashFunction> d_allSkolems;
210 };
211
212 } // namespace strings
213 } // namespace theory
214 } // namespace CVC4
215
216 #endif /* CVC4__THEORY__STRINGS__SKOLEM_CACHE_H */