850acfb2a46c3327a277c90d294dc6aaddc2aa0c
[cvc5.git] / src / theory / strings / sequences_rewriter.h
1 /******************************************************************************
2 * Top contributors (to current version):
3 * Andrew Reynolds, Andres Noetzli, Yoni Zohar
4 *
5 * This file is part of the cvc5 project.
6 *
7 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8 * in the top-level source directory and their institutional affiliations.
9 * All rights reserved. See the file COPYING in the top-level source
10 * directory for licensing information.
11 * ****************************************************************************
12 *
13 * Rewriter for the theory of strings and sequences.
14 */
15
16 #include "cvc5_private.h"
17
18 #ifndef CVC5__THEORY__STRINGS__SEQUENCES_REWRITER_H
19 #define CVC5__THEORY__STRINGS__SEQUENCES_REWRITER_H
20
21 #include <vector>
22
23 #include "expr/node.h"
24 #include "theory/strings/arith_entail.h"
25 #include "theory/strings/rewrites.h"
26 #include "theory/strings/sequences_stats.h"
27 #include "theory/strings/strings_entail.h"
28 #include "theory/theory_rewriter.h"
29
30 namespace cvc5 {
31 namespace theory {
32 namespace strings {
33
34 class SequencesRewriter : public TheoryRewriter
35 {
36 public:
37 SequencesRewriter(Rewriter* r, HistogramStat<Rewrite>* statistics);
38 /** The underlying entailment utilities */
39 ArithEntail& getArithEntail();
40 StringsEntail& getStringsEntail();
41
42 protected:
43 /** rewrite regular expression concatenation
44 *
45 * This is the entry point for post-rewriting applications of re.++.
46 * Returns the rewritten form of node.
47 */
48 Node rewriteConcatRegExp(TNode node);
49 /** rewrite regular expression star
50 *
51 * This is the entry point for post-rewriting applications of re.*.
52 * Returns the rewritten form of node.
53 */
54 Node rewriteStarRegExp(TNode node);
55 /** rewrite regular expression intersection/union
56 *
57 * This is the entry point for post-rewriting applications of re.inter and
58 * re.union. Returns the rewritten form of node.
59 */
60 Node rewriteAndOrRegExp(TNode node);
61 /** rewrite regular expression loop
62 *
63 * This is the entry point for post-rewriting applications of re.loop.
64 * Returns the rewritten form of node.
65 */
66 Node rewriteLoopRegExp(TNode node);
67 /** rewrite regular expression repeat
68 *
69 * This is the entry point for post-rewriting applications of re.repeat.
70 * Returns the rewritten form of node.
71 */
72 Node rewriteRepeatRegExp(TNode node);
73 /** rewrite regular expression option
74 *
75 * This is the entry point for post-rewriting applications of re.opt.
76 * Returns the rewritten form of node.
77 */
78 Node rewriteOptionRegExp(TNode node);
79 /** rewrite regular expression plus
80 *
81 * This is the entry point for post-rewriting applications of re.+.
82 * Returns the rewritten form of node.
83 */
84 Node rewritePlusRegExp(TNode node);
85 /** rewrite regular expression difference
86 *
87 * This is the entry point for post-rewriting applications of re.diff.
88 * Returns the rewritten form of node.
89 */
90 Node rewriteDifferenceRegExp(TNode node);
91 /** rewrite regular expression range
92 *
93 * This is the entry point for post-rewriting applications of re.range.
94 * Returns the rewritten form of node.
95 */
96 Node rewriteRangeRegExp(TNode node);
97
98 /** rewrite regular expression membership
99 *
100 * This is the entry point for post-rewriting applications of str.in.re
101 * Returns the rewritten form of node.
102 */
103 Node rewriteMembership(TNode node);
104
105 /** rewrite string equality extended
106 *
107 * This method returns a formula that is equivalent to the equality between
108 * two strings s = t, given by node. It is called by rewriteEqualityExt.
109 */
110 Node rewriteStrEqualityExt(Node node);
111 /** rewrite arithmetic equality extended
112 *
113 * This method returns a formula that is equivalent to the equality between
114 * two arithmetic string terms s = t, given by node. t is called by
115 * rewriteEqualityExt.
116 */
117 Node rewriteArithEqualityExt(Node node);
118 /**
119 * Called when node rewrites to ret.
120 *
121 * The rewrite r indicates the justification for the rewrite, which is printed
122 * by this function for debugging.
123 */
124 Node returnRewrite(Node node, Node ret, Rewrite r);
125
126 public:
127 RewriteResponse postRewrite(TNode node) override;
128 RewriteResponse preRewrite(TNode node) override;
129 /** Expand definition */
130 TrustNode expandDefinition(Node n) override;
131
132 /** rewrite equality
133 *
134 * This method returns a formula that is equivalent to the equality between
135 * two strings s = t, given by node. The result of rewrite is one of
136 * { s = t, t = s, true, false }.
137 */
138 Node rewriteEquality(Node node);
139 /** rewrite equality extended
140 *
141 * This method returns a formula that is equivalent to the equality between
142 * two terms s = t, given by node, where s and t are terms in the signature
143 * of the theory of strings. Notice that s and t may be of string type or
144 * of Int type.
145 *
146 * Specifically, this function performs rewrites whose conclusion is not
147 * necessarily one of { s = t, t = s, true, false }.
148 */
149 Node rewriteEqualityExt(Node node) override;
150 /** rewrite string length
151 * This is the entry point for post-rewriting terms node of the form
152 * str.len( t )
153 * Returns the rewritten form of node.
154 */
155 Node rewriteLength(Node node);
156 /** rewrite concat
157 * This is the entry point for post-rewriting terms node of the form
158 * str.++( t1, .., tn )
159 * Returns the rewritten form of node.
160 */
161 Node rewriteConcat(Node node);
162 /** rewrite character at
163 * This is the entry point for post-rewriting terms node of the form
164 * str.charat( s, i1 )
165 * Returns the rewritten form of node.
166 */
167 Node rewriteCharAt(Node node);
168 /** rewrite substr
169 * This is the entry point for post-rewriting terms node of the form
170 * str.substr( s, i1, i2 )
171 * Returns the rewritten form of node.
172 */
173 Node rewriteSubstr(Node node);
174 /** rewrite update
175 * This is the entry point for post-rewriting terms node of the form
176 * str.update( s, i1, i2 )
177 * Returns the rewritten form of node.
178 */
179 Node rewriteUpdate(Node node);
180 /** rewrite contains
181 * This is the entry point for post-rewriting terms node of the form
182 * str.contains( t, s )
183 * Returns the rewritten form of node.
184 *
185 * For details on some of the basic rewrites done in this function, see Figure
186 * 7 of Reynolds et al "Scaling Up DPLL(T) String Solvers Using
187 * Context-Dependent Rewriting", CAV 2017.
188 */
189 Node rewriteContains(Node node);
190 /** rewrite indexof
191 * This is the entry point for post-rewriting terms n of the form
192 * str.indexof( s, t, n )
193 * Returns the rewritten form of node.
194 */
195 Node rewriteIndexof(Node node);
196 /** rewrite indexof regular expression match
197 * This is the entry point for post-rewriting terms n of the form
198 * str.indexof_re( s, r, n )
199 * Returns the rewritten form of node.
200 */
201 Node rewriteIndexofRe(Node node);
202 /** rewrite replace
203 * This is the entry point for post-rewriting terms n of the form
204 * str.replace( s, t, r )
205 * Returns the rewritten form of node.
206 */
207 Node rewriteReplace(Node node);
208 /** rewrite replace all
209 * This is the entry point for post-rewriting terms n of the form
210 * str.replaceall( s, t, r )
211 * Returns the rewritten form of node.
212 */
213 Node rewriteReplaceAll(Node node);
214 /** rewrite replace internal
215 *
216 * This method implements rewrite rules that apply to both str.replace and
217 * str.replaceall. If it returns a non-null ret, then node rewrites to ret.
218 */
219 Node rewriteReplaceInternal(Node node);
220 /** rewrite regular expression replace
221 *
222 * This method implements rewrite rules that apply to terms of the form
223 * str.replace_re(s, r, t).
224 *
225 * @param node The node to rewrite
226 * @return The rewritten node
227 */
228 Node rewriteReplaceRe(Node node);
229 /** rewrite regular expression replace
230 *
231 * This method implements rewrite rules that apply to terms of the form
232 * str.replace_re_all(s, r, t).
233 *
234 * @param node The node to rewrite
235 * @return The rewritten node
236 */
237 Node rewriteReplaceReAll(Node node);
238 /**
239 * Returns the first, shortest sequence in n that matches r.
240 *
241 * @param n The constant string or sequence to search in.
242 * @param r The regular expression to search for.
243 * @return A pair holding the start position and the end position of the
244 * match or a pair of string::npos if r does not appear in n.
245 */
246 std::pair<size_t, size_t> firstMatch(Node n, Node r);
247 /** rewrite string reverse
248 *
249 * This is the entry point for post-rewriting terms n of the form
250 * str.rev( s )
251 * Returns the rewritten form of node.
252 */
253 Node rewriteStrReverse(Node node);
254 /** rewrite prefix/suffix
255 * This is the entry point for post-rewriting terms n of the form
256 * str.prefixof( s, t ) / str.suffixof( s, t )
257 * Returns the rewritten form of node.
258 */
259 Node rewritePrefixSuffix(Node node);
260
261 /** rewrite str.to_code
262 * This is the entry point for post-rewriting terms n of the form
263 * str.to_code( t )
264 * Returns the rewritten form of node.
265 */
266 Node rewriteStringToCode(Node node);
267 /** rewrite seq.unit
268 * This is the entry point for post-rewriting terms n of the form
269 * seq.unit( t )
270 * Returns the rewritten form of node.
271 */
272 Node rewriteSeqUnit(Node node);
273
274 /** rewrite seq.nth
275 * This is the entry point for post-rewriting terms n of the form
276 * seq.nth(s, i)
277 * Returns the rewritten form of node.
278 */
279 Node rewriteSeqNth(Node node);
280
281 /** length preserving rewrite
282 *
283 * Given input n, this returns a string n' whose length is equivalent to n.
284 * We apply certain normalizations to n', such as replacing all constants
285 * that are not relevant to length by "A".
286 */
287 Node lengthPreserveRewrite(Node n);
288
289 /**
290 * Given a symbolic length n, returns the canonical string (of type stype)
291 * for that length. For example if n is constant, this function returns a
292 * string consisting of "A" repeated n times. Returns the null node if no such
293 * string exists.
294 */
295 static Node canonicalStrForSymbolicLength(Node n, TypeNode stype);
296
297 /**
298 * post-process rewrite
299 *
300 * If node is not an equality and ret is an equality,
301 * this method applies an additional rewrite step (rewriteEqualityExt) that
302 * performs additional rewrites on ret, after which we return the result of
303 * this call. Otherwise, this method simply returns ret.
304 */
305 Node postProcessRewrite(Node node, Node ret);
306 /** Reference to the rewriter statistics. */
307 HistogramStat<Rewrite>* d_statistics;
308 /**
309 * Pointer to the rewriter. NOTE this is a cyclic dependency, and should
310 * be removed.
311 */
312 Rewriter* d_rr;
313 /** The arithmetic entailment module */
314 ArithEntail d_arithEntail;
315 /** Instance of the entailment checker for strings. */
316 StringsEntail d_stringsEntail;
317 }; /* class SequencesRewriter */
318
319 } // namespace strings
320 } // namespace theory
321 } // namespace cvc5
322
323 #endif /* CVC5__THEORY__STRINGS__SEQUENCES_REWRITER_H */