fix: indexof, replace rewriting
[cvc5.git] / src / theory / strings / theory_strings_preprocess.cpp
1 /********************* */
2 /*! \file theory_strings_preprocess.cpp
3 ** \verbatim
4 ** Original author: Tianyi Liang
5 ** Major contributors: Morgan Deters
6 ** Minor contributors (to current version): none
7 ** This file is part of the CVC4 project.
8 ** Copyright (c) 2009-2013 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
11 **
12 ** \brief Strings Preprocess
13 **
14 ** Strings Preprocess.
15 **/
16
17 #include "theory/strings/theory_strings_preprocess.h"
18 #include "expr/kind.h"
19 #include "theory/strings/options.h"
20 #include "smt/logic_exception.h"
21
22 namespace CVC4 {
23 namespace theory {
24 namespace strings {
25
26 StringsPreprocess::StringsPreprocess() {
27 std::vector< TypeNode > argTypes;
28 argTypes.push_back(NodeManager::currentNM()->stringType());
29 argTypes.push_back(NodeManager::currentNM()->integerType());
30 d_charAtUF = NodeManager::currentNM()->mkSkolem("charAt",
31 NodeManager::currentNM()->mkFunctionType(
32 argTypes,
33 NodeManager::currentNM()->stringType()),
34 "total charat",
35 NodeManager::SKOLEM_EXACT_NAME);
36 }
37
38 void StringsPreprocess::simplifyRegExp( Node s, Node r, std::vector< Node > &ret, std::vector< Node > &nn ) {
39 int k = r.getKind();
40 switch( k ) {
41 case kind::STRING_TO_REGEXP:
42 {
43 Node eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, r[0] );
44 ret.push_back( eq );
45 }
46 break;
47 case kind::REGEXP_CONCAT:
48 {
49 std::vector< Node > cc;
50 for(unsigned i=0; i<r.getNumChildren(); ++i) {
51 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
52 cc.push_back( r[i][0] );
53 } else {
54 Node sk = NodeManager::currentNM()->mkSkolem( "recsym_$$", s.getType(), "created for regular expression concat" );
55 simplifyRegExp( sk, r[i], ret, nn );
56 cc.push_back( sk );
57 }
58 }
59 Node cc_eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s,
60 NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, cc ) );
61 nn.push_back( cc_eq );
62 }
63 break;
64 case kind::REGEXP_OR:
65 {
66 std::vector< Node > c_or;
67 for(unsigned i=0; i<r.getNumChildren(); ++i) {
68 simplifyRegExp( s, r[i], c_or, nn );
69 }
70 Node eq = NodeManager::currentNM()->mkNode( kind::OR, c_or );
71 ret.push_back( eq );
72 }
73 break;
74 case kind::REGEXP_INTER:
75 for(unsigned i=0; i<r.getNumChildren(); ++i) {
76 simplifyRegExp( s, r[i], ret, nn );
77 }
78 break;
79 case kind::REGEXP_STAR:
80 {
81 Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r );
82 ret.push_back( eq );
83 }
84 break;
85 default:
86 //TODO: special sym: sigma, none, all
87 Trace("strings-preprocess") << "Unsupported term: " << r << " in simplifyRegExp." << std::endl;
88 Assert( false );
89 break;
90 }
91 }
92
93 bool StringsPreprocess::checkStarPlus( Node t ) {
94 if( t.getKind() != kind::REGEXP_STAR && t.getKind() != kind::REGEXP_PLUS ) {
95 for( unsigned i = 0; i<t.getNumChildren(); ++i ) {
96 if( checkStarPlus(t[i]) ) return true;
97 }
98 return false;
99 } else {
100 return true;
101 }
102 }
103 int StringsPreprocess::checkFixLenVar( Node t ) {
104 int ret = 2;
105 if(t.getKind() == kind::EQUAL) {
106 if(t[0].getType().isInteger() && t[0].isConst() && t[1].getKind() == kind::STRING_LENGTH) {
107 if(t[1][0].getKind() == kind::VARIABLE) {
108 ret = 0;
109 }
110 } else if(t[1].getType().isInteger() && t[1].isConst() && t[0].getKind() == kind::STRING_LENGTH) {
111 if(t[0][0].getKind() == kind::VARIABLE) {
112 ret = 1;
113 }
114 }
115 }
116 if(ret != 2) {
117 int len = t[ret].getConst<Rational>().getNumerator().toUnsignedInt();
118 if(len < 2) {
119 ret = 2;
120 }
121 }
122 if(!options::stringExp()) {
123 ret = 2;
124 }
125 return ret;
126 }
127 Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
128 std::hash_map<TNode, Node, TNodeHashFunction>::const_iterator i = d_cache.find(t);
129 if(i != d_cache.end()) {
130 return (*i).second.isNull() ? t : (*i).second;
131 }
132
133 Trace("strings-preprocess") << "StringsPreprocess::simplify: " << t << std::endl;
134 Node retNode = t;
135 int c_id = checkFixLenVar(t);
136 if( c_id != 2 ) {
137 int v_id = 1 - c_id;
138 int len = t[c_id].getConst<Rational>().getNumerator().toUnsignedInt();
139 if(len > 1) {
140 Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
141 std::vector< Node > vec;
142 for(int i=0; i<len; i++) {
143 Node num = NodeManager::currentNM()->mkConst( ::CVC4::Rational(i) );
144 Node sk = NodeManager::currentNM()->mkNode(kind::APPLY_UF, d_charAtUF, t[v_id][0], num);
145 vec.push_back(sk);
146 Node lem = one.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk ) );
147 new_nodes.push_back( lem );
148 }
149 Node lem = t[v_id][0].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, vec ) );
150 lem = NodeManager::currentNM()->mkNode( kind::IMPLIES, t, lem );
151 new_nodes.push_back( lem );
152 d_cache[t] = t;
153 retNode = t;
154 }
155 } else if( t.getKind() == kind::STRING_IN_REGEXP ) {
156 // t0 in t1
157 Node t0 = simplify( t[0], new_nodes );
158
159 //if(!checkStarPlus( t[1] )) {
160 //rewrite it
161 std::vector< Node > ret;
162 std::vector< Node > nn;
163 simplifyRegExp( t0, t[1], ret, nn );
164 new_nodes.insert( new_nodes.end(), nn.begin(), nn.end() );
165
166 Node n = ret.size() == 1 ? ret[0] : NodeManager::currentNM()->mkNode( kind::AND, ret );
167 d_cache[t] = (t == n) ? Node::null() : n;
168 retNode = n;
169 //} else {
170 // TODO
171 // return (t0 == t[0]) ? t : NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, t0, t[1] );
172 //}
173 } else if( t.getKind() == kind::STRING_SUBSTR_TOTAL ){
174 Node sk1 = NodeManager::currentNM()->mkSkolem( "st1_$$", NodeManager::currentNM()->stringType(), "created for substr" );
175 Node sk2 = NodeManager::currentNM()->mkSkolem( "st2_$$", NodeManager::currentNM()->stringType(), "created for substr" );
176 Node sk3 = NodeManager::currentNM()->mkSkolem( "st3_$$", NodeManager::currentNM()->stringType(), "created for substr" );
177 Node x = simplify( t[0], new_nodes );
178 Node lenxgti = NodeManager::currentNM()->mkNode( kind::GEQ,
179 NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, x ),
180 NodeManager::currentNM()->mkNode( kind::PLUS, t[1], t[2] ) );
181 Node x_eq_123 = NodeManager::currentNM()->mkNode( kind::EQUAL,
182 NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, sk2, sk3 ), x );
183 Node len_sk1_eq_i = NodeManager::currentNM()->mkNode( kind::EQUAL, t[1],
184 NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) );
185 Node len_sk2_eq_j = NodeManager::currentNM()->mkNode( kind::EQUAL, t[2],
186 NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk2 ) );
187
188 Node tp = NodeManager::currentNM()->mkNode( kind::AND, x_eq_123, len_sk1_eq_i, len_sk2_eq_j );
189 tp = NodeManager::currentNM()->mkNode( kind::IMPLIES, lenxgti, tp );
190 new_nodes.push_back( tp );
191
192 d_cache[t] = sk2;
193 retNode = sk2;
194 } else if( t.getKind() == kind::STRING_CHARAT_TOTAL ){
195 Node sk1 = NodeManager::currentNM()->mkSkolem( "ca1_$$", NodeManager::currentNM()->stringType(), "created for charat" );
196 Node sk2 = NodeManager::currentNM()->mkSkolem( "ca2_$$", NodeManager::currentNM()->stringType(), "created for charat" );
197 Node sk3 = NodeManager::currentNM()->mkSkolem( "ca3_$$", NodeManager::currentNM()->stringType(), "created for charat" );
198 Node x = simplify( t[0], new_nodes );
199 Node lenxgti = NodeManager::currentNM()->mkNode( kind::GT,
200 NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, x ), t[1] );
201 Node x_eq_123 = NodeManager::currentNM()->mkNode( kind::EQUAL,
202 NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, sk2, sk3 ), x );
203 Node len_sk1_eq_i = NodeManager::currentNM()->mkNode( kind::EQUAL, t[1],
204 NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) );
205 Node len_sk2_eq_1 = NodeManager::currentNM()->mkNode( kind::EQUAL, NodeManager::currentNM()->mkConst( Rational( 1 ) ),
206 NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk2 ) );
207
208 Node tp = NodeManager::currentNM()->mkNode( kind::AND, x_eq_123, len_sk1_eq_i, len_sk2_eq_1 );
209 tp = NodeManager::currentNM()->mkNode( kind::IMPLIES, lenxgti, tp );
210 new_nodes.push_back( tp );
211
212 d_cache[t] = sk2;
213 retNode = sk2;
214 } else if( t.getKind() == kind::STRING_STRIDOF ){
215 if(options::stringExp()) {
216 Node sk1 = NodeManager::currentNM()->mkSkolem( "io1_$$", t[0].getType(), "created for indexof" );
217 Node sk2 = NodeManager::currentNM()->mkSkolem( "io2_$$", t[0].getType(), "created for indexof" );
218 Node sk3 = NodeManager::currentNM()->mkSkolem( "io3_$$", t[0].getType(), "created for indexof" );
219 Node sk4 = NodeManager::currentNM()->mkSkolem( "io4_$$", t[0].getType(), "created for indexof" );
220 Node skk = NodeManager::currentNM()->mkSkolem( "iok_$$", t[2].getType(), "created for indexof" );
221 Node eq = t[0].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, sk2, sk3, sk4 ) );
222 new_nodes.push_back( eq );
223 Node negone = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) );
224 Node krange = NodeManager::currentNM()->mkNode( kind::GEQ, skk, negone );
225 new_nodes.push_back( krange );
226 krange = NodeManager::currentNM()->mkNode( kind::GT,
227 NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[0] ), skk);
228 new_nodes.push_back( krange );
229
230 //str.len(s1) < y + str.len(s2)
231 Node c1 = Rewriter::rewrite(NodeManager::currentNM()->mkNode( kind::GT,
232 NodeManager::currentNM()->mkNode( kind::PLUS, t[2], NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[1] )),
233 NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[0] )));
234 //str.len(t1) = y
235 Node c2 = t[2].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) );
236 //~contain(t234, s2)
237 Node c3 = Rewriter::rewrite(NodeManager::currentNM()->mkNode( kind::STRING_STRCTN,
238 NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk2, sk3, sk4), t[1] ).negate());
239 //left
240 Node left = NodeManager::currentNM()->mkNode( kind::OR, c1, NodeManager::currentNM()->mkNode( kind::AND, c2, c3 ) );
241 //t3 = s2
242 Node c4 = t[1].eqNode( sk3 );
243 //~contain(t2, s2)
244 Node c5 = NodeManager::currentNM()->mkNode( kind::STRING_STRCTN, sk2, t[1] ).negate();
245 //k=str.len(s1, s2)
246 Node c6 = skk.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH,
247 NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, sk2 )));
248 //right
249 Node right = Rewriter::rewrite(NodeManager::currentNM()->mkNode( kind::AND, c2, c4, c5, c6 ));
250 Node cond = skk.eqNode( negone );
251 Node rr = NodeManager::currentNM()->mkNode( kind::ITE, cond, left, right );
252 new_nodes.push_back( rr );
253 d_cache[t] = skk;
254 retNode = skk;
255 } else {
256 throw LogicException("string indexof not supported in this release");
257 }
258 } else if( t.getKind() == kind::STRING_STRREPL ){
259 if(options::stringExp()) {
260 Node zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
261 Node x = t[0];
262 Node y = t[1];
263 Node z = t[2];
264 Node sk1 = NodeManager::currentNM()->mkSkolem( "rp1_$$", t[0].getType(), "created for replace" );
265 Node sk2 = NodeManager::currentNM()->mkSkolem( "rp2_$$", t[0].getType(), "created for replace" );
266 Node skw = NodeManager::currentNM()->mkSkolem( "rpw_$$", t[0].getType(), "created for replace" );
267 Node cond = NodeManager::currentNM()->mkNode( kind::STRING_STRCTN, x, y );
268 Node c1 = x.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, y, sk2 ) );
269 Node c2 = skw.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, z, sk2 ) );
270 Node iof = NodeManager::currentNM()->mkNode( kind::STRING_STRIDOF, x, y, zero );
271 Node c3 = iof.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) );
272 Node rr = NodeManager::currentNM()->mkNode( kind::ITE, cond,
273 NodeManager::currentNM()->mkNode( kind::AND, c1, c2, c3 ),
274 skw.eqNode(x) );
275 new_nodes.push_back( rr );
276 d_cache[t] = skw;
277 retNode = skw;
278 } else {
279 throw LogicException("string replace not supported in this release");
280 }
281 } else if(t.getKind() == kind::STRING_CHARAT) {
282 Node sk2 = NodeManager::currentNM()->mkNode(kind::APPLY_UF, d_charAtUF, t[0], t[1]);
283 Node sk1 = NodeManager::currentNM()->mkSkolem( "ca1_$$", NodeManager::currentNM()->stringType(), "created for charat" );
284 Node sk3 = NodeManager::currentNM()->mkSkolem( "ca3_$$", NodeManager::currentNM()->stringType(), "created for charat" );
285 Node x = simplify( t[0], new_nodes );
286 Node lenxgti = NodeManager::currentNM()->mkNode( kind::GT,
287 NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, x ), t[1] );
288 Node x_eq_123 = x.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, sk2, sk3 ) );
289 Node len_sk1_eq_i = t[1].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) );
290 Node tp = NodeManager::currentNM()->mkNode( kind::AND, x_eq_123, len_sk1_eq_i );
291 tp = NodeManager::currentNM()->mkNode( kind::IMPLIES, lenxgti, tp );
292 Node len_sk2_eq_1 = NodeManager::currentNM()->mkNode( kind::EQUAL, NodeManager::currentNM()->mkConst( Rational( 1 ) ),
293 NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk2 ) );
294 tp = NodeManager::currentNM()->mkNode( kind::AND, len_sk2_eq_1, tp );
295
296 new_nodes.push_back( tp );
297
298 d_cache[t] = sk2;
299 retNode = sk2;
300 } else if(t.getKind() == kind::STRING_SUBSTR) {
301 InternalError("CharAt and Substr should not be reached here.");
302 } else if( t.getNumChildren()>0 ) {
303 std::vector< Node > cc;
304 if (t.getMetaKind() == kind::metakind::PARAMETERIZED) {
305 cc.push_back(t.getOperator());
306 }
307 bool changed = false;
308 for( unsigned i=0; i<t.getNumChildren(); i++ ){
309 Node tn = simplify( t[i], new_nodes );
310 cc.push_back( tn );
311 changed = changed || tn!=t[i];
312 }
313 if(changed) {
314 Node n = NodeManager::currentNM()->mkNode( t.getKind(), cc );
315 d_cache[t] = n;
316 retNode = n;
317 } else {
318 d_cache[t] = Node::null();
319 retNode = t;
320 }
321 }else{
322 d_cache[t] = Node::null();
323 retNode = t;
324 }
325
326 Trace("strings-preprocess") << "StringsPreprocess::simplify returns: " << retNode << std::endl;
327 if(!new_nodes.empty()) {
328 Trace("strings-preprocess") << " ... new nodes:";
329 for(unsigned int i=0; i<new_nodes.size(); ++i) {
330 Trace("strings-preprocess") << " " << new_nodes[i];
331 }
332 Trace("strings-preprocess") << std::endl;
333 }
334
335 return retNode;
336 }
337
338 void StringsPreprocess::simplify(std::vector< Node > &vec_node) {
339 std::vector< Node > new_nodes;
340 for( unsigned i=0; i<vec_node.size(); i++ ){
341 vec_node[i] = simplify( vec_node[i], new_nodes );
342 }
343 vec_node.insert( vec_node.end(), new_nodes.begin(), new_nodes.end() );
344 }
345
346 }/* CVC4::theory::strings namespace */
347 }/* CVC4::theory namespace */
348 }/* CVC4 namespace */