0d422e823d5022e00fa7c2432edfdcea01118da8
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2 /*! \file regexp_operation.cpp
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Tianyi Liang, Andrew Reynolds, Tim King
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief Symbolic Regular Expresion Operations
13 **
14 ** Symbolic Regular Expresion Operations
15 **/
16
17 #include "theory/strings/regexp_operation.h"
18
19 #include "expr/kind.h"
20 #include "options/strings_options.h"
21 #include "theory/strings/theory_strings_rewriter.h"
22
23 using namespace CVC4;
24 using namespace CVC4::kind;
25
26 namespace CVC4 {
27 namespace theory {
28 namespace strings {
29
30 RegExpOpr::RegExpOpr()
31 : d_emptyString(NodeManager::currentNM()->mkConst(::CVC4::String(""))),
32 d_true(NodeManager::currentNM()->mkConst(true)),
33 d_false(NodeManager::currentNM()->mkConst(false)),
34 d_emptySingleton(NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP,
35 d_emptyString)),
36 d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY,
37 std::vector<Node>{})),
38 d_zero(NodeManager::currentNM()->mkConst(::CVC4::Rational(0))),
39 d_one(NodeManager::currentNM()->mkConst(::CVC4::Rational(1))),
40 d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA,
41 std::vector<Node>{})),
42 d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma))
43 {
44 d_lastchar = TheoryStringsRewriter::getAlphabetCardinality()-1;
45 }
46
47 RegExpOpr::~RegExpOpr() {}
48
49 bool RegExpOpr::checkConstRegExp( Node r ) {
50 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with /" << mkString( r ) << "/" << std::endl;
51 bool ret = true;
52 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
53 ret = d_cstre_cache[r];
54 } else {
55 if(r.getKind() == kind::STRING_TO_REGEXP) {
56 Node tmp = Rewriter::rewrite( r[0] );
57 ret = tmp.isConst();
58 } else {
59 for(unsigned i=0; i<r.getNumChildren(); ++i) {
60 if(!checkConstRegExp(r[i])) {
61 ret = false; break;
62 }
63 }
64 }
65 d_cstre_cache[r] = ret;
66 }
67 return ret;
68 }
69
70 // 0-unknown, 1-yes, 2-no
71 int RegExpOpr::delta( Node r, Node &exp ) {
72 Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r ) << "/" << std::endl;
73 int ret = 0;
74 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
75 ret = d_delta_cache[r].first;
76 exp = d_delta_cache[r].second;
77 } else {
78 int k = r.getKind();
79 switch( k ) {
80 case kind::REGEXP_EMPTY: {
81 ret = 2;
82 break;
83 }
84 case kind::REGEXP_SIGMA: {
85 ret = 2;
86 break;
87 }
88 case kind::STRING_TO_REGEXP: {
89 Node tmp = Rewriter::rewrite(r[0]);
90 if(tmp.isConst()) {
91 if(tmp == d_emptyString) {
92 ret = 1;
93 } else {
94 ret = 2;
95 }
96 } else {
97 ret = 0;
98 if(tmp.getKind() == kind::STRING_CONCAT) {
99 for(unsigned i=0; i<tmp.getNumChildren(); i++) {
100 if(tmp[i].isConst()) {
101 ret = 2; break;
102 }
103 }
104
105 }
106 if(ret == 0) {
107 exp = r[0].eqNode(d_emptyString);
108 }
109 }
110 break;
111 }
112 case kind::REGEXP_CONCAT: {
113 bool flag = false;
114 std::vector< Node > vec_nodes;
115 for(unsigned i=0; i<r.getNumChildren(); ++i) {
116 Node exp2;
117 int tmp = delta( r[i], exp2 );
118 if(tmp == 2) {
119 ret = 2;
120 break;
121 } else if(tmp == 0) {
122 vec_nodes.push_back( exp2 );
123 flag = true;
124 }
125 }
126 if(ret != 2) {
127 if(!flag) {
128 ret = 1;
129 } else {
130 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
131 }
132 }
133 break;
134 }
135 case kind::REGEXP_UNION: {
136 bool flag = false;
137 std::vector< Node > vec_nodes;
138 for(unsigned i=0; i<r.getNumChildren(); ++i) {
139 Node exp2;
140 int tmp = delta( r[i], exp2 );
141 if(tmp == 1) {
142 ret = 1;
143 break;
144 } else if(tmp == 0) {
145 vec_nodes.push_back( exp2 );
146 flag = true;
147 }
148 }
149 if(ret != 1) {
150 if(!flag) {
151 ret = 2;
152 } else {
153 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::OR, vec_nodes);
154 }
155 }
156 break;
157 }
158 case kind::REGEXP_INTER: {
159 bool flag = false;
160 std::vector< Node > vec_nodes;
161 for(unsigned i=0; i<r.getNumChildren(); ++i) {
162 Node exp2;
163 int tmp = delta( r[i], exp2 );
164 if(tmp == 2) {
165 ret = 2;
166 break;
167 } else if(tmp == 0) {
168 vec_nodes.push_back( exp2 );
169 flag = true;
170 }
171 }
172 if(ret != 2) {
173 if(!flag) {
174 ret = 1;
175 } else {
176 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
177 }
178 }
179 break;
180 }
181 case kind::REGEXP_STAR: {
182 ret = 1;
183 break;
184 }
185 case kind::REGEXP_PLUS: {
186 ret = delta( r[0], exp );
187 break;
188 }
189 case kind::REGEXP_OPT: {
190 ret = 1;
191 break;
192 }
193 case kind::REGEXP_RANGE: {
194 ret = 2;
195 break;
196 }
197 case kind::REGEXP_LOOP: {
198 if(r[1] == d_zero) {
199 ret = 1;
200 } else {
201 ret = delta(r[0], exp);
202 }
203 break;
204 }
205 default: {
206 //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
207 Unreachable();
208 }
209 }
210 if(!exp.isNull()) {
211 exp = Rewriter::rewrite(exp);
212 }
213 std::pair< int, Node > p(ret, exp);
214 d_delta_cache[r] = p;
215 }
216 Trace("regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
217 return ret;
218 }
219
220 // 0-unknown, 1-yes, 2-no
221 int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
222 Assert( c.size() < 2 );
223 Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
224
225 int ret = 1;
226 retNode = d_emptyRegexp;
227
228 PairNodeStr dv = std::make_pair( r, c );
229 if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
230 retNode = d_deriv_cache[dv].first;
231 ret = d_deriv_cache[dv].second;
232 } else if( c.isEmptyString() ) {
233 Node expNode;
234 ret = delta( r, expNode );
235 if(ret == 0) {
236 retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
237 } else if(ret == 1) {
238 retNode = r;
239 }
240 std::pair< Node, int > p(retNode, ret);
241 d_deriv_cache[dv] = p;
242 } else {
243 switch( r.getKind() ) {
244 case kind::REGEXP_EMPTY: {
245 ret = 2;
246 break;
247 }
248 case kind::REGEXP_SIGMA: {
249 retNode = d_emptySingleton;
250 break;
251 }
252 case kind::REGEXP_RANGE: {
253 CVC4::String a = r[0].getConst<String>();
254 CVC4::String b = r[1].getConst<String>();
255 retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
256 break;
257 }
258 case kind::STRING_TO_REGEXP: {
259 Node tmp = Rewriter::rewrite(r[0]);
260 if(tmp.isConst()) {
261 if(tmp == d_emptyString) {
262 ret = 2;
263 } else {
264 if (tmp.getConst<CVC4::String>().front() == c.front())
265 {
266 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
267 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
268 } else {
269 ret = 2;
270 }
271 }
272 } else {
273 ret = 0;
274 Node rest;
275 if(tmp.getKind() == kind::STRING_CONCAT) {
276 Node t2 = tmp[0];
277 if(t2.isConst()) {
278 if (t2.getConst<CVC4::String>().front() == c.front())
279 {
280 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
281 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
282 std::vector< Node > vec_nodes;
283 vec_nodes.push_back(n);
284 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
285 vec_nodes.push_back(tmp[i]);
286 }
287 retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
288 ret = 1;
289 } else {
290 ret = 2;
291 }
292 } else {
293 tmp = tmp[0];
294 std::vector< Node > vec_nodes;
295 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
296 vec_nodes.push_back(tmp[i]);
297 }
298 rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
299 }
300 }
301 if(ret == 0) {
302 Node sk = NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" );
303 retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
304 if(!rest.isNull()) {
305 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
306 }
307 Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
308 NodeManager::currentNM()->mkConst(c), sk));
309 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
310 }
311 }
312 break;
313 }
314 case kind::REGEXP_CONCAT: {
315 std::vector< Node > vec_nodes;
316 std::vector< Node > delta_nodes;
317 Node dnode = d_true;
318 for(unsigned i=0; i<r.getNumChildren(); ++i) {
319 Node dc;
320 Node exp2;
321 int rt = derivativeS(r[i], c, dc);
322 if(rt != 2) {
323 if(rt == 0) {
324 ret = 0;
325 }
326 std::vector< Node > vec_nodes2;
327 if(dc != d_emptySingleton) {
328 vec_nodes2.push_back( dc );
329 }
330 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
331 if(r[j] != d_emptySingleton) {
332 vec_nodes2.push_back( r[j] );
333 }
334 }
335 Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
336 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
337 if(dnode != d_true) {
338 tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
339 ret = 0;
340 }
341 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
342 vec_nodes.push_back( tmp );
343 }
344 }
345 Node exp3;
346 int rt2 = delta( r[i], exp3 );
347 if( rt2 == 0 ) {
348 dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
349 } else if( rt2 == 2 ) {
350 break;
351 }
352 }
353 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
354 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
355 if(retNode == d_emptyRegexp) {
356 ret = 2;
357 }
358 break;
359 }
360 case kind::REGEXP_UNION: {
361 std::vector< Node > vec_nodes;
362 for(unsigned i=0; i<r.getNumChildren(); ++i) {
363 Node dc;
364 int rt = derivativeS(r[i], c, dc);
365 if(rt == 0) {
366 ret = 0;
367 }
368 if(rt != 2) {
369 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
370 vec_nodes.push_back( dc );
371 }
372 }
373 //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
374 }
375 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
376 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
377 if(retNode == d_emptyRegexp) {
378 ret = 2;
379 }
380 break;
381 }
382 case kind::REGEXP_INTER: {
383 bool flag = true;
384 bool flag_sg = false;
385 std::vector< Node > vec_nodes;
386 for(unsigned i=0; i<r.getNumChildren(); ++i) {
387 Node dc;
388 int rt = derivativeS(r[i], c, dc);
389 if(rt == 0) {
390 ret = 0;
391 } else if(rt == 2) {
392 flag = false;
393 break;
394 }
395 if(dc == d_sigma_star) {
396 flag_sg = true;
397 } else {
398 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
399 vec_nodes.push_back( dc );
400 }
401 }
402 }
403 if(flag) {
404 if(vec_nodes.size() == 0 && flag_sg) {
405 retNode = d_sigma_star;
406 } else {
407 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
408 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
409 if(retNode == d_emptyRegexp) {
410 ret = 2;
411 }
412 }
413 } else {
414 retNode = d_emptyRegexp;
415 ret = 2;
416 }
417 break;
418 }
419 case kind::REGEXP_STAR: {
420 Node dc;
421 ret = derivativeS(r[0], c, dc);
422 retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
423 break;
424 }
425 case kind::REGEXP_LOOP: {
426 if(r[1] == r[2] && r[1] == d_zero) {
427 ret = 2;
428 //retNode = d_emptyRegexp;
429 } else {
430 Node dc;
431 ret = derivativeS(r[0], c, dc);
432 if(dc==d_emptyRegexp) {
433 unsigned l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
434 unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
435 Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0],
436 NodeManager::currentNM()->mkConst(CVC4::Rational(l==0? 0 : (l-1))),
437 NodeManager::currentNM()->mkConst(CVC4::Rational(u-1)));
438 retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
439 } else {
440 retNode = d_emptyRegexp;
441 }
442 }
443 break;
444 }
445 default: {
446 //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
447 Unreachable();
448 }
449 }
450 if(retNode != d_emptyRegexp) {
451 retNode = Rewriter::rewrite( retNode );
452 }
453 std::pair< Node, int > p(retNode, ret);
454 d_deriv_cache[dv] = p;
455 }
456
457 Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
458 return ret;
459 }
460
461 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
462 Assert( c.size() < 2 );
463 Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
464 Node retNode = d_emptyRegexp;
465 PairNodeStr dv = std::make_pair( r, c );
466 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
467 retNode = d_dv_cache[dv];
468 } else if( c.isEmptyString() ){
469 Node exp;
470 int tmp = delta( r, exp );
471 if(tmp == 0) {
472 // TODO variable
473 retNode = d_emptyRegexp;
474 } else if(tmp == 1) {
475 retNode = r;
476 } else {
477 retNode = d_emptyRegexp;
478 }
479 } else {
480 int k = r.getKind();
481 switch( k ) {
482 case kind::REGEXP_EMPTY: {
483 retNode = d_emptyRegexp;
484 break;
485 }
486 case kind::REGEXP_SIGMA: {
487 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
488 break;
489 }
490 case kind::REGEXP_RANGE: {
491 CVC4::String a = r[0].getConst<String>();
492 CVC4::String b = r[1].getConst<String>();
493 retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
494 break;
495 }
496 case kind::STRING_TO_REGEXP: {
497 if(r[0].isConst()) {
498 if(r[0] == d_emptyString) {
499 retNode = d_emptyRegexp;
500 } else {
501 if (r[0].getConst<CVC4::String>().front() == c.front())
502 {
503 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
504 r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
505 } else {
506 retNode = d_emptyRegexp;
507 }
508 }
509 } else {
510 // TODO variable
511 retNode = d_emptyRegexp;
512 }
513 break;
514 }
515 case kind::REGEXP_CONCAT: {
516 Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
517 std::vector< Node > vec_nodes;
518 for(unsigned i=0; i<r.getNumChildren(); ++i) {
519 Node dc = derivativeSingle(r[i], c);
520 if(dc != d_emptyRegexp) {
521 std::vector< Node > vec_nodes2;
522 if(dc != rees) {
523 vec_nodes2.push_back( dc );
524 }
525 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
526 if(r[j] != rees) {
527 vec_nodes2.push_back( r[j] );
528 }
529 }
530 Node tmp = vec_nodes2.size()==0 ? rees :
531 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
532 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
533 vec_nodes.push_back( tmp );
534 }
535 }
536 Node exp;
537 if( delta( r[i], exp ) != 1 ) {
538 break;
539 }
540 }
541 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
542 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
543 break;
544 }
545 case kind::REGEXP_UNION: {
546 std::vector< Node > vec_nodes;
547 for(unsigned i=0; i<r.getNumChildren(); ++i) {
548 Node dc = derivativeSingle(r[i], c);
549 if(dc != d_emptyRegexp) {
550 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
551 vec_nodes.push_back( dc );
552 }
553 }
554 //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl;
555 }
556 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
557 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
558 break;
559 }
560 case kind::REGEXP_INTER: {
561 bool flag = true;
562 bool flag_sg = false;
563 std::vector< Node > vec_nodes;
564 for(unsigned i=0; i<r.getNumChildren(); ++i) {
565 Node dc = derivativeSingle(r[i], c);
566 if(dc != d_emptyRegexp) {
567 if(dc == d_sigma_star) {
568 flag_sg = true;
569 } else {
570 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
571 vec_nodes.push_back( dc );
572 }
573 }
574 } else {
575 flag = false;
576 break;
577 }
578 }
579 if(flag) {
580 if(vec_nodes.size() == 0 && flag_sg) {
581 retNode = d_sigma_star;
582 } else {
583 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
584 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
585 }
586 } else {
587 retNode = d_emptyRegexp;
588 }
589 break;
590 }
591 case kind::REGEXP_STAR: {
592 Node dc = derivativeSingle(r[0], c);
593 if(dc != d_emptyRegexp) {
594 retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
595 } else {
596 retNode = d_emptyRegexp;
597 }
598 break;
599 }
600 case kind::REGEXP_LOOP: {
601 if(r[1] == r[2] && r[1] == d_zero) {
602 retNode = d_emptyRegexp;
603 } else {
604 Node dc = derivativeSingle(r[0], c);
605 if(dc != d_emptyRegexp) {
606 unsigned l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
607 unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
608 Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0],
609 NodeManager::currentNM()->mkConst(CVC4::Rational(l==0? 0 : (l-1))),
610 NodeManager::currentNM()->mkConst(CVC4::Rational(u-1)));
611 retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
612 } else {
613 retNode = d_emptyRegexp;
614 }
615 }
616 //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
617 break;
618 }
619 default: {
620 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
621 Unreachable();
622 }
623 }
624 if(retNode != d_emptyRegexp) {
625 retNode = Rewriter::rewrite( retNode );
626 }
627 d_dv_cache[dv] = retNode;
628 }
629 Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
630 return retNode;
631 }
632
633 void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
634 {
635 Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl;
636 std::map<Node, std::pair<std::set<unsigned>, SetNodes> >::const_iterator itr =
637 d_fset_cache.find(r);
638 if(itr != d_fset_cache.end()) {
639 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
640 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
641 } else {
642 // cset is code points
643 std::set<unsigned> cset;
644 SetNodes vset;
645 int k = r.getKind();
646 switch( k ) {
647 case kind::REGEXP_EMPTY: {
648 break;
649 }
650 case kind::REGEXP_SIGMA: {
651 Assert(d_lastchar < std::numeric_limits<unsigned>::max());
652 for (unsigned i = 0; i <= d_lastchar; i++)
653 {
654 cset.insert(i);
655 }
656 break;
657 }
658 case kind::REGEXP_RANGE: {
659 unsigned a = r[0].getConst<String>().front();
660 a = String::convertUnsignedIntToCode(a);
661 unsigned b = r[1].getConst<String>().front();
662 b = String::convertUnsignedIntToCode(b);
663 Assert(a < b);
664 Assert(b < std::numeric_limits<unsigned>::max());
665 for (unsigned c = a; c <= b; c++)
666 {
667 cset.insert(c);
668 }
669 break;
670 }
671 case kind::STRING_TO_REGEXP: {
672 Node st = Rewriter::rewrite(r[0]);
673 if(st.isConst()) {
674 CVC4::String s = st.getConst< CVC4::String >();
675 if(s.size() != 0) {
676 unsigned sc = s.front();
677 sc = String::convertUnsignedIntToCode(sc);
678 cset.insert(sc);
679 }
680 }
681 else if (st.getKind() == kind::STRING_CONCAT)
682 {
683 if(st[0].isConst()) {
684 CVC4::String s = st[0].getConst<CVC4::String>();
685 unsigned sc = s.front();
686 sc = String::convertUnsignedIntToCode(sc);
687 cset.insert(sc);
688 } else {
689 vset.insert( st[0] );
690 }
691 }
692 else
693 {
694 vset.insert(st);
695 }
696 break;
697 }
698 case kind::REGEXP_CONCAT: {
699 for(unsigned i=0; i<r.getNumChildren(); i++) {
700 firstChars(r[i], cset, vset);
701 Node n = r[i];
702 Node exp;
703 int r = delta( n, exp );
704 if(r != 1) {
705 break;
706 }
707 }
708 break;
709 }
710 case kind::REGEXP_UNION: {
711 for(unsigned i=0; i<r.getNumChildren(); i++) {
712 firstChars(r[i], cset, vset);
713 }
714 break;
715 }
716 case kind::REGEXP_INTER: {
717 //TODO: Overapproximation for now
718 //for(unsigned i=0; i<r.getNumChildren(); i++) {
719 // firstChars(r[i], cset, vset);
720 //}
721 firstChars(r[0], cset, vset);
722 break;
723 }
724 case kind::REGEXP_STAR: {
725 firstChars(r[0], cset, vset);
726 break;
727 }
728 case kind::REGEXP_LOOP: {
729 firstChars(r[0], cset, vset);
730 break;
731 }
732 default: {
733 Trace("regexp-error") << "Unsupported term: " << r << " in firstChars." << std::endl;
734 Unreachable();
735 }
736 }
737 pcset.insert(cset.begin(), cset.end());
738 pvset.insert(vset.begin(), vset.end());
739 std::pair<std::set<unsigned>, SetNodes> p(cset, vset);
740 d_fset_cache[r] = p;
741 }
742
743 if(Trace.isOn("regexp-fset")) {
744 Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {";
745 for (std::set<unsigned>::const_iterator itr = pcset.begin();
746 itr != pcset.end();
747 itr++)
748 {
749 if (itr != pcset.begin())
750 {
751 Trace("regexp-fset") << ",";
752 }
753 Trace("regexp-fset") << (*itr);
754 }
755 Trace("regexp-fset") << "}" << std::endl;
756 }
757 }
758
759 //simplify
760 void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
761 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
762 Assert(t.getKind() == kind::STRING_IN_REGEXP);
763 Node str = Rewriter::rewrite(t[0]);
764 Node re = Rewriter::rewrite(t[1]);
765 if(polarity) {
766 simplifyPRegExp( str, re, new_nodes );
767 } else {
768 simplifyNRegExp( str, re, new_nodes );
769 }
770 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
771 for(unsigned i=0; i<new_nodes.size(); i++) {
772 Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
773 }
774 }
775 void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
776 std::pair < Node, Node > p(s, r);
777 NodeManager *nm = NodeManager::currentNM();
778 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
779 if(itr != d_simpl_neg_cache.end()) {
780 new_nodes.push_back( itr->second );
781 } else {
782 int k = r.getKind();
783 Node conc;
784 switch( k ) {
785 case kind::REGEXP_EMPTY: {
786 conc = d_true;
787 break;
788 }
789 case kind::REGEXP_SIGMA: {
790 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
791 break;
792 }
793 case kind::REGEXP_RANGE: {
794 std::vector< Node > vec;
795 unsigned a = r[0].getConst<String>().front();
796 a = String::convertUnsignedIntToCode(a);
797 unsigned b = r[1].getConst<String>().front();
798 b = String::convertUnsignedIntToCode(b);
799 for (unsigned c = a; c <= b; c++)
800 {
801 std::vector<unsigned> tmpVec;
802 tmpVec.push_back(String::convertCodeToUnsignedInt(c));
803 Node tmp = s.eqNode(nm->mkConst(String(tmpVec))).negate();
804 vec.push_back( tmp );
805 }
806 conc = vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::AND, vec);
807 break;
808 }
809 case kind::STRING_TO_REGEXP: {
810 conc = s.eqNode(r[0]).negate();
811 break;
812 }
813 case kind::REGEXP_CONCAT: {
814 // The following simplification states that
815 // ~( s in R1 ++ R2 )
816 // is equivalent to
817 // forall x.
818 // 0 <= x <= len(s) =>
819 // ~( substr(s,0,x) in R1 ) OR ~( substr(s,x,len(s)-x) in R2)
820 Node lens = nm->mkNode(STRING_LENGTH, s);
821 // the index we are removing from the RE concatenation
822 unsigned indexRm = 0;
823 Node b1;
824 Node b1v;
825 // As an optimization to the above reduction, if we can determine that
826 // all strings in the language of R1 have the same length, say n,
827 // then the conclusion of the reduction is quantifier-free:
828 // ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
829 Node reLength = TheoryStringsRewriter::getFixedLengthForRegexp(r[0]);
830 if (reLength.isNull())
831 {
832 // try from the opposite end
833 unsigned indexE = r.getNumChildren() - 1;
834 reLength = TheoryStringsRewriter::getFixedLengthForRegexp(r[indexE]);
835 if (!reLength.isNull())
836 {
837 indexRm = indexE;
838 }
839 }
840 Node guard;
841 if (reLength.isNull())
842 {
843 b1 = nm->mkBoundVar(nm->integerType());
844 b1v = nm->mkNode(BOUND_VAR_LIST, b1);
845 guard = nm->mkNode(AND,
846 nm->mkNode(GEQ, b1, d_zero),
847 nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1));
848 }
849 else
850 {
851 b1 = reLength;
852 }
853 Node s1;
854 Node s2;
855 if (indexRm == 0)
856 {
857 s1 = nm->mkNode(STRING_SUBSTR, s, d_zero, b1);
858 s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
859 }
860 else
861 {
862 s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1);
863 s2 =
864 nm->mkNode(STRING_SUBSTR, s, d_zero, nm->mkNode(MINUS, lens, b1));
865 }
866 Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[indexRm]).negate();
867 std::vector<Node> nvec;
868 for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++)
869 {
870 if (i != indexRm)
871 {
872 nvec.push_back( r[i] );
873 }
874 }
875 Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec);
876 r2 = Rewriter::rewrite(r2);
877 Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate();
878 conc = nm->mkNode(OR, s1r1, s2r2);
879 if (!b1v.isNull())
880 {
881 conc = nm->mkNode(OR, guard.negate(), conc);
882 conc = nm->mkNode(FORALL, b1v, conc);
883 }
884 break;
885 }
886 case kind::REGEXP_UNION: {
887 std::vector< Node > c_and;
888 for(unsigned i=0; i<r.getNumChildren(); ++i) {
889 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
890 c_and.push_back( r[i][0].eqNode(s).negate() );
891 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
892 continue;
893 } else {
894 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
895 }
896 }
897 conc = c_and.size() == 0 ? d_true :
898 c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
899 break;
900 }
901 case kind::REGEXP_INTER: {
902 bool emptyflag = false;
903 std::vector< Node > c_or;
904 for(unsigned i=0; i<r.getNumChildren(); ++i) {
905 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
906 c_or.push_back( r[i][0].eqNode(s).negate() );
907 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
908 emptyflag = true;
909 break;
910 } else {
911 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
912 }
913 }
914 if(emptyflag) {
915 conc = d_true;
916 } else {
917 conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
918 }
919 break;
920 }
921 case kind::REGEXP_STAR: {
922 if(s == d_emptyString) {
923 conc = d_false;
924 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
925 conc = s.eqNode(d_emptyString).negate();
926 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
927 conc = d_false;
928 } else {
929 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
930 Node sne = s.eqNode(d_emptyString).negate();
931 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
932 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
933 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
934 NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
935 //internal
936 Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, d_zero, b1);
937 Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
938 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
939 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
940
941 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
942 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
943 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
944 conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
945 }
946 break;
947 }
948 case kind::REGEXP_LOOP: {
949 Assert(r.getNumChildren() == 3);
950 if(r[1] == r[2]) {
951 if(r[1] == d_zero) {
952 conc = s.eqNode(d_emptyString).negate();
953 } else if(r[1] == d_one) {
954 conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]).negate();
955 } else {
956 //unroll for now
957 unsigned l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
958 std::vector<Node> vec;
959 for(unsigned i=0; i<l; i++) {
960 vec.push_back(r[0]);
961 }
962 Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec);
963 conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r2).negate();
964 }
965 } else {
966 Assert(r[1] == d_zero);
967 //unroll for now
968 unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
969 std::vector<Node> vec;
970 vec.push_back(d_emptySingleton);
971 std::vector<Node> vec2;
972 for(unsigned i=1; i<=u; i++) {
973 vec2.push_back(r[0]);
974 Node r2 = i==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec);
975 vec.push_back(r2);
976 }
977 Node r3 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec);
978 conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r3).negate();
979 }
980 break;
981 }
982 default: {
983 Trace("strings-error") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
984 Assert( false, "Unsupported Term" );
985 }
986 }
987 conc = Rewriter::rewrite( conc );
988 new_nodes.push_back( conc );
989 d_simpl_neg_cache[p] = conc;
990 }
991 }
992 void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
993 std::pair < Node, Node > p(s, r);
994 NodeManager *nm = NodeManager::currentNM();
995 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
996 if(itr != d_simpl_cache.end()) {
997 new_nodes.push_back( itr->second );
998 } else {
999 int k = r.getKind();
1000 Node conc;
1001 switch( k ) {
1002 case kind::REGEXP_EMPTY: {
1003 conc = d_false;
1004 break;
1005 }
1006 case kind::REGEXP_SIGMA: {
1007 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
1008 break;
1009 }
1010 case kind::REGEXP_RANGE: {
1011 conc = s.eqNode( r[0] );
1012 if(r[0] != r[1]) {
1013 unsigned a = r[0].getConst<String>().front();
1014 unsigned b = r[1].getConst<String>().front();
1015 a += 1;
1016 std::vector<unsigned> anvec;
1017 anvec.push_back(a);
1018 Node an = nm->mkConst(String(anvec));
1019 Node tmp = a != b
1020 ? nm->mkNode(kind::STRING_IN_REGEXP,
1021 s,
1022 nm->mkNode(kind::REGEXP_RANGE, an, r[1]))
1023 : s.eqNode(r[1]);
1024 conc = NodeManager::currentNM()->mkNode(kind::OR, conc, tmp);
1025 }
1026 break;
1027 }
1028 case kind::STRING_TO_REGEXP: {
1029 conc = s.eqNode(r[0]);
1030 break;
1031 }
1032 case kind::REGEXP_CONCAT: {
1033 std::vector< Node > nvec;
1034 std::vector< Node > cc;
1035 bool emptyflag = false;
1036 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1037 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1038 cc.push_back( r[i][0] );
1039 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1040 emptyflag = true;
1041 break;
1042 } else {
1043 Node sk = NodeManager::currentNM()->mkSkolem( "rc", s.getType(), "created for regular expression concat" );
1044 Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
1045 nvec.push_back(lem);
1046 cc.push_back(sk);
1047 }
1048 }
1049 if(emptyflag) {
1050 conc = d_false;
1051 } else {
1052 Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
1053 nvec.push_back(lem);
1054 conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
1055 }
1056 break;
1057 }
1058 case kind::REGEXP_UNION: {
1059 std::vector< Node > c_or;
1060 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1061 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1062 c_or.push_back( r[i][0].eqNode(s) );
1063 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1064 continue;
1065 } else {
1066 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1067 }
1068 }
1069 conc = c_or.size() == 0 ? d_false :
1070 c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
1071 break;
1072 }
1073 case kind::REGEXP_INTER: {
1074 std::vector< Node > c_and;
1075 bool emptyflag = false;
1076 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1077 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1078 c_and.push_back( r[i][0].eqNode(s) );
1079 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1080 emptyflag = true;
1081 break;
1082 } else {
1083 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1084 }
1085 }
1086 if(emptyflag) {
1087 conc = d_false;
1088 } else {
1089 conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
1090 }
1091 break;
1092 }
1093 case kind::REGEXP_STAR: {
1094 if(s == d_emptyString) {
1095 conc = d_true;
1096 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
1097 conc = s.eqNode(d_emptyString);
1098 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
1099 conc = d_true;
1100 } else {
1101 Node se = s.eqNode(d_emptyString);
1102 Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
1103 Node sk1 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1104 Node sk2 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1105 Node s1nz = sk1.eqNode(d_emptyString).negate();
1106 Node s2nz = sk2.eqNode(d_emptyString).negate();
1107 Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
1108 Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
1109 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
1110
1111 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
1112 conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
1113 }
1114 break;
1115 }
1116 case kind::REGEXP_LOOP: {
1117 Assert(r.getNumChildren() == 3);
1118 if(r[1] == d_zero) {
1119 if(r[2] == d_zero) {
1120 conc = s.eqNode( d_emptyString );
1121 } else {
1122 //R{0,n}
1123 if(s != d_emptyString) {
1124 Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
1125 Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
1126 Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
1127 Node sk1ne = sk1.eqNode(d_emptyString).negate();
1128 Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
1129 unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
1130 Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1));
1131 Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2,
1132 NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], d_zero, u1));
1133 conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru);
1134 conc = NodeManager::currentNM()->mkNode(kind::OR,
1135 s.eqNode(d_emptyString), conc);
1136 } else {
1137 conc = d_true;
1138 }
1139 }
1140 } else {
1141 //R^n
1142 Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
1143 Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" );
1144 Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
1145 Node sk1ne = sk1.eqNode(d_emptyString).negate();
1146 Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
1147 unsigned u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
1148 Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1));
1149 Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2,
1150 NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], u1, u1));
1151 conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru);
1152 }
1153 break;
1154 }
1155 default: {
1156 Trace("strings-error") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
1157 Assert( false, "Unsupported Term" );
1158 }
1159 }
1160 conc = Rewriter::rewrite( conc );
1161 new_nodes.push_back( conc );
1162 d_simpl_cache[p] = conc;
1163 }
1164 }
1165
1166 bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
1167 for(std::set< PairNodes >::const_iterator itr = s.begin();
1168 itr != s.end(); ++itr) {
1169 if((itr->first == n1 && itr->second == n2) ||
1170 (itr->first == n2 && itr->second == n1)) {
1171 return true;
1172 }
1173 }
1174 return false;
1175 }
1176
1177 bool RegExpOpr::containC2(unsigned cnt, Node n) {
1178 if(n.getKind() == kind::REGEXP_RV) {
1179 Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()),
1180 "Exceeded UINT32_MAX in RegExpOpr::containC2");
1181 unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1182 return cnt == y;
1183 } else if(n.getKind() == kind::REGEXP_CONCAT) {
1184 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1185 if(containC2(cnt, n[i])) {
1186 return true;
1187 }
1188 }
1189 } else if(n.getKind() == kind::REGEXP_STAR) {
1190 return containC2(cnt, n[0]);
1191 } else if(n.getKind() == kind::REGEXP_LOOP) {
1192 return containC2(cnt, n[0]);
1193 } else if(n.getKind() == kind::REGEXP_UNION) {
1194 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1195 if(containC2(cnt, n[i])) {
1196 return true;
1197 }
1198 }
1199 }
1200 return false;
1201 }
1202 Node RegExpOpr::convert1(unsigned cnt, Node n) {
1203 Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl;
1204 Node r1, r2;
1205 convert2(cnt, n, r1, r2);
1206 Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl;
1207 Node ret = r1==d_emptySingleton ? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1208 NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2);
1209 ret = Rewriter::rewrite( ret );
1210 Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl;
1211 return ret;
1212 }
1213 void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
1214 if(n == d_emptyRegexp) {
1215 r1 = d_emptyRegexp;
1216 r2 = d_emptyRegexp;
1217 } else if(n == d_emptySingleton) {
1218 r1 = d_emptySingleton;
1219 r2 = d_emptySingleton;
1220 } else if(n.getKind() == kind::REGEXP_RV) {
1221 Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()),
1222 "Exceeded UINT32_MAX in RegExpOpr::convert2");
1223 unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1224 r1 = d_emptySingleton;
1225 if(cnt == y) {
1226 r2 = d_emptyRegexp;
1227 } else {
1228 r2 = n;
1229 }
1230 } else if(n.getKind() == kind::REGEXP_CONCAT) {
1231 bool flag = true;
1232 std::vector<Node> vr1, vr2;
1233 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1234 if(containC2(cnt, n[i])) {
1235 Node t1, t2;
1236 convert2(cnt, n[i], t1, t2);
1237 vr1.push_back(t1);
1238 r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] :
1239 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1);
1240 vr2.push_back(t2);
1241 for( unsigned j=i+1; j<n.getNumChildren(); j++ ) {
1242 vr2.push_back(n[j]);
1243 }
1244 r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] :
1245 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2);
1246 flag = false;
1247 break;
1248 } else {
1249 vr1.push_back(n[i]);
1250 }
1251 }
1252 if(flag) {
1253 r1 = d_emptySingleton;
1254 r2 = n;
1255 }
1256 } else if(n.getKind() == kind::REGEXP_UNION) {
1257 std::vector<Node> vr1, vr2;
1258 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1259 Node t1, t2;
1260 convert2(cnt, n[i], t1, t2);
1261 vr1.push_back(t1);
1262 vr2.push_back(t2);
1263 }
1264 r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
1265 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
1266 } else if(n.getKind() == kind::STRING_TO_REGEXP || n.getKind() == kind::REGEXP_SIGMA || n.getKind() == kind::REGEXP_RANGE) {
1267 r1 = d_emptySingleton;
1268 r2 = n;
1269 } else if(n.getKind() == kind::REGEXP_LOOP) {
1270 //TODO:LOOP
1271 r1 = d_emptySingleton;
1272 r2 = n;
1273 //Unreachable();
1274 } else {
1275 //is it possible?
1276 Unreachable();
1277 }
1278 }
1279
1280 bool RegExpOpr::testNoRV(Node r) {
1281 std::map< Node, bool >::const_iterator itr = d_norv_cache.find(r);
1282 if(itr != d_norv_cache.end()) {
1283 return itr->second;
1284 } else {
1285 if(r.getKind() == kind::REGEXP_RV) {
1286 return false;
1287 } else if(r.getNumChildren() > 1) {
1288 for(unsigned int i=0; i<r.getNumChildren(); i++) {
1289 if(!testNoRV(r[i])) {
1290 return false;
1291 }
1292 }
1293 }
1294 return true;
1295 }
1296 }
1297
1298 Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) {
1299 //Assert(checkConstRegExp(r1) && checkConstRegExp(r2));
1300 if(r1 > r2) {
1301 TNode tmpNode = r1;
1302 r1 = r2;
1303 r2 = tmpNode;
1304 }
1305 Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl;
1306 //if(Trace.isOn("regexp-debug")) {
1307 // Trace("regexp-debug") << "... with cache:\n";
1308 // for(std::map< PairNodes, Node >::const_iterator itr=cache.begin();
1309 // itr!=cache.end();itr++) {
1310 // Trace("regexp-debug") << "(" << itr->first.first << "," << itr->first.second << ")->" << itr->second << std::endl;
1311 // }
1312 //}
1313 std::pair < Node, Node > p(r1, r2);
1314 std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p);
1315 Node rNode;
1316 if(itr != d_inter_cache.end()) {
1317 rNode = itr->second;
1318 } else {
1319 Trace("regexp-int-debug") << " ... not in cache" << std::endl;
1320 if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1321 Trace("regexp-int-debug") << " ... one is empty set" << std::endl;
1322 rNode = d_emptyRegexp;
1323 } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1324 Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl;
1325 Node exp;
1326 int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1327 if(r == 0) {
1328 //TODO: variable
1329 Unreachable();
1330 } else if(r == 1) {
1331 rNode = d_emptySingleton;
1332 } else {
1333 rNode = d_emptyRegexp;
1334 }
1335 } else if(r1 == r2) {
1336 Trace("regexp-int-debug") << " ... equal" << std::endl;
1337 rNode = r1; //convert1(cnt, r1);
1338 } else {
1339 Trace("regexp-int-debug") << " ... normal checking" << std::endl;
1340 std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p);
1341 if(itrcache != cache.end()) {
1342 rNode = itrcache->second;
1343 } else {
1344 Trace("regexp-int-debug") << " ... normal without cache" << std::endl;
1345 std::vector<unsigned> cset;
1346 std::set<unsigned> cset1, cset2;
1347 std::set< Node > vset1, vset2;
1348 firstChars(r1, cset1, vset1);
1349 firstChars(r2, cset2, vset2);
1350 Trace("regexp-int-debug") << " ... got fset" << std::endl;
1351 std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(),
1352 std::inserter(cset, cset.begin()));
1353 std::vector< Node > vec_nodes;
1354 Node delta_exp;
1355 Trace("regexp-int-debug") << " ... try delta" << std::endl;
1356 int flag = delta(r1, delta_exp);
1357 int flag2 = delta(r2, delta_exp);
1358 Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl;
1359 if(flag != 2 && flag2 != 2) {
1360 if(flag == 1 && flag2 == 1) {
1361 vec_nodes.push_back(d_emptySingleton);
1362 } else {
1363 //TODO: variable
1364 Unreachable();
1365 }
1366 }
1367 if(Trace.isOn("regexp-int-debug")) {
1368 Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {";
1369 for (std::vector<unsigned>::const_iterator itr = cset.begin();
1370 itr != cset.end();
1371 itr++)
1372 {
1373 //CVC4::String c( *itr );
1374 if(itr != cset.begin()) {
1375 Trace("regexp-int-debug") << ", ";
1376 }
1377 Trace("regexp-int-debug") << ( *itr );
1378 }
1379 Trace("regexp-int-debug") << std::endl;
1380 }
1381 std::map< PairNodes, Node > cacheX;
1382 for (std::vector<unsigned>::const_iterator itr = cset.begin();
1383 itr != cset.end();
1384 itr++)
1385 {
1386 std::vector<unsigned> cvec;
1387 cvec.push_back(String::convertCodeToUnsignedInt(*itr));
1388 String c(cvec);
1389 Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl;
1390 Node r1l = derivativeSingle(r1, c);
1391 Node r2l = derivativeSingle(r2, c);
1392 Trace("regexp-int-debug") << " ... got partial(r1,c) = " << mkString(r1l) << std::endl;
1393 Trace("regexp-int-debug") << " ... got partial(r2,c) = " << mkString(r2l) << std::endl;
1394 Node rt;
1395
1396 if(r1l > r2l) {
1397 Node tnode = r1l;
1398 r1l = r2l; r2l = tnode;
1399 }
1400 PairNodes pp(r1l, r2l);
1401 std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp);
1402 if(itr2 != cacheX.end()) {
1403 rt = itr2->second;
1404 } else {
1405 std::map< PairNodes, Node > cache2(cache);
1406 cache2[ p ] = NodeManager::currentNM()->mkNode(kind::REGEXP_RV, NodeManager::currentNM()->mkConst(CVC4::Rational(cnt)));
1407 rt = intersectInternal(r1l, r2l, cache2, cnt+1);
1408 cacheX[ pp ] = rt;
1409 }
1410
1411 rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1412 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1413
1414 Trace("regexp-int-debug") << " ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl;
1415 vec_nodes.push_back(rt);
1416 }
1417 rNode = Rewriter::rewrite( vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1418 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes) );
1419 rNode = convert1(cnt, rNode);
1420 rNode = Rewriter::rewrite( rNode );
1421 }
1422 }
1423 Trace("regexp-int-debug") << " ... try testing no RV of " << mkString(rNode) << std::endl;
1424 if(testNoRV(rNode)) {
1425 d_inter_cache[p] = rNode;
1426 }
1427 }
1428 Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1429 return rNode;
1430 }
1431
1432 Node RegExpOpr::removeIntersection(Node r) {
1433 Assert( checkConstRegExp(r) );
1434 std::map < Node, Node >::const_iterator itr = d_rm_inter_cache.find(r);
1435 Node retNode;
1436 if(itr != d_rm_inter_cache.end()) {
1437 retNode = itr->second;
1438 } else {
1439 switch(r.getKind()) {
1440 case kind::REGEXP_EMPTY: {
1441 retNode = r;
1442 break;
1443 }
1444 case kind::REGEXP_SIGMA: {
1445 retNode = r;
1446 break;
1447 }
1448 case kind::REGEXP_RANGE: {
1449 retNode = r;
1450 break;
1451 }
1452 case kind::STRING_TO_REGEXP: {
1453 retNode = r;
1454 break;
1455 }
1456 case kind::REGEXP_CONCAT: {
1457 std::vector< Node > vec_nodes;
1458 for(unsigned i=0; i<r.getNumChildren(); i++) {
1459 Node tmpNode = removeIntersection( r[i] );
1460 vec_nodes.push_back( tmpNode );
1461 }
1462 retNode = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes) );
1463 break;
1464 }
1465 case kind::REGEXP_UNION: {
1466 std::vector< Node > vec_nodes;
1467 for(unsigned i=0; i<r.getNumChildren(); i++) {
1468 Node tmpNode = removeIntersection( r[i] );
1469 vec_nodes.push_back( tmpNode );
1470 }
1471 retNode = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes) );
1472 break;
1473 }
1474 case kind::REGEXP_INTER: {
1475 retNode = removeIntersection( r[0] );
1476 for(unsigned i=1; i<r.getNumChildren(); i++) {
1477 bool spflag = false;
1478 Node tmpNode = removeIntersection( r[i] );
1479 retNode = intersect( retNode, tmpNode, spflag );
1480 }
1481 break;
1482 }
1483 case kind::REGEXP_STAR: {
1484 retNode = removeIntersection( r[0] );
1485 retNode = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, retNode) );
1486 break;
1487 }
1488 case kind::REGEXP_LOOP: {
1489 retNode = removeIntersection( r[0] );
1490 retNode = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, retNode, r[1], r[2]) );
1491 break;
1492 }
1493 default: {
1494 Unreachable();
1495 }
1496 }
1497 d_rm_inter_cache[r] = retNode;
1498 }
1499 Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r) << " ) = " << mkString(retNode) << std::endl;
1500 return retNode;
1501 }
1502
1503 Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) {
1504 if(checkConstRegExp(r1) && checkConstRegExp(r2)) {
1505 Node rr1 = removeIntersection(r1);
1506 Node rr2 = removeIntersection(r2);
1507 std::map< PairNodes, Node > cache;
1508 Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1) << ",\n\t"<< mkString(r2) << ")" << std::endl;
1509 Node retNode = intersectInternal(rr1, rr2, cache, 1);
1510 Trace("regexp-intersect") << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t"<< mkString(r2) << ") =\n\t" << mkString(retNode) << std::endl;
1511 return retNode;
1512 } else {
1513 spflag = true;
1514 return Node::null();
1515 }
1516 }
1517
1518 //printing
1519 std::string RegExpOpr::niceChar(Node r) {
1520 if(r.isConst()) {
1521 std::string s = r.getConst<CVC4::String>().toString() ;
1522 return s == "." ? "\\." : s;
1523 } else {
1524 std::string ss = "$" + r.toString();
1525 return ss;
1526 }
1527 }
1528 std::string RegExpOpr::mkString( Node r ) {
1529 std::string retStr;
1530 if(r.isNull()) {
1531 retStr = "\\E";
1532 } else {
1533 int k = r.getKind();
1534 switch( k ) {
1535 case kind::REGEXP_EMPTY: {
1536 retStr += "\\E";
1537 break;
1538 }
1539 case kind::REGEXP_SIGMA: {
1540 retStr += ".";
1541 break;
1542 }
1543 case kind::STRING_TO_REGEXP: {
1544 std::string tmp( niceChar( r[0] ) );
1545 retStr += tmp.size()==1? tmp : "(" + tmp + ")";
1546 break;
1547 }
1548 case kind::REGEXP_CONCAT: {
1549 retStr += "(";
1550 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1551 //if(i != 0) retStr += ".";
1552 retStr += mkString( r[i] );
1553 }
1554 retStr += ")";
1555 break;
1556 }
1557 case kind::REGEXP_UNION: {
1558 retStr += "(";
1559 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1560 if(i != 0) retStr += "|";
1561 retStr += mkString( r[i] );
1562 }
1563 retStr += ")";
1564 break;
1565 }
1566 case kind::REGEXP_INTER: {
1567 retStr += "(";
1568 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1569 if(i != 0) retStr += "&";
1570 retStr += mkString( r[i] );
1571 }
1572 retStr += ")";
1573 break;
1574 }
1575 case kind::REGEXP_STAR: {
1576 retStr += mkString( r[0] );
1577 retStr += "*";
1578 break;
1579 }
1580 case kind::REGEXP_PLUS: {
1581 retStr += mkString( r[0] );
1582 retStr += "+";
1583 break;
1584 }
1585 case kind::REGEXP_OPT: {
1586 retStr += mkString( r[0] );
1587 retStr += "?";
1588 break;
1589 }
1590 case kind::REGEXP_RANGE: {
1591 retStr += "[";
1592 retStr += niceChar( r[0] );
1593 retStr += "-";
1594 retStr += niceChar( r[1] );
1595 retStr += "]";
1596 break;
1597 }
1598 case kind::REGEXP_LOOP: {
1599 retStr += "(";
1600 retStr += mkString(r[0]);
1601 retStr += ")";
1602 retStr += "{";
1603 retStr += r[1].getConst<Rational>().toString();
1604 retStr += ",";
1605 if(r.getNumChildren() == 3) {
1606 retStr += r[2].getConst<Rational>().toString();
1607 }
1608 retStr += "}";
1609 break;
1610 }
1611 case kind::REGEXP_RV: {
1612 retStr += "<";
1613 retStr += r[0].getConst<Rational>().getNumerator().toString();
1614 retStr += ">";
1615 break;
1616 }
1617 default:
1618 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
1619 //Assert( false );
1620 //return Node::null();
1621 }
1622 }
1623
1624 return retStr;
1625 }
1626
1627 }/* CVC4::theory::strings namespace */
1628 }/* CVC4::theory namespace */
1629 }/* CVC4 namespace */