adds new feature: re.loop
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2
3 /*! \file regexp_operation.cpp
4
5 ** \verbatim
6
7 ** Original author: Tianyi Liang
8
9 ** Major contributors: none
10
11 ** Minor contributors (to current version): none
12
13 ** This file is part of the CVC4 project.
14
15 ** Copyright (c) 2009-2013 New York University and The University of Iowa
16
17 ** See the file COPYING in the top-level source directory for licensing
18
19 ** information.\endverbatim
20
21 **
22
23 ** \brief Symbolic Regular Expresion Operations
24 **
25 ** Symbolic Regular Expresion Operations
26 **/
27
28 #include "theory/strings/regexp_operation.h"
29 #include "expr/kind.h"
30
31 namespace CVC4 {
32 namespace theory {
33 namespace strings {
34
35 RegExpOpr::RegExpOpr() {
36 d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
37 d_true = NodeManager::currentNM()->mkConst( true );
38 d_false = NodeManager::currentNM()->mkConst( false );
39 d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
40 d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
41 d_emptySingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
42 std::vector< Node > nvec;
43 d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec );
44 // All Charactors = all printable ones 32-126
45 //d_char_start = 'a';//' ';
46 //d_char_end = 'c';//'~';
47 //d_sigma = mkAllExceptOne( '\0' );
48 d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec );
49 d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
50 d_card = 256;
51 }
52
53 int RegExpOpr::gcd ( int a, int b ) {
54 int c;
55 while ( a != 0 ) {
56 c = a; a = b%a; b = c;
57 }
58 return b;
59 }
60
61 bool RegExpOpr::checkConstRegExp( Node r ) {
62 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
63 bool ret = true;
64 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
65 ret = d_cstre_cache[r];
66 } else {
67 if(r.getKind() == kind::STRING_TO_REGEXP) {
68 Node tmp = Rewriter::rewrite( r[0] );
69 ret = tmp.isConst();
70 } else {
71 for(unsigned i=0; i<r.getNumChildren(); ++i) {
72 if(!checkConstRegExp(r[i])) {
73 ret = false; break;
74 }
75 }
76 }
77 d_cstre_cache[r] = ret;
78 }
79 return ret;
80 }
81
82 // 0-unknown, 1-yes, 2-no
83 int RegExpOpr::delta( Node r, Node &exp ) {
84 Trace("regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
85 int ret = 0;
86 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
87 ret = d_delta_cache[r].first;
88 exp = d_delta_cache[r].second;
89 } else {
90 int k = r.getKind();
91 switch( k ) {
92 case kind::REGEXP_EMPTY: {
93 ret = 2;
94 break;
95 }
96 case kind::REGEXP_SIGMA: {
97 ret = 2;
98 break;
99 }
100 case kind::STRING_TO_REGEXP: {
101 Node tmp = Rewriter::rewrite(r[0]);
102 if(tmp.isConst()) {
103 if(tmp == d_emptyString) {
104 ret = 1;
105 } else {
106 ret = 2;
107 }
108 } else {
109 ret = 0;
110 if(tmp.getKind() == kind::STRING_CONCAT) {
111 for(unsigned i=0; i<tmp.getNumChildren(); i++) {
112 if(tmp[i].isConst()) {
113 ret = 2; break;
114 }
115 }
116
117 }
118 if(ret == 0) {
119 exp = r[0].eqNode(d_emptyString);
120 }
121 }
122 break;
123 }
124 case kind::REGEXP_CONCAT: {
125 bool flag = false;
126 std::vector< Node > vec_nodes;
127 for(unsigned i=0; i<r.getNumChildren(); ++i) {
128 Node exp2;
129 int tmp = delta( r[i], exp2 );
130 if(tmp == 2) {
131 ret = 2;
132 break;
133 } else if(tmp == 0) {
134 vec_nodes.push_back( exp2 );
135 flag = true;
136 }
137 }
138 if(ret != 2) {
139 if(!flag) {
140 ret = 1;
141 } else {
142 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
143 }
144 }
145 break;
146 }
147 case kind::REGEXP_UNION: {
148 bool flag = false;
149 std::vector< Node > vec_nodes;
150 for(unsigned i=0; i<r.getNumChildren(); ++i) {
151 Node exp2;
152 int tmp = delta( r[i], exp2 );
153 if(tmp == 1) {
154 ret = 1;
155 break;
156 } else if(tmp == 0) {
157 vec_nodes.push_back( exp2 );
158 flag = true;
159 }
160 }
161 if(ret != 1) {
162 if(!flag) {
163 ret = 2;
164 } else {
165 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::OR, vec_nodes);
166 }
167 }
168 break;
169 }
170 case kind::REGEXP_INTER: {
171 bool flag = false;
172 std::vector< Node > vec_nodes;
173 for(unsigned i=0; i<r.getNumChildren(); ++i) {
174 Node exp2;
175 int tmp = delta( r[i], exp2 );
176 if(tmp == 2) {
177 ret = 2;
178 break;
179 } else if(tmp == 0) {
180 vec_nodes.push_back( exp2 );
181 flag = true;
182 }
183 }
184 if(ret != 2) {
185 if(!flag) {
186 ret = 1;
187 } else {
188 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
189 }
190 }
191 break;
192 }
193 case kind::REGEXP_STAR: {
194 ret = 1;
195 break;
196 }
197 case kind::REGEXP_PLUS: {
198 ret = delta( r[0], exp );
199 break;
200 }
201 case kind::REGEXP_OPT: {
202 ret = 1;
203 break;
204 }
205 case kind::REGEXP_RANGE: {
206 ret = 2;
207 break;
208 }
209 default: {
210 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
211 Assert( false );
212 //return Node::null();
213 }
214 }
215 if(!exp.isNull()) {
216 exp = Rewriter::rewrite(exp);
217 }
218 std::pair< int, Node > p(ret, exp);
219 d_delta_cache[r] = p;
220 }
221 Trace("regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
222 return ret;
223 }
224
225 // 0-unknown, 1-yes, 2-no
226 int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
227 Assert( c.size() < 2 );
228 Trace("regexp-deriv") << "RegExp-deriv starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
229
230 int ret = 1;
231 retNode = d_emptyRegexp;
232
233 PairNodeStr dv = std::make_pair( r, c );
234 if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
235 retNode = d_deriv_cache[dv].first;
236 ret = d_deriv_cache[dv].second;
237 } else if( c.isEmptyString() ) {
238 Node expNode;
239 ret = delta( r, expNode );
240 if(ret == 0) {
241 retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
242 } else if(ret == 1) {
243 retNode = r;
244 }
245 std::pair< Node, int > p(retNode, ret);
246 d_deriv_cache[dv] = p;
247 } else {
248 switch( r.getKind() ) {
249 case kind::REGEXP_EMPTY: {
250 ret = 2;
251 break;
252 }
253 case kind::REGEXP_SIGMA: {
254 retNode = d_emptySingleton;
255 break;
256 }
257 case kind::STRING_TO_REGEXP: {
258 Node tmp = Rewriter::rewrite(r[0]);
259 if(tmp.isConst()) {
260 if(tmp == d_emptyString) {
261 ret = 2;
262 } else {
263 if(tmp.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
264 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
265 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
266 } else {
267 ret = 2;
268 }
269 }
270 } else {
271 ret = 0;
272 Node rest;
273 if(tmp.getKind() == kind::STRING_CONCAT) {
274 Node t2 = tmp[0];
275 if(t2.isConst()) {
276 if(t2.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
277 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
278 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
279 std::vector< Node > vec_nodes;
280 vec_nodes.push_back(n);
281 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
282 vec_nodes.push_back(tmp[i]);
283 }
284 retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
285 ret = 1;
286 } else {
287 ret = 2;
288 }
289 } else {
290 tmp = tmp[0];
291 std::vector< Node > vec_nodes;
292 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
293 vec_nodes.push_back(tmp[i]);
294 }
295 rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
296 }
297 }
298 if(ret == 0) {
299 Node sk = NodeManager::currentNM()->mkSkolem( "rsp_$$", NodeManager::currentNM()->stringType(), "Split RegExp" );
300 retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
301 if(!rest.isNull()) {
302 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
303 }
304 Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
305 NodeManager::currentNM()->mkConst(c), sk));
306 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
307 }
308 }
309 break;
310 }
311 case kind::REGEXP_CONCAT: {
312 std::vector< Node > vec_nodes;
313 std::vector< Node > delta_nodes;
314 Node dnode = d_true;
315 for(unsigned i=0; i<r.getNumChildren(); ++i) {
316 Node dc;
317 Node exp2;
318 int rt = derivativeS(r[i], c, dc);
319 if(rt != 2) {
320 if(rt == 0) {
321 ret = 0;
322 }
323 std::vector< Node > vec_nodes2;
324 if(dc != d_emptySingleton) {
325 vec_nodes2.push_back( dc );
326 }
327 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
328 if(r[j] != d_emptySingleton) {
329 vec_nodes2.push_back( r[j] );
330 }
331 }
332 Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
333 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
334 if(dnode != d_true) {
335 tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
336 ret = 0;
337 }
338 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
339 vec_nodes.push_back( tmp );
340 }
341 }
342 Node exp3;
343 int rt2 = delta( r[i], exp3 );
344 if( rt2 == 0 ) {
345 dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
346 } else if( rt2 == 2 ) {
347 break;
348 }
349 }
350 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
351 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
352 if(retNode == d_emptyRegexp) {
353 ret = 2;
354 }
355 break;
356 }
357 case kind::REGEXP_UNION: {
358 std::vector< Node > vec_nodes;
359 for(unsigned i=0; i<r.getNumChildren(); ++i) {
360 Node dc;
361 int rt = derivativeS(r[i], c, dc);
362 if(rt == 0) {
363 ret = 0;
364 }
365 if(rt != 2) {
366 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
367 vec_nodes.push_back( dc );
368 }
369 }
370 Trace("regexp-deriv") << "RegExp-deriv OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
371 }
372 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
373 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
374 if(retNode == d_emptyRegexp) {
375 ret = 2;
376 }
377 break;
378 }
379 case kind::REGEXP_INTER: {
380 bool flag = true;
381 bool flag_sg = false;
382 std::vector< Node > vec_nodes;
383 for(unsigned i=0; i<r.getNumChildren(); ++i) {
384 Node dc;
385 int rt = derivativeS(r[i], c, dc);
386 if(rt == 0) {
387 ret = 0;
388 } else if(rt == 2) {
389 flag = false;
390 break;
391 }
392 if(dc == d_sigma_star) {
393 flag_sg = true;
394 } else {
395 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
396 vec_nodes.push_back( dc );
397 }
398 }
399 }
400 if(flag) {
401 if(vec_nodes.size() == 0 && flag_sg) {
402 retNode = d_sigma_star;
403 } else {
404 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
405 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
406 if(retNode == d_emptyRegexp) {
407 ret = 2;
408 }
409 }
410 } else {
411 retNode = d_emptyRegexp;
412 ret = 2;
413 }
414 break;
415 }
416 case kind::REGEXP_STAR: {
417 Node dc;
418 ret = derivativeS(r[0], c, dc);
419 retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
420 break;
421 }
422 default: {
423 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
424 Assert( false, "Unsupported Term" );
425 }
426 }
427 if(retNode != d_emptyRegexp) {
428 retNode = Rewriter::rewrite( retNode );
429 }
430 std::pair< Node, int > p(retNode, ret);
431 d_deriv_cache[dv] = p;
432 }
433
434 Trace("regexp-deriv") << "RegExp-deriv returns : " << mkString( retNode ) << std::endl;
435 return ret;
436 }
437
438 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
439 Assert( c.size() < 2 );
440 Trace("regexp-deriv") << "RegExp-deriv starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
441 Node retNode = d_emptyRegexp;
442 PairNodeStr dv = std::make_pair( r, c );
443 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
444 retNode = d_dv_cache[dv];
445 } else if( c.isEmptyString() ){
446 Node exp;
447 int tmp = delta( r, exp );
448 if(tmp == 0) {
449 // TODO variable
450 retNode = d_emptyRegexp;
451 } else if(tmp == 1) {
452 retNode = r;
453 } else {
454 retNode = d_emptyRegexp;
455 }
456 } else {
457 int k = r.getKind();
458 switch( k ) {
459 case kind::REGEXP_EMPTY: {
460 retNode = d_emptyRegexp;
461 break;
462 }
463 case kind::REGEXP_SIGMA: {
464 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
465 break;
466 }
467 case kind::STRING_TO_REGEXP: {
468 if(r[0].isConst()) {
469 if(r[0] == d_emptyString) {
470 retNode = d_emptyRegexp;
471 } else {
472 if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
473 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
474 r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
475 } else {
476 retNode = d_emptyRegexp;
477 }
478 }
479 } else {
480 // TODO variable
481 retNode = d_emptyRegexp;
482 }
483 break;
484 }
485 case kind::REGEXP_CONCAT: {
486 Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
487 std::vector< Node > vec_nodes;
488 for(unsigned i=0; i<r.getNumChildren(); ++i) {
489 Node dc = derivativeSingle(r[i], c);
490 if(dc != d_emptyRegexp) {
491 std::vector< Node > vec_nodes2;
492 if(dc != rees) {
493 vec_nodes2.push_back( dc );
494 }
495 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
496 if(r[j] != rees) {
497 vec_nodes2.push_back( r[j] );
498 }
499 }
500 Node tmp = vec_nodes2.size()==0 ? rees :
501 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
502 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
503 vec_nodes.push_back( tmp );
504 }
505 }
506 Node exp;
507 if( delta( r[i], exp ) != 1 ) {
508 break;
509 }
510 }
511 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
512 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
513 break;
514 }
515 case kind::REGEXP_UNION: {
516 std::vector< Node > vec_nodes;
517 for(unsigned i=0; i<r.getNumChildren(); ++i) {
518 Node dc = derivativeSingle(r[i], c);
519 if(dc != d_emptyRegexp) {
520 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
521 vec_nodes.push_back( dc );
522 }
523 }
524 Trace("regexp-deriv") << "RegExp-deriv OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
525 }
526 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
527 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
528 break;
529 }
530 case kind::REGEXP_INTER: {
531 bool flag = true;
532 bool flag_sg = false;
533 std::vector< Node > vec_nodes;
534 for(unsigned i=0; i<r.getNumChildren(); ++i) {
535 Node dc = derivativeSingle(r[i], c);
536 if(dc != d_emptyRegexp) {
537 if(dc == d_sigma_star) {
538 flag_sg = true;
539 } else {
540 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
541 vec_nodes.push_back( dc );
542 }
543 }
544 } else {
545 flag = false;
546 break;
547 }
548 }
549 if(flag) {
550 if(vec_nodes.size() == 0 && flag_sg) {
551 retNode = d_sigma_star;
552 } else {
553 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
554 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
555 }
556 } else {
557 retNode = d_emptyRegexp;
558 }
559 break;
560 }
561 case kind::REGEXP_STAR: {
562 Node dc = derivativeSingle(r[0], c);
563 if(dc != d_emptyRegexp) {
564 retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
565 } else {
566 retNode = d_emptyRegexp;
567 }
568 break;
569 }
570 default: {
571 //TODO: special sym: sigma, none, all
572 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
573 Assert( false, "Unsupported Term" );
574 //return Node::null();
575 }
576 }
577 if(retNode != d_emptyRegexp) {
578 retNode = Rewriter::rewrite( retNode );
579 }
580 d_dv_cache[dv] = retNode;
581 }
582 Trace("regexp-deriv") << "RegExp-deriv returns : " << mkString( retNode ) << std::endl;
583 return retNode;
584 }
585
586 //TODO:
587 bool RegExpOpr::guessLength( Node r, int &co ) {
588 int k = r.getKind();
589 switch( k ) {
590 case kind::STRING_TO_REGEXP:
591 {
592 if(r[0].isConst()) {
593 co += r[0].getConst< CVC4::String >().size();
594 return true;
595 } else {
596 return false;
597 }
598 }
599 break;
600 case kind::REGEXP_CONCAT:
601 {
602 for(unsigned i=0; i<r.getNumChildren(); ++i) {
603 if(!guessLength( r[i], co)) {
604 return false;
605 }
606 }
607 return true;
608 }
609 break;
610 case kind::REGEXP_UNION:
611 {
612 int g_co;
613 for(unsigned i=0; i<r.getNumChildren(); ++i) {
614 int cop = 0;
615 if(!guessLength( r[i], cop)) {
616 return false;
617 }
618 if(i == 0) {
619 g_co = cop;
620 } else {
621 g_co = gcd(g_co, cop);
622 }
623 }
624 return true;
625 }
626 break;
627 case kind::REGEXP_INTER:
628 {
629 int g_co;
630 for(unsigned i=0; i<r.getNumChildren(); ++i) {
631 int cop = 0;
632 if(!guessLength( r[i], cop)) {
633 return false;
634 }
635 if(i == 0) {
636 g_co = cop;
637 } else {
638 g_co = gcd(g_co, cop);
639 }
640 }
641 return true;
642 }
643 break;
644 case kind::REGEXP_STAR:
645 {
646 co = 0;
647 return true;
648 }
649 break;
650 default:
651 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in membership of RegExp." << std::endl;
652 return false;
653 }
654 }
655
656 void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
657 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_fset_cache.find(r);
658 if(itr != d_fset_cache.end()) {
659 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
660 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
661 } else {
662 std::set<unsigned> cset;
663 SetNodes vset;
664 int k = r.getKind();
665 switch( k ) {
666 case kind::REGEXP_EMPTY: {
667 break;
668 }
669 case kind::REGEXP_SIGMA: {
670 for(unsigned i=0; i<d_card; i++) {
671 cset.insert(i);
672 }
673 break;
674 }
675 case kind::STRING_TO_REGEXP: {
676 Node st = Rewriter::rewrite(r[0]);
677 if(st.isConst()) {
678 CVC4::String s = st.getConst< CVC4::String >();
679 if(s.size() != 0) {
680 cset.insert(s[0]);
681 }
682 } else if(st.getKind() == kind::VARIABLE) {
683 vset.insert( st );
684 } else {
685 if(st[0].isConst()) {
686 CVC4::String s = st[0].getConst< CVC4::String >();
687 cset.insert(s[0]);
688 } else {
689 vset.insert( st[0] );
690 }
691 }
692 break;
693 }
694 case kind::REGEXP_CONCAT: {
695 for(unsigned i=0; i<r.getNumChildren(); i++) {
696 firstChars(r[i], cset, vset);
697 Node n = r[i];
698
699 Node exp;
700 int r = delta( n, exp );
701
702 if(r != 1) {
703 break;
704 }
705 }
706 break;
707 }
708 case kind::REGEXP_UNION: {
709 for(unsigned i=0; i<r.getNumChildren(); i++) {
710 firstChars(r[i], cset, vset);
711 }
712 break;
713 }
714 case kind::REGEXP_INTER: {
715 //TODO: Overapproximation for now
716 for(unsigned i=0; i<r.getNumChildren(); i++) {
717 firstChars(r[i], cset, vset);
718 }
719 break;
720 }
721 case kind::REGEXP_STAR: {
722 firstChars(r[0], cset, vset);
723 break;
724 }
725 default: {
726 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
727 Assert( false, "Unsupported Term" );
728 }
729 }
730 pcset.insert(cset.begin(), cset.end());
731 pvset.insert(vset.begin(), vset.end());
732 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
733 d_fset_cache[r] = p;
734
735 Trace("regexp-fset") << "FSET( " << mkString(r) << " ) = { ";
736 for(std::set<unsigned>::const_iterator itr = cset.begin();
737 itr != cset.end(); itr++) {
738 Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
739 }
740 Trace("regexp-fset") << " }" << std::endl;
741 }
742 }
743
744 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
745 int k = r.getKind();
746 switch( k ) {
747 case kind::STRING_TO_REGEXP:
748 {
749 if(r[0].isConst()) {
750 if(r[0] != d_emptyString) {
751 char t1 = r[0].getConst< CVC4::String >().getFirstChar();
752 if(c.isEmptyString()) {
753 vec_chars.push_back( t1 );
754 return true;
755 } else {
756 char t2 = c.getFirstChar();
757 if(t1 != t2) {
758 return false;
759 } else {
760 if(c.size() >= 2) {
761 vec_chars.push_back( c.substr(1,1).getFirstChar() );
762 } else {
763 vec_chars.push_back( '\0' );
764 }
765 return true;
766 }
767 }
768 } else {
769 return false;
770 }
771 } else {
772 return false;
773 }
774 }
775 break;
776 case kind::REGEXP_CONCAT:
777 {
778 for(unsigned i=0; i<r.getNumChildren(); ++i) {
779 if( follow(r[i], c, vec_chars) ) {
780 if(vec_chars[vec_chars.size() - 1] == '\0') {
781 vec_chars.pop_back();
782 c = d_emptyString.getConst< CVC4::String >();
783 }
784 } else {
785 return false;
786 }
787 }
788 vec_chars.push_back( '\0' );
789 return true;
790 }
791 break;
792 case kind::REGEXP_UNION:
793 {
794 bool flag = false;
795 for(unsigned i=0; i<r.getNumChildren(); ++i) {
796 if( follow(r[i], c, vec_chars) ) {
797 flag=true;
798 }
799 }
800 return flag;
801 }
802 break;
803 case kind::REGEXP_INTER:
804 {
805 std::vector< char > vt2;
806 for(unsigned i=0; i<r.getNumChildren(); ++i) {
807 std::vector< char > v_tmp;
808 if( !follow(r[i], c, v_tmp) ) {
809 return false;
810 }
811 std::vector< char > vt3(vt2);
812 vt2.clear();
813 std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
814 if(vt2.size() == 0) {
815 return false;
816 }
817 }
818 vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
819 return true;
820 }
821 break;
822 case kind::REGEXP_STAR:
823 {
824 if(follow(r[0], c, vec_chars)) {
825 if(vec_chars[vec_chars.size() - 1] == '\0') {
826 if(c.isEmptyString()) {
827 return true;
828 } else {
829 vec_chars.pop_back();
830 c = d_emptyString.getConst< CVC4::String >();
831 return follow(r[0], c, vec_chars);
832 }
833 } else {
834 return true;
835 }
836 } else {
837 vec_chars.push_back( '\0' );
838 return true;
839 }
840 }
841 break;
842 default: {
843 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
844 //AlwaysAssert( false );
845 //return Node::null();
846 return false;
847 }
848 }
849 }
850
851 Node RegExpOpr::mkAllExceptOne( char exp_c ) {
852 std::vector< Node > vec_nodes;
853 for(char c=d_char_start; c<=d_char_end; ++c) {
854 if(c != exp_c ) {
855 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
856 vec_nodes.push_back( n );
857 }
858 }
859 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
860 }
861
862 //simplify
863 void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
864 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
865 Assert(t.getKind() == kind::STRING_IN_REGEXP);
866 Node str = Rewriter::rewrite(t[0]);
867 Node re = Rewriter::rewrite(t[1]);
868 if(polarity) {
869 simplifyPRegExp( str, re, new_nodes );
870 } else {
871 simplifyNRegExp( str, re, new_nodes );
872 }
873 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
874 for(unsigned i=0; i<new_nodes.size(); i++) {
875 Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
876 }
877 }
878 void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
879 std::pair < Node, Node > p(s, r);
880 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
881 if(itr != d_simpl_neg_cache.end()) {
882 new_nodes.push_back( itr->second );
883 } else {
884 int k = r.getKind();
885 Node conc;
886 switch( k ) {
887 case kind::REGEXP_EMPTY: {
888 conc = d_true;
889 break;
890 }
891 case kind::REGEXP_SIGMA: {
892 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
893 break;
894 }
895 case kind::STRING_TO_REGEXP: {
896 conc = s.eqNode(r[0]).negate();
897 break;
898 }
899 case kind::REGEXP_CONCAT: {
900 //TODO: rewrite empty
901 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
902 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
903 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
904 NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
905 Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
906 Node s2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
907 Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, s1, s2);
908 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2));
909 Node s1len = b1.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1));
910 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
911 if(r[0].getKind() == kind::STRING_TO_REGEXP) {
912 s1r1 = s1.eqNode(r[0][0]).negate();
913 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
914 s1r1 = d_true;
915 }
916 Node r2 = r[1];
917 if(r.getNumChildren() > 2) {
918 std::vector< Node > nvec;
919 for(unsigned i=1; i<r.getNumChildren(); i++) {
920 nvec.push_back( r[i] );
921 }
922 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
923 }
924 r2 = Rewriter::rewrite(r2);
925 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
926 if(r2.getKind() == kind::STRING_TO_REGEXP) {
927 s2r2 = s2.eqNode(r2[0]).negate();
928 } else if(r2.getKind() == kind::REGEXP_EMPTY) {
929 s2r2 = d_true;
930 }
931
932 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
933 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1len, conc);
934 conc = NodeManager::currentNM()->mkNode(kind::EXISTS, b2v, conc);
935 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
936 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
937 break;
938 }
939 case kind::REGEXP_UNION: {
940 std::vector< Node > c_and;
941 for(unsigned i=0; i<r.getNumChildren(); ++i) {
942 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
943 c_and.push_back( r[i][0].eqNode(s).negate() );
944 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
945 continue;
946 } else {
947 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
948 }
949 }
950 conc = c_and.size() == 0 ? d_true :
951 c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
952 break;
953 }
954 case kind::REGEXP_INTER: {
955 bool emptyflag = false;
956 std::vector< Node > c_or;
957 for(unsigned i=0; i<r.getNumChildren(); ++i) {
958 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
959 c_or.push_back( r[i][0].eqNode(s).negate() );
960 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
961 emptyflag = true;
962 break;
963 } else {
964 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
965 }
966 }
967 if(emptyflag) {
968 conc = d_true;
969 } else {
970 conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
971 }
972 break;
973 }
974 case kind::REGEXP_STAR: {
975 if(s == d_emptyString) {
976 conc = d_false;
977 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
978 conc = s.eqNode(d_emptyString).negate();
979 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
980 conc = d_false;
981 } else {
982 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
983 Node sne = s.eqNode(d_emptyString).negate();
984 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
985 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
986 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
987 NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
988 //internal
989 Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1);
990 Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
991 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
992 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
993
994 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
995 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
996 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
997 conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
998 }
999 break;
1000 }
1001 default: {
1002 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
1003 Assert( false, "Unsupported Term" );
1004 }
1005 }
1006 conc = Rewriter::rewrite( conc );
1007 new_nodes.push_back( conc );
1008 d_simpl_neg_cache[p] = conc;
1009 }
1010 }
1011 void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
1012 std::pair < Node, Node > p(s, r);
1013 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
1014 if(itr != d_simpl_cache.end()) {
1015 new_nodes.push_back( itr->second );
1016 } else {
1017 int k = r.getKind();
1018 Node conc;
1019 switch( k ) {
1020 case kind::REGEXP_EMPTY: {
1021 conc = d_false;
1022 break;
1023 }
1024 case kind::REGEXP_SIGMA: {
1025 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
1026 break;
1027 }
1028 case kind::STRING_TO_REGEXP: {
1029 conc = s.eqNode(r[0]);
1030 break;
1031 }
1032 case kind::REGEXP_CONCAT: {
1033 std::vector< Node > nvec;
1034 std::vector< Node > cc;
1035 bool emptyflag = false;
1036 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1037 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1038 cc.push_back( r[i][0] );
1039 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1040 emptyflag = true;
1041 break;
1042 } else {
1043 Node sk = NodeManager::currentNM()->mkSkolem( "rc_$$", s.getType(), "created for regular expression concat" );
1044 Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
1045 nvec.push_back(lem);
1046 cc.push_back(sk);
1047 }
1048 }
1049 if(emptyflag) {
1050 conc = d_false;
1051 } else {
1052 Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
1053 nvec.push_back(lem);
1054 conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
1055 }
1056 break;
1057 }
1058 case kind::REGEXP_UNION: {
1059 std::vector< Node > c_or;
1060 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1061 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1062 c_or.push_back( r[i][0].eqNode(s) );
1063 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1064 continue;
1065 } else {
1066 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1067 }
1068 }
1069 conc = c_or.size() == 0 ? d_false :
1070 c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
1071 break;
1072 }
1073 case kind::REGEXP_INTER: {
1074 std::vector< Node > c_and;
1075 bool emptyflag = false;
1076 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1077 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1078 c_and.push_back( r[i][0].eqNode(s) );
1079 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1080 emptyflag = true;
1081 break;
1082 } else {
1083 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1084 }
1085 }
1086 if(emptyflag) {
1087 conc = d_false;
1088 } else {
1089 conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
1090 }
1091 break;
1092 }
1093 case kind::REGEXP_STAR: {
1094 if(s == d_emptyString) {
1095 conc = d_true;
1096 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
1097 conc = s.eqNode(d_emptyString);
1098 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
1099 conc = d_true;
1100 } else {
1101 Node se = s.eqNode(d_emptyString);
1102 Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
1103 Node sk1 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" );
1104 Node sk2 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" );
1105 Node s1nz = sk1.eqNode(d_emptyString).negate();
1106 Node s2nz = sk2.eqNode(d_emptyString).negate();
1107 Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
1108 Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
1109 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
1110
1111 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
1112 conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
1113 }
1114 break;
1115 }
1116 default: {
1117 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
1118 Assert( false, "Unsupported Term" );
1119 }
1120 }
1121 conc = Rewriter::rewrite( conc );
1122 new_nodes.push_back( conc );
1123 d_simpl_cache[p] = conc;
1124 }
1125 }
1126
1127 void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
1128 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_cset_cache.find(r);
1129 if(itr != d_cset_cache.end()) {
1130 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
1131 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
1132 } else {
1133 std::set<unsigned> cset;
1134 SetNodes vset;
1135 int k = r.getKind();
1136 switch( k ) {
1137 case kind::REGEXP_EMPTY: {
1138 break;
1139 }
1140 case kind::REGEXP_SIGMA: {
1141 for(unsigned i=0; i<d_card; i++) {
1142 cset.insert(i);
1143 }
1144 break;
1145 }
1146 case kind::STRING_TO_REGEXP: {
1147 Node st = Rewriter::rewrite(r[0]);
1148 if(st.isConst()) {
1149 CVC4::String s = st.getConst< CVC4::String >();
1150 s.getCharSet( cset );
1151 } else if(st.getKind() == kind::VARIABLE) {
1152 vset.insert( st );
1153 } else {
1154 for(unsigned i=0; i<st.getNumChildren(); i++) {
1155 if(st[i].isConst()) {
1156 CVC4::String s = st[i].getConst< CVC4::String >();
1157 s.getCharSet( cset );
1158 } else {
1159 vset.insert( st[i] );
1160 }
1161 }
1162 }
1163 break;
1164 }
1165 case kind::REGEXP_CONCAT: {
1166 for(unsigned i=0; i<r.getNumChildren(); i++) {
1167 getCharSet(r[i], cset, vset);
1168 }
1169 break;
1170 }
1171 case kind::REGEXP_UNION: {
1172 for(unsigned i=0; i<r.getNumChildren(); i++) {
1173 getCharSet(r[i], cset, vset);
1174 }
1175 break;
1176 }
1177 case kind::REGEXP_INTER: {
1178 //TODO: Overapproximation for now
1179 for(unsigned i=0; i<r.getNumChildren(); i++) {
1180 getCharSet(r[i], cset, vset);
1181 }
1182 break;
1183 }
1184 case kind::REGEXP_STAR: {
1185 getCharSet(r[0], cset, vset);
1186 break;
1187 }
1188 default: {
1189 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
1190 Assert( false, "Unsupported Term" );
1191 }
1192 }
1193 pcset.insert(cset.begin(), cset.end());
1194 pvset.insert(vset.begin(), vset.end());
1195 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
1196 d_cset_cache[r] = p;
1197
1198 Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { ";
1199 for(std::set<unsigned>::const_iterator itr = cset.begin();
1200 itr != cset.end(); itr++) {
1201 Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
1202 }
1203 Trace("regexp-cset") << " }" << std::endl;
1204 }
1205 }
1206
1207
1208 Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache, bool &spflag ) {
1209 if(spflag) {
1210 //TODO: var
1211 return Node::null();
1212 }
1213
1214 std::pair < Node, Node > p(r1, r2);
1215 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);
1216 Node rNode;
1217 if(itr != d_inter_cache.end()) {
1218 rNode = itr->second;
1219 } else {
1220 if(r1 == r2) {
1221 rNode = r1;
1222 } else if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1223 Trace("regexp-intersect") << "INTERSECT Case 1: one empty RE" << std::endl;
1224 rNode = d_emptyRegexp;
1225 } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1226 Trace("regexp-intersect") << "INTERSECT Case 2: one empty Singleton" << std::endl;
1227 Node exp;
1228 int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1229 if(r == 0) {
1230 //TODO: variable
1231 spflag = true;
1232 //Assert( false, "Unsupported Yet, 892 REO" );
1233 } else if(r == 1) {
1234 rNode = d_emptySingleton;
1235 } else {
1236 rNode = d_emptyRegexp;
1237 }
1238 } else {
1239 Trace("regexp-intersect") << "INTERSECT Case 3: must compare" << std::endl;
1240 std::set< unsigned > cset, cset2;
1241 std::vector< unsigned > cdiff;
1242 std::set< Node > vset;
1243 getCharSet(r1, cset, vset);
1244 getCharSet(r2, cset2, vset);
1245 if(vset.empty()) {
1246 std::set_symmetric_difference(cset.begin(), cset.end(), cset2.begin(), cset2.end(), std::back_inserter(cdiff));
1247 if(cdiff.empty()) {
1248 Trace("regexp-intersect") << "INTERSECT Case 3.1: compare" << std::endl;
1249 cset.clear();
1250 firstChars(r1, cset, vset);
1251 std::vector< Node > vec_nodes;
1252 for(std::set<unsigned>::const_iterator itr = cset.begin();
1253 itr != cset.end(); itr++) {
1254 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1255 std::pair< Node, Node > p(r1, r2);
1256 if(cache[ *itr ].find(p) == cache[ *itr ].end()) {
1257 Node r1l = derivativeSingle(r1, c);
1258 Node r2l = derivativeSingle(r2, c);
1259 std::map< unsigned, std::set< PairNodes > > cache2(cache);
1260 PairNodes p(r1l, r2l);
1261 cache2[ *itr ].insert( p );
1262 Node rt = intersectInternal(r1l, r2l, cache2, spflag);
1263 if(spflag) {
1264 //TODO:
1265 return Node::null();
1266 }
1267 rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1268 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1269 vec_nodes.push_back(rt);
1270 }
1271 }
1272 rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1273 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1274 rNode = Rewriter::rewrite( rNode );
1275 } else {
1276 Trace("regexp-intersect") << "INTERSECT Case 3.2: diff cset" << std::endl;
1277 rNode = d_emptyRegexp;
1278 }
1279 } else {
1280 //TODO: non-empty var set
1281 spflag = true;
1282 //Assert( false, "Unsupported Yet, 927 REO" );
1283
1284 }
1285 }
1286 d_inter_cache[p] = rNode;
1287 }
1288 Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1289 return rNode;
1290 }
1291 Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) {
1292 std::map< unsigned, std::set< PairNodes > > cache;
1293 return intersectInternal(r1, r2, cache, spflag);
1294
1295 }
1296
1297 Node RegExpOpr::complement(Node r, int &ret) {
1298 Node rNode;
1299 ret = 1;
1300 if(d_compl_cache.find(r) != d_compl_cache.end()) {
1301 rNode = d_compl_cache[r].first;
1302 ret = d_compl_cache[r].second;
1303 } else {
1304 if(r == d_emptyRegexp) {
1305 rNode = d_sigma_star;
1306 } else if(r == d_emptySingleton) {
1307 rNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, d_sigma, d_sigma_star);
1308 } else if(!checkConstRegExp(r)) {
1309 //TODO: var to be extended
1310 ret = 0;
1311 } else {
1312 std::set<unsigned> cset;
1313 SetNodes vset;
1314 firstChars(r, cset, vset);
1315 Assert(!vset.empty(), "Regexp 1298 Error");
1316 std::vector< Node > vec_nodes;
1317 for(unsigned i=0; i<d_card; i++) {
1318 CVC4::String c = CVC4::String::convertUnsignedIntToChar(i);
1319 Node n = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c));
1320 Node r2;
1321 if(cset.find(i) == cset.end()) {
1322 r2 = d_sigma_star;
1323 } else {
1324 int rt;
1325 derivativeS(r, c, r2);
1326 if(r2 == r) {
1327 r2 = d_emptyRegexp;
1328 } else {
1329 r2 = complement(r2, rt);
1330 }
1331 }
1332 n = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, n, r2));
1333 vec_nodes.push_back(n);
1334 }
1335 rNode = vec_nodes.size()==0? d_emptyRegexp : vec_nodes.size()==1? vec_nodes[0] :
1336 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1337 }
1338 std::pair< Node, int > p(rNode, ret);
1339 d_compl_cache[r] = p;
1340 }
1341 Trace("regexp-compl") << "COMPL( " << mkString(r) << " ) = " << mkString(rNode) << ", ret=" << ret << std::endl;
1342 return rNode;
1343 }
1344 //printing
1345 std::string RegExpOpr::niceChar( Node r ) {
1346 if(r.isConst()) {
1347 std::string s = r.getConst<CVC4::String>().toString() ;
1348 return s == "" ? "{E}" : ( s == " " ? "{ }" : s );
1349 } else {
1350 return r.toString();
1351 }
1352 }
1353 std::string RegExpOpr::mkString( Node r ) {
1354 std::string retStr;
1355 if(r.isNull()) {
1356 retStr = "Empty";
1357 } else {
1358 int k = r.getKind();
1359 switch( k ) {
1360 case kind::REGEXP_EMPTY: {
1361 retStr += "Empty";
1362 break;
1363 }
1364 case kind::REGEXP_SIGMA: {
1365 retStr += "{W}";
1366 break;
1367 }
1368 case kind::STRING_TO_REGEXP: {
1369 retStr += niceChar( r[0] );
1370 break;
1371 }
1372 case kind::REGEXP_CONCAT: {
1373 //retStr += "(";
1374 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1375 //if(i != 0) retStr += ".";
1376 retStr += mkString( r[i] );
1377 }
1378 //retStr += ")";
1379 break;
1380 }
1381 case kind::REGEXP_UNION: {
1382 if(r == d_sigma) {
1383 retStr += "{A}";
1384 } else {
1385 retStr += "(";
1386 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1387 if(i != 0) retStr += "|";
1388 retStr += mkString( r[i] );
1389 }
1390 retStr += ")";
1391 }
1392 break;
1393 }
1394 case kind::REGEXP_INTER: {
1395 retStr += "(";
1396 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1397 if(i != 0) retStr += "&";
1398 retStr += mkString( r[i] );
1399 }
1400 retStr += ")";
1401 break;
1402 }
1403 case kind::REGEXP_STAR: {
1404 retStr += mkString( r[0] );
1405 retStr += "*";
1406 break;
1407 }
1408 case kind::REGEXP_PLUS: {
1409 retStr += mkString( r[0] );
1410 retStr += "+";
1411 break;
1412 }
1413 case kind::REGEXP_OPT: {
1414 retStr += mkString( r[0] );
1415 retStr += "?";
1416 break;
1417 }
1418 case kind::REGEXP_RANGE: {
1419 retStr += "[";
1420 retStr += niceChar( r[0] );
1421 retStr += "-";
1422 retStr += niceChar( r[1] );
1423 retStr += "]";
1424 break;
1425 }
1426 default:
1427 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
1428 //Assert( false );
1429 //return Node::null();
1430 }
1431 }
1432
1433 return retStr;
1434 }
1435
1436 }/* CVC4::theory::strings namespace */
1437 }/* CVC4::theory namespace */
1438 }/* CVC4 namespace */