simplify mkSkolem naming system: don't use $$
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2
3 /*! \file regexp_operation.cpp
4
5 ** \verbatim
6
7 ** Original author: Tianyi Liang
8
9 ** Major contributors: none
10
11 ** Minor contributors (to current version): none
12
13 ** This file is part of the CVC4 project.
14
15 ** Copyright (c) 2009-2013 New York University and The University of Iowa
16
17 ** See the file COPYING in the top-level source directory for licensing
18
19 ** information.\endverbatim
20
21 **
22
23 ** \brief Symbolic Regular Expresion Operations
24 **
25 ** Symbolic Regular Expresion Operations
26 **/
27
28 #include "theory/strings/regexp_operation.h"
29 #include "expr/kind.h"
30
31 namespace CVC4 {
32 namespace theory {
33 namespace strings {
34
35 RegExpOpr::RegExpOpr() {
36 d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
37 d_true = NodeManager::currentNM()->mkConst( true );
38 d_false = NodeManager::currentNM()->mkConst( false );
39 d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
40 d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
41 d_emptySingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
42 std::vector< Node > nvec;
43 d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec );
44 // All Charactors = all printable ones 32-126
45 //d_char_start = 'a';//' ';
46 //d_char_end = 'c';//'~';
47 //d_sigma = mkAllExceptOne( '\0' );
48 d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec );
49 d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
50 d_card = 256;
51 }
52
53 int RegExpOpr::gcd ( int a, int b ) {
54 int c;
55 while ( a != 0 ) {
56 c = a; a = b%a; b = c;
57 }
58 return b;
59 }
60
61 bool RegExpOpr::checkConstRegExp( Node r ) {
62 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
63 bool ret = true;
64 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
65 ret = d_cstre_cache[r];
66 } else {
67 if(r.getKind() == kind::STRING_TO_REGEXP) {
68 Node tmp = Rewriter::rewrite( r[0] );
69 ret = tmp.isConst();
70 } else {
71 for(unsigned i=0; i<r.getNumChildren(); ++i) {
72 if(!checkConstRegExp(r[i])) {
73 ret = false; break;
74 }
75 }
76 }
77 d_cstre_cache[r] = ret;
78 }
79 return ret;
80 }
81
82 // 0-unknown, 1-yes, 2-no
83 int RegExpOpr::delta( Node r, Node &exp ) {
84 Trace("regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
85 int ret = 0;
86 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
87 ret = d_delta_cache[r].first;
88 exp = d_delta_cache[r].second;
89 } else {
90 int k = r.getKind();
91 switch( k ) {
92 case kind::REGEXP_EMPTY: {
93 ret = 2;
94 break;
95 }
96 case kind::REGEXP_SIGMA: {
97 ret = 2;
98 break;
99 }
100 case kind::STRING_TO_REGEXP: {
101 Node tmp = Rewriter::rewrite(r[0]);
102 if(tmp.isConst()) {
103 if(tmp == d_emptyString) {
104 ret = 1;
105 } else {
106 ret = 2;
107 }
108 } else {
109 ret = 0;
110 if(tmp.getKind() == kind::STRING_CONCAT) {
111 for(unsigned i=0; i<tmp.getNumChildren(); i++) {
112 if(tmp[i].isConst()) {
113 ret = 2; break;
114 }
115 }
116
117 }
118 if(ret == 0) {
119 exp = r[0].eqNode(d_emptyString);
120 }
121 }
122 break;
123 }
124 case kind::REGEXP_CONCAT: {
125 bool flag = false;
126 std::vector< Node > vec_nodes;
127 for(unsigned i=0; i<r.getNumChildren(); ++i) {
128 Node exp2;
129 int tmp = delta( r[i], exp2 );
130 if(tmp == 2) {
131 ret = 2;
132 break;
133 } else if(tmp == 0) {
134 vec_nodes.push_back( exp2 );
135 flag = true;
136 }
137 }
138 if(ret != 2) {
139 if(!flag) {
140 ret = 1;
141 } else {
142 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
143 }
144 }
145 break;
146 }
147 case kind::REGEXP_UNION: {
148 bool flag = false;
149 std::vector< Node > vec_nodes;
150 for(unsigned i=0; i<r.getNumChildren(); ++i) {
151 Node exp2;
152 int tmp = delta( r[i], exp2 );
153 if(tmp == 1) {
154 ret = 1;
155 break;
156 } else if(tmp == 0) {
157 vec_nodes.push_back( exp2 );
158 flag = true;
159 }
160 }
161 if(ret != 1) {
162 if(!flag) {
163 ret = 2;
164 } else {
165 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::OR, vec_nodes);
166 }
167 }
168 break;
169 }
170 case kind::REGEXP_INTER: {
171 bool flag = false;
172 std::vector< Node > vec_nodes;
173 for(unsigned i=0; i<r.getNumChildren(); ++i) {
174 Node exp2;
175 int tmp = delta( r[i], exp2 );
176 if(tmp == 2) {
177 ret = 2;
178 break;
179 } else if(tmp == 0) {
180 vec_nodes.push_back( exp2 );
181 flag = true;
182 }
183 }
184 if(ret != 2) {
185 if(!flag) {
186 ret = 1;
187 } else {
188 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
189 }
190 }
191 break;
192 }
193 case kind::REGEXP_STAR: {
194 ret = 1;
195 break;
196 }
197 case kind::REGEXP_PLUS: {
198 ret = delta( r[0], exp );
199 break;
200 }
201 case kind::REGEXP_OPT: {
202 ret = 1;
203 break;
204 }
205 case kind::REGEXP_RANGE: {
206 ret = 2;
207 break;
208 }
209 default: {
210 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
211 Assert( false );
212 //return Node::null();
213 }
214 }
215 if(!exp.isNull()) {
216 exp = Rewriter::rewrite(exp);
217 }
218 std::pair< int, Node > p(ret, exp);
219 d_delta_cache[r] = p;
220 }
221 Trace("regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
222 return ret;
223 }
224
225 // 0-unknown, 1-yes, 2-no
226 int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
227 Assert( c.size() < 2 );
228 Trace("regexp-deriv") << "RegExp-deriv starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
229
230 int ret = 1;
231 retNode = d_emptyRegexp;
232
233 PairNodeStr dv = std::make_pair( r, c );
234 if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
235 retNode = d_deriv_cache[dv].first;
236 ret = d_deriv_cache[dv].second;
237 } else if( c.isEmptyString() ) {
238 Node expNode;
239 ret = delta( r, expNode );
240 if(ret == 0) {
241 retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
242 } else if(ret == 1) {
243 retNode = r;
244 }
245 std::pair< Node, int > p(retNode, ret);
246 d_deriv_cache[dv] = p;
247 } else {
248 switch( r.getKind() ) {
249 case kind::REGEXP_EMPTY: {
250 ret = 2;
251 break;
252 }
253 case kind::REGEXP_SIGMA: {
254 retNode = d_emptySingleton;
255 break;
256 }
257 case kind::STRING_TO_REGEXP: {
258 Node tmp = Rewriter::rewrite(r[0]);
259 if(tmp.isConst()) {
260 if(tmp == d_emptyString) {
261 ret = 2;
262 } else {
263 if(tmp.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
264 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
265 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
266 } else {
267 ret = 2;
268 }
269 }
270 } else {
271 ret = 0;
272 Node rest;
273 if(tmp.getKind() == kind::STRING_CONCAT) {
274 Node t2 = tmp[0];
275 if(t2.isConst()) {
276 if(t2.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
277 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
278 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
279 std::vector< Node > vec_nodes;
280 vec_nodes.push_back(n);
281 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
282 vec_nodes.push_back(tmp[i]);
283 }
284 retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
285 ret = 1;
286 } else {
287 ret = 2;
288 }
289 } else {
290 tmp = tmp[0];
291 std::vector< Node > vec_nodes;
292 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
293 vec_nodes.push_back(tmp[i]);
294 }
295 rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
296 }
297 }
298 if(ret == 0) {
299 Node sk = NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" );
300 retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
301 if(!rest.isNull()) {
302 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
303 }
304 Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
305 NodeManager::currentNM()->mkConst(c), sk));
306 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
307 }
308 }
309 break;
310 }
311 case kind::REGEXP_CONCAT: {
312 std::vector< Node > vec_nodes;
313 std::vector< Node > delta_nodes;
314 Node dnode = d_true;
315 for(unsigned i=0; i<r.getNumChildren(); ++i) {
316 Node dc;
317 Node exp2;
318 int rt = derivativeS(r[i], c, dc);
319 if(rt != 2) {
320 if(rt == 0) {
321 ret = 0;
322 }
323 std::vector< Node > vec_nodes2;
324 if(dc != d_emptySingleton) {
325 vec_nodes2.push_back( dc );
326 }
327 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
328 if(r[j] != d_emptySingleton) {
329 vec_nodes2.push_back( r[j] );
330 }
331 }
332 Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
333 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
334 if(dnode != d_true) {
335 tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
336 ret = 0;
337 }
338 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
339 vec_nodes.push_back( tmp );
340 }
341 }
342 Node exp3;
343 int rt2 = delta( r[i], exp3 );
344 if( rt2 == 0 ) {
345 dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
346 } else if( rt2 == 2 ) {
347 break;
348 }
349 }
350 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
351 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
352 if(retNode == d_emptyRegexp) {
353 ret = 2;
354 }
355 break;
356 }
357 case kind::REGEXP_UNION: {
358 std::vector< Node > vec_nodes;
359 for(unsigned i=0; i<r.getNumChildren(); ++i) {
360 Node dc;
361 int rt = derivativeS(r[i], c, dc);
362 if(rt == 0) {
363 ret = 0;
364 }
365 if(rt != 2) {
366 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
367 vec_nodes.push_back( dc );
368 }
369 }
370 Trace("regexp-deriv") << "RegExp-deriv OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
371 }
372 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
373 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
374 if(retNode == d_emptyRegexp) {
375 ret = 2;
376 }
377 break;
378 }
379 case kind::REGEXP_INTER: {
380 bool flag = true;
381 bool flag_sg = false;
382 std::vector< Node > vec_nodes;
383 for(unsigned i=0; i<r.getNumChildren(); ++i) {
384 Node dc;
385 int rt = derivativeS(r[i], c, dc);
386 if(rt == 0) {
387 ret = 0;
388 } else if(rt == 2) {
389 flag = false;
390 break;
391 }
392 if(dc == d_sigma_star) {
393 flag_sg = true;
394 } else {
395 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
396 vec_nodes.push_back( dc );
397 }
398 }
399 }
400 if(flag) {
401 if(vec_nodes.size() == 0 && flag_sg) {
402 retNode = d_sigma_star;
403 } else {
404 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
405 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
406 if(retNode == d_emptyRegexp) {
407 ret = 2;
408 }
409 }
410 } else {
411 retNode = d_emptyRegexp;
412 ret = 2;
413 }
414 break;
415 }
416 case kind::REGEXP_STAR: {
417 Node dc;
418 ret = derivativeS(r[0], c, dc);
419 retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
420 break;
421 }
422 default: {
423 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
424 Assert( false, "Unsupported Term" );
425 }
426 }
427 if(retNode != d_emptyRegexp) {
428 retNode = Rewriter::rewrite( retNode );
429 }
430 std::pair< Node, int > p(retNode, ret);
431 d_deriv_cache[dv] = p;
432 }
433
434 Trace("regexp-deriv") << "RegExp-deriv returns : " << mkString( retNode ) << std::endl;
435 return ret;
436 }
437
438 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
439 Assert( c.size() < 2 );
440 Trace("regexp-deriv") << "RegExp-deriv starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
441 Node retNode = d_emptyRegexp;
442 PairNodeStr dv = std::make_pair( r, c );
443 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
444 retNode = d_dv_cache[dv];
445 } else if( c.isEmptyString() ){
446 Node exp;
447 int tmp = delta( r, exp );
448 if(tmp == 0) {
449 // TODO variable
450 retNode = d_emptyRegexp;
451 } else if(tmp == 1) {
452 retNode = r;
453 } else {
454 retNode = d_emptyRegexp;
455 }
456 } else {
457 int k = r.getKind();
458 switch( k ) {
459 case kind::REGEXP_EMPTY: {
460 retNode = d_emptyRegexp;
461 break;
462 }
463 case kind::REGEXP_SIGMA: {
464 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
465 break;
466 }
467 case kind::STRING_TO_REGEXP: {
468 if(r[0].isConst()) {
469 if(r[0] == d_emptyString) {
470 retNode = d_emptyRegexp;
471 } else {
472 if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
473 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
474 r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
475 } else {
476 retNode = d_emptyRegexp;
477 }
478 }
479 } else {
480 // TODO variable
481 retNode = d_emptyRegexp;
482 }
483 break;
484 }
485 case kind::REGEXP_CONCAT: {
486 Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
487 std::vector< Node > vec_nodes;
488 for(unsigned i=0; i<r.getNumChildren(); ++i) {
489 Node dc = derivativeSingle(r[i], c);
490 if(dc != d_emptyRegexp) {
491 std::vector< Node > vec_nodes2;
492 if(dc != rees) {
493 vec_nodes2.push_back( dc );
494 }
495 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
496 if(r[j] != rees) {
497 vec_nodes2.push_back( r[j] );
498 }
499 }
500 Node tmp = vec_nodes2.size()==0 ? rees :
501 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
502 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
503 vec_nodes.push_back( tmp );
504 }
505 }
506 Node exp;
507 if( delta( r[i], exp ) != 1 ) {
508 break;
509 }
510 }
511 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
512 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
513 break;
514 }
515 case kind::REGEXP_UNION: {
516 std::vector< Node > vec_nodes;
517 for(unsigned i=0; i<r.getNumChildren(); ++i) {
518 Node dc = derivativeSingle(r[i], c);
519 if(dc != d_emptyRegexp) {
520 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
521 vec_nodes.push_back( dc );
522 }
523 }
524 Trace("regexp-deriv") << "RegExp-deriv OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
525 }
526 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
527 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
528 break;
529 }
530 case kind::REGEXP_INTER: {
531 bool flag = true;
532 bool flag_sg = false;
533 std::vector< Node > vec_nodes;
534 for(unsigned i=0; i<r.getNumChildren(); ++i) {
535 Node dc = derivativeSingle(r[i], c);
536 if(dc != d_emptyRegexp) {
537 if(dc == d_sigma_star) {
538 flag_sg = true;
539 } else {
540 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
541 vec_nodes.push_back( dc );
542 }
543 }
544 } else {
545 flag = false;
546 break;
547 }
548 }
549 if(flag) {
550 if(vec_nodes.size() == 0 && flag_sg) {
551 retNode = d_sigma_star;
552 } else {
553 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
554 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
555 }
556 } else {
557 retNode = d_emptyRegexp;
558 }
559 break;
560 }
561 case kind::REGEXP_STAR: {
562 Node dc = derivativeSingle(r[0], c);
563 if(dc != d_emptyRegexp) {
564 retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
565 } else {
566 retNode = d_emptyRegexp;
567 }
568 break;
569 }
570 default: {
571 //TODO: special sym: sigma, none, all
572 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
573 Assert( false, "Unsupported Term" );
574 //return Node::null();
575 }
576 }
577 if(retNode != d_emptyRegexp) {
578 retNode = Rewriter::rewrite( retNode );
579 }
580 d_dv_cache[dv] = retNode;
581 }
582 Trace("regexp-deriv") << "RegExp-deriv returns : " << mkString( retNode ) << std::endl;
583 return retNode;
584 }
585
586 //TODO:
587 bool RegExpOpr::guessLength( Node r, int &co ) {
588 int k = r.getKind();
589 switch( k ) {
590 case kind::STRING_TO_REGEXP:
591 {
592 if(r[0].isConst()) {
593 co += r[0].getConst< CVC4::String >().size();
594 return true;
595 } else {
596 return false;
597 }
598 }
599 break;
600 case kind::REGEXP_CONCAT:
601 {
602 for(unsigned i=0; i<r.getNumChildren(); ++i) {
603 if(!guessLength( r[i], co)) {
604 return false;
605 }
606 }
607 return true;
608 }
609 break;
610 case kind::REGEXP_UNION:
611 {
612 int g_co;
613 for(unsigned i=0; i<r.getNumChildren(); ++i) {
614 int cop = 0;
615 if(!guessLength( r[i], cop)) {
616 return false;
617 }
618 if(i == 0) {
619 g_co = cop;
620 } else {
621 g_co = gcd(g_co, cop);
622 }
623 }
624 return true;
625 }
626 break;
627 case kind::REGEXP_INTER:
628 {
629 int g_co;
630 for(unsigned i=0; i<r.getNumChildren(); ++i) {
631 int cop = 0;
632 if(!guessLength( r[i], cop)) {
633 return false;
634 }
635 if(i == 0) {
636 g_co = cop;
637 } else {
638 g_co = gcd(g_co, cop);
639 }
640 }
641 return true;
642 }
643 break;
644 case kind::REGEXP_STAR:
645 {
646 co = 0;
647 return true;
648 }
649 break;
650 default:
651 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in membership of RegExp." << std::endl;
652 return false;
653 }
654 }
655
656 void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
657 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_fset_cache.find(r);
658 if(itr != d_fset_cache.end()) {
659 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
660 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
661 } else {
662 std::set<unsigned> cset;
663 SetNodes vset;
664 int k = r.getKind();
665 switch( k ) {
666 case kind::REGEXP_EMPTY: {
667 break;
668 }
669 case kind::REGEXP_SIGMA: {
670 for(unsigned i=0; i<d_card; i++) {
671 cset.insert(i);
672 }
673 break;
674 }
675 case kind::STRING_TO_REGEXP: {
676 Node st = Rewriter::rewrite(r[0]);
677 if(st.isConst()) {
678 CVC4::String s = st.getConst< CVC4::String >();
679 if(s.size() != 0) {
680 cset.insert(s[0]);
681 }
682 } else if(st.getKind() == kind::VARIABLE) {
683 vset.insert( st );
684 } else {
685 if(st[0].isConst()) {
686 CVC4::String s = st[0].getConst< CVC4::String >();
687 cset.insert(s[0]);
688 } else {
689 vset.insert( st[0] );
690 }
691 }
692 break;
693 }
694 case kind::REGEXP_CONCAT: {
695 for(unsigned i=0; i<r.getNumChildren(); i++) {
696 firstChars(r[i], cset, vset);
697 Node n = r[i];
698 Node exp;
699 int r = delta( n, exp );
700 if(r != 1) {
701 break;
702 }
703 }
704 break;
705 }
706 case kind::REGEXP_UNION: {
707 for(unsigned i=0; i<r.getNumChildren(); i++) {
708 firstChars(r[i], cset, vset);
709 }
710 break;
711 }
712 case kind::REGEXP_INTER: {
713 //TODO: Overapproximation for now
714 for(unsigned i=0; i<r.getNumChildren(); i++) {
715 firstChars(r[i], cset, vset);
716 }
717 break;
718 }
719 case kind::REGEXP_STAR: {
720 firstChars(r[0], cset, vset);
721 break;
722 }
723 default: {
724 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
725 Assert( false, "Unsupported Term" );
726 }
727 }
728 pcset.insert(cset.begin(), cset.end());
729 pvset.insert(vset.begin(), vset.end());
730 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
731 d_fset_cache[r] = p;
732
733 Trace("regexp-fset") << "FSET( " << mkString(r) << " ) = { ";
734 for(std::set<unsigned>::const_iterator itr = cset.begin();
735 itr != cset.end(); itr++) {
736 Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
737 }
738 Trace("regexp-fset") << " }" << std::endl;
739 }
740 }
741
742 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
743 int k = r.getKind();
744 switch( k ) {
745 case kind::STRING_TO_REGEXP:
746 {
747 if(r[0].isConst()) {
748 if(r[0] != d_emptyString) {
749 char t1 = r[0].getConst< CVC4::String >().getFirstChar();
750 if(c.isEmptyString()) {
751 vec_chars.push_back( t1 );
752 return true;
753 } else {
754 char t2 = c.getFirstChar();
755 if(t1 != t2) {
756 return false;
757 } else {
758 if(c.size() >= 2) {
759 vec_chars.push_back( c.substr(1,1).getFirstChar() );
760 } else {
761 vec_chars.push_back( '\0' );
762 }
763 return true;
764 }
765 }
766 } else {
767 return false;
768 }
769 } else {
770 return false;
771 }
772 }
773 break;
774 case kind::REGEXP_CONCAT:
775 {
776 for(unsigned i=0; i<r.getNumChildren(); ++i) {
777 if( follow(r[i], c, vec_chars) ) {
778 if(vec_chars[vec_chars.size() - 1] == '\0') {
779 vec_chars.pop_back();
780 c = d_emptyString.getConst< CVC4::String >();
781 }
782 } else {
783 return false;
784 }
785 }
786 vec_chars.push_back( '\0' );
787 return true;
788 }
789 break;
790 case kind::REGEXP_UNION:
791 {
792 bool flag = false;
793 for(unsigned i=0; i<r.getNumChildren(); ++i) {
794 if( follow(r[i], c, vec_chars) ) {
795 flag=true;
796 }
797 }
798 return flag;
799 }
800 break;
801 case kind::REGEXP_INTER:
802 {
803 std::vector< char > vt2;
804 for(unsigned i=0; i<r.getNumChildren(); ++i) {
805 std::vector< char > v_tmp;
806 if( !follow(r[i], c, v_tmp) ) {
807 return false;
808 }
809 std::vector< char > vt3(vt2);
810 vt2.clear();
811 std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
812 if(vt2.size() == 0) {
813 return false;
814 }
815 }
816 vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
817 return true;
818 }
819 break;
820 case kind::REGEXP_STAR:
821 {
822 if(follow(r[0], c, vec_chars)) {
823 if(vec_chars[vec_chars.size() - 1] == '\0') {
824 if(c.isEmptyString()) {
825 return true;
826 } else {
827 vec_chars.pop_back();
828 c = d_emptyString.getConst< CVC4::String >();
829 return follow(r[0], c, vec_chars);
830 }
831 } else {
832 return true;
833 }
834 } else {
835 vec_chars.push_back( '\0' );
836 return true;
837 }
838 }
839 break;
840 default: {
841 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
842 //AlwaysAssert( false );
843 //return Node::null();
844 return false;
845 }
846 }
847 }
848
849 Node RegExpOpr::mkAllExceptOne( char exp_c ) {
850 std::vector< Node > vec_nodes;
851 for(char c=d_char_start; c<=d_char_end; ++c) {
852 if(c != exp_c ) {
853 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
854 vec_nodes.push_back( n );
855 }
856 }
857 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
858 }
859
860 //simplify
861 void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
862 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
863 Assert(t.getKind() == kind::STRING_IN_REGEXP);
864 Node str = Rewriter::rewrite(t[0]);
865 Node re = Rewriter::rewrite(t[1]);
866 if(polarity) {
867 simplifyPRegExp( str, re, new_nodes );
868 } else {
869 simplifyNRegExp( str, re, new_nodes );
870 }
871 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
872 for(unsigned i=0; i<new_nodes.size(); i++) {
873 Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
874 }
875 }
876 void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
877 std::pair < Node, Node > p(s, r);
878 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
879 if(itr != d_simpl_neg_cache.end()) {
880 new_nodes.push_back( itr->second );
881 } else {
882 int k = r.getKind();
883 Node conc;
884 switch( k ) {
885 case kind::REGEXP_EMPTY: {
886 conc = d_true;
887 break;
888 }
889 case kind::REGEXP_SIGMA: {
890 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
891 break;
892 }
893 case kind::STRING_TO_REGEXP: {
894 conc = s.eqNode(r[0]).negate();
895 break;
896 }
897 case kind::REGEXP_CONCAT: {
898 //TODO: rewrite empty
899 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
900 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
901 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
902 NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
903 Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
904 Node s2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
905 Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, s1, s2);
906 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2));
907 Node s1len = b1.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1));
908 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
909 if(r[0].getKind() == kind::STRING_TO_REGEXP) {
910 s1r1 = s1.eqNode(r[0][0]).negate();
911 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
912 s1r1 = d_true;
913 }
914 Node r2 = r[1];
915 if(r.getNumChildren() > 2) {
916 std::vector< Node > nvec;
917 for(unsigned i=1; i<r.getNumChildren(); i++) {
918 nvec.push_back( r[i] );
919 }
920 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
921 }
922 r2 = Rewriter::rewrite(r2);
923 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
924 if(r2.getKind() == kind::STRING_TO_REGEXP) {
925 s2r2 = s2.eqNode(r2[0]).negate();
926 } else if(r2.getKind() == kind::REGEXP_EMPTY) {
927 s2r2 = d_true;
928 }
929
930 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
931 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1len, conc);
932 conc = NodeManager::currentNM()->mkNode(kind::EXISTS, b2v, conc);
933 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
934 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
935 break;
936 }
937 case kind::REGEXP_UNION: {
938 std::vector< Node > c_and;
939 for(unsigned i=0; i<r.getNumChildren(); ++i) {
940 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
941 c_and.push_back( r[i][0].eqNode(s).negate() );
942 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
943 continue;
944 } else {
945 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
946 }
947 }
948 conc = c_and.size() == 0 ? d_true :
949 c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
950 break;
951 }
952 case kind::REGEXP_INTER: {
953 bool emptyflag = false;
954 std::vector< Node > c_or;
955 for(unsigned i=0; i<r.getNumChildren(); ++i) {
956 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
957 c_or.push_back( r[i][0].eqNode(s).negate() );
958 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
959 emptyflag = true;
960 break;
961 } else {
962 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
963 }
964 }
965 if(emptyflag) {
966 conc = d_true;
967 } else {
968 conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
969 }
970 break;
971 }
972 case kind::REGEXP_STAR: {
973 if(s == d_emptyString) {
974 conc = d_false;
975 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
976 conc = s.eqNode(d_emptyString).negate();
977 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
978 conc = d_false;
979 } else {
980 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
981 Node sne = s.eqNode(d_emptyString).negate();
982 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
983 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
984 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
985 NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
986 //internal
987 Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1);
988 Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
989 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
990 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
991
992 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
993 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
994 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
995 conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
996 }
997 break;
998 }
999 default: {
1000 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
1001 Assert( false, "Unsupported Term" );
1002 }
1003 }
1004 conc = Rewriter::rewrite( conc );
1005 new_nodes.push_back( conc );
1006 d_simpl_neg_cache[p] = conc;
1007 }
1008 }
1009 void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
1010 std::pair < Node, Node > p(s, r);
1011 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
1012 if(itr != d_simpl_cache.end()) {
1013 new_nodes.push_back( itr->second );
1014 } else {
1015 int k = r.getKind();
1016 Node conc;
1017 switch( k ) {
1018 case kind::REGEXP_EMPTY: {
1019 conc = d_false;
1020 break;
1021 }
1022 case kind::REGEXP_SIGMA: {
1023 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
1024 break;
1025 }
1026 case kind::STRING_TO_REGEXP: {
1027 conc = s.eqNode(r[0]);
1028 break;
1029 }
1030 case kind::REGEXP_CONCAT: {
1031 std::vector< Node > nvec;
1032 std::vector< Node > cc;
1033 bool emptyflag = false;
1034 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1035 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1036 cc.push_back( r[i][0] );
1037 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1038 emptyflag = true;
1039 break;
1040 } else {
1041 Node sk = NodeManager::currentNM()->mkSkolem( "rc", s.getType(), "created for regular expression concat" );
1042 Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
1043 nvec.push_back(lem);
1044 cc.push_back(sk);
1045 }
1046 }
1047 if(emptyflag) {
1048 conc = d_false;
1049 } else {
1050 Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
1051 nvec.push_back(lem);
1052 conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
1053 }
1054 break;
1055 }
1056 case kind::REGEXP_UNION: {
1057 std::vector< Node > c_or;
1058 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1059 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1060 c_or.push_back( r[i][0].eqNode(s) );
1061 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1062 continue;
1063 } else {
1064 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1065 }
1066 }
1067 conc = c_or.size() == 0 ? d_false :
1068 c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
1069 break;
1070 }
1071 case kind::REGEXP_INTER: {
1072 std::vector< Node > c_and;
1073 bool emptyflag = false;
1074 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1075 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1076 c_and.push_back( r[i][0].eqNode(s) );
1077 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1078 emptyflag = true;
1079 break;
1080 } else {
1081 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1082 }
1083 }
1084 if(emptyflag) {
1085 conc = d_false;
1086 } else {
1087 conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
1088 }
1089 break;
1090 }
1091 case kind::REGEXP_STAR: {
1092 if(s == d_emptyString) {
1093 conc = d_true;
1094 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
1095 conc = s.eqNode(d_emptyString);
1096 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
1097 conc = d_true;
1098 } else {
1099 Node se = s.eqNode(d_emptyString);
1100 Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
1101 Node sk1 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1102 Node sk2 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1103 Node s1nz = sk1.eqNode(d_emptyString).negate();
1104 Node s2nz = sk2.eqNode(d_emptyString).negate();
1105 Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
1106 Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
1107 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
1108
1109 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
1110 conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
1111 }
1112 break;
1113 }
1114 default: {
1115 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
1116 Assert( false, "Unsupported Term" );
1117 }
1118 }
1119 conc = Rewriter::rewrite( conc );
1120 new_nodes.push_back( conc );
1121 d_simpl_cache[p] = conc;
1122 }
1123 }
1124
1125 void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
1126 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_cset_cache.find(r);
1127 if(itr != d_cset_cache.end()) {
1128 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
1129 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
1130 } else {
1131 std::set<unsigned> cset;
1132 SetNodes vset;
1133 int k = r.getKind();
1134 switch( k ) {
1135 case kind::REGEXP_EMPTY: {
1136 break;
1137 }
1138 case kind::REGEXP_SIGMA: {
1139 for(unsigned i=0; i<d_card; i++) {
1140 cset.insert(i);
1141 }
1142 break;
1143 }
1144 case kind::STRING_TO_REGEXP: {
1145 Node st = Rewriter::rewrite(r[0]);
1146 if(st.isConst()) {
1147 CVC4::String s = st.getConst< CVC4::String >();
1148 s.getCharSet( cset );
1149 } else if(st.getKind() == kind::VARIABLE) {
1150 vset.insert( st );
1151 } else {
1152 for(unsigned i=0; i<st.getNumChildren(); i++) {
1153 if(st[i].isConst()) {
1154 CVC4::String s = st[i].getConst< CVC4::String >();
1155 s.getCharSet( cset );
1156 } else {
1157 vset.insert( st[i] );
1158 }
1159 }
1160 }
1161 break;
1162 }
1163 case kind::REGEXP_CONCAT: {
1164 for(unsigned i=0; i<r.getNumChildren(); i++) {
1165 getCharSet(r[i], cset, vset);
1166 }
1167 break;
1168 }
1169 case kind::REGEXP_UNION: {
1170 for(unsigned i=0; i<r.getNumChildren(); i++) {
1171 getCharSet(r[i], cset, vset);
1172 }
1173 break;
1174 }
1175 case kind::REGEXP_INTER: {
1176 //TODO: Overapproximation for now
1177 for(unsigned i=0; i<r.getNumChildren(); i++) {
1178 getCharSet(r[i], cset, vset);
1179 }
1180 break;
1181 }
1182 case kind::REGEXP_STAR: {
1183 getCharSet(r[0], cset, vset);
1184 break;
1185 }
1186 default: {
1187 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
1188 Assert( false, "Unsupported Term" );
1189 }
1190 }
1191 pcset.insert(cset.begin(), cset.end());
1192 pvset.insert(vset.begin(), vset.end());
1193 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
1194 d_cset_cache[r] = p;
1195
1196 Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { ";
1197 for(std::set<unsigned>::const_iterator itr = cset.begin();
1198 itr != cset.end(); itr++) {
1199 Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
1200 }
1201 Trace("regexp-cset") << " }" << std::endl;
1202 }
1203 }
1204
1205
1206 Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache, bool &spflag ) {
1207 if(spflag) {
1208 //TODO: var
1209 return Node::null();
1210 }
1211 std::pair < Node, Node > p(r1, r2);
1212 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);
1213 Node rNode;
1214 if(itr != d_inter_cache.end()) {
1215 rNode = itr->second;
1216 } else {
1217 if(r1 == r2) {
1218 rNode = r1;
1219 } else if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1220 rNode = d_emptyRegexp;
1221 } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1222 Node exp;
1223 int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1224 if(r == 0) {
1225 //TODO: variable
1226 spflag = true;
1227 } else if(r == 1) {
1228 rNode = d_emptySingleton;
1229 } else {
1230 rNode = d_emptyRegexp;
1231 }
1232 } else {
1233 std::set< unsigned > cset, cset2;
1234 std::set< Node > vset, vset2;
1235 getCharSet(r1, cset, vset);
1236 getCharSet(r2, cset2, vset2);
1237 if(vset.empty() && vset2.empty()) {
1238 cset.clear();
1239 firstChars(r1, cset, vset);
1240 std::vector< Node > vec_nodes;
1241 for(std::set<unsigned>::const_iterator itr = cset.begin();
1242 itr != cset.end(); itr++) {
1243 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1244 std::pair< Node, Node > p(r1, r2);
1245 if(cache[ *itr ].find(p) == cache[ *itr ].end()) {
1246 Node r1l = derivativeSingle(r1, c);
1247 Node r2l = derivativeSingle(r2, c);
1248 std::map< unsigned, std::set< PairNodes > > cache2(cache);
1249 PairNodes p(r1l, r2l);
1250 cache2[ *itr ].insert( p );
1251 Node rt = intersectInternal(r1l, r2l, cache2, spflag);
1252 if(spflag) {
1253 //TODO:
1254 return Node::null();
1255 }
1256 rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1257 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1258 vec_nodes.push_back(rt);
1259 }
1260 }
1261 rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1262 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1263 rNode = Rewriter::rewrite( rNode );
1264 } else {
1265 //TODO: non-empty var set
1266 spflag = true;
1267 }
1268 }
1269 d_inter_cache[p] = rNode;
1270 }
1271 Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1272 return rNode;
1273 }
1274 Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) {
1275 std::map< unsigned, std::set< PairNodes > > cache;
1276 return intersectInternal(r1, r2, cache, spflag);
1277 }
1278
1279 Node RegExpOpr::complement(Node r, int &ret) {
1280 Node rNode;
1281 ret = 1;
1282 if(d_compl_cache.find(r) != d_compl_cache.end()) {
1283 rNode = d_compl_cache[r].first;
1284 ret = d_compl_cache[r].second;
1285 } else {
1286 if(r == d_emptyRegexp) {
1287 rNode = d_sigma_star;
1288 } else if(r == d_emptySingleton) {
1289 rNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, d_sigma, d_sigma_star);
1290 } else if(!checkConstRegExp(r)) {
1291 //TODO: var to be extended
1292 ret = 0;
1293 } else {
1294 std::set<unsigned> cset;
1295 SetNodes vset;
1296 firstChars(r, cset, vset);
1297 std::vector< Node > vec_nodes;
1298 for(unsigned i=0; i<d_card; i++) {
1299 CVC4::String c = CVC4::String::convertUnsignedIntToChar(i);
1300 Node n = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c));
1301 Node r2;
1302 if(cset.find(i) == cset.end()) {
1303 r2 = d_sigma_star;
1304 } else {
1305 int rt;
1306 derivativeS(r, c, r2);
1307 if(r2 == r) {
1308 r2 = d_emptyRegexp;
1309 } else {
1310 r2 = complement(r2, rt);
1311 }
1312 }
1313 n = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, n, r2));
1314 vec_nodes.push_back(n);
1315 }
1316 rNode = vec_nodes.size()==0? d_emptyRegexp : vec_nodes.size()==1? vec_nodes[0] :
1317 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1318 }
1319 rNode = Rewriter::rewrite(rNode);
1320 std::pair< Node, int > p(rNode, ret);
1321 d_compl_cache[r] = p;
1322 }
1323 Trace("regexp-compl") << "COMPL( " << mkString(r) << " ) = " << mkString(rNode) << ", ret=" << ret << std::endl;
1324 return rNode;
1325 }
1326 //printing
1327 std::string RegExpOpr::niceChar( Node r ) {
1328 if(r.isConst()) {
1329 std::string s = r.getConst<CVC4::String>().toString() ;
1330 return s == "" ? "{E}" : ( s == " " ? "{ }" : s.size()>1? "("+s+")" : s );
1331 } else {
1332 std::string ss = "$" + r.toString();
1333 return ss;
1334 }
1335 }
1336 std::string RegExpOpr::mkString( Node r ) {
1337 std::string retStr;
1338 if(r.isNull()) {
1339 retStr = "Empty";
1340 } else {
1341 int k = r.getKind();
1342 switch( k ) {
1343 case kind::REGEXP_EMPTY: {
1344 retStr += "Empty";
1345 break;
1346 }
1347 case kind::REGEXP_SIGMA: {
1348 retStr += "{W}";
1349 break;
1350 }
1351 case kind::STRING_TO_REGEXP: {
1352 retStr += niceChar( r[0] );
1353 break;
1354 }
1355 case kind::REGEXP_CONCAT: {
1356 retStr += "(";
1357 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1358 //if(i != 0) retStr += ".";
1359 retStr += mkString( r[i] );
1360 }
1361 retStr += ")";
1362 break;
1363 }
1364 case kind::REGEXP_UNION: {
1365 if(r == d_sigma) {
1366 retStr += "{A}";
1367 } else {
1368 retStr += "(";
1369 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1370 if(i != 0) retStr += "|";
1371 retStr += mkString( r[i] );
1372 }
1373 retStr += ")";
1374 }
1375 break;
1376 }
1377 case kind::REGEXP_INTER: {
1378 retStr += "(";
1379 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1380 if(i != 0) retStr += "&";
1381 retStr += mkString( r[i] );
1382 }
1383 retStr += ")";
1384 break;
1385 }
1386 case kind::REGEXP_STAR: {
1387 retStr += mkString( r[0] );
1388 retStr += "*";
1389 break;
1390 }
1391 case kind::REGEXP_PLUS: {
1392 retStr += mkString( r[0] );
1393 retStr += "+";
1394 break;
1395 }
1396 case kind::REGEXP_OPT: {
1397 retStr += mkString( r[0] );
1398 retStr += "?";
1399 break;
1400 }
1401 case kind::REGEXP_RANGE: {
1402 retStr += "[";
1403 retStr += niceChar( r[0] );
1404 retStr += "-";
1405 retStr += niceChar( r[1] );
1406 retStr += "]";
1407 break;
1408 }
1409 default:
1410 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
1411 //Assert( false );
1412 //return Node::null();
1413 }
1414 }
1415
1416 return retStr;
1417 }
1418
1419 }/* CVC4::theory::strings namespace */
1420 }/* CVC4::theory namespace */
1421 }/* CVC4 namespace */