minor change with kshitij's change
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2
3 /*! \file regexp_operation.cpp
4
5 ** \verbatim
6
7 ** Original author: Tianyi Liang
8
9 ** Major contributors: none
10
11 ** Minor contributors (to current version): none
12
13 ** This file is part of the CVC4 project.
14
15 ** Copyright (c) 2009-2013 New York University and The University of Iowa
16
17 ** See the file COPYING in the top-level source directory for licensing
18
19 ** information.\endverbatim
20
21 **
22
23 ** \brief Symbolic Regular Expresion Operations
24 **
25 ** Symbolic Regular Expresion Operations
26 **/
27
28 #include "theory/strings/regexp_operation.h"
29 #include "expr/kind.h"
30
31 namespace CVC4 {
32 namespace theory {
33 namespace strings {
34
35 RegExpOpr::RegExpOpr() {
36 d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
37 d_true = NodeManager::currentNM()->mkConst( true );
38 d_false = NodeManager::currentNM()->mkConst( false );
39 d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
40 d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
41 d_emptySingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
42 std::vector< Node > nvec;
43 d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec );
44 d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec );
45 d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
46 d_card = 256;
47 }
48
49 int RegExpOpr::gcd ( int a, int b ) {
50 int c;
51 while ( a != 0 ) {
52 c = a; a = b%a; b = c;
53 }
54 return b;
55 }
56
57 bool RegExpOpr::checkConstRegExp( Node r ) {
58 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
59 bool ret = true;
60 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
61 ret = d_cstre_cache[r];
62 } else {
63 if(r.getKind() == kind::STRING_TO_REGEXP) {
64 Node tmp = Rewriter::rewrite( r[0] );
65 ret = tmp.isConst();
66 } else {
67 for(unsigned i=0; i<r.getNumChildren(); ++i) {
68 if(!checkConstRegExp(r[i])) {
69 ret = false; break;
70 }
71 }
72 }
73 d_cstre_cache[r] = ret;
74 }
75 return ret;
76 }
77
78 // 0-unknown, 1-yes, 2-no
79 int RegExpOpr::delta( Node r, Node &exp ) {
80 Trace("regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
81 int ret = 0;
82 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
83 ret = d_delta_cache[r].first;
84 exp = d_delta_cache[r].second;
85 } else {
86 int k = r.getKind();
87 switch( k ) {
88 case kind::REGEXP_EMPTY: {
89 ret = 2;
90 break;
91 }
92 case kind::REGEXP_SIGMA: {
93 ret = 2;
94 break;
95 }
96 case kind::STRING_TO_REGEXP: {
97 Node tmp = Rewriter::rewrite(r[0]);
98 if(tmp.isConst()) {
99 if(tmp == d_emptyString) {
100 ret = 1;
101 } else {
102 ret = 2;
103 }
104 } else {
105 ret = 0;
106 if(tmp.getKind() == kind::STRING_CONCAT) {
107 for(unsigned i=0; i<tmp.getNumChildren(); i++) {
108 if(tmp[i].isConst()) {
109 ret = 2; break;
110 }
111 }
112
113 }
114 if(ret == 0) {
115 exp = r[0].eqNode(d_emptyString);
116 }
117 }
118 break;
119 }
120 case kind::REGEXP_CONCAT: {
121 bool flag = false;
122 std::vector< Node > vec_nodes;
123 for(unsigned i=0; i<r.getNumChildren(); ++i) {
124 Node exp2;
125 int tmp = delta( r[i], exp2 );
126 if(tmp == 2) {
127 ret = 2;
128 break;
129 } else if(tmp == 0) {
130 vec_nodes.push_back( exp2 );
131 flag = true;
132 }
133 }
134 if(ret != 2) {
135 if(!flag) {
136 ret = 1;
137 } else {
138 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
139 }
140 }
141 break;
142 }
143 case kind::REGEXP_UNION: {
144 bool flag = false;
145 std::vector< Node > vec_nodes;
146 for(unsigned i=0; i<r.getNumChildren(); ++i) {
147 Node exp2;
148 int tmp = delta( r[i], exp2 );
149 if(tmp == 1) {
150 ret = 1;
151 break;
152 } else if(tmp == 0) {
153 vec_nodes.push_back( exp2 );
154 flag = true;
155 }
156 }
157 if(ret != 1) {
158 if(!flag) {
159 ret = 2;
160 } else {
161 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::OR, vec_nodes);
162 }
163 }
164 break;
165 }
166 case kind::REGEXP_INTER: {
167 bool flag = false;
168 std::vector< Node > vec_nodes;
169 for(unsigned i=0; i<r.getNumChildren(); ++i) {
170 Node exp2;
171 int tmp = delta( r[i], exp2 );
172 if(tmp == 2) {
173 ret = 2;
174 break;
175 } else if(tmp == 0) {
176 vec_nodes.push_back( exp2 );
177 flag = true;
178 }
179 }
180 if(ret != 2) {
181 if(!flag) {
182 ret = 1;
183 } else {
184 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
185 }
186 }
187 break;
188 }
189 case kind::REGEXP_STAR: {
190 ret = 1;
191 break;
192 }
193 case kind::REGEXP_PLUS: {
194 ret = delta( r[0], exp );
195 break;
196 }
197 case kind::REGEXP_OPT: {
198 ret = 1;
199 break;
200 }
201 case kind::REGEXP_RANGE: {
202 ret = 2;
203 break;
204 }
205 default: {
206 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
207 Assert( false );
208 //return Node::null();
209 }
210 }
211 if(!exp.isNull()) {
212 exp = Rewriter::rewrite(exp);
213 }
214 std::pair< int, Node > p(ret, exp);
215 d_delta_cache[r] = p;
216 }
217 Trace("regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
218 return ret;
219 }
220
221 // 0-unknown, 1-yes, 2-no
222 int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
223 Assert( c.size() < 2 );
224 Trace("regexp-deriv") << "RegExp-deriv starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
225
226 int ret = 1;
227 retNode = d_emptyRegexp;
228
229 PairNodeStr dv = std::make_pair( r, c );
230 if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
231 retNode = d_deriv_cache[dv].first;
232 ret = d_deriv_cache[dv].second;
233 } else if( c.isEmptyString() ) {
234 Node expNode;
235 ret = delta( r, expNode );
236 if(ret == 0) {
237 retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
238 } else if(ret == 1) {
239 retNode = r;
240 }
241 std::pair< Node, int > p(retNode, ret);
242 d_deriv_cache[dv] = p;
243 } else {
244 switch( r.getKind() ) {
245 case kind::REGEXP_EMPTY: {
246 ret = 2;
247 break;
248 }
249 case kind::REGEXP_SIGMA: {
250 retNode = d_emptySingleton;
251 break;
252 }
253 case kind::STRING_TO_REGEXP: {
254 Node tmp = Rewriter::rewrite(r[0]);
255 if(tmp.isConst()) {
256 if(tmp == d_emptyString) {
257 ret = 2;
258 } else {
259 if(tmp.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
260 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
261 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
262 } else {
263 ret = 2;
264 }
265 }
266 } else {
267 ret = 0;
268 Node rest;
269 if(tmp.getKind() == kind::STRING_CONCAT) {
270 Node t2 = tmp[0];
271 if(t2.isConst()) {
272 if(t2.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
273 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
274 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
275 std::vector< Node > vec_nodes;
276 vec_nodes.push_back(n);
277 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
278 vec_nodes.push_back(tmp[i]);
279 }
280 retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
281 ret = 1;
282 } else {
283 ret = 2;
284 }
285 } else {
286 tmp = tmp[0];
287 std::vector< Node > vec_nodes;
288 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
289 vec_nodes.push_back(tmp[i]);
290 }
291 rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
292 }
293 }
294 if(ret == 0) {
295 Node sk = NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" );
296 retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
297 if(!rest.isNull()) {
298 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
299 }
300 Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
301 NodeManager::currentNM()->mkConst(c), sk));
302 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
303 }
304 }
305 break;
306 }
307 case kind::REGEXP_CONCAT: {
308 std::vector< Node > vec_nodes;
309 std::vector< Node > delta_nodes;
310 Node dnode = d_true;
311 for(unsigned i=0; i<r.getNumChildren(); ++i) {
312 Node dc;
313 Node exp2;
314 int rt = derivativeS(r[i], c, dc);
315 if(rt != 2) {
316 if(rt == 0) {
317 ret = 0;
318 }
319 std::vector< Node > vec_nodes2;
320 if(dc != d_emptySingleton) {
321 vec_nodes2.push_back( dc );
322 }
323 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
324 if(r[j] != d_emptySingleton) {
325 vec_nodes2.push_back( r[j] );
326 }
327 }
328 Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
329 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
330 if(dnode != d_true) {
331 tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
332 ret = 0;
333 }
334 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
335 vec_nodes.push_back( tmp );
336 }
337 }
338 Node exp3;
339 int rt2 = delta( r[i], exp3 );
340 if( rt2 == 0 ) {
341 dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
342 } else if( rt2 == 2 ) {
343 break;
344 }
345 }
346 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
347 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
348 if(retNode == d_emptyRegexp) {
349 ret = 2;
350 }
351 break;
352 }
353 case kind::REGEXP_UNION: {
354 std::vector< Node > vec_nodes;
355 for(unsigned i=0; i<r.getNumChildren(); ++i) {
356 Node dc;
357 int rt = derivativeS(r[i], c, dc);
358 if(rt == 0) {
359 ret = 0;
360 }
361 if(rt != 2) {
362 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
363 vec_nodes.push_back( dc );
364 }
365 }
366 Trace("regexp-deriv") << "RegExp-deriv OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
367 }
368 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
369 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
370 if(retNode == d_emptyRegexp) {
371 ret = 2;
372 }
373 break;
374 }
375 case kind::REGEXP_INTER: {
376 bool flag = true;
377 bool flag_sg = false;
378 std::vector< Node > vec_nodes;
379 for(unsigned i=0; i<r.getNumChildren(); ++i) {
380 Node dc;
381 int rt = derivativeS(r[i], c, dc);
382 if(rt == 0) {
383 ret = 0;
384 } else if(rt == 2) {
385 flag = false;
386 break;
387 }
388 if(dc == d_sigma_star) {
389 flag_sg = true;
390 } else {
391 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
392 vec_nodes.push_back( dc );
393 }
394 }
395 }
396 if(flag) {
397 if(vec_nodes.size() == 0 && flag_sg) {
398 retNode = d_sigma_star;
399 } else {
400 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
401 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
402 if(retNode == d_emptyRegexp) {
403 ret = 2;
404 }
405 }
406 } else {
407 retNode = d_emptyRegexp;
408 ret = 2;
409 }
410 break;
411 }
412 case kind::REGEXP_STAR: {
413 Node dc;
414 ret = derivativeS(r[0], c, dc);
415 retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
416 break;
417 }
418 default: {
419 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
420 Assert( false, "Unsupported Term" );
421 }
422 }
423 if(retNode != d_emptyRegexp) {
424 retNode = Rewriter::rewrite( retNode );
425 }
426 std::pair< Node, int > p(retNode, ret);
427 d_deriv_cache[dv] = p;
428 }
429
430 Trace("regexp-deriv") << "RegExp-deriv returns : " << mkString( retNode ) << std::endl;
431 return ret;
432 }
433
434 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
435 Assert( c.size() < 2 );
436 Trace("regexp-deriv") << "RegExp-deriv starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
437 Node retNode = d_emptyRegexp;
438 PairNodeStr dv = std::make_pair( r, c );
439 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
440 retNode = d_dv_cache[dv];
441 } else if( c.isEmptyString() ){
442 Node exp;
443 int tmp = delta( r, exp );
444 if(tmp == 0) {
445 // TODO variable
446 retNode = d_emptyRegexp;
447 } else if(tmp == 1) {
448 retNode = r;
449 } else {
450 retNode = d_emptyRegexp;
451 }
452 } else {
453 int k = r.getKind();
454 switch( k ) {
455 case kind::REGEXP_EMPTY: {
456 retNode = d_emptyRegexp;
457 break;
458 }
459 case kind::REGEXP_SIGMA: {
460 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
461 break;
462 }
463 case kind::STRING_TO_REGEXP: {
464 if(r[0].isConst()) {
465 if(r[0] == d_emptyString) {
466 retNode = d_emptyRegexp;
467 } else {
468 if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
469 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
470 r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
471 } else {
472 retNode = d_emptyRegexp;
473 }
474 }
475 } else {
476 // TODO variable
477 retNode = d_emptyRegexp;
478 }
479 break;
480 }
481 case kind::REGEXP_CONCAT: {
482 Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
483 std::vector< Node > vec_nodes;
484 for(unsigned i=0; i<r.getNumChildren(); ++i) {
485 Node dc = derivativeSingle(r[i], c);
486 if(dc != d_emptyRegexp) {
487 std::vector< Node > vec_nodes2;
488 if(dc != rees) {
489 vec_nodes2.push_back( dc );
490 }
491 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
492 if(r[j] != rees) {
493 vec_nodes2.push_back( r[j] );
494 }
495 }
496 Node tmp = vec_nodes2.size()==0 ? rees :
497 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
498 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
499 vec_nodes.push_back( tmp );
500 }
501 }
502 Node exp;
503 if( delta( r[i], exp ) != 1 ) {
504 break;
505 }
506 }
507 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
508 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
509 break;
510 }
511 case kind::REGEXP_UNION: {
512 std::vector< Node > vec_nodes;
513 for(unsigned i=0; i<r.getNumChildren(); ++i) {
514 Node dc = derivativeSingle(r[i], c);
515 if(dc != d_emptyRegexp) {
516 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
517 vec_nodes.push_back( dc );
518 }
519 }
520 Trace("regexp-deriv") << "RegExp-deriv OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
521 }
522 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
523 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
524 break;
525 }
526 case kind::REGEXP_INTER: {
527 bool flag = true;
528 bool flag_sg = false;
529 std::vector< Node > vec_nodes;
530 for(unsigned i=0; i<r.getNumChildren(); ++i) {
531 Node dc = derivativeSingle(r[i], c);
532 if(dc != d_emptyRegexp) {
533 if(dc == d_sigma_star) {
534 flag_sg = true;
535 } else {
536 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
537 vec_nodes.push_back( dc );
538 }
539 }
540 } else {
541 flag = false;
542 break;
543 }
544 }
545 if(flag) {
546 if(vec_nodes.size() == 0 && flag_sg) {
547 retNode = d_sigma_star;
548 } else {
549 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
550 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
551 }
552 } else {
553 retNode = d_emptyRegexp;
554 }
555 break;
556 }
557 case kind::REGEXP_STAR: {
558 Node dc = derivativeSingle(r[0], c);
559 if(dc != d_emptyRegexp) {
560 retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
561 } else {
562 retNode = d_emptyRegexp;
563 }
564 break;
565 }
566 default: {
567 //TODO: special sym: sigma, none, all
568 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
569 Assert( false, "Unsupported Term" );
570 //return Node::null();
571 }
572 }
573 if(retNode != d_emptyRegexp) {
574 retNode = Rewriter::rewrite( retNode );
575 }
576 d_dv_cache[dv] = retNode;
577 }
578 Trace("regexp-deriv") << "RegExp-deriv returns : " << mkString( retNode ) << std::endl;
579 return retNode;
580 }
581
582 //TODO:
583 bool RegExpOpr::guessLength( Node r, int &co ) {
584 int k = r.getKind();
585 switch( k ) {
586 case kind::STRING_TO_REGEXP:
587 {
588 if(r[0].isConst()) {
589 co += r[0].getConst< CVC4::String >().size();
590 return true;
591 } else {
592 return false;
593 }
594 }
595 break;
596 case kind::REGEXP_CONCAT:
597 {
598 for(unsigned i=0; i<r.getNumChildren(); ++i) {
599 if(!guessLength( r[i], co)) {
600 return false;
601 }
602 }
603 return true;
604 }
605 break;
606 case kind::REGEXP_UNION:
607 {
608 int g_co;
609 for(unsigned i=0; i<r.getNumChildren(); ++i) {
610 int cop = 0;
611 if(!guessLength( r[i], cop)) {
612 return false;
613 }
614 if(i == 0) {
615 g_co = cop;
616 } else {
617 g_co = gcd(g_co, cop);
618 }
619 }
620 return true;
621 }
622 break;
623 case kind::REGEXP_INTER:
624 {
625 int g_co;
626 for(unsigned i=0; i<r.getNumChildren(); ++i) {
627 int cop = 0;
628 if(!guessLength( r[i], cop)) {
629 return false;
630 }
631 if(i == 0) {
632 g_co = cop;
633 } else {
634 g_co = gcd(g_co, cop);
635 }
636 }
637 return true;
638 }
639 break;
640 case kind::REGEXP_STAR:
641 {
642 co = 0;
643 return true;
644 }
645 break;
646 default:
647 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in membership of RegExp." << std::endl;
648 return false;
649 }
650 }
651
652 void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
653 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_fset_cache.find(r);
654 if(itr != d_fset_cache.end()) {
655 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
656 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
657 } else {
658 std::set<unsigned> cset;
659 SetNodes vset;
660 int k = r.getKind();
661 switch( k ) {
662 case kind::REGEXP_EMPTY: {
663 break;
664 }
665 case kind::REGEXP_SIGMA: {
666 for(unsigned i=0; i<d_card; i++) {
667 cset.insert(i);
668 }
669 break;
670 }
671 case kind::STRING_TO_REGEXP: {
672 Node st = Rewriter::rewrite(r[0]);
673 if(st.isConst()) {
674 CVC4::String s = st.getConst< CVC4::String >();
675 if(s.size() != 0) {
676 cset.insert(s[0]);
677 }
678 } else if(st.getKind() == kind::VARIABLE) {
679 vset.insert( st );
680 } else {
681 if(st[0].isConst()) {
682 CVC4::String s = st[0].getConst< CVC4::String >();
683 cset.insert(s[0]);
684 } else {
685 vset.insert( st[0] );
686 }
687 }
688 break;
689 }
690 case kind::REGEXP_CONCAT: {
691 for(unsigned i=0; i<r.getNumChildren(); i++) {
692 firstChars(r[i], cset, vset);
693 Node n = r[i];
694 Node exp;
695 int r = delta( n, exp );
696 if(r != 1) {
697 break;
698 }
699 }
700 break;
701 }
702 case kind::REGEXP_UNION: {
703 for(unsigned i=0; i<r.getNumChildren(); i++) {
704 firstChars(r[i], cset, vset);
705 }
706 break;
707 }
708 case kind::REGEXP_INTER: {
709 //TODO: Overapproximation for now
710 for(unsigned i=0; i<r.getNumChildren(); i++) {
711 firstChars(r[i], cset, vset);
712 }
713 break;
714 }
715 case kind::REGEXP_STAR: {
716 firstChars(r[0], cset, vset);
717 break;
718 }
719 default: {
720 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
721 Assert( false, "Unsupported Term" );
722 }
723 }
724 pcset.insert(cset.begin(), cset.end());
725 pvset.insert(vset.begin(), vset.end());
726 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
727 d_fset_cache[r] = p;
728
729 Trace("regexp-fset") << "FSET( " << mkString(r) << " ) = { ";
730 for(std::set<unsigned>::const_iterator itr = cset.begin();
731 itr != cset.end(); itr++) {
732 Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
733 }
734 Trace("regexp-fset") << " }" << std::endl;
735 }
736 }
737
738 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
739 int k = r.getKind();
740 switch( k ) {
741 case kind::STRING_TO_REGEXP:
742 {
743 if(r[0].isConst()) {
744 if(r[0] != d_emptyString) {
745 char t1 = r[0].getConst< CVC4::String >().getFirstChar();
746 if(c.isEmptyString()) {
747 vec_chars.push_back( t1 );
748 return true;
749 } else {
750 char t2 = c.getFirstChar();
751 if(t1 != t2) {
752 return false;
753 } else {
754 if(c.size() >= 2) {
755 vec_chars.push_back( c.substr(1,1).getFirstChar() );
756 } else {
757 vec_chars.push_back( '\0' );
758 }
759 return true;
760 }
761 }
762 } else {
763 return false;
764 }
765 } else {
766 return false;
767 }
768 }
769 break;
770 case kind::REGEXP_CONCAT:
771 {
772 for(unsigned i=0; i<r.getNumChildren(); ++i) {
773 if( follow(r[i], c, vec_chars) ) {
774 if(vec_chars[vec_chars.size() - 1] == '\0') {
775 vec_chars.pop_back();
776 c = d_emptyString.getConst< CVC4::String >();
777 }
778 } else {
779 return false;
780 }
781 }
782 vec_chars.push_back( '\0' );
783 return true;
784 }
785 break;
786 case kind::REGEXP_UNION:
787 {
788 bool flag = false;
789 for(unsigned i=0; i<r.getNumChildren(); ++i) {
790 if( follow(r[i], c, vec_chars) ) {
791 flag=true;
792 }
793 }
794 return flag;
795 }
796 break;
797 case kind::REGEXP_INTER:
798 {
799 std::vector< char > vt2;
800 for(unsigned i=0; i<r.getNumChildren(); ++i) {
801 std::vector< char > v_tmp;
802 if( !follow(r[i], c, v_tmp) ) {
803 return false;
804 }
805 std::vector< char > vt3(vt2);
806 vt2.clear();
807 std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
808 if(vt2.size() == 0) {
809 return false;
810 }
811 }
812 vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
813 return true;
814 }
815 break;
816 case kind::REGEXP_STAR:
817 {
818 if(follow(r[0], c, vec_chars)) {
819 if(vec_chars[vec_chars.size() - 1] == '\0') {
820 if(c.isEmptyString()) {
821 return true;
822 } else {
823 vec_chars.pop_back();
824 c = d_emptyString.getConst< CVC4::String >();
825 return follow(r[0], c, vec_chars);
826 }
827 } else {
828 return true;
829 }
830 } else {
831 vec_chars.push_back( '\0' );
832 return true;
833 }
834 }
835 break;
836 default: {
837 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
838 //AlwaysAssert( false );
839 //return Node::null();
840 return false;
841 }
842 }
843 }
844
845 Node RegExpOpr::mkAllExceptOne( char exp_c ) {
846 std::vector< Node > vec_nodes;
847 for(char c=d_char_start; c<=d_char_end; ++c) {
848 if(c != exp_c ) {
849 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
850 vec_nodes.push_back( n );
851 }
852 }
853 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
854 }
855
856 //simplify
857 void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
858 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
859 Assert(t.getKind() == kind::STRING_IN_REGEXP);
860 Node str = Rewriter::rewrite(t[0]);
861 Node re = Rewriter::rewrite(t[1]);
862 if(polarity) {
863 simplifyPRegExp( str, re, new_nodes );
864 } else {
865 simplifyNRegExp( str, re, new_nodes );
866 }
867 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
868 for(unsigned i=0; i<new_nodes.size(); i++) {
869 Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
870 }
871 }
872 void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
873 std::pair < Node, Node > p(s, r);
874 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
875 if(itr != d_simpl_neg_cache.end()) {
876 new_nodes.push_back( itr->second );
877 } else {
878 int k = r.getKind();
879 Node conc;
880 switch( k ) {
881 case kind::REGEXP_EMPTY: {
882 conc = d_true;
883 break;
884 }
885 case kind::REGEXP_SIGMA: {
886 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
887 break;
888 }
889 case kind::STRING_TO_REGEXP: {
890 conc = s.eqNode(r[0]).negate();
891 break;
892 }
893 case kind::REGEXP_CONCAT: {
894 //TODO: rewrite empty
895 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
896 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
897 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
898 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
899 NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
900 Node s1 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1));
901 Node s2 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1)));
902 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
903 if(r[0].getKind() == kind::STRING_TO_REGEXP) {
904 s1r1 = s1.eqNode(r[0][0]).negate();
905 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
906 s1r1 = d_true;
907 }
908 Node r2 = r[1];
909 if(r.getNumChildren() > 2) {
910 std::vector< Node > nvec;
911 for(unsigned i=1; i<r.getNumChildren(); i++) {
912 nvec.push_back( r[i] );
913 }
914 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
915 }
916 r2 = Rewriter::rewrite(r2);
917 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
918 if(r2.getKind() == kind::STRING_TO_REGEXP) {
919 s2r2 = s2.eqNode(r2[0]).negate();
920 } else if(r2.getKind() == kind::REGEXP_EMPTY) {
921 s2r2 = d_true;
922 }
923
924 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
925 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
926 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
927 break;
928 }
929 case kind::REGEXP_UNION: {
930 std::vector< Node > c_and;
931 for(unsigned i=0; i<r.getNumChildren(); ++i) {
932 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
933 c_and.push_back( r[i][0].eqNode(s).negate() );
934 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
935 continue;
936 } else {
937 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
938 }
939 }
940 conc = c_and.size() == 0 ? d_true :
941 c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
942 break;
943 }
944 case kind::REGEXP_INTER: {
945 bool emptyflag = false;
946 std::vector< Node > c_or;
947 for(unsigned i=0; i<r.getNumChildren(); ++i) {
948 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
949 c_or.push_back( r[i][0].eqNode(s).negate() );
950 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
951 emptyflag = true;
952 break;
953 } else {
954 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
955 }
956 }
957 if(emptyflag) {
958 conc = d_true;
959 } else {
960 conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
961 }
962 break;
963 }
964 case kind::REGEXP_STAR: {
965 if(s == d_emptyString) {
966 conc = d_false;
967 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
968 conc = s.eqNode(d_emptyString).negate();
969 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
970 conc = d_false;
971 } else {
972 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
973 Node sne = s.eqNode(d_emptyString).negate();
974 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
975 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
976 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
977 NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
978 //internal
979 Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1);
980 Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
981 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
982 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
983
984 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
985 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
986 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
987 conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
988 }
989 break;
990 }
991 default: {
992 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
993 Assert( false, "Unsupported Term" );
994 }
995 }
996 conc = Rewriter::rewrite( conc );
997 new_nodes.push_back( conc );
998 d_simpl_neg_cache[p] = conc;
999 }
1000 }
1001 void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
1002 std::pair < Node, Node > p(s, r);
1003 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
1004 if(itr != d_simpl_cache.end()) {
1005 new_nodes.push_back( itr->second );
1006 } else {
1007 int k = r.getKind();
1008 Node conc;
1009 switch( k ) {
1010 case kind::REGEXP_EMPTY: {
1011 conc = d_false;
1012 break;
1013 }
1014 case kind::REGEXP_SIGMA: {
1015 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
1016 break;
1017 }
1018 case kind::STRING_TO_REGEXP: {
1019 conc = s.eqNode(r[0]);
1020 break;
1021 }
1022 case kind::REGEXP_CONCAT: {
1023 std::vector< Node > nvec;
1024 std::vector< Node > cc;
1025 bool emptyflag = false;
1026 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1027 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1028 cc.push_back( r[i][0] );
1029 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1030 emptyflag = true;
1031 break;
1032 } else {
1033 Node sk = NodeManager::currentNM()->mkSkolem( "rc", s.getType(), "created for regular expression concat" );
1034 Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
1035 nvec.push_back(lem);
1036 cc.push_back(sk);
1037 }
1038 }
1039 if(emptyflag) {
1040 conc = d_false;
1041 } else {
1042 Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
1043 nvec.push_back(lem);
1044 conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
1045 }
1046 break;
1047 }
1048 case kind::REGEXP_UNION: {
1049 std::vector< Node > c_or;
1050 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1051 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1052 c_or.push_back( r[i][0].eqNode(s) );
1053 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1054 continue;
1055 } else {
1056 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1057 }
1058 }
1059 conc = c_or.size() == 0 ? d_false :
1060 c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
1061 break;
1062 }
1063 case kind::REGEXP_INTER: {
1064 std::vector< Node > c_and;
1065 bool emptyflag = false;
1066 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1067 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1068 c_and.push_back( r[i][0].eqNode(s) );
1069 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1070 emptyflag = true;
1071 break;
1072 } else {
1073 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1074 }
1075 }
1076 if(emptyflag) {
1077 conc = d_false;
1078 } else {
1079 conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
1080 }
1081 break;
1082 }
1083 case kind::REGEXP_STAR: {
1084 if(s == d_emptyString) {
1085 conc = d_true;
1086 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
1087 conc = s.eqNode(d_emptyString);
1088 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
1089 conc = d_true;
1090 } else {
1091 Node se = s.eqNode(d_emptyString);
1092 Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
1093 Node sk1 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1094 Node sk2 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1095 Node s1nz = sk1.eqNode(d_emptyString).negate();
1096 Node s2nz = sk2.eqNode(d_emptyString).negate();
1097 Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
1098 Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
1099 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
1100
1101 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
1102 conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
1103 }
1104 break;
1105 }
1106 default: {
1107 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
1108 Assert( false, "Unsupported Term" );
1109 }
1110 }
1111 conc = Rewriter::rewrite( conc );
1112 new_nodes.push_back( conc );
1113 d_simpl_cache[p] = conc;
1114 }
1115 }
1116
1117 void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
1118 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_cset_cache.find(r);
1119 if(itr != d_cset_cache.end()) {
1120 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
1121 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
1122 } else {
1123 std::set<unsigned> cset;
1124 SetNodes vset;
1125 int k = r.getKind();
1126 switch( k ) {
1127 case kind::REGEXP_EMPTY: {
1128 break;
1129 }
1130 case kind::REGEXP_SIGMA: {
1131 for(unsigned i=0; i<d_card; i++) {
1132 cset.insert(i);
1133 }
1134 break;
1135 }
1136 case kind::STRING_TO_REGEXP: {
1137 Node st = Rewriter::rewrite(r[0]);
1138 if(st.isConst()) {
1139 CVC4::String s = st.getConst< CVC4::String >();
1140 s.getCharSet( cset );
1141 } else if(st.getKind() == kind::VARIABLE) {
1142 vset.insert( st );
1143 } else {
1144 for(unsigned i=0; i<st.getNumChildren(); i++) {
1145 if(st[i].isConst()) {
1146 CVC4::String s = st[i].getConst< CVC4::String >();
1147 s.getCharSet( cset );
1148 } else {
1149 vset.insert( st[i] );
1150 }
1151 }
1152 }
1153 break;
1154 }
1155 case kind::REGEXP_CONCAT: {
1156 for(unsigned i=0; i<r.getNumChildren(); i++) {
1157 getCharSet(r[i], cset, vset);
1158 }
1159 break;
1160 }
1161 case kind::REGEXP_UNION: {
1162 for(unsigned i=0; i<r.getNumChildren(); i++) {
1163 getCharSet(r[i], cset, vset);
1164 }
1165 break;
1166 }
1167 case kind::REGEXP_INTER: {
1168 //TODO: Overapproximation for now
1169 for(unsigned i=0; i<r.getNumChildren(); i++) {
1170 getCharSet(r[i], cset, vset);
1171 }
1172 break;
1173 }
1174 case kind::REGEXP_STAR: {
1175 getCharSet(r[0], cset, vset);
1176 break;
1177 }
1178 default: {
1179 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
1180 Assert( false, "Unsupported Term" );
1181 }
1182 }
1183 pcset.insert(cset.begin(), cset.end());
1184 pvset.insert(vset.begin(), vset.end());
1185 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
1186 d_cset_cache[r] = p;
1187
1188 Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { ";
1189 for(std::set<unsigned>::const_iterator itr = cset.begin();
1190 itr != cset.end(); itr++) {
1191 Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
1192 }
1193 Trace("regexp-cset") << " }" << std::endl;
1194 }
1195 }
1196
1197
1198 Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache, bool &spflag ) {
1199 if(spflag) {
1200 //TODO: var
1201 return Node::null();
1202 }
1203 std::pair < Node, Node > p(r1, r2);
1204 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);
1205 Node rNode;
1206 if(itr != d_inter_cache.end()) {
1207 rNode = itr->second;
1208 } else {
1209 if(r1 == r2) {
1210 rNode = r1;
1211 } else if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1212 rNode = d_emptyRegexp;
1213 } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1214 Node exp;
1215 int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1216 if(r == 0) {
1217 //TODO: variable
1218 spflag = true;
1219 } else if(r == 1) {
1220 rNode = d_emptySingleton;
1221 } else {
1222 rNode = d_emptyRegexp;
1223 }
1224 } else {
1225 std::set< unsigned > cset, cset2;
1226 std::set< Node > vset, vset2;
1227 getCharSet(r1, cset, vset);
1228 getCharSet(r2, cset2, vset2);
1229 if(vset.empty() && vset2.empty()) {
1230 cset.clear();
1231 firstChars(r1, cset, vset);
1232 std::vector< Node > vec_nodes;
1233 for(std::set<unsigned>::const_iterator itr = cset.begin();
1234 itr != cset.end(); itr++) {
1235 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1236 std::pair< Node, Node > p(r1, r2);
1237 if(cache[ *itr ].find(p) == cache[ *itr ].end()) {
1238 Node r1l = derivativeSingle(r1, c);
1239 Node r2l = derivativeSingle(r2, c);
1240 std::map< unsigned, std::set< PairNodes > > cache2(cache);
1241 PairNodes p(r1l, r2l);
1242 cache2[ *itr ].insert( p );
1243 Node rt = intersectInternal(r1l, r2l, cache2, spflag);
1244 if(spflag) {
1245 //TODO:
1246 return Node::null();
1247 }
1248 rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1249 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1250 vec_nodes.push_back(rt);
1251 }
1252 }
1253 rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1254 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1255 rNode = Rewriter::rewrite( rNode );
1256 } else {
1257 //TODO: non-empty var set
1258 spflag = true;
1259 }
1260 }
1261 d_inter_cache[p] = rNode;
1262 }
1263 Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1264 return rNode;
1265 }
1266 Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) {
1267 std::map< unsigned, std::set< PairNodes > > cache;
1268 return intersectInternal(r1, r2, cache, spflag);
1269 }
1270
1271 Node RegExpOpr::complement(Node r, int &ret) {
1272 Node rNode;
1273 ret = 1;
1274 if(d_compl_cache.find(r) != d_compl_cache.end()) {
1275 rNode = d_compl_cache[r].first;
1276 ret = d_compl_cache[r].second;
1277 } else {
1278 if(r == d_emptyRegexp) {
1279 rNode = d_sigma_star;
1280 } else if(r == d_emptySingleton) {
1281 rNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, d_sigma, d_sigma_star);
1282 } else if(!checkConstRegExp(r)) {
1283 //TODO: var to be extended
1284 ret = 0;
1285 } else {
1286 std::set<unsigned> cset;
1287 SetNodes vset;
1288 firstChars(r, cset, vset);
1289 std::vector< Node > vec_nodes;
1290 for(unsigned i=0; i<d_card; i++) {
1291 CVC4::String c = CVC4::String::convertUnsignedIntToChar(i);
1292 Node n = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c));
1293 Node r2;
1294 if(cset.find(i) == cset.end()) {
1295 r2 = d_sigma_star;
1296 } else {
1297 int rt;
1298 derivativeS(r, c, r2);
1299 if(r2 == r) {
1300 r2 = d_emptyRegexp;
1301 } else {
1302 r2 = complement(r2, rt);
1303 }
1304 }
1305 n = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, n, r2));
1306 vec_nodes.push_back(n);
1307 }
1308 rNode = vec_nodes.size()==0? d_emptyRegexp : vec_nodes.size()==1? vec_nodes[0] :
1309 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1310 }
1311 rNode = Rewriter::rewrite(rNode);
1312 std::pair< Node, int > p(rNode, ret);
1313 d_compl_cache[r] = p;
1314 }
1315 Trace("regexp-compl") << "COMPL( " << mkString(r) << " ) = " << mkString(rNode) << ", ret=" << ret << std::endl;
1316 return rNode;
1317 }
1318
1319 void RegExpOpr::splitRegExp(Node r, std::vector< PairNodes > &pset) {
1320 Assert(checkConstRegExp(r));
1321 if(d_split_cache.find(r) != d_split_cache.end()) {
1322 pset = d_split_cache[r];
1323 } else {
1324 switch( r.getKind() ) {
1325 case kind::REGEXP_EMPTY: {
1326 break;
1327 }
1328 case kind::REGEXP_OPT: {
1329 PairNodes tmp(d_emptySingleton, d_emptySingleton);
1330 pset.push_back(tmp);
1331 }
1332 case kind::REGEXP_RANGE:
1333 case kind::REGEXP_SIGMA: {
1334 PairNodes tmp1(d_emptySingleton, r);
1335 PairNodes tmp2(r, d_emptySingleton);
1336 pset.push_back(tmp1);
1337 pset.push_back(tmp2);
1338 break;
1339 }
1340 case kind::STRING_TO_REGEXP: {
1341 Assert(r[0].isConst());
1342 CVC4::String s = r[0].getConst< CVC4::String >();
1343 PairNodes tmp1(d_emptySingleton, r);
1344 pset.push_back(tmp1);
1345 for(unsigned i=1; i<s.size(); i++) {
1346 CVC4::String s1 = s.substr(0, i);
1347 CVC4::String s2 = s.substr(i);
1348 Node n1 = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(s1));
1349 Node n2 = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(s2));
1350 PairNodes tmp3(n1, n2);
1351 pset.push_back(tmp3);
1352 }
1353 PairNodes tmp2(r, d_emptySingleton);
1354 pset.push_back(tmp2);
1355 break;
1356 }
1357 case kind::REGEXP_CONCAT: {
1358 for(unsigned i=0; i<r.getNumChildren(); i++) {
1359 std::vector< PairNodes > tset;
1360 splitRegExp(r[i], tset);
1361 std::vector< Node > hvec;
1362 std::vector< Node > tvec;
1363 for(unsigned j=0; j<=i; j++) {
1364 hvec.push_back(r[j]);
1365 }
1366 for(unsigned j=i; j<r.getNumChildren(); j++) {
1367 tvec.push_back(r[j]);
1368 }
1369 for(unsigned j=0; j<tset.size(); j++) {
1370 hvec[i] = tset[j].first;
1371 tvec[0] = tset[j].second;
1372 Node r1 = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, hvec) );
1373 Node r2 = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tvec) );
1374 PairNodes tmp2(r1, r2);
1375 pset.push_back(tmp2);
1376 }
1377 }
1378 break;
1379 }
1380 case kind::REGEXP_UNION: {
1381 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1382 std::vector< PairNodes > tset;
1383 splitRegExp(r[i], tset);
1384 pset.insert(pset.end(), tset.begin(), tset.end());
1385 }
1386 break;
1387 }
1388 case kind::REGEXP_INTER: {
1389 bool spflag = false;
1390 Node tmp = r[0];
1391 for(unsigned i=1; i<r.getNumChildren(); i++) {
1392 tmp = intersect(tmp, r[i], spflag);
1393 }
1394 splitRegExp(tmp, pset);
1395 break;
1396 }
1397 case kind::REGEXP_STAR: {
1398 std::vector< PairNodes > tset;
1399 splitRegExp(r[0], tset);
1400 PairNodes tmp1(d_emptySingleton, d_emptySingleton);
1401 pset.push_back(tmp1);
1402 for(unsigned i=0; i<tset.size(); i++) {
1403 Node r1 = tset[i].first==d_emptySingleton ? r : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, r, tset[i].first);
1404 Node r2 = tset[i].second==d_emptySingleton ? r : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r);
1405 PairNodes tmp2(r1, r2);
1406 pset.push_back(tmp2);
1407 }
1408 break;
1409 }
1410 case kind::REGEXP_PLUS: {
1411 std::vector< PairNodes > tset;
1412 splitRegExp(r[0], tset);
1413 for(unsigned i=0; i<tset.size(); i++) {
1414 Node r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, r, tset[i].first);
1415 Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r);
1416 PairNodes tmp2(r1, r2);
1417 pset.push_back(tmp2);
1418 }
1419 break;
1420 }
1421 default: {
1422 Trace("strings-error") << "Unsupported term: " << r << " in splitRegExp." << std::endl;
1423 Assert( false );
1424 //return Node::null();
1425 }
1426 }
1427 d_split_cache[r] = pset;
1428 }
1429 }
1430
1431 //printing
1432 std::string RegExpOpr::niceChar( Node r ) {
1433 if(r.isConst()) {
1434 std::string s = r.getConst<CVC4::String>().toString() ;
1435 return s == "" ? "{E}" : ( s == " " ? "{ }" : s.size()>1? "("+s+")" : s );
1436 } else {
1437 std::string ss = "$" + r.toString();
1438 return ss;
1439 }
1440 }
1441 std::string RegExpOpr::mkString( Node r ) {
1442 std::string retStr;
1443 if(r.isNull()) {
1444 retStr = "Empty";
1445 } else {
1446 int k = r.getKind();
1447 switch( k ) {
1448 case kind::REGEXP_EMPTY: {
1449 retStr += "Empty";
1450 break;
1451 }
1452 case kind::REGEXP_SIGMA: {
1453 retStr += "{W}";
1454 break;
1455 }
1456 case kind::STRING_TO_REGEXP: {
1457 retStr += niceChar( r[0] );
1458 break;
1459 }
1460 case kind::REGEXP_CONCAT: {
1461 retStr += "(";
1462 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1463 //if(i != 0) retStr += ".";
1464 retStr += mkString( r[i] );
1465 }
1466 retStr += ")";
1467 break;
1468 }
1469 case kind::REGEXP_UNION: {
1470 if(r == d_sigma) {
1471 retStr += "{A}";
1472 } else {
1473 retStr += "(";
1474 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1475 if(i != 0) retStr += "|";
1476 retStr += mkString( r[i] );
1477 }
1478 retStr += ")";
1479 }
1480 break;
1481 }
1482 case kind::REGEXP_INTER: {
1483 retStr += "(";
1484 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1485 if(i != 0) retStr += "&";
1486 retStr += mkString( r[i] );
1487 }
1488 retStr += ")";
1489 break;
1490 }
1491 case kind::REGEXP_STAR: {
1492 retStr += mkString( r[0] );
1493 retStr += "*";
1494 break;
1495 }
1496 case kind::REGEXP_PLUS: {
1497 retStr += mkString( r[0] );
1498 retStr += "+";
1499 break;
1500 }
1501 case kind::REGEXP_OPT: {
1502 retStr += mkString( r[0] );
1503 retStr += "?";
1504 break;
1505 }
1506 case kind::REGEXP_RANGE: {
1507 retStr += "[";
1508 retStr += niceChar( r[0] );
1509 retStr += "-";
1510 retStr += niceChar( r[1] );
1511 retStr += "]";
1512 break;
1513 }
1514 default:
1515 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
1516 //Assert( false );
1517 //return Node::null();
1518 }
1519 }
1520
1521 return retStr;
1522 }
1523
1524 }/* CVC4::theory::strings namespace */
1525 }/* CVC4::theory namespace */
1526 }/* CVC4 namespace */