Merge pull request #34 from mdeters/datatypes-kinds
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2
3 /*! \file regexp_operation.cpp
4 ** \verbatim
5 ** Original author: Tianyi Liang
6 ** Major contributors: none
7 ** Minor contributors (to current version): none
8 ** This file is part of the CVC4 project.
9 ** Copyright (c) 2009-2013 New York University and The University of Iowa
10 ** See the file COPYING in the top-level source directory for licensing
11 ** information.\endverbatim
12 **
13 ** \brief Symbolic Regular Expresion Operations
14 **
15 ** Symbolic Regular Expresion Operations
16 **/
17
18 #include "theory/strings/regexp_operation.h"
19 #include "expr/kind.h"
20
21 namespace CVC4 {
22 namespace theory {
23 namespace strings {
24
25 RegExpOpr::RegExpOpr() {
26 d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
27 d_true = NodeManager::currentNM()->mkConst( true );
28 d_false = NodeManager::currentNM()->mkConst( false );
29 d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
30 d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
31 d_emptySingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
32 std::vector< Node > nvec;
33 d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec );
34 d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec );
35 d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
36 d_card = 256;
37 }
38
39 int RegExpOpr::gcd ( int a, int b ) {
40 int c;
41 while ( a != 0 ) {
42 c = a; a = b%a; b = c;
43 }
44 return b;
45 }
46
47 bool RegExpOpr::checkConstRegExp( Node r ) {
48 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
49 bool ret = true;
50 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
51 ret = d_cstre_cache[r];
52 } else {
53 if(r.getKind() == kind::STRING_TO_REGEXP) {
54 Node tmp = Rewriter::rewrite( r[0] );
55 ret = tmp.isConst();
56 } else {
57 for(unsigned i=0; i<r.getNumChildren(); ++i) {
58 if(!checkConstRegExp(r[i])) {
59 ret = false; break;
60 }
61 }
62 }
63 d_cstre_cache[r] = ret;
64 }
65 return ret;
66 }
67
68 // 0-unknown, 1-yes, 2-no
69 int RegExpOpr::delta( Node r, Node &exp ) {
70 Trace("regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
71 int ret = 0;
72 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
73 ret = d_delta_cache[r].first;
74 exp = d_delta_cache[r].second;
75 } else {
76 int k = r.getKind();
77 switch( k ) {
78 case kind::REGEXP_EMPTY: {
79 ret = 2;
80 break;
81 }
82 case kind::REGEXP_SIGMA: {
83 ret = 2;
84 break;
85 }
86 case kind::STRING_TO_REGEXP: {
87 Node tmp = Rewriter::rewrite(r[0]);
88 if(tmp.isConst()) {
89 if(tmp == d_emptyString) {
90 ret = 1;
91 } else {
92 ret = 2;
93 }
94 } else {
95 ret = 0;
96 if(tmp.getKind() == kind::STRING_CONCAT) {
97 for(unsigned i=0; i<tmp.getNumChildren(); i++) {
98 if(tmp[i].isConst()) {
99 ret = 2; break;
100 }
101 }
102
103 }
104 if(ret == 0) {
105 exp = r[0].eqNode(d_emptyString);
106 }
107 }
108 break;
109 }
110 case kind::REGEXP_CONCAT: {
111 bool flag = false;
112 std::vector< Node > vec_nodes;
113 for(unsigned i=0; i<r.getNumChildren(); ++i) {
114 Node exp2;
115 int tmp = delta( r[i], exp2 );
116 if(tmp == 2) {
117 ret = 2;
118 break;
119 } else if(tmp == 0) {
120 vec_nodes.push_back( exp2 );
121 flag = true;
122 }
123 }
124 if(ret != 2) {
125 if(!flag) {
126 ret = 1;
127 } else {
128 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
129 }
130 }
131 break;
132 }
133 case kind::REGEXP_UNION: {
134 bool flag = false;
135 std::vector< Node > vec_nodes;
136 for(unsigned i=0; i<r.getNumChildren(); ++i) {
137 Node exp2;
138 int tmp = delta( r[i], exp2 );
139 if(tmp == 1) {
140 ret = 1;
141 break;
142 } else if(tmp == 0) {
143 vec_nodes.push_back( exp2 );
144 flag = true;
145 }
146 }
147 if(ret != 1) {
148 if(!flag) {
149 ret = 2;
150 } else {
151 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::OR, vec_nodes);
152 }
153 }
154 break;
155 }
156 case kind::REGEXP_INTER: {
157 bool flag = false;
158 std::vector< Node > vec_nodes;
159 for(unsigned i=0; i<r.getNumChildren(); ++i) {
160 Node exp2;
161 int tmp = delta( r[i], exp2 );
162 if(tmp == 2) {
163 ret = 2;
164 break;
165 } else if(tmp == 0) {
166 vec_nodes.push_back( exp2 );
167 flag = true;
168 }
169 }
170 if(ret != 2) {
171 if(!flag) {
172 ret = 1;
173 } else {
174 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
175 }
176 }
177 break;
178 }
179 case kind::REGEXP_STAR: {
180 ret = 1;
181 break;
182 }
183 case kind::REGEXP_PLUS: {
184 ret = delta( r[0], exp );
185 break;
186 }
187 case kind::REGEXP_OPT: {
188 ret = 1;
189 break;
190 }
191 case kind::REGEXP_RANGE: {
192 ret = 2;
193 break;
194 }
195 default: {
196 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
197 Assert( false );
198 //return Node::null();
199 }
200 }
201 if(!exp.isNull()) {
202 exp = Rewriter::rewrite(exp);
203 }
204 std::pair< int, Node > p(ret, exp);
205 d_delta_cache[r] = p;
206 }
207 Trace("regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
208 return ret;
209 }
210
211 // 0-unknown, 1-yes, 2-no
212 int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
213 Assert( c.size() < 2 );
214 Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
215
216 int ret = 1;
217 retNode = d_emptyRegexp;
218
219 PairNodeStr dv = std::make_pair( r, c );
220 if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
221 retNode = d_deriv_cache[dv].first;
222 ret = d_deriv_cache[dv].second;
223 } else if( c.isEmptyString() ) {
224 Node expNode;
225 ret = delta( r, expNode );
226 if(ret == 0) {
227 retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
228 } else if(ret == 1) {
229 retNode = r;
230 }
231 std::pair< Node, int > p(retNode, ret);
232 d_deriv_cache[dv] = p;
233 } else {
234 switch( r.getKind() ) {
235 case kind::REGEXP_EMPTY: {
236 ret = 2;
237 break;
238 }
239 case kind::REGEXP_SIGMA: {
240 retNode = d_emptySingleton;
241 break;
242 }
243 case kind::STRING_TO_REGEXP: {
244 Node tmp = Rewriter::rewrite(r[0]);
245 if(tmp.isConst()) {
246 if(tmp == d_emptyString) {
247 ret = 2;
248 } else {
249 if(tmp.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
250 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
251 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
252 } else {
253 ret = 2;
254 }
255 }
256 } else {
257 ret = 0;
258 Node rest;
259 if(tmp.getKind() == kind::STRING_CONCAT) {
260 Node t2 = tmp[0];
261 if(t2.isConst()) {
262 if(t2.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
263 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
264 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
265 std::vector< Node > vec_nodes;
266 vec_nodes.push_back(n);
267 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
268 vec_nodes.push_back(tmp[i]);
269 }
270 retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
271 ret = 1;
272 } else {
273 ret = 2;
274 }
275 } else {
276 tmp = tmp[0];
277 std::vector< Node > vec_nodes;
278 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
279 vec_nodes.push_back(tmp[i]);
280 }
281 rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
282 }
283 }
284 if(ret == 0) {
285 Node sk = NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" );
286 retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
287 if(!rest.isNull()) {
288 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
289 }
290 Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
291 NodeManager::currentNM()->mkConst(c), sk));
292 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
293 }
294 }
295 break;
296 }
297 case kind::REGEXP_CONCAT: {
298 std::vector< Node > vec_nodes;
299 std::vector< Node > delta_nodes;
300 Node dnode = d_true;
301 for(unsigned i=0; i<r.getNumChildren(); ++i) {
302 Node dc;
303 Node exp2;
304 int rt = derivativeS(r[i], c, dc);
305 if(rt != 2) {
306 if(rt == 0) {
307 ret = 0;
308 }
309 std::vector< Node > vec_nodes2;
310 if(dc != d_emptySingleton) {
311 vec_nodes2.push_back( dc );
312 }
313 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
314 if(r[j] != d_emptySingleton) {
315 vec_nodes2.push_back( r[j] );
316 }
317 }
318 Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
319 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
320 if(dnode != d_true) {
321 tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
322 ret = 0;
323 }
324 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
325 vec_nodes.push_back( tmp );
326 }
327 }
328 Node exp3;
329 int rt2 = delta( r[i], exp3 );
330 if( rt2 == 0 ) {
331 dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
332 } else if( rt2 == 2 ) {
333 break;
334 }
335 }
336 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
337 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
338 if(retNode == d_emptyRegexp) {
339 ret = 2;
340 }
341 break;
342 }
343 case kind::REGEXP_UNION: {
344 std::vector< Node > vec_nodes;
345 for(unsigned i=0; i<r.getNumChildren(); ++i) {
346 Node dc;
347 int rt = derivativeS(r[i], c, dc);
348 if(rt == 0) {
349 ret = 0;
350 }
351 if(rt != 2) {
352 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
353 vec_nodes.push_back( dc );
354 }
355 }
356 Trace("regexp-derive") << "RegExp-derive OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
357 }
358 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
359 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
360 if(retNode == d_emptyRegexp) {
361 ret = 2;
362 }
363 break;
364 }
365 case kind::REGEXP_INTER: {
366 bool flag = true;
367 bool flag_sg = false;
368 std::vector< Node > vec_nodes;
369 for(unsigned i=0; i<r.getNumChildren(); ++i) {
370 Node dc;
371 int rt = derivativeS(r[i], c, dc);
372 if(rt == 0) {
373 ret = 0;
374 } else if(rt == 2) {
375 flag = false;
376 break;
377 }
378 if(dc == d_sigma_star) {
379 flag_sg = true;
380 } else {
381 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
382 vec_nodes.push_back( dc );
383 }
384 }
385 }
386 if(flag) {
387 if(vec_nodes.size() == 0 && flag_sg) {
388 retNode = d_sigma_star;
389 } else {
390 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
391 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
392 if(retNode == d_emptyRegexp) {
393 ret = 2;
394 }
395 }
396 } else {
397 retNode = d_emptyRegexp;
398 ret = 2;
399 }
400 break;
401 }
402 case kind::REGEXP_STAR: {
403 Node dc;
404 ret = derivativeS(r[0], c, dc);
405 retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
406 break;
407 }
408 default: {
409 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
410 Assert( false, "Unsupported Term" );
411 }
412 }
413 if(retNode != d_emptyRegexp) {
414 retNode = Rewriter::rewrite( retNode );
415 }
416 std::pair< Node, int > p(retNode, ret);
417 d_deriv_cache[dv] = p;
418 }
419
420 Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode ) << std::endl;
421 return ret;
422 }
423
424 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
425 Assert( c.size() < 2 );
426 Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
427 Node retNode = d_emptyRegexp;
428 PairNodeStr dv = std::make_pair( r, c );
429 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
430 retNode = d_dv_cache[dv];
431 } else if( c.isEmptyString() ){
432 Node exp;
433 int tmp = delta( r, exp );
434 if(tmp == 0) {
435 // TODO variable
436 retNode = d_emptyRegexp;
437 } else if(tmp == 1) {
438 retNode = r;
439 } else {
440 retNode = d_emptyRegexp;
441 }
442 } else {
443 int k = r.getKind();
444 switch( k ) {
445 case kind::REGEXP_EMPTY: {
446 retNode = d_emptyRegexp;
447 break;
448 }
449 case kind::REGEXP_SIGMA: {
450 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
451 break;
452 }
453 case kind::STRING_TO_REGEXP: {
454 if(r[0].isConst()) {
455 if(r[0] == d_emptyString) {
456 retNode = d_emptyRegexp;
457 } else {
458 if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
459 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
460 r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
461 } else {
462 retNode = d_emptyRegexp;
463 }
464 }
465 } else {
466 // TODO variable
467 retNode = d_emptyRegexp;
468 }
469 break;
470 }
471 case kind::REGEXP_CONCAT: {
472 Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
473 std::vector< Node > vec_nodes;
474 for(unsigned i=0; i<r.getNumChildren(); ++i) {
475 Node dc = derivativeSingle(r[i], c);
476 if(dc != d_emptyRegexp) {
477 std::vector< Node > vec_nodes2;
478 if(dc != rees) {
479 vec_nodes2.push_back( dc );
480 }
481 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
482 if(r[j] != rees) {
483 vec_nodes2.push_back( r[j] );
484 }
485 }
486 Node tmp = vec_nodes2.size()==0 ? rees :
487 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
488 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
489 vec_nodes.push_back( tmp );
490 }
491 }
492 Node exp;
493 if( delta( r[i], exp ) != 1 ) {
494 break;
495 }
496 }
497 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
498 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
499 break;
500 }
501 case kind::REGEXP_UNION: {
502 std::vector< Node > vec_nodes;
503 for(unsigned i=0; i<r.getNumChildren(); ++i) {
504 Node dc = derivativeSingle(r[i], c);
505 if(dc != d_emptyRegexp) {
506 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
507 vec_nodes.push_back( dc );
508 }
509 }
510 Trace("regexp-derive") << "RegExp-derive OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
511 }
512 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
513 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
514 break;
515 }
516 case kind::REGEXP_INTER: {
517 bool flag = true;
518 bool flag_sg = false;
519 std::vector< Node > vec_nodes;
520 for(unsigned i=0; i<r.getNumChildren(); ++i) {
521 Node dc = derivativeSingle(r[i], c);
522 if(dc != d_emptyRegexp) {
523 if(dc == d_sigma_star) {
524 flag_sg = true;
525 } else {
526 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
527 vec_nodes.push_back( dc );
528 }
529 }
530 } else {
531 flag = false;
532 break;
533 }
534 }
535 if(flag) {
536 if(vec_nodes.size() == 0 && flag_sg) {
537 retNode = d_sigma_star;
538 } else {
539 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
540 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
541 }
542 } else {
543 retNode = d_emptyRegexp;
544 }
545 break;
546 }
547 case kind::REGEXP_STAR: {
548 Node dc = derivativeSingle(r[0], c);
549 if(dc != d_emptyRegexp) {
550 retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
551 } else {
552 retNode = d_emptyRegexp;
553 }
554 break;
555 }
556 default: {
557 //TODO: special sym: sigma, none, all
558 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
559 Assert( false, "Unsupported Term" );
560 //return Node::null();
561 }
562 }
563 if(retNode != d_emptyRegexp) {
564 retNode = Rewriter::rewrite( retNode );
565 }
566 d_dv_cache[dv] = retNode;
567 }
568 Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode ) << std::endl;
569 return retNode;
570 }
571
572 //TODO:
573 bool RegExpOpr::guessLength( Node r, int &co ) {
574 int k = r.getKind();
575 switch( k ) {
576 case kind::STRING_TO_REGEXP:
577 {
578 if(r[0].isConst()) {
579 co += r[0].getConst< CVC4::String >().size();
580 return true;
581 } else {
582 return false;
583 }
584 }
585 break;
586 case kind::REGEXP_CONCAT:
587 {
588 for(unsigned i=0; i<r.getNumChildren(); ++i) {
589 if(!guessLength( r[i], co)) {
590 return false;
591 }
592 }
593 return true;
594 }
595 break;
596 case kind::REGEXP_UNION:
597 {
598 int g_co;
599 for(unsigned i=0; i<r.getNumChildren(); ++i) {
600 int cop = 0;
601 if(!guessLength( r[i], cop)) {
602 return false;
603 }
604 if(i == 0) {
605 g_co = cop;
606 } else {
607 g_co = gcd(g_co, cop);
608 }
609 }
610 return true;
611 }
612 break;
613 case kind::REGEXP_INTER:
614 {
615 int g_co;
616 for(unsigned i=0; i<r.getNumChildren(); ++i) {
617 int cop = 0;
618 if(!guessLength( r[i], cop)) {
619 return false;
620 }
621 if(i == 0) {
622 g_co = cop;
623 } else {
624 g_co = gcd(g_co, cop);
625 }
626 }
627 return true;
628 }
629 break;
630 case kind::REGEXP_STAR:
631 {
632 co = 0;
633 return true;
634 }
635 break;
636 default:
637 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in membership of RegExp." << std::endl;
638 return false;
639 }
640 }
641
642 void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
643 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_fset_cache.find(r);
644 if(itr != d_fset_cache.end()) {
645 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
646 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
647 } else {
648 std::set<unsigned> cset;
649 SetNodes vset;
650 int k = r.getKind();
651 switch( k ) {
652 case kind::REGEXP_EMPTY: {
653 break;
654 }
655 case kind::REGEXP_SIGMA: {
656 for(unsigned i=0; i<d_card; i++) {
657 cset.insert(i);
658 }
659 break;
660 }
661 case kind::STRING_TO_REGEXP: {
662 Node st = Rewriter::rewrite(r[0]);
663 if(st.isConst()) {
664 CVC4::String s = st.getConst< CVC4::String >();
665 if(s.size() != 0) {
666 cset.insert(s[0]);
667 }
668 } else if(st.getKind() == kind::VARIABLE) {
669 vset.insert( st );
670 } else {
671 if(st[0].isConst()) {
672 CVC4::String s = st[0].getConst< CVC4::String >();
673 cset.insert(s[0]);
674 } else {
675 vset.insert( st[0] );
676 }
677 }
678 break;
679 }
680 case kind::REGEXP_CONCAT: {
681 for(unsigned i=0; i<r.getNumChildren(); i++) {
682 firstChars(r[i], cset, vset);
683 Node n = r[i];
684 Node exp;
685 int r = delta( n, exp );
686 if(r != 1) {
687 break;
688 }
689 }
690 break;
691 }
692 case kind::REGEXP_UNION: {
693 for(unsigned i=0; i<r.getNumChildren(); i++) {
694 firstChars(r[i], cset, vset);
695 }
696 break;
697 }
698 case kind::REGEXP_INTER: {
699 //TODO: Overapproximation for now
700 for(unsigned i=0; i<r.getNumChildren(); i++) {
701 firstChars(r[i], cset, vset);
702 }
703 break;
704 }
705 case kind::REGEXP_STAR: {
706 firstChars(r[0], cset, vset);
707 break;
708 }
709 default: {
710 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
711 Assert( false, "Unsupported Term" );
712 }
713 }
714 pcset.insert(cset.begin(), cset.end());
715 pvset.insert(vset.begin(), vset.end());
716 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
717 d_fset_cache[r] = p;
718
719 Trace("regexp-fset") << "FSET( " << mkString(r) << " ) = { ";
720 for(std::set<unsigned>::const_iterator itr = cset.begin();
721 itr != cset.end(); itr++) {
722 Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
723 }
724 Trace("regexp-fset") << " }" << std::endl;
725 }
726 }
727
728 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
729 int k = r.getKind();
730 switch( k ) {
731 case kind::STRING_TO_REGEXP:
732 {
733 if(r[0].isConst()) {
734 if(r[0] != d_emptyString) {
735 char t1 = r[0].getConst< CVC4::String >().getFirstChar();
736 if(c.isEmptyString()) {
737 vec_chars.push_back( t1 );
738 return true;
739 } else {
740 char t2 = c.getFirstChar();
741 if(t1 != t2) {
742 return false;
743 } else {
744 if(c.size() >= 2) {
745 vec_chars.push_back( c.substr(1,1).getFirstChar() );
746 } else {
747 vec_chars.push_back( '\0' );
748 }
749 return true;
750 }
751 }
752 } else {
753 return false;
754 }
755 } else {
756 return false;
757 }
758 }
759 break;
760 case kind::REGEXP_CONCAT:
761 {
762 for(unsigned i=0; i<r.getNumChildren(); ++i) {
763 if( follow(r[i], c, vec_chars) ) {
764 if(vec_chars[vec_chars.size() - 1] == '\0') {
765 vec_chars.pop_back();
766 c = d_emptyString.getConst< CVC4::String >();
767 }
768 } else {
769 return false;
770 }
771 }
772 vec_chars.push_back( '\0' );
773 return true;
774 }
775 break;
776 case kind::REGEXP_UNION:
777 {
778 bool flag = false;
779 for(unsigned i=0; i<r.getNumChildren(); ++i) {
780 if( follow(r[i], c, vec_chars) ) {
781 flag=true;
782 }
783 }
784 return flag;
785 }
786 break;
787 case kind::REGEXP_INTER:
788 {
789 std::vector< char > vt2;
790 for(unsigned i=0; i<r.getNumChildren(); ++i) {
791 std::vector< char > v_tmp;
792 if( !follow(r[i], c, v_tmp) ) {
793 return false;
794 }
795 std::vector< char > vt3(vt2);
796 vt2.clear();
797 std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
798 if(vt2.size() == 0) {
799 return false;
800 }
801 }
802 vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
803 return true;
804 }
805 break;
806 case kind::REGEXP_STAR:
807 {
808 if(follow(r[0], c, vec_chars)) {
809 if(vec_chars[vec_chars.size() - 1] == '\0') {
810 if(c.isEmptyString()) {
811 return true;
812 } else {
813 vec_chars.pop_back();
814 c = d_emptyString.getConst< CVC4::String >();
815 return follow(r[0], c, vec_chars);
816 }
817 } else {
818 return true;
819 }
820 } else {
821 vec_chars.push_back( '\0' );
822 return true;
823 }
824 }
825 break;
826 default: {
827 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
828 //AlwaysAssert( false );
829 //return Node::null();
830 return false;
831 }
832 }
833 }
834
835 Node RegExpOpr::mkAllExceptOne( char exp_c ) {
836 std::vector< Node > vec_nodes;
837 for(char c=d_char_start; c<=d_char_end; ++c) {
838 if(c != exp_c ) {
839 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
840 vec_nodes.push_back( n );
841 }
842 }
843 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
844 }
845
846 //simplify
847 void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
848 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
849 Assert(t.getKind() == kind::STRING_IN_REGEXP);
850 Node str = Rewriter::rewrite(t[0]);
851 Node re = Rewriter::rewrite(t[1]);
852 if(polarity) {
853 simplifyPRegExp( str, re, new_nodes );
854 } else {
855 simplifyNRegExp( str, re, new_nodes );
856 }
857 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
858 for(unsigned i=0; i<new_nodes.size(); i++) {
859 Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
860 }
861 }
862 void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
863 std::pair < Node, Node > p(s, r);
864 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
865 if(itr != d_simpl_neg_cache.end()) {
866 new_nodes.push_back( itr->second );
867 } else {
868 int k = r.getKind();
869 Node conc;
870 switch( k ) {
871 case kind::REGEXP_EMPTY: {
872 conc = d_true;
873 break;
874 }
875 case kind::REGEXP_SIGMA: {
876 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
877 break;
878 }
879 case kind::STRING_TO_REGEXP: {
880 conc = s.eqNode(r[0]).negate();
881 break;
882 }
883 case kind::REGEXP_CONCAT: {
884 //TODO: rewrite empty
885 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
886 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
887 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
888 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
889 NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
890 Node s1 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1));
891 Node s2 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1)));
892 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
893 if(r[0].getKind() == kind::STRING_TO_REGEXP) {
894 s1r1 = s1.eqNode(r[0][0]).negate();
895 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
896 s1r1 = d_true;
897 }
898 Node r2 = r[1];
899 if(r.getNumChildren() > 2) {
900 std::vector< Node > nvec;
901 for(unsigned i=1; i<r.getNumChildren(); i++) {
902 nvec.push_back( r[i] );
903 }
904 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
905 }
906 r2 = Rewriter::rewrite(r2);
907 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
908 if(r2.getKind() == kind::STRING_TO_REGEXP) {
909 s2r2 = s2.eqNode(r2[0]).negate();
910 } else if(r2.getKind() == kind::REGEXP_EMPTY) {
911 s2r2 = d_true;
912 }
913
914 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
915 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
916 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
917 break;
918 }
919 case kind::REGEXP_UNION: {
920 std::vector< Node > c_and;
921 for(unsigned i=0; i<r.getNumChildren(); ++i) {
922 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
923 c_and.push_back( r[i][0].eqNode(s).negate() );
924 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
925 continue;
926 } else {
927 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
928 }
929 }
930 conc = c_and.size() == 0 ? d_true :
931 c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
932 break;
933 }
934 case kind::REGEXP_INTER: {
935 bool emptyflag = false;
936 std::vector< Node > c_or;
937 for(unsigned i=0; i<r.getNumChildren(); ++i) {
938 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
939 c_or.push_back( r[i][0].eqNode(s).negate() );
940 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
941 emptyflag = true;
942 break;
943 } else {
944 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
945 }
946 }
947 if(emptyflag) {
948 conc = d_true;
949 } else {
950 conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
951 }
952 break;
953 }
954 case kind::REGEXP_STAR: {
955 if(s == d_emptyString) {
956 conc = d_false;
957 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
958 conc = s.eqNode(d_emptyString).negate();
959 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
960 conc = d_false;
961 } else {
962 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
963 Node sne = s.eqNode(d_emptyString).negate();
964 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
965 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
966 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
967 NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
968 //internal
969 Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1);
970 Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
971 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
972 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
973
974 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
975 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
976 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
977 conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
978 }
979 break;
980 }
981 default: {
982 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
983 Assert( false, "Unsupported Term" );
984 }
985 }
986 conc = Rewriter::rewrite( conc );
987 new_nodes.push_back( conc );
988 d_simpl_neg_cache[p] = conc;
989 }
990 }
991 void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
992 std::pair < Node, Node > p(s, r);
993 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
994 if(itr != d_simpl_cache.end()) {
995 new_nodes.push_back( itr->second );
996 } else {
997 int k = r.getKind();
998 Node conc;
999 switch( k ) {
1000 case kind::REGEXP_EMPTY: {
1001 conc = d_false;
1002 break;
1003 }
1004 case kind::REGEXP_SIGMA: {
1005 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
1006 break;
1007 }
1008 case kind::STRING_TO_REGEXP: {
1009 conc = s.eqNode(r[0]);
1010 break;
1011 }
1012 case kind::REGEXP_CONCAT: {
1013 std::vector< Node > nvec;
1014 std::vector< Node > cc;
1015 bool emptyflag = false;
1016 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1017 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1018 cc.push_back( r[i][0] );
1019 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1020 emptyflag = true;
1021 break;
1022 } else {
1023 Node sk = NodeManager::currentNM()->mkSkolem( "rc", s.getType(), "created for regular expression concat" );
1024 Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
1025 nvec.push_back(lem);
1026 cc.push_back(sk);
1027 }
1028 }
1029 if(emptyflag) {
1030 conc = d_false;
1031 } else {
1032 Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
1033 nvec.push_back(lem);
1034 conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
1035 }
1036 break;
1037 }
1038 case kind::REGEXP_UNION: {
1039 std::vector< Node > c_or;
1040 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1041 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1042 c_or.push_back( r[i][0].eqNode(s) );
1043 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1044 continue;
1045 } else {
1046 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1047 }
1048 }
1049 conc = c_or.size() == 0 ? d_false :
1050 c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
1051 break;
1052 }
1053 case kind::REGEXP_INTER: {
1054 std::vector< Node > c_and;
1055 bool emptyflag = false;
1056 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1057 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1058 c_and.push_back( r[i][0].eqNode(s) );
1059 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1060 emptyflag = true;
1061 break;
1062 } else {
1063 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1064 }
1065 }
1066 if(emptyflag) {
1067 conc = d_false;
1068 } else {
1069 conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
1070 }
1071 break;
1072 }
1073 case kind::REGEXP_STAR: {
1074 if(s == d_emptyString) {
1075 conc = d_true;
1076 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
1077 conc = s.eqNode(d_emptyString);
1078 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
1079 conc = d_true;
1080 } else {
1081 Node se = s.eqNode(d_emptyString);
1082 Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
1083 Node sk1 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1084 Node sk2 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1085 Node s1nz = sk1.eqNode(d_emptyString).negate();
1086 Node s2nz = sk2.eqNode(d_emptyString).negate();
1087 Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
1088 Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
1089 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
1090
1091 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
1092 conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
1093 }
1094 break;
1095 }
1096 default: {
1097 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
1098 Assert( false, "Unsupported Term" );
1099 }
1100 }
1101 conc = Rewriter::rewrite( conc );
1102 new_nodes.push_back( conc );
1103 d_simpl_cache[p] = conc;
1104 }
1105 }
1106
1107 void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
1108 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_cset_cache.find(r);
1109 if(itr != d_cset_cache.end()) {
1110 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
1111 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
1112 } else {
1113 std::set<unsigned> cset;
1114 SetNodes vset;
1115 int k = r.getKind();
1116 switch( k ) {
1117 case kind::REGEXP_EMPTY: {
1118 break;
1119 }
1120 case kind::REGEXP_SIGMA: {
1121 for(unsigned i=0; i<d_card; i++) {
1122 cset.insert(i);
1123 }
1124 break;
1125 }
1126 case kind::STRING_TO_REGEXP: {
1127 Node st = Rewriter::rewrite(r[0]);
1128 if(st.isConst()) {
1129 CVC4::String s = st.getConst< CVC4::String >();
1130 s.getCharSet( cset );
1131 } else if(st.getKind() == kind::VARIABLE) {
1132 vset.insert( st );
1133 } else {
1134 for(unsigned i=0; i<st.getNumChildren(); i++) {
1135 if(st[i].isConst()) {
1136 CVC4::String s = st[i].getConst< CVC4::String >();
1137 s.getCharSet( cset );
1138 } else {
1139 vset.insert( st[i] );
1140 }
1141 }
1142 }
1143 break;
1144 }
1145 case kind::REGEXP_CONCAT: {
1146 for(unsigned i=0; i<r.getNumChildren(); i++) {
1147 getCharSet(r[i], cset, vset);
1148 }
1149 break;
1150 }
1151 case kind::REGEXP_UNION: {
1152 for(unsigned i=0; i<r.getNumChildren(); i++) {
1153 getCharSet(r[i], cset, vset);
1154 }
1155 break;
1156 }
1157 case kind::REGEXP_INTER: {
1158 //TODO: Overapproximation for now
1159 for(unsigned i=0; i<r.getNumChildren(); i++) {
1160 getCharSet(r[i], cset, vset);
1161 }
1162 break;
1163 }
1164 case kind::REGEXP_STAR: {
1165 getCharSet(r[0], cset, vset);
1166 break;
1167 }
1168 default: {
1169 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
1170 Assert( false, "Unsupported Term" );
1171 }
1172 }
1173 pcset.insert(cset.begin(), cset.end());
1174 pvset.insert(vset.begin(), vset.end());
1175 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
1176 d_cset_cache[r] = p;
1177
1178 Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { ";
1179 for(std::set<unsigned>::const_iterator itr = cset.begin();
1180 itr != cset.end(); itr++) {
1181 Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
1182 }
1183 Trace("regexp-cset") << " }" << std::endl;
1184 }
1185 }
1186
1187
1188 Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache, bool &spflag ) {
1189 if(spflag) {
1190 //TODO: var
1191 return Node::null();
1192 }
1193 std::pair < Node, Node > p(r1, r2);
1194 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);
1195 Node rNode;
1196 if(itr != d_inter_cache.end()) {
1197 rNode = itr->second;
1198 } else {
1199 if(r1 == r2) {
1200 rNode = r1;
1201 } else if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1202 rNode = d_emptyRegexp;
1203 } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1204 Node exp;
1205 int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1206 if(r == 0) {
1207 //TODO: variable
1208 spflag = true;
1209 } else if(r == 1) {
1210 rNode = d_emptySingleton;
1211 } else {
1212 rNode = d_emptyRegexp;
1213 }
1214 } else {
1215 std::set< unsigned > cset, cset2;
1216 std::set< Node > vset, vset2;
1217 getCharSet(r1, cset, vset);
1218 getCharSet(r2, cset2, vset2);
1219 if(vset.empty() && vset2.empty()) {
1220 cset.clear();
1221 firstChars(r1, cset, vset);
1222 std::vector< Node > vec_nodes;
1223 Node delta_exp;
1224 int flag = delta(r1, delta_exp);
1225 int flag2 = delta(r2, delta_exp);
1226 if(flag != 2 && flag2 != 2) {
1227 if(flag == 1 && flag2 == 1) {
1228 vec_nodes.push_back(d_emptySingleton);
1229 } else {
1230 //TODO
1231 spflag = true;
1232 }
1233 }
1234 for(std::set<unsigned>::const_iterator itr = cset.begin();
1235 itr != cset.end(); itr++) {
1236 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1237 std::pair< Node, Node > p(r1, r2);
1238 if(cache[ *itr ].find(p) == cache[ *itr ].end()) {
1239 Node r1l = derivativeSingle(r1, c);
1240 Node r2l = derivativeSingle(r2, c);
1241 std::map< unsigned, std::set< PairNodes > > cache2(cache);
1242 PairNodes p(r1l, r2l);
1243 cache2[ *itr ].insert( p );
1244 Node rt = intersectInternal(r1l, r2l, cache2, spflag);
1245 if(spflag) {
1246 //TODO:
1247 return Node::null();
1248 }
1249 rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1250 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1251 vec_nodes.push_back(rt);
1252 }
1253 }
1254 rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1255 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1256 rNode = Rewriter::rewrite( rNode );
1257 } else {
1258 //TODO: non-empty var set
1259 spflag = true;
1260 }
1261 }
1262 d_inter_cache[p] = rNode;
1263 }
1264 Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1265 return rNode;
1266 }
1267 Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) {
1268 std::map< unsigned, std::set< PairNodes > > cache;
1269 if(checkConstRegExp(r1) && checkConstRegExp(r2)) {
1270 return intersectInternal(r1, r2, cache, spflag);
1271 } else {
1272 spflag = true;
1273 return Node::null();
1274 }
1275 }
1276
1277 Node RegExpOpr::complement(Node r, int &ret) {
1278 Node rNode;
1279 ret = 1;
1280 if(d_compl_cache.find(r) != d_compl_cache.end()) {
1281 rNode = d_compl_cache[r].first;
1282 ret = d_compl_cache[r].second;
1283 } else {
1284 if(r == d_emptyRegexp) {
1285 rNode = d_sigma_star;
1286 } else if(r == d_emptySingleton) {
1287 rNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, d_sigma, d_sigma_star);
1288 } else if(!checkConstRegExp(r)) {
1289 //TODO: var to be extended
1290 ret = 0;
1291 } else {
1292 std::set<unsigned> cset;
1293 SetNodes vset;
1294 firstChars(r, cset, vset);
1295 std::vector< Node > vec_nodes;
1296 for(unsigned i=0; i<d_card; i++) {
1297 CVC4::String c = CVC4::String::convertUnsignedIntToChar(i);
1298 Node n = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c));
1299 Node r2;
1300 if(cset.find(i) == cset.end()) {
1301 r2 = d_sigma_star;
1302 } else {
1303 int rt;
1304 derivativeS(r, c, r2);
1305 if(r2 == r) {
1306 r2 = d_emptyRegexp;
1307 } else {
1308 r2 = complement(r2, rt);
1309 }
1310 }
1311 n = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, n, r2));
1312 vec_nodes.push_back(n);
1313 }
1314 rNode = vec_nodes.size()==0? d_emptyRegexp : vec_nodes.size()==1? vec_nodes[0] :
1315 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1316 }
1317 rNode = Rewriter::rewrite(rNode);
1318 std::pair< Node, int > p(rNode, ret);
1319 d_compl_cache[r] = p;
1320 }
1321 Trace("regexp-compl") << "COMPL( " << mkString(r) << " ) = " << mkString(rNode) << ", ret=" << ret << std::endl;
1322 return rNode;
1323 }
1324
1325 void RegExpOpr::splitRegExp(Node r, std::vector< PairNodes > &pset) {
1326 Assert(checkConstRegExp(r));
1327 if(d_split_cache.find(r) != d_split_cache.end()) {
1328 pset = d_split_cache[r];
1329 } else {
1330 switch( r.getKind() ) {
1331 case kind::REGEXP_EMPTY: {
1332 break;
1333 }
1334 case kind::REGEXP_OPT: {
1335 PairNodes tmp(d_emptySingleton, d_emptySingleton);
1336 pset.push_back(tmp);
1337 }
1338 case kind::REGEXP_RANGE:
1339 case kind::REGEXP_SIGMA: {
1340 PairNodes tmp1(d_emptySingleton, r);
1341 PairNodes tmp2(r, d_emptySingleton);
1342 pset.push_back(tmp1);
1343 pset.push_back(tmp2);
1344 break;
1345 }
1346 case kind::STRING_TO_REGEXP: {
1347 Assert(r[0].isConst());
1348 CVC4::String s = r[0].getConst< CVC4::String >();
1349 PairNodes tmp1(d_emptySingleton, r);
1350 pset.push_back(tmp1);
1351 for(unsigned i=1; i<s.size(); i++) {
1352 CVC4::String s1 = s.substr(0, i);
1353 CVC4::String s2 = s.substr(i);
1354 Node n1 = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(s1));
1355 Node n2 = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(s2));
1356 PairNodes tmp3(n1, n2);
1357 pset.push_back(tmp3);
1358 }
1359 PairNodes tmp2(r, d_emptySingleton);
1360 pset.push_back(tmp2);
1361 break;
1362 }
1363 case kind::REGEXP_CONCAT: {
1364 for(unsigned i=0; i<r.getNumChildren(); i++) {
1365 std::vector< PairNodes > tset;
1366 splitRegExp(r[i], tset);
1367 std::vector< Node > hvec;
1368 std::vector< Node > tvec;
1369 for(unsigned j=0; j<=i; j++) {
1370 hvec.push_back(r[j]);
1371 }
1372 for(unsigned j=i; j<r.getNumChildren(); j++) {
1373 tvec.push_back(r[j]);
1374 }
1375 for(unsigned j=0; j<tset.size(); j++) {
1376 hvec[i] = tset[j].first;
1377 tvec[0] = tset[j].second;
1378 Node r1 = Rewriter::rewrite( hvec.size()==1?hvec[0]:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, hvec) );
1379 Node r2 = Rewriter::rewrite( tvec.size()==1?tvec[0]:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tvec) );
1380 PairNodes tmp2(r1, r2);
1381 pset.push_back(tmp2);
1382 }
1383 }
1384 break;
1385 }
1386 case kind::REGEXP_UNION: {
1387 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1388 std::vector< PairNodes > tset;
1389 splitRegExp(r[i], tset);
1390 pset.insert(pset.end(), tset.begin(), tset.end());
1391 }
1392 break;
1393 }
1394 case kind::REGEXP_INTER: {
1395 bool spflag = false;
1396 Node tmp = r[0];
1397 for(unsigned i=1; i<r.getNumChildren(); i++) {
1398 tmp = intersect(tmp, r[i], spflag);
1399 }
1400 splitRegExp(tmp, pset);
1401 break;
1402 }
1403 case kind::REGEXP_STAR: {
1404 std::vector< PairNodes > tset;
1405 splitRegExp(r[0], tset);
1406 PairNodes tmp1(d_emptySingleton, d_emptySingleton);
1407 pset.push_back(tmp1);
1408 for(unsigned i=0; i<tset.size(); i++) {
1409 Node r1 = tset[i].first==d_emptySingleton ? r : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, r, tset[i].first);
1410 Node r2 = tset[i].second==d_emptySingleton ? r : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r);
1411 PairNodes tmp2(r1, r2);
1412 pset.push_back(tmp2);
1413 }
1414 break;
1415 }
1416 case kind::REGEXP_PLUS: {
1417 std::vector< PairNodes > tset;
1418 splitRegExp(r[0], tset);
1419 for(unsigned i=0; i<tset.size(); i++) {
1420 Node r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, r, tset[i].first);
1421 Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r);
1422 PairNodes tmp2(r1, r2);
1423 pset.push_back(tmp2);
1424 }
1425 break;
1426 }
1427 default: {
1428 Trace("strings-error") << "Unsupported term: " << r << " in splitRegExp." << std::endl;
1429 Assert( false );
1430 //return Node::null();
1431 }
1432 }
1433 d_split_cache[r] = pset;
1434 }
1435 }
1436
1437 //printing
1438 std::string RegExpOpr::niceChar( Node r ) {
1439 if(r.isConst()) {
1440 std::string s = r.getConst<CVC4::String>().toString() ;
1441 return s == "" ? "{E}" : ( s == " " ? "{ }" : s.size()>1? "("+s+")" : s );
1442 } else {
1443 std::string ss = "$" + r.toString();
1444 return ss;
1445 }
1446 }
1447 std::string RegExpOpr::mkString( Node r ) {
1448 std::string retStr;
1449 if(r.isNull()) {
1450 retStr = "Empty";
1451 } else {
1452 int k = r.getKind();
1453 switch( k ) {
1454 case kind::REGEXP_EMPTY: {
1455 retStr += "Empty";
1456 break;
1457 }
1458 case kind::REGEXP_SIGMA: {
1459 retStr += "{W}";
1460 break;
1461 }
1462 case kind::STRING_TO_REGEXP: {
1463 retStr += niceChar( r[0] );
1464 break;
1465 }
1466 case kind::REGEXP_CONCAT: {
1467 retStr += "(";
1468 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1469 //if(i != 0) retStr += ".";
1470 retStr += mkString( r[i] );
1471 }
1472 retStr += ")";
1473 break;
1474 }
1475 case kind::REGEXP_UNION: {
1476 if(r == d_sigma) {
1477 retStr += "{A}";
1478 } else {
1479 retStr += "(";
1480 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1481 if(i != 0) retStr += "|";
1482 retStr += mkString( r[i] );
1483 }
1484 retStr += ")";
1485 }
1486 break;
1487 }
1488 case kind::REGEXP_INTER: {
1489 retStr += "(";
1490 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1491 if(i != 0) retStr += "&";
1492 retStr += mkString( r[i] );
1493 }
1494 retStr += ")";
1495 break;
1496 }
1497 case kind::REGEXP_STAR: {
1498 retStr += mkString( r[0] );
1499 retStr += "*";
1500 break;
1501 }
1502 case kind::REGEXP_PLUS: {
1503 retStr += mkString( r[0] );
1504 retStr += "+";
1505 break;
1506 }
1507 case kind::REGEXP_OPT: {
1508 retStr += mkString( r[0] );
1509 retStr += "?";
1510 break;
1511 }
1512 case kind::REGEXP_RANGE: {
1513 retStr += "[";
1514 retStr += niceChar( r[0] );
1515 retStr += "-";
1516 retStr += niceChar( r[1] );
1517 retStr += "]";
1518 break;
1519 }
1520 default:
1521 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
1522 //Assert( false );
1523 //return Node::null();
1524 }
1525 }
1526
1527 return retStr;
1528 }
1529
1530 }/* CVC4::theory::strings namespace */
1531 }/* CVC4::theory namespace */
1532 }/* CVC4 namespace */