Merge branch '1.4.x'
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2 /*! \file regexp_operation.cpp
3 ** \verbatim
4 ** Original author: Tianyi Liang
5 ** Major contributors: Morgan Deters
6 ** Minor contributors (to current version): none
7 ** This file is part of the CVC4 project.
8 ** Copyright (c) 2009-2014 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
11 **
12 ** \brief Symbolic Regular Expresion Operations
13 **
14 ** Symbolic Regular Expresion Operations
15 **/
16
17 #include "theory/strings/regexp_operation.h"
18 #include "expr/kind.h"
19
20 namespace CVC4 {
21 namespace theory {
22 namespace strings {
23
24 RegExpOpr::RegExpOpr() {
25 d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
26 d_true = NodeManager::currentNM()->mkConst( true );
27 d_false = NodeManager::currentNM()->mkConst( false );
28 d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
29 d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
30 d_emptySingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
31 std::vector< Node > nvec;
32 d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec );
33 d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec );
34 d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
35 d_card = 256;
36 }
37
38 int RegExpOpr::gcd ( int a, int b ) {
39 int c;
40 while ( a != 0 ) {
41 c = a; a = b%a; b = c;
42 }
43 return b;
44 }
45
46 bool RegExpOpr::checkConstRegExp( Node r ) {
47 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
48 bool ret = true;
49 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
50 ret = d_cstre_cache[r];
51 } else {
52 if(r.getKind() == kind::STRING_TO_REGEXP) {
53 Node tmp = Rewriter::rewrite( r[0] );
54 ret = tmp.isConst();
55 } else {
56 for(unsigned i=0; i<r.getNumChildren(); ++i) {
57 if(!checkConstRegExp(r[i])) {
58 ret = false; break;
59 }
60 }
61 }
62 d_cstre_cache[r] = ret;
63 }
64 return ret;
65 }
66
67 // 0-unknown, 1-yes, 2-no
68 int RegExpOpr::delta( Node r, Node &exp ) {
69 Trace("regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
70 int ret = 0;
71 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
72 ret = d_delta_cache[r].first;
73 exp = d_delta_cache[r].second;
74 } else {
75 int k = r.getKind();
76 switch( k ) {
77 case kind::REGEXP_EMPTY: {
78 ret = 2;
79 break;
80 }
81 case kind::REGEXP_SIGMA: {
82 ret = 2;
83 break;
84 }
85 case kind::STRING_TO_REGEXP: {
86 Node tmp = Rewriter::rewrite(r[0]);
87 if(tmp.isConst()) {
88 if(tmp == d_emptyString) {
89 ret = 1;
90 } else {
91 ret = 2;
92 }
93 } else {
94 ret = 0;
95 if(tmp.getKind() == kind::STRING_CONCAT) {
96 for(unsigned i=0; i<tmp.getNumChildren(); i++) {
97 if(tmp[i].isConst()) {
98 ret = 2; break;
99 }
100 }
101
102 }
103 if(ret == 0) {
104 exp = r[0].eqNode(d_emptyString);
105 }
106 }
107 break;
108 }
109 case kind::REGEXP_CONCAT: {
110 bool flag = false;
111 std::vector< Node > vec_nodes;
112 for(unsigned i=0; i<r.getNumChildren(); ++i) {
113 Node exp2;
114 int tmp = delta( r[i], exp2 );
115 if(tmp == 2) {
116 ret = 2;
117 break;
118 } else if(tmp == 0) {
119 vec_nodes.push_back( exp2 );
120 flag = true;
121 }
122 }
123 if(ret != 2) {
124 if(!flag) {
125 ret = 1;
126 } else {
127 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
128 }
129 }
130 break;
131 }
132 case kind::REGEXP_UNION: {
133 bool flag = false;
134 std::vector< Node > vec_nodes;
135 for(unsigned i=0; i<r.getNumChildren(); ++i) {
136 Node exp2;
137 int tmp = delta( r[i], exp2 );
138 if(tmp == 1) {
139 ret = 1;
140 break;
141 } else if(tmp == 0) {
142 vec_nodes.push_back( exp2 );
143 flag = true;
144 }
145 }
146 if(ret != 1) {
147 if(!flag) {
148 ret = 2;
149 } else {
150 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::OR, vec_nodes);
151 }
152 }
153 break;
154 }
155 case kind::REGEXP_INTER: {
156 bool flag = false;
157 std::vector< Node > vec_nodes;
158 for(unsigned i=0; i<r.getNumChildren(); ++i) {
159 Node exp2;
160 int tmp = delta( r[i], exp2 );
161 if(tmp == 2) {
162 ret = 2;
163 break;
164 } else if(tmp == 0) {
165 vec_nodes.push_back( exp2 );
166 flag = true;
167 }
168 }
169 if(ret != 2) {
170 if(!flag) {
171 ret = 1;
172 } else {
173 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
174 }
175 }
176 break;
177 }
178 case kind::REGEXP_STAR: {
179 ret = 1;
180 break;
181 }
182 case kind::REGEXP_PLUS: {
183 ret = delta( r[0], exp );
184 break;
185 }
186 case kind::REGEXP_OPT: {
187 ret = 1;
188 break;
189 }
190 case kind::REGEXP_RANGE: {
191 ret = 2;
192 break;
193 }
194 default: {
195 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
196 Assert( false );
197 //return Node::null();
198 }
199 }
200 if(!exp.isNull()) {
201 exp = Rewriter::rewrite(exp);
202 }
203 std::pair< int, Node > p(ret, exp);
204 d_delta_cache[r] = p;
205 }
206 Trace("regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
207 return ret;
208 }
209
210 // 0-unknown, 1-yes, 2-no
211 int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
212 Assert( c.size() < 2 );
213 Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
214
215 int ret = 1;
216 retNode = d_emptyRegexp;
217
218 PairNodeStr dv = std::make_pair( r, c );
219 if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
220 retNode = d_deriv_cache[dv].first;
221 ret = d_deriv_cache[dv].second;
222 } else if( c.isEmptyString() ) {
223 Node expNode;
224 ret = delta( r, expNode );
225 if(ret == 0) {
226 retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
227 } else if(ret == 1) {
228 retNode = r;
229 }
230 std::pair< Node, int > p(retNode, ret);
231 d_deriv_cache[dv] = p;
232 } else {
233 switch( r.getKind() ) {
234 case kind::REGEXP_EMPTY: {
235 ret = 2;
236 break;
237 }
238 case kind::REGEXP_SIGMA: {
239 retNode = d_emptySingleton;
240 break;
241 }
242 case kind::STRING_TO_REGEXP: {
243 Node tmp = Rewriter::rewrite(r[0]);
244 if(tmp.isConst()) {
245 if(tmp == d_emptyString) {
246 ret = 2;
247 } else {
248 if(tmp.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
249 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
250 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
251 } else {
252 ret = 2;
253 }
254 }
255 } else {
256 ret = 0;
257 Node rest;
258 if(tmp.getKind() == kind::STRING_CONCAT) {
259 Node t2 = tmp[0];
260 if(t2.isConst()) {
261 if(t2.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
262 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
263 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
264 std::vector< Node > vec_nodes;
265 vec_nodes.push_back(n);
266 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
267 vec_nodes.push_back(tmp[i]);
268 }
269 retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
270 ret = 1;
271 } else {
272 ret = 2;
273 }
274 } else {
275 tmp = tmp[0];
276 std::vector< Node > vec_nodes;
277 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
278 vec_nodes.push_back(tmp[i]);
279 }
280 rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
281 }
282 }
283 if(ret == 0) {
284 Node sk = NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" );
285 retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
286 if(!rest.isNull()) {
287 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
288 }
289 Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
290 NodeManager::currentNM()->mkConst(c), sk));
291 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
292 }
293 }
294 break;
295 }
296 case kind::REGEXP_CONCAT: {
297 std::vector< Node > vec_nodes;
298 std::vector< Node > delta_nodes;
299 Node dnode = d_true;
300 for(unsigned i=0; i<r.getNumChildren(); ++i) {
301 Node dc;
302 Node exp2;
303 int rt = derivativeS(r[i], c, dc);
304 if(rt != 2) {
305 if(rt == 0) {
306 ret = 0;
307 }
308 std::vector< Node > vec_nodes2;
309 if(dc != d_emptySingleton) {
310 vec_nodes2.push_back( dc );
311 }
312 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
313 if(r[j] != d_emptySingleton) {
314 vec_nodes2.push_back( r[j] );
315 }
316 }
317 Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
318 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
319 if(dnode != d_true) {
320 tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
321 ret = 0;
322 }
323 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
324 vec_nodes.push_back( tmp );
325 }
326 }
327 Node exp3;
328 int rt2 = delta( r[i], exp3 );
329 if( rt2 == 0 ) {
330 dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
331 } else if( rt2 == 2 ) {
332 break;
333 }
334 }
335 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
336 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
337 if(retNode == d_emptyRegexp) {
338 ret = 2;
339 }
340 break;
341 }
342 case kind::REGEXP_UNION: {
343 std::vector< Node > vec_nodes;
344 for(unsigned i=0; i<r.getNumChildren(); ++i) {
345 Node dc;
346 int rt = derivativeS(r[i], c, dc);
347 if(rt == 0) {
348 ret = 0;
349 }
350 if(rt != 2) {
351 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
352 vec_nodes.push_back( dc );
353 }
354 }
355 Trace("regexp-derive") << "RegExp-derive OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
356 }
357 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
358 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
359 if(retNode == d_emptyRegexp) {
360 ret = 2;
361 }
362 break;
363 }
364 case kind::REGEXP_INTER: {
365 bool flag = true;
366 bool flag_sg = false;
367 std::vector< Node > vec_nodes;
368 for(unsigned i=0; i<r.getNumChildren(); ++i) {
369 Node dc;
370 int rt = derivativeS(r[i], c, dc);
371 if(rt == 0) {
372 ret = 0;
373 } else if(rt == 2) {
374 flag = false;
375 break;
376 }
377 if(dc == d_sigma_star) {
378 flag_sg = true;
379 } else {
380 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
381 vec_nodes.push_back( dc );
382 }
383 }
384 }
385 if(flag) {
386 if(vec_nodes.size() == 0 && flag_sg) {
387 retNode = d_sigma_star;
388 } else {
389 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
390 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
391 if(retNode == d_emptyRegexp) {
392 ret = 2;
393 }
394 }
395 } else {
396 retNode = d_emptyRegexp;
397 ret = 2;
398 }
399 break;
400 }
401 case kind::REGEXP_STAR: {
402 Node dc;
403 ret = derivativeS(r[0], c, dc);
404 retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
405 break;
406 }
407 default: {
408 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
409 Assert( false, "Unsupported Term" );
410 }
411 }
412 if(retNode != d_emptyRegexp) {
413 retNode = Rewriter::rewrite( retNode );
414 }
415 std::pair< Node, int > p(retNode, ret);
416 d_deriv_cache[dv] = p;
417 }
418
419 Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode ) << std::endl;
420 return ret;
421 }
422
423 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
424 Assert( c.size() < 2 );
425 Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
426 Node retNode = d_emptyRegexp;
427 PairNodeStr dv = std::make_pair( r, c );
428 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
429 retNode = d_dv_cache[dv];
430 } else if( c.isEmptyString() ){
431 Node exp;
432 int tmp = delta( r, exp );
433 if(tmp == 0) {
434 // TODO variable
435 retNode = d_emptyRegexp;
436 } else if(tmp == 1) {
437 retNode = r;
438 } else {
439 retNode = d_emptyRegexp;
440 }
441 } else {
442 int k = r.getKind();
443 switch( k ) {
444 case kind::REGEXP_EMPTY: {
445 retNode = d_emptyRegexp;
446 break;
447 }
448 case kind::REGEXP_SIGMA: {
449 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
450 break;
451 }
452 case kind::STRING_TO_REGEXP: {
453 if(r[0].isConst()) {
454 if(r[0] == d_emptyString) {
455 retNode = d_emptyRegexp;
456 } else {
457 if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
458 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
459 r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
460 } else {
461 retNode = d_emptyRegexp;
462 }
463 }
464 } else {
465 // TODO variable
466 retNode = d_emptyRegexp;
467 }
468 break;
469 }
470 case kind::REGEXP_CONCAT: {
471 Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
472 std::vector< Node > vec_nodes;
473 for(unsigned i=0; i<r.getNumChildren(); ++i) {
474 Node dc = derivativeSingle(r[i], c);
475 if(dc != d_emptyRegexp) {
476 std::vector< Node > vec_nodes2;
477 if(dc != rees) {
478 vec_nodes2.push_back( dc );
479 }
480 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
481 if(r[j] != rees) {
482 vec_nodes2.push_back( r[j] );
483 }
484 }
485 Node tmp = vec_nodes2.size()==0 ? rees :
486 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
487 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
488 vec_nodes.push_back( tmp );
489 }
490 }
491 Node exp;
492 if( delta( r[i], exp ) != 1 ) {
493 break;
494 }
495 }
496 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
497 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
498 break;
499 }
500 case kind::REGEXP_UNION: {
501 std::vector< Node > vec_nodes;
502 for(unsigned i=0; i<r.getNumChildren(); ++i) {
503 Node dc = derivativeSingle(r[i], c);
504 if(dc != d_emptyRegexp) {
505 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
506 vec_nodes.push_back( dc );
507 }
508 }
509 Trace("regexp-derive") << "RegExp-derive OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
510 }
511 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
512 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
513 break;
514 }
515 case kind::REGEXP_INTER: {
516 bool flag = true;
517 bool flag_sg = false;
518 std::vector< Node > vec_nodes;
519 for(unsigned i=0; i<r.getNumChildren(); ++i) {
520 Node dc = derivativeSingle(r[i], c);
521 if(dc != d_emptyRegexp) {
522 if(dc == d_sigma_star) {
523 flag_sg = true;
524 } else {
525 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
526 vec_nodes.push_back( dc );
527 }
528 }
529 } else {
530 flag = false;
531 break;
532 }
533 }
534 if(flag) {
535 if(vec_nodes.size() == 0 && flag_sg) {
536 retNode = d_sigma_star;
537 } else {
538 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
539 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
540 }
541 } else {
542 retNode = d_emptyRegexp;
543 }
544 break;
545 }
546 case kind::REGEXP_STAR: {
547 Node dc = derivativeSingle(r[0], c);
548 if(dc != d_emptyRegexp) {
549 retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
550 } else {
551 retNode = d_emptyRegexp;
552 }
553 break;
554 }
555 default: {
556 //TODO: special sym: sigma, none, all
557 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
558 Assert( false, "Unsupported Term" );
559 //return Node::null();
560 }
561 }
562 if(retNode != d_emptyRegexp) {
563 retNode = Rewriter::rewrite( retNode );
564 }
565 d_dv_cache[dv] = retNode;
566 }
567 Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode ) << std::endl;
568 return retNode;
569 }
570
571 //TODO:
572 bool RegExpOpr::guessLength( Node r, int &co ) {
573 int k = r.getKind();
574 switch( k ) {
575 case kind::STRING_TO_REGEXP:
576 {
577 if(r[0].isConst()) {
578 co += r[0].getConst< CVC4::String >().size();
579 return true;
580 } else {
581 return false;
582 }
583 }
584 break;
585 case kind::REGEXP_CONCAT:
586 {
587 for(unsigned i=0; i<r.getNumChildren(); ++i) {
588 if(!guessLength( r[i], co)) {
589 return false;
590 }
591 }
592 return true;
593 }
594 break;
595 case kind::REGEXP_UNION:
596 {
597 int g_co;
598 for(unsigned i=0; i<r.getNumChildren(); ++i) {
599 int cop = 0;
600 if(!guessLength( r[i], cop)) {
601 return false;
602 }
603 if(i == 0) {
604 g_co = cop;
605 } else {
606 g_co = gcd(g_co, cop);
607 }
608 }
609 return true;
610 }
611 break;
612 case kind::REGEXP_INTER:
613 {
614 int g_co;
615 for(unsigned i=0; i<r.getNumChildren(); ++i) {
616 int cop = 0;
617 if(!guessLength( r[i], cop)) {
618 return false;
619 }
620 if(i == 0) {
621 g_co = cop;
622 } else {
623 g_co = gcd(g_co, cop);
624 }
625 }
626 return true;
627 }
628 break;
629 case kind::REGEXP_STAR:
630 {
631 co = 0;
632 return true;
633 }
634 break;
635 default:
636 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in membership of RegExp." << std::endl;
637 return false;
638 }
639 }
640
641 void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
642 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_fset_cache.find(r);
643 if(itr != d_fset_cache.end()) {
644 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
645 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
646 } else {
647 std::set<unsigned> cset;
648 SetNodes vset;
649 int k = r.getKind();
650 switch( k ) {
651 case kind::REGEXP_EMPTY: {
652 break;
653 }
654 case kind::REGEXP_SIGMA: {
655 for(unsigned i=0; i<d_card; i++) {
656 cset.insert(i);
657 }
658 break;
659 }
660 case kind::STRING_TO_REGEXP: {
661 Node st = Rewriter::rewrite(r[0]);
662 if(st.isConst()) {
663 CVC4::String s = st.getConst< CVC4::String >();
664 if(s.size() != 0) {
665 cset.insert(s[0]);
666 }
667 } else if(st.getKind() == kind::VARIABLE) {
668 vset.insert( st );
669 } else {
670 if(st[0].isConst()) {
671 CVC4::String s = st[0].getConst< CVC4::String >();
672 cset.insert(s[0]);
673 } else {
674 vset.insert( st[0] );
675 }
676 }
677 break;
678 }
679 case kind::REGEXP_CONCAT: {
680 for(unsigned i=0; i<r.getNumChildren(); i++) {
681 firstChars(r[i], cset, vset);
682 Node n = r[i];
683 Node exp;
684 int r = delta( n, exp );
685 if(r != 1) {
686 break;
687 }
688 }
689 break;
690 }
691 case kind::REGEXP_UNION: {
692 for(unsigned i=0; i<r.getNumChildren(); i++) {
693 firstChars(r[i], cset, vset);
694 }
695 break;
696 }
697 case kind::REGEXP_INTER: {
698 //TODO: Overapproximation for now
699 for(unsigned i=0; i<r.getNumChildren(); i++) {
700 firstChars(r[i], cset, vset);
701 }
702 break;
703 }
704 case kind::REGEXP_STAR: {
705 firstChars(r[0], cset, vset);
706 break;
707 }
708 default: {
709 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
710 Assert( false, "Unsupported Term" );
711 }
712 }
713 pcset.insert(cset.begin(), cset.end());
714 pvset.insert(vset.begin(), vset.end());
715 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
716 d_fset_cache[r] = p;
717
718 Trace("regexp-fset") << "FSET( " << mkString(r) << " ) = { ";
719 for(std::set<unsigned>::const_iterator itr = cset.begin();
720 itr != cset.end(); itr++) {
721 Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
722 }
723 Trace("regexp-fset") << " }" << std::endl;
724 }
725 }
726
727 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
728 int k = r.getKind();
729 switch( k ) {
730 case kind::STRING_TO_REGEXP:
731 {
732 if(r[0].isConst()) {
733 if(r[0] != d_emptyString) {
734 char t1 = r[0].getConst< CVC4::String >().getFirstChar();
735 if(c.isEmptyString()) {
736 vec_chars.push_back( t1 );
737 return true;
738 } else {
739 char t2 = c.getFirstChar();
740 if(t1 != t2) {
741 return false;
742 } else {
743 if(c.size() >= 2) {
744 vec_chars.push_back( c.substr(1,1).getFirstChar() );
745 } else {
746 vec_chars.push_back( '\0' );
747 }
748 return true;
749 }
750 }
751 } else {
752 return false;
753 }
754 } else {
755 return false;
756 }
757 }
758 break;
759 case kind::REGEXP_CONCAT:
760 {
761 for(unsigned i=0; i<r.getNumChildren(); ++i) {
762 if( follow(r[i], c, vec_chars) ) {
763 if(vec_chars[vec_chars.size() - 1] == '\0') {
764 vec_chars.pop_back();
765 c = d_emptyString.getConst< CVC4::String >();
766 }
767 } else {
768 return false;
769 }
770 }
771 vec_chars.push_back( '\0' );
772 return true;
773 }
774 break;
775 case kind::REGEXP_UNION:
776 {
777 bool flag = false;
778 for(unsigned i=0; i<r.getNumChildren(); ++i) {
779 if( follow(r[i], c, vec_chars) ) {
780 flag=true;
781 }
782 }
783 return flag;
784 }
785 break;
786 case kind::REGEXP_INTER:
787 {
788 std::vector< char > vt2;
789 for(unsigned i=0; i<r.getNumChildren(); ++i) {
790 std::vector< char > v_tmp;
791 if( !follow(r[i], c, v_tmp) ) {
792 return false;
793 }
794 std::vector< char > vt3(vt2);
795 vt2.clear();
796 std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
797 if(vt2.size() == 0) {
798 return false;
799 }
800 }
801 vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
802 return true;
803 }
804 break;
805 case kind::REGEXP_STAR:
806 {
807 if(follow(r[0], c, vec_chars)) {
808 if(vec_chars[vec_chars.size() - 1] == '\0') {
809 if(c.isEmptyString()) {
810 return true;
811 } else {
812 vec_chars.pop_back();
813 c = d_emptyString.getConst< CVC4::String >();
814 return follow(r[0], c, vec_chars);
815 }
816 } else {
817 return true;
818 }
819 } else {
820 vec_chars.push_back( '\0' );
821 return true;
822 }
823 }
824 break;
825 default: {
826 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
827 //AlwaysAssert( false );
828 //return Node::null();
829 return false;
830 }
831 }
832 }
833
834 Node RegExpOpr::mkAllExceptOne( char exp_c ) {
835 std::vector< Node > vec_nodes;
836 for(char c=d_char_start; c<=d_char_end; ++c) {
837 if(c != exp_c ) {
838 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
839 vec_nodes.push_back( n );
840 }
841 }
842 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
843 }
844
845 //simplify
846 void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
847 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
848 Assert(t.getKind() == kind::STRING_IN_REGEXP);
849 Node str = Rewriter::rewrite(t[0]);
850 Node re = Rewriter::rewrite(t[1]);
851 if(polarity) {
852 simplifyPRegExp( str, re, new_nodes );
853 } else {
854 simplifyNRegExp( str, re, new_nodes );
855 }
856 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
857 for(unsigned i=0; i<new_nodes.size(); i++) {
858 Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
859 }
860 }
861 void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
862 std::pair < Node, Node > p(s, r);
863 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
864 if(itr != d_simpl_neg_cache.end()) {
865 new_nodes.push_back( itr->second );
866 } else {
867 int k = r.getKind();
868 Node conc;
869 switch( k ) {
870 case kind::REGEXP_EMPTY: {
871 conc = d_true;
872 break;
873 }
874 case kind::REGEXP_SIGMA: {
875 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
876 break;
877 }
878 case kind::STRING_TO_REGEXP: {
879 conc = s.eqNode(r[0]).negate();
880 break;
881 }
882 case kind::REGEXP_CONCAT: {
883 //TODO: rewrite empty
884 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
885 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
886 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
887 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
888 NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
889 Node s1 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1));
890 Node s2 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1)));
891 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
892 if(r[0].getKind() == kind::STRING_TO_REGEXP) {
893 s1r1 = s1.eqNode(r[0][0]).negate();
894 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
895 s1r1 = d_true;
896 }
897 Node r2 = r[1];
898 if(r.getNumChildren() > 2) {
899 std::vector< Node > nvec;
900 for(unsigned i=1; i<r.getNumChildren(); i++) {
901 nvec.push_back( r[i] );
902 }
903 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
904 }
905 r2 = Rewriter::rewrite(r2);
906 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
907 if(r2.getKind() == kind::STRING_TO_REGEXP) {
908 s2r2 = s2.eqNode(r2[0]).negate();
909 } else if(r2.getKind() == kind::REGEXP_EMPTY) {
910 s2r2 = d_true;
911 }
912
913 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
914 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
915 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
916 break;
917 }
918 case kind::REGEXP_UNION: {
919 std::vector< Node > c_and;
920 for(unsigned i=0; i<r.getNumChildren(); ++i) {
921 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
922 c_and.push_back( r[i][0].eqNode(s).negate() );
923 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
924 continue;
925 } else {
926 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
927 }
928 }
929 conc = c_and.size() == 0 ? d_true :
930 c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
931 break;
932 }
933 case kind::REGEXP_INTER: {
934 bool emptyflag = false;
935 std::vector< Node > c_or;
936 for(unsigned i=0; i<r.getNumChildren(); ++i) {
937 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
938 c_or.push_back( r[i][0].eqNode(s).negate() );
939 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
940 emptyflag = true;
941 break;
942 } else {
943 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
944 }
945 }
946 if(emptyflag) {
947 conc = d_true;
948 } else {
949 conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
950 }
951 break;
952 }
953 case kind::REGEXP_STAR: {
954 if(s == d_emptyString) {
955 conc = d_false;
956 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
957 conc = s.eqNode(d_emptyString).negate();
958 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
959 conc = d_false;
960 } else {
961 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
962 Node sne = s.eqNode(d_emptyString).negate();
963 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
964 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
965 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
966 NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
967 //internal
968 Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1);
969 Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
970 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
971 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
972
973 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
974 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
975 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
976 conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
977 }
978 break;
979 }
980 default: {
981 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
982 Assert( false, "Unsupported Term" );
983 }
984 }
985 conc = Rewriter::rewrite( conc );
986 new_nodes.push_back( conc );
987 d_simpl_neg_cache[p] = conc;
988 }
989 }
990 void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
991 std::pair < Node, Node > p(s, r);
992 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
993 if(itr != d_simpl_cache.end()) {
994 new_nodes.push_back( itr->second );
995 } else {
996 int k = r.getKind();
997 Node conc;
998 switch( k ) {
999 case kind::REGEXP_EMPTY: {
1000 conc = d_false;
1001 break;
1002 }
1003 case kind::REGEXP_SIGMA: {
1004 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
1005 break;
1006 }
1007 case kind::STRING_TO_REGEXP: {
1008 conc = s.eqNode(r[0]);
1009 break;
1010 }
1011 case kind::REGEXP_CONCAT: {
1012 std::vector< Node > nvec;
1013 std::vector< Node > cc;
1014 bool emptyflag = false;
1015 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1016 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1017 cc.push_back( r[i][0] );
1018 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1019 emptyflag = true;
1020 break;
1021 } else {
1022 Node sk = NodeManager::currentNM()->mkSkolem( "rc", s.getType(), "created for regular expression concat" );
1023 Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
1024 nvec.push_back(lem);
1025 cc.push_back(sk);
1026 }
1027 }
1028 if(emptyflag) {
1029 conc = d_false;
1030 } else {
1031 Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
1032 nvec.push_back(lem);
1033 conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
1034 }
1035 break;
1036 }
1037 case kind::REGEXP_UNION: {
1038 std::vector< Node > c_or;
1039 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1040 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1041 c_or.push_back( r[i][0].eqNode(s) );
1042 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1043 continue;
1044 } else {
1045 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1046 }
1047 }
1048 conc = c_or.size() == 0 ? d_false :
1049 c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
1050 break;
1051 }
1052 case kind::REGEXP_INTER: {
1053 std::vector< Node > c_and;
1054 bool emptyflag = false;
1055 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1056 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1057 c_and.push_back( r[i][0].eqNode(s) );
1058 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1059 emptyflag = true;
1060 break;
1061 } else {
1062 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1063 }
1064 }
1065 if(emptyflag) {
1066 conc = d_false;
1067 } else {
1068 conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
1069 }
1070 break;
1071 }
1072 case kind::REGEXP_STAR: {
1073 if(s == d_emptyString) {
1074 conc = d_true;
1075 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
1076 conc = s.eqNode(d_emptyString);
1077 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
1078 conc = d_true;
1079 } else {
1080 Node se = s.eqNode(d_emptyString);
1081 Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
1082 Node sk1 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1083 Node sk2 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1084 Node s1nz = sk1.eqNode(d_emptyString).negate();
1085 Node s2nz = sk2.eqNode(d_emptyString).negate();
1086 Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
1087 Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
1088 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
1089
1090 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
1091 conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
1092 }
1093 break;
1094 }
1095 default: {
1096 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
1097 Assert( false, "Unsupported Term" );
1098 }
1099 }
1100 conc = Rewriter::rewrite( conc );
1101 new_nodes.push_back( conc );
1102 d_simpl_cache[p] = conc;
1103 }
1104 }
1105
1106 void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
1107 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_cset_cache.find(r);
1108 if(itr != d_cset_cache.end()) {
1109 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
1110 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
1111 } else {
1112 std::set<unsigned> cset;
1113 SetNodes vset;
1114 int k = r.getKind();
1115 switch( k ) {
1116 case kind::REGEXP_EMPTY: {
1117 break;
1118 }
1119 case kind::REGEXP_SIGMA: {
1120 for(unsigned i=0; i<d_card; i++) {
1121 cset.insert(i);
1122 }
1123 break;
1124 }
1125 case kind::STRING_TO_REGEXP: {
1126 Node st = Rewriter::rewrite(r[0]);
1127 if(st.isConst()) {
1128 CVC4::String s = st.getConst< CVC4::String >();
1129 s.getCharSet( cset );
1130 } else if(st.getKind() == kind::VARIABLE) {
1131 vset.insert( st );
1132 } else {
1133 for(unsigned i=0; i<st.getNumChildren(); i++) {
1134 if(st[i].isConst()) {
1135 CVC4::String s = st[i].getConst< CVC4::String >();
1136 s.getCharSet( cset );
1137 } else {
1138 vset.insert( st[i] );
1139 }
1140 }
1141 }
1142 break;
1143 }
1144 case kind::REGEXP_CONCAT: {
1145 for(unsigned i=0; i<r.getNumChildren(); i++) {
1146 getCharSet(r[i], cset, vset);
1147 }
1148 break;
1149 }
1150 case kind::REGEXP_UNION: {
1151 for(unsigned i=0; i<r.getNumChildren(); i++) {
1152 getCharSet(r[i], cset, vset);
1153 }
1154 break;
1155 }
1156 case kind::REGEXP_INTER: {
1157 //TODO: Overapproximation for now
1158 for(unsigned i=0; i<r.getNumChildren(); i++) {
1159 getCharSet(r[i], cset, vset);
1160 }
1161 break;
1162 }
1163 case kind::REGEXP_STAR: {
1164 getCharSet(r[0], cset, vset);
1165 break;
1166 }
1167 default: {
1168 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
1169 Assert( false, "Unsupported Term" );
1170 }
1171 }
1172 pcset.insert(cset.begin(), cset.end());
1173 pvset.insert(vset.begin(), vset.end());
1174 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
1175 d_cset_cache[r] = p;
1176
1177 Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { ";
1178 for(std::set<unsigned>::const_iterator itr = cset.begin();
1179 itr != cset.end(); itr++) {
1180 Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
1181 }
1182 Trace("regexp-cset") << " }" << std::endl;
1183 }
1184 }
1185
1186 bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
1187 for(std::set< PairNodes >::const_iterator itr = s.begin();
1188 itr != s.end(); ++itr) {
1189 if(itr->first == n1 && itr->second == n2 ||
1190 itr->first == n2 && itr->second == n1) {
1191 return true;
1192 }
1193 }
1194 return false;
1195 }
1196
1197 Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache, bool &spflag ) {
1198 Trace("regexp-intersect") << "Starting INTERSECT:\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl;
1199 if(spflag) {
1200 //TODO: var
1201 return Node::null();
1202 }
1203 std::pair < Node, Node > p(r1, r2);
1204 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);
1205 Node rNode;
1206 if(itr != d_inter_cache.end()) {
1207 rNode = itr->second;
1208 } else {
1209 if(r1 == r2) {
1210 rNode = r1;
1211 } else if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1212 rNode = d_emptyRegexp;
1213 } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1214 Node exp;
1215 int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1216 if(r == 0) {
1217 //TODO: variable
1218 spflag = true;
1219 } else if(r == 1) {
1220 rNode = d_emptySingleton;
1221 } else {
1222 rNode = d_emptyRegexp;
1223 }
1224 } else {
1225 std::set< unsigned > cset, cset2;
1226 std::set< Node > vset, vset2;
1227 getCharSet(r1, cset, vset);
1228 getCharSet(r2, cset2, vset2);
1229 if(vset.empty() && vset2.empty()) {
1230 cset.clear();
1231 firstChars(r1, cset, vset);
1232 std::vector< Node > vec_nodes;
1233 Node delta_exp;
1234 int flag = delta(r1, delta_exp);
1235 int flag2 = delta(r2, delta_exp);
1236 if(flag != 2 && flag2 != 2) {
1237 if(flag == 1 && flag2 == 1) {
1238 vec_nodes.push_back(d_emptySingleton);
1239 } else {
1240 //TODO
1241 spflag = true;
1242 }
1243 }
1244 if(Trace.isOn("regexp-debug")) {
1245 Trace("regexp-debug") << "Try CSET( " << cset.size() << " ) = ";
1246 for(std::set<unsigned>::const_iterator itr = cset.begin();
1247 itr != cset.end(); itr++) {
1248 Trace("regexp-debug") << *itr << ", ";
1249 }
1250 Trace("regexp-debug") << std::endl;
1251 }
1252 for(std::set<unsigned>::const_iterator itr = cset.begin();
1253 itr != cset.end(); itr++) {
1254 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1255 if(!isPairNodesInSet(cache[ *itr ], r1, r2)) {
1256 Node r1l = derivativeSingle(r1, c);
1257 Node r2l = derivativeSingle(r2, c);
1258 std::map< unsigned, std::set< PairNodes > > cache2(cache);
1259 PairNodes p(r1l, r2l);
1260 cache2[ *itr ].insert( p );
1261 Node rt = intersectInternal(r1l, r2l, cache2, spflag);
1262 if(spflag) {
1263 //TODO:
1264 return Node::null();
1265 }
1266 rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1267 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1268 vec_nodes.push_back(rt);
1269 }
1270 }
1271 rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1272 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1273 rNode = Rewriter::rewrite( rNode );
1274 } else {
1275 //TODO: non-empty var set
1276 spflag = true;
1277 }
1278 }
1279 d_inter_cache[p] = rNode;
1280 }
1281 Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1282 return rNode;
1283 }
1284
1285 bool RegExpOpr::containC2(unsigned cnt, Node n) {
1286 if(n.getKind() == kind::REGEXP_RV) {
1287 unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1288 return cnt == y;
1289 } else if(n.getKind() == kind::REGEXP_CONCAT) {
1290 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1291 if(containC2(cnt, n[i])) {
1292 return true;
1293 }
1294 }
1295 } else if(n.getKind() == kind::REGEXP_STAR) {
1296 return containC2(cnt, n[0]);
1297 } else if(n.getKind() == kind::REGEXP_UNION) {
1298 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1299 if(containC2(cnt, n[i])) {
1300 return true;
1301 }
1302 }
1303 }
1304 return false;
1305 }
1306 Node RegExpOpr::convert1(unsigned cnt, Node n) {
1307 Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl;
1308 Node r1, r2;
1309 convert2(cnt, n, r1, r2);
1310 Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl;
1311 Node ret = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1312 NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2);
1313 ret = Rewriter::rewrite( ret );
1314 Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl;
1315 return ret;
1316 }
1317 void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
1318 if(n == d_emptyRegexp) {
1319 r1 = d_emptyRegexp;
1320 r2 = d_emptyRegexp;
1321 } else if(n == d_emptySingleton) {
1322 r1 = d_emptySingleton;
1323 r2 = d_emptySingleton;
1324 } else if(n.getKind() == kind::REGEXP_RV) {
1325 unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1326 r1 = d_emptySingleton;
1327 if(cnt == y) {
1328 r2 = d_emptyRegexp;
1329 } else {
1330 r2 = n;
1331 }
1332 } else if(n.getKind() == kind::REGEXP_CONCAT) {
1333 //TODO
1334 //convert2 x (r@(Seq l r1))
1335 // | contains x r1 = let (r2,r3) = convert2 x r1
1336 // in (Seq l r2, r3)
1337 // | otherwise = (Empty, r)
1338 bool flag = true;
1339 std::vector<Node> vr1, vr2;
1340 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1341 if(containC2(cnt, n[i])) {
1342 Node t1, t2;
1343 convert2(cnt, n[i], t1, t2);
1344 vr1.push_back(t1);
1345 r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] :
1346 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1);
1347 vr2.push_back(t2);
1348 for( unsigned j=i+1; j<n.getNumChildren(); j++ ) {
1349 vr2.push_back(n[j]);
1350 }
1351 r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] :
1352 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2);
1353 flag = false;
1354 break;
1355 } else {
1356 vr1.push_back(n[i]);
1357 }
1358 }
1359 if(flag) {
1360 r1 = d_emptySingleton;
1361 r2 = n;
1362 }
1363 } else if(n.getKind() == kind::REGEXP_UNION) {
1364 std::vector<Node> vr1, vr2;
1365 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1366 Node t1, t2;
1367 convert2(cnt, n[i], t1, t2);
1368 vr1.push_back(t1);
1369 vr2.push_back(t2);
1370 }
1371 r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
1372 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
1373 } else if(n.getKind() == kind::STRING_TO_REGEXP) {
1374 r1 = d_emptySingleton;
1375 r2 = n;
1376 } else {
1377 //is it possible?
1378 }
1379 }
1380 Node RegExpOpr::intersectInternal2( Node r1, Node r2, std::map< PairNodes, Node > cache, bool &spflag, unsigned cnt ) {
1381 Trace("regexp-intersect") << "Starting INTERSECT:\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl;
1382 //if(Trace.isOn("regexp-debug")) {
1383 // Trace("regexp-debug") << "... with cache:\n";
1384 // for(std::map< PairNodes, Node >::const_iterator itr=cache.begin();
1385 // itr!=cache.end();itr++) {
1386 // Trace("regexp-debug") << "(" << itr->first.first << "," << itr->first.second << ")->" << itr->second << std::endl;
1387 // }
1388 //}
1389 if(spflag) {
1390 //TODO: var
1391 return Node::null();
1392 }
1393 std::pair < Node, Node > p(r1, r2);
1394 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);
1395 Node rNode;
1396 if(itr != d_inter_cache.end()) {
1397 rNode = itr->second;
1398 } else {
1399 if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1400 rNode = d_emptyRegexp;
1401 } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1402 Node exp;
1403 int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1404 if(r == 0) {
1405 //TODO: variable
1406 spflag = true;
1407 } else if(r == 1) {
1408 rNode = d_emptySingleton;
1409 } else {
1410 rNode = d_emptyRegexp;
1411 }
1412 } else if(r1 == r2) {
1413 rNode = convert1(cnt, r1);
1414 } else {
1415 PairNodes p(r1, r2);
1416 std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p);
1417 if(itrcache != cache.end()) {
1418 rNode = itrcache->second;
1419 } else {
1420 if(checkConstRegExp(r1) && checkConstRegExp(r2)) {
1421 std::vector< unsigned > cset;
1422 std::set< unsigned > cset1, cset2;
1423 std::set< Node > vset1, vset2;
1424 firstChars(r1, cset1, vset1);
1425 firstChars(r2, cset2, vset2);
1426 std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset1.end(),
1427 std::inserter(cset, cset.begin()));
1428 std::vector< Node > vec_nodes;
1429 Node delta_exp;
1430 int flag = delta(r1, delta_exp);
1431 int flag2 = delta(r2, delta_exp);
1432 if(flag != 2 && flag2 != 2) {
1433 if(flag == 1 && flag2 == 1) {
1434 vec_nodes.push_back(d_emptySingleton);
1435 } else {
1436 //TODO
1437 spflag = true;
1438 }
1439 }
1440 if(Trace.isOn("regexp-debug")) {
1441 Trace("regexp-debug") << "Try CSET( " << cset.size() << " ) = ";
1442 for(std::vector<unsigned>::const_iterator itr = cset.begin();
1443 itr != cset.end(); itr++) {
1444 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1445 Trace("regexp-debug") << c << ", ";
1446 }
1447 Trace("regexp-debug") << std::endl;
1448 }
1449 for(std::vector<unsigned>::const_iterator itr = cset.begin();
1450 itr != cset.end(); itr++) {
1451 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1452 Node r1l = derivativeSingle(r1, c);
1453 Node r2l = derivativeSingle(r2, c);
1454 std::map< PairNodes, Node > cache2(cache);
1455 PairNodes p(r1, r2);
1456 cache2[ p ] = NodeManager::currentNM()->mkNode(kind::REGEXP_RV, NodeManager::currentNM()->mkConst(CVC4::Rational(cnt)));
1457 Node rt = intersectInternal2(r1l, r2l, cache2, spflag, cnt+1);
1458 rt = convert1(cnt, rt);
1459 if(spflag) {
1460 //TODO:
1461 return Node::null();
1462 }
1463 rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1464 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1465 vec_nodes.push_back(rt);
1466 }
1467 rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1468 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1469 rNode = Rewriter::rewrite( rNode );
1470 } else {
1471 //TODO: non-empty var set
1472 spflag = true;
1473 }
1474 }
1475 }
1476 d_inter_cache[p] = rNode;
1477 }
1478 Trace("regexp-intersect") << "End of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1479 return rNode;
1480 }
1481 Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) {
1482 //std::map< unsigned, std::set< PairNodes > > cache;
1483 std::map< PairNodes, Node > cache;
1484 if(checkConstRegExp(r1) && checkConstRegExp(r2)) {
1485 return intersectInternal2(r1, r2, cache, spflag, 1);
1486 } else {
1487 spflag = true;
1488 return Node::null();
1489 }
1490 }
1491
1492 Node RegExpOpr::complement(Node r, int &ret) {
1493 Node rNode;
1494 ret = 1;
1495 if(d_compl_cache.find(r) != d_compl_cache.end()) {
1496 rNode = d_compl_cache[r].first;
1497 ret = d_compl_cache[r].second;
1498 } else {
1499 if(r == d_emptyRegexp) {
1500 rNode = d_sigma_star;
1501 } else if(r == d_emptySingleton) {
1502 rNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, d_sigma, d_sigma_star);
1503 } else if(!checkConstRegExp(r)) {
1504 //TODO: var to be extended
1505 ret = 0;
1506 } else {
1507 std::set<unsigned> cset;
1508 SetNodes vset;
1509 firstChars(r, cset, vset);
1510 std::vector< Node > vec_nodes;
1511 for(unsigned i=0; i<d_card; i++) {
1512 CVC4::String c = CVC4::String::convertUnsignedIntToChar(i);
1513 Node n = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c));
1514 Node r2;
1515 if(cset.find(i) == cset.end()) {
1516 r2 = d_sigma_star;
1517 } else {
1518 int rt;
1519 derivativeS(r, c, r2);
1520 if(r2 == r) {
1521 r2 = d_emptyRegexp;
1522 } else {
1523 r2 = complement(r2, rt);
1524 }
1525 }
1526 n = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, n, r2));
1527 vec_nodes.push_back(n);
1528 }
1529 rNode = vec_nodes.size()==0? d_emptyRegexp : vec_nodes.size()==1? vec_nodes[0] :
1530 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1531 }
1532 rNode = Rewriter::rewrite(rNode);
1533 std::pair< Node, int > p(rNode, ret);
1534 d_compl_cache[r] = p;
1535 }
1536 Trace("regexp-compl") << "COMPL( " << mkString(r) << " ) = " << mkString(rNode) << ", ret=" << ret << std::endl;
1537 return rNode;
1538 }
1539
1540 void RegExpOpr::splitRegExp(Node r, std::vector< PairNodes > &pset) {
1541 Assert(checkConstRegExp(r));
1542 if(d_split_cache.find(r) != d_split_cache.end()) {
1543 pset = d_split_cache[r];
1544 } else {
1545 switch( r.getKind() ) {
1546 case kind::REGEXP_EMPTY: {
1547 break;
1548 }
1549 case kind::REGEXP_OPT: {
1550 PairNodes tmp(d_emptySingleton, d_emptySingleton);
1551 pset.push_back(tmp);
1552 }
1553 case kind::REGEXP_RANGE:
1554 case kind::REGEXP_SIGMA: {
1555 PairNodes tmp1(d_emptySingleton, r);
1556 PairNodes tmp2(r, d_emptySingleton);
1557 pset.push_back(tmp1);
1558 pset.push_back(tmp2);
1559 break;
1560 }
1561 case kind::STRING_TO_REGEXP: {
1562 Assert(r[0].isConst());
1563 CVC4::String s = r[0].getConst< CVC4::String >();
1564 PairNodes tmp1(d_emptySingleton, r);
1565 pset.push_back(tmp1);
1566 for(unsigned i=1; i<s.size(); i++) {
1567 CVC4::String s1 = s.substr(0, i);
1568 CVC4::String s2 = s.substr(i);
1569 Node n1 = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(s1));
1570 Node n2 = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(s2));
1571 PairNodes tmp3(n1, n2);
1572 pset.push_back(tmp3);
1573 }
1574 PairNodes tmp2(r, d_emptySingleton);
1575 pset.push_back(tmp2);
1576 break;
1577 }
1578 case kind::REGEXP_CONCAT: {
1579 for(unsigned i=0; i<r.getNumChildren(); i++) {
1580 std::vector< PairNodes > tset;
1581 splitRegExp(r[i], tset);
1582 std::vector< Node > hvec;
1583 std::vector< Node > tvec;
1584 for(unsigned j=0; j<=i; j++) {
1585 hvec.push_back(r[j]);
1586 }
1587 for(unsigned j=i; j<r.getNumChildren(); j++) {
1588 tvec.push_back(r[j]);
1589 }
1590 for(unsigned j=0; j<tset.size(); j++) {
1591 hvec[i] = tset[j].first;
1592 tvec[0] = tset[j].second;
1593 Node r1 = Rewriter::rewrite( hvec.size()==1?hvec[0]:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, hvec) );
1594 Node r2 = Rewriter::rewrite( tvec.size()==1?tvec[0]:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tvec) );
1595 PairNodes tmp2(r1, r2);
1596 pset.push_back(tmp2);
1597 }
1598 }
1599 break;
1600 }
1601 case kind::REGEXP_UNION: {
1602 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1603 std::vector< PairNodes > tset;
1604 splitRegExp(r[i], tset);
1605 pset.insert(pset.end(), tset.begin(), tset.end());
1606 }
1607 break;
1608 }
1609 case kind::REGEXP_INTER: {
1610 bool spflag = false;
1611 Node tmp = r[0];
1612 for(unsigned i=1; i<r.getNumChildren(); i++) {
1613 tmp = intersect(tmp, r[i], spflag);
1614 }
1615 splitRegExp(tmp, pset);
1616 break;
1617 }
1618 case kind::REGEXP_STAR: {
1619 std::vector< PairNodes > tset;
1620 splitRegExp(r[0], tset);
1621 PairNodes tmp1(d_emptySingleton, d_emptySingleton);
1622 pset.push_back(tmp1);
1623 for(unsigned i=0; i<tset.size(); i++) {
1624 Node r1 = tset[i].first==d_emptySingleton ? r : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, r, tset[i].first);
1625 Node r2 = tset[i].second==d_emptySingleton ? r : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r);
1626 PairNodes tmp2(r1, r2);
1627 pset.push_back(tmp2);
1628 }
1629 break;
1630 }
1631 case kind::REGEXP_PLUS: {
1632 std::vector< PairNodes > tset;
1633 splitRegExp(r[0], tset);
1634 for(unsigned i=0; i<tset.size(); i++) {
1635 Node r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, r, tset[i].first);
1636 Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r);
1637 PairNodes tmp2(r1, r2);
1638 pset.push_back(tmp2);
1639 }
1640 break;
1641 }
1642 default: {
1643 Trace("strings-error") << "Unsupported term: " << r << " in splitRegExp." << std::endl;
1644 Assert( false );
1645 //return Node::null();
1646 }
1647 }
1648 d_split_cache[r] = pset;
1649 }
1650 }
1651
1652 //printing
1653 std::string RegExpOpr::niceChar( Node r ) {
1654 if(r.isConst()) {
1655 std::string s = r.getConst<CVC4::String>().toString() ;
1656 return s == "" ? "{E}" : ( s == " " ? "{ }" : s.size()>1? "("+s+")" : s );
1657 } else {
1658 std::string ss = "$" + r.toString();
1659 return ss;
1660 }
1661 }
1662 std::string RegExpOpr::mkString( Node r ) {
1663 std::string retStr;
1664 if(r.isNull()) {
1665 retStr = "Empty";
1666 } else {
1667 int k = r.getKind();
1668 switch( k ) {
1669 case kind::REGEXP_EMPTY: {
1670 retStr += "Empty";
1671 break;
1672 }
1673 case kind::REGEXP_SIGMA: {
1674 retStr += "{W}";
1675 break;
1676 }
1677 case kind::STRING_TO_REGEXP: {
1678 retStr += niceChar( r[0] );
1679 break;
1680 }
1681 case kind::REGEXP_CONCAT: {
1682 retStr += "(";
1683 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1684 //if(i != 0) retStr += ".";
1685 retStr += mkString( r[i] );
1686 }
1687 retStr += ")";
1688 break;
1689 }
1690 case kind::REGEXP_UNION: {
1691 if(r == d_sigma) {
1692 retStr += "{A}";
1693 } else {
1694 retStr += "(";
1695 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1696 if(i != 0) retStr += "|";
1697 retStr += mkString( r[i] );
1698 }
1699 retStr += ")";
1700 }
1701 break;
1702 }
1703 case kind::REGEXP_INTER: {
1704 retStr += "(";
1705 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1706 if(i != 0) retStr += "&";
1707 retStr += mkString( r[i] );
1708 }
1709 retStr += ")";
1710 break;
1711 }
1712 case kind::REGEXP_STAR: {
1713 retStr += mkString( r[0] );
1714 retStr += "*";
1715 break;
1716 }
1717 case kind::REGEXP_PLUS: {
1718 retStr += mkString( r[0] );
1719 retStr += "+";
1720 break;
1721 }
1722 case kind::REGEXP_OPT: {
1723 retStr += mkString( r[0] );
1724 retStr += "?";
1725 break;
1726 }
1727 case kind::REGEXP_RANGE: {
1728 retStr += "[";
1729 retStr += niceChar( r[0] );
1730 retStr += "-";
1731 retStr += niceChar( r[1] );
1732 retStr += "]";
1733 break;
1734 }
1735 case kind::REGEXP_RV: {
1736 retStr += "<";
1737 retStr += r[0].getConst<Rational>().getNumerator().toString();
1738 retStr += ">";
1739 break;
1740 }
1741 default:
1742 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
1743 //Assert( false );
1744 //return Node::null();
1745 }
1746 }
1747
1748 return retStr;
1749 }
1750
1751 }/* CVC4::theory::strings namespace */
1752 }/* CVC4::theory namespace */
1753 }/* CVC4 namespace */