69b089c84d4aff65c7431962a4542996e90e5016
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2 /*! \file regexp_operation.cpp
3 ** \verbatim
4 ** Original author: Tianyi Liang
5 ** Major contributors: Morgan Deters
6 ** Minor contributors (to current version): none
7 ** This file is part of the CVC4 project.
8 ** Copyright (c) 2009-2014 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
11 **
12 ** \brief Symbolic Regular Expresion Operations
13 **
14 ** Symbolic Regular Expresion Operations
15 **/
16
17 #include "theory/strings/regexp_operation.h"
18 #include "expr/kind.h"
19
20 namespace CVC4 {
21 namespace theory {
22 namespace strings {
23
24 RegExpOpr::RegExpOpr()
25 : d_card( 256 ),
26 RMAXINT( LONG_MAX )
27 {
28 d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
29 d_true = NodeManager::currentNM()->mkConst( true );
30 d_false = NodeManager::currentNM()->mkConst( false );
31 d_emptySingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
32 d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
33 d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
34 std::vector< Node > nvec;
35 d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec );
36 d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec );
37 d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
38 }
39
40 int RegExpOpr::gcd ( int a, int b ) {
41 int c;
42 while ( a != 0 ) {
43 c = a; a = b%a; b = c;
44 }
45 return b;
46 }
47
48 bool RegExpOpr::checkConstRegExp( Node r ) {
49 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
50 bool ret = true;
51 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
52 ret = d_cstre_cache[r];
53 } else {
54 if(r.getKind() == kind::STRING_TO_REGEXP) {
55 Node tmp = Rewriter::rewrite( r[0] );
56 ret = tmp.isConst();
57 } else {
58 for(unsigned i=0; i<r.getNumChildren(); ++i) {
59 if(!checkConstRegExp(r[i])) {
60 ret = false; break;
61 }
62 }
63 }
64 d_cstre_cache[r] = ret;
65 }
66 return ret;
67 }
68
69 // 0-unknown, 1-yes, 2-no
70 int RegExpOpr::delta( Node r, Node &exp ) {
71 Trace("regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
72 int ret = 0;
73 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
74 ret = d_delta_cache[r].first;
75 exp = d_delta_cache[r].second;
76 } else {
77 int k = r.getKind();
78 switch( k ) {
79 case kind::REGEXP_EMPTY: {
80 ret = 2;
81 break;
82 }
83 case kind::REGEXP_SIGMA: {
84 ret = 2;
85 break;
86 }
87 case kind::STRING_TO_REGEXP: {
88 Node tmp = Rewriter::rewrite(r[0]);
89 if(tmp.isConst()) {
90 if(tmp == d_emptyString) {
91 ret = 1;
92 } else {
93 ret = 2;
94 }
95 } else {
96 ret = 0;
97 if(tmp.getKind() == kind::STRING_CONCAT) {
98 for(unsigned i=0; i<tmp.getNumChildren(); i++) {
99 if(tmp[i].isConst()) {
100 ret = 2; break;
101 }
102 }
103
104 }
105 if(ret == 0) {
106 exp = r[0].eqNode(d_emptyString);
107 }
108 }
109 break;
110 }
111 case kind::REGEXP_CONCAT: {
112 bool flag = false;
113 std::vector< Node > vec_nodes;
114 for(unsigned i=0; i<r.getNumChildren(); ++i) {
115 Node exp2;
116 int tmp = delta( r[i], exp2 );
117 if(tmp == 2) {
118 ret = 2;
119 break;
120 } else if(tmp == 0) {
121 vec_nodes.push_back( exp2 );
122 flag = true;
123 }
124 }
125 if(ret != 2) {
126 if(!flag) {
127 ret = 1;
128 } else {
129 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
130 }
131 }
132 break;
133 }
134 case kind::REGEXP_UNION: {
135 bool flag = false;
136 std::vector< Node > vec_nodes;
137 for(unsigned i=0; i<r.getNumChildren(); ++i) {
138 Node exp2;
139 int tmp = delta( r[i], exp2 );
140 if(tmp == 1) {
141 ret = 1;
142 break;
143 } else if(tmp == 0) {
144 vec_nodes.push_back( exp2 );
145 flag = true;
146 }
147 }
148 if(ret != 1) {
149 if(!flag) {
150 ret = 2;
151 } else {
152 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::OR, vec_nodes);
153 }
154 }
155 break;
156 }
157 case kind::REGEXP_INTER: {
158 bool flag = false;
159 std::vector< Node > vec_nodes;
160 for(unsigned i=0; i<r.getNumChildren(); ++i) {
161 Node exp2;
162 int tmp = delta( r[i], exp2 );
163 if(tmp == 2) {
164 ret = 2;
165 break;
166 } else if(tmp == 0) {
167 vec_nodes.push_back( exp2 );
168 flag = true;
169 }
170 }
171 if(ret != 2) {
172 if(!flag) {
173 ret = 1;
174 } else {
175 exp = vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
176 }
177 }
178 break;
179 }
180 case kind::REGEXP_STAR: {
181 ret = 1;
182 break;
183 }
184 case kind::REGEXP_PLUS: {
185 ret = delta( r[0], exp );
186 break;
187 }
188 case kind::REGEXP_OPT: {
189 ret = 1;
190 break;
191 }
192 case kind::REGEXP_RANGE: {
193 ret = 2;
194 break;
195 }
196 default: {
197 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
198 Assert( false );
199 //return Node::null();
200 }
201 }
202 if(!exp.isNull()) {
203 exp = Rewriter::rewrite(exp);
204 }
205 std::pair< int, Node > p(ret, exp);
206 d_delta_cache[r] = p;
207 }
208 Trace("regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
209 return ret;
210 }
211
212 // 0-unknown, 1-yes, 2-no
213 int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
214 Assert( c.size() < 2 );
215 Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
216
217 int ret = 1;
218 retNode = d_emptyRegexp;
219
220 PairNodeStr dv = std::make_pair( r, c );
221 if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
222 retNode = d_deriv_cache[dv].first;
223 ret = d_deriv_cache[dv].second;
224 } else if( c.isEmptyString() ) {
225 Node expNode;
226 ret = delta( r, expNode );
227 if(ret == 0) {
228 retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
229 } else if(ret == 1) {
230 retNode = r;
231 }
232 std::pair< Node, int > p(retNode, ret);
233 d_deriv_cache[dv] = p;
234 } else {
235 switch( r.getKind() ) {
236 case kind::REGEXP_EMPTY: {
237 ret = 2;
238 break;
239 }
240 case kind::REGEXP_SIGMA: {
241 retNode = d_emptySingleton;
242 break;
243 }
244 case kind::STRING_TO_REGEXP: {
245 Node tmp = Rewriter::rewrite(r[0]);
246 if(tmp.isConst()) {
247 if(tmp == d_emptyString) {
248 ret = 2;
249 } else {
250 if(tmp.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
251 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
252 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
253 } else {
254 ret = 2;
255 }
256 }
257 } else {
258 ret = 0;
259 Node rest;
260 if(tmp.getKind() == kind::STRING_CONCAT) {
261 Node t2 = tmp[0];
262 if(t2.isConst()) {
263 if(t2.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
264 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
265 tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
266 std::vector< Node > vec_nodes;
267 vec_nodes.push_back(n);
268 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
269 vec_nodes.push_back(tmp[i]);
270 }
271 retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
272 ret = 1;
273 } else {
274 ret = 2;
275 }
276 } else {
277 tmp = tmp[0];
278 std::vector< Node > vec_nodes;
279 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
280 vec_nodes.push_back(tmp[i]);
281 }
282 rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
283 }
284 }
285 if(ret == 0) {
286 Node sk = NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" );
287 retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
288 if(!rest.isNull()) {
289 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
290 }
291 Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
292 NodeManager::currentNM()->mkConst(c), sk));
293 retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
294 }
295 }
296 break;
297 }
298 case kind::REGEXP_CONCAT: {
299 std::vector< Node > vec_nodes;
300 std::vector< Node > delta_nodes;
301 Node dnode = d_true;
302 for(unsigned i=0; i<r.getNumChildren(); ++i) {
303 Node dc;
304 Node exp2;
305 int rt = derivativeS(r[i], c, dc);
306 if(rt != 2) {
307 if(rt == 0) {
308 ret = 0;
309 }
310 std::vector< Node > vec_nodes2;
311 if(dc != d_emptySingleton) {
312 vec_nodes2.push_back( dc );
313 }
314 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
315 if(r[j] != d_emptySingleton) {
316 vec_nodes2.push_back( r[j] );
317 }
318 }
319 Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
320 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
321 if(dnode != d_true) {
322 tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
323 ret = 0;
324 }
325 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
326 vec_nodes.push_back( tmp );
327 }
328 }
329 Node exp3;
330 int rt2 = delta( r[i], exp3 );
331 if( rt2 == 0 ) {
332 dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
333 } else if( rt2 == 2 ) {
334 break;
335 }
336 }
337 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
338 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
339 if(retNode == d_emptyRegexp) {
340 ret = 2;
341 }
342 break;
343 }
344 case kind::REGEXP_UNION: {
345 std::vector< Node > vec_nodes;
346 for(unsigned i=0; i<r.getNumChildren(); ++i) {
347 Node dc;
348 int rt = derivativeS(r[i], c, dc);
349 if(rt == 0) {
350 ret = 0;
351 }
352 if(rt != 2) {
353 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
354 vec_nodes.push_back( dc );
355 }
356 }
357 Trace("regexp-derive") << "RegExp-derive OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
358 }
359 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
360 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
361 if(retNode == d_emptyRegexp) {
362 ret = 2;
363 }
364 break;
365 }
366 case kind::REGEXP_INTER: {
367 bool flag = true;
368 bool flag_sg = false;
369 std::vector< Node > vec_nodes;
370 for(unsigned i=0; i<r.getNumChildren(); ++i) {
371 Node dc;
372 int rt = derivativeS(r[i], c, dc);
373 if(rt == 0) {
374 ret = 0;
375 } else if(rt == 2) {
376 flag = false;
377 break;
378 }
379 if(dc == d_sigma_star) {
380 flag_sg = true;
381 } else {
382 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
383 vec_nodes.push_back( dc );
384 }
385 }
386 }
387 if(flag) {
388 if(vec_nodes.size() == 0 && flag_sg) {
389 retNode = d_sigma_star;
390 } else {
391 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
392 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
393 if(retNode == d_emptyRegexp) {
394 ret = 2;
395 }
396 }
397 } else {
398 retNode = d_emptyRegexp;
399 ret = 2;
400 }
401 break;
402 }
403 case kind::REGEXP_STAR: {
404 Node dc;
405 ret = derivativeS(r[0], c, dc);
406 retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
407 break;
408 }
409 default: {
410 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
411 Assert( false, "Unsupported Term" );
412 }
413 }
414 if(retNode != d_emptyRegexp) {
415 retNode = Rewriter::rewrite( retNode );
416 }
417 std::pair< Node, int > p(retNode, ret);
418 d_deriv_cache[dv] = p;
419 }
420
421 Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode ) << std::endl;
422 return ret;
423 }
424
425 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
426 Assert( c.size() < 2 );
427 Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
428 Node retNode = d_emptyRegexp;
429 PairNodeStr dv = std::make_pair( r, c );
430 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
431 retNode = d_dv_cache[dv];
432 } else if( c.isEmptyString() ){
433 Node exp;
434 int tmp = delta( r, exp );
435 if(tmp == 0) {
436 // TODO variable
437 retNode = d_emptyRegexp;
438 } else if(tmp == 1) {
439 retNode = r;
440 } else {
441 retNode = d_emptyRegexp;
442 }
443 } else {
444 int k = r.getKind();
445 switch( k ) {
446 case kind::REGEXP_EMPTY: {
447 retNode = d_emptyRegexp;
448 break;
449 }
450 case kind::REGEXP_SIGMA: {
451 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
452 break;
453 }
454 case kind::STRING_TO_REGEXP: {
455 if(r[0].isConst()) {
456 if(r[0] == d_emptyString) {
457 retNode = d_emptyRegexp;
458 } else {
459 if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
460 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
461 r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
462 } else {
463 retNode = d_emptyRegexp;
464 }
465 }
466 } else {
467 // TODO variable
468 retNode = d_emptyRegexp;
469 }
470 break;
471 }
472 case kind::REGEXP_CONCAT: {
473 Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
474 std::vector< Node > vec_nodes;
475 for(unsigned i=0; i<r.getNumChildren(); ++i) {
476 Node dc = derivativeSingle(r[i], c);
477 if(dc != d_emptyRegexp) {
478 std::vector< Node > vec_nodes2;
479 if(dc != rees) {
480 vec_nodes2.push_back( dc );
481 }
482 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
483 if(r[j] != rees) {
484 vec_nodes2.push_back( r[j] );
485 }
486 }
487 Node tmp = vec_nodes2.size()==0 ? rees :
488 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
489 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
490 vec_nodes.push_back( tmp );
491 }
492 }
493 Node exp;
494 if( delta( r[i], exp ) != 1 ) {
495 break;
496 }
497 }
498 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
499 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
500 break;
501 }
502 case kind::REGEXP_UNION: {
503 std::vector< Node > vec_nodes;
504 for(unsigned i=0; i<r.getNumChildren(); ++i) {
505 Node dc = derivativeSingle(r[i], c);
506 if(dc != d_emptyRegexp) {
507 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
508 vec_nodes.push_back( dc );
509 }
510 }
511 Trace("regexp-derive") << "RegExp-derive OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
512 }
513 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
514 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
515 break;
516 }
517 case kind::REGEXP_INTER: {
518 bool flag = true;
519 bool flag_sg = false;
520 std::vector< Node > vec_nodes;
521 for(unsigned i=0; i<r.getNumChildren(); ++i) {
522 Node dc = derivativeSingle(r[i], c);
523 if(dc != d_emptyRegexp) {
524 if(dc == d_sigma_star) {
525 flag_sg = true;
526 } else {
527 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
528 vec_nodes.push_back( dc );
529 }
530 }
531 } else {
532 flag = false;
533 break;
534 }
535 }
536 if(flag) {
537 if(vec_nodes.size() == 0 && flag_sg) {
538 retNode = d_sigma_star;
539 } else {
540 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
541 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
542 }
543 } else {
544 retNode = d_emptyRegexp;
545 }
546 break;
547 }
548 case kind::REGEXP_STAR: {
549 Node dc = derivativeSingle(r[0], c);
550 if(dc != d_emptyRegexp) {
551 retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
552 } else {
553 retNode = d_emptyRegexp;
554 }
555 break;
556 }
557 default: {
558 //TODO: special sym: sigma, none, all
559 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
560 Assert( false, "Unsupported Term" );
561 //return Node::null();
562 }
563 }
564 if(retNode != d_emptyRegexp) {
565 retNode = Rewriter::rewrite( retNode );
566 }
567 d_dv_cache[dv] = retNode;
568 }
569 Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode ) << std::endl;
570 return retNode;
571 }
572
573 //TODO:
574 bool RegExpOpr::guessLength( Node r, int &co ) {
575 int k = r.getKind();
576 switch( k ) {
577 case kind::STRING_TO_REGEXP:
578 {
579 if(r[0].isConst()) {
580 co += r[0].getConst< CVC4::String >().size();
581 return true;
582 } else {
583 return false;
584 }
585 }
586 break;
587 case kind::REGEXP_CONCAT:
588 {
589 for(unsigned i=0; i<r.getNumChildren(); ++i) {
590 if(!guessLength( r[i], co)) {
591 return false;
592 }
593 }
594 return true;
595 }
596 break;
597 case kind::REGEXP_UNION:
598 {
599 int g_co;
600 for(unsigned i=0; i<r.getNumChildren(); ++i) {
601 int cop = 0;
602 if(!guessLength( r[i], cop)) {
603 return false;
604 }
605 if(i == 0) {
606 g_co = cop;
607 } else {
608 g_co = gcd(g_co, cop);
609 }
610 }
611 return true;
612 }
613 break;
614 case kind::REGEXP_INTER:
615 {
616 int g_co;
617 for(unsigned i=0; i<r.getNumChildren(); ++i) {
618 int cop = 0;
619 if(!guessLength( r[i], cop)) {
620 return false;
621 }
622 if(i == 0) {
623 g_co = cop;
624 } else {
625 g_co = gcd(g_co, cop);
626 }
627 }
628 return true;
629 }
630 break;
631 case kind::REGEXP_STAR:
632 {
633 co = 0;
634 return true;
635 }
636 break;
637 default:
638 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in membership of RegExp." << std::endl;
639 return false;
640 }
641 }
642
643 void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
644 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_fset_cache.find(r);
645 if(itr != d_fset_cache.end()) {
646 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
647 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
648 } else {
649 std::set<unsigned> cset;
650 SetNodes vset;
651 int k = r.getKind();
652 switch( k ) {
653 case kind::REGEXP_EMPTY: {
654 break;
655 }
656 case kind::REGEXP_SIGMA: {
657 for(unsigned i=0; i<d_card; i++) {
658 cset.insert(i);
659 }
660 break;
661 }
662 case kind::STRING_TO_REGEXP: {
663 Node st = Rewriter::rewrite(r[0]);
664 if(st.isConst()) {
665 CVC4::String s = st.getConst< CVC4::String >();
666 if(s.size() != 0) {
667 cset.insert(s[0]);
668 }
669 } else if(st.getKind() == kind::VARIABLE) {
670 vset.insert( st );
671 } else {
672 if(st[0].isConst()) {
673 CVC4::String s = st[0].getConst< CVC4::String >();
674 cset.insert(s[0]);
675 } else {
676 vset.insert( st[0] );
677 }
678 }
679 break;
680 }
681 case kind::REGEXP_CONCAT: {
682 for(unsigned i=0; i<r.getNumChildren(); i++) {
683 firstChars(r[i], cset, vset);
684 Node n = r[i];
685 Node exp;
686 int r = delta( n, exp );
687 if(r != 1) {
688 break;
689 }
690 }
691 break;
692 }
693 case kind::REGEXP_UNION: {
694 for(unsigned i=0; i<r.getNumChildren(); i++) {
695 firstChars(r[i], cset, vset);
696 }
697 break;
698 }
699 case kind::REGEXP_INTER: {
700 //TODO: Overapproximation for now
701 for(unsigned i=0; i<r.getNumChildren(); i++) {
702 firstChars(r[i], cset, vset);
703 }
704 break;
705 }
706 case kind::REGEXP_STAR: {
707 firstChars(r[0], cset, vset);
708 break;
709 }
710 default: {
711 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
712 Assert( false, "Unsupported Term" );
713 }
714 }
715 pcset.insert(cset.begin(), cset.end());
716 pvset.insert(vset.begin(), vset.end());
717 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
718 d_fset_cache[r] = p;
719
720 Trace("regexp-fset") << "FSET( " << mkString(r) << " ) = { ";
721 for(std::set<unsigned>::const_iterator itr = cset.begin();
722 itr != cset.end(); itr++) {
723 Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
724 }
725 Trace("regexp-fset") << " }" << std::endl;
726 }
727 }
728
729 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
730 int k = r.getKind();
731 switch( k ) {
732 case kind::STRING_TO_REGEXP:
733 {
734 if(r[0].isConst()) {
735 if(r[0] != d_emptyString) {
736 char t1 = r[0].getConst< CVC4::String >().getFirstChar();
737 if(c.isEmptyString()) {
738 vec_chars.push_back( t1 );
739 return true;
740 } else {
741 char t2 = c.getFirstChar();
742 if(t1 != t2) {
743 return false;
744 } else {
745 if(c.size() >= 2) {
746 vec_chars.push_back( c.substr(1,1).getFirstChar() );
747 } else {
748 vec_chars.push_back( '\0' );
749 }
750 return true;
751 }
752 }
753 } else {
754 return false;
755 }
756 } else {
757 return false;
758 }
759 }
760 break;
761 case kind::REGEXP_CONCAT:
762 {
763 for(unsigned i=0; i<r.getNumChildren(); ++i) {
764 if( follow(r[i], c, vec_chars) ) {
765 if(vec_chars[vec_chars.size() - 1] == '\0') {
766 vec_chars.pop_back();
767 c = d_emptyString.getConst< CVC4::String >();
768 }
769 } else {
770 return false;
771 }
772 }
773 vec_chars.push_back( '\0' );
774 return true;
775 }
776 break;
777 case kind::REGEXP_UNION:
778 {
779 bool flag = false;
780 for(unsigned i=0; i<r.getNumChildren(); ++i) {
781 if( follow(r[i], c, vec_chars) ) {
782 flag=true;
783 }
784 }
785 return flag;
786 }
787 break;
788 case kind::REGEXP_INTER:
789 {
790 std::vector< char > vt2;
791 for(unsigned i=0; i<r.getNumChildren(); ++i) {
792 std::vector< char > v_tmp;
793 if( !follow(r[i], c, v_tmp) ) {
794 return false;
795 }
796 std::vector< char > vt3(vt2);
797 vt2.clear();
798 std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
799 if(vt2.size() == 0) {
800 return false;
801 }
802 }
803 vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
804 return true;
805 }
806 break;
807 case kind::REGEXP_STAR:
808 {
809 if(follow(r[0], c, vec_chars)) {
810 if(vec_chars[vec_chars.size() - 1] == '\0') {
811 if(c.isEmptyString()) {
812 return true;
813 } else {
814 vec_chars.pop_back();
815 c = d_emptyString.getConst< CVC4::String >();
816 return follow(r[0], c, vec_chars);
817 }
818 } else {
819 return true;
820 }
821 } else {
822 vec_chars.push_back( '\0' );
823 return true;
824 }
825 }
826 break;
827 default: {
828 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
829 //AlwaysAssert( false );
830 //return Node::null();
831 return false;
832 }
833 }
834 }
835
836 Node RegExpOpr::mkAllExceptOne( char exp_c ) {
837 std::vector< Node > vec_nodes;
838 for(char c=d_char_start; c<=d_char_end; ++c) {
839 if(c != exp_c ) {
840 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
841 vec_nodes.push_back( n );
842 }
843 }
844 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
845 }
846
847 //simplify
848 void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
849 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
850 Assert(t.getKind() == kind::STRING_IN_REGEXP);
851 Node str = Rewriter::rewrite(t[0]);
852 Node re = Rewriter::rewrite(t[1]);
853 if(polarity) {
854 simplifyPRegExp( str, re, new_nodes );
855 } else {
856 simplifyNRegExp( str, re, new_nodes );
857 }
858 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
859 for(unsigned i=0; i<new_nodes.size(); i++) {
860 Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
861 }
862 }
863 void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
864 std::pair < Node, Node > p(s, r);
865 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
866 if(itr != d_simpl_neg_cache.end()) {
867 new_nodes.push_back( itr->second );
868 } else {
869 int k = r.getKind();
870 Node conc;
871 switch( k ) {
872 case kind::REGEXP_EMPTY: {
873 conc = d_true;
874 break;
875 }
876 case kind::REGEXP_SIGMA: {
877 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
878 break;
879 }
880 case kind::STRING_TO_REGEXP: {
881 conc = s.eqNode(r[0]).negate();
882 break;
883 }
884 case kind::REGEXP_CONCAT: {
885 //TODO: rewrite empty
886 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
887 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
888 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
889 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
890 NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
891 Node s1 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1));
892 Node s2 = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1)));
893 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
894 if(r[0].getKind() == kind::STRING_TO_REGEXP) {
895 s1r1 = s1.eqNode(r[0][0]).negate();
896 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
897 s1r1 = d_true;
898 }
899 Node r2 = r[1];
900 if(r.getNumChildren() > 2) {
901 std::vector< Node > nvec;
902 for(unsigned i=1; i<r.getNumChildren(); i++) {
903 nvec.push_back( r[i] );
904 }
905 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
906 }
907 r2 = Rewriter::rewrite(r2);
908 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
909 if(r2.getKind() == kind::STRING_TO_REGEXP) {
910 s2r2 = s2.eqNode(r2[0]).negate();
911 } else if(r2.getKind() == kind::REGEXP_EMPTY) {
912 s2r2 = d_true;
913 }
914
915 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
916 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
917 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
918 break;
919 }
920 case kind::REGEXP_UNION: {
921 std::vector< Node > c_and;
922 for(unsigned i=0; i<r.getNumChildren(); ++i) {
923 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
924 c_and.push_back( r[i][0].eqNode(s).negate() );
925 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
926 continue;
927 } else {
928 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
929 }
930 }
931 conc = c_and.size() == 0 ? d_true :
932 c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
933 break;
934 }
935 case kind::REGEXP_INTER: {
936 bool emptyflag = false;
937 std::vector< Node > c_or;
938 for(unsigned i=0; i<r.getNumChildren(); ++i) {
939 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
940 c_or.push_back( r[i][0].eqNode(s).negate() );
941 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
942 emptyflag = true;
943 break;
944 } else {
945 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
946 }
947 }
948 if(emptyflag) {
949 conc = d_true;
950 } else {
951 conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
952 }
953 break;
954 }
955 case kind::REGEXP_STAR: {
956 if(s == d_emptyString) {
957 conc = d_false;
958 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
959 conc = s.eqNode(d_emptyString).negate();
960 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
961 conc = d_false;
962 } else {
963 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
964 Node sne = s.eqNode(d_emptyString).negate();
965 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
966 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
967 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
968 NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
969 //internal
970 Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1);
971 Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
972 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
973 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
974
975 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
976 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
977 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
978 conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
979 }
980 break;
981 }
982 default: {
983 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
984 Assert( false, "Unsupported Term" );
985 }
986 }
987 conc = Rewriter::rewrite( conc );
988 new_nodes.push_back( conc );
989 d_simpl_neg_cache[p] = conc;
990 }
991 }
992 void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
993 std::pair < Node, Node > p(s, r);
994 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
995 if(itr != d_simpl_cache.end()) {
996 new_nodes.push_back( itr->second );
997 } else {
998 int k = r.getKind();
999 Node conc;
1000 switch( k ) {
1001 case kind::REGEXP_EMPTY: {
1002 conc = d_false;
1003 break;
1004 }
1005 case kind::REGEXP_SIGMA: {
1006 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
1007 break;
1008 }
1009 case kind::STRING_TO_REGEXP: {
1010 conc = s.eqNode(r[0]);
1011 break;
1012 }
1013 case kind::REGEXP_CONCAT: {
1014 std::vector< Node > nvec;
1015 std::vector< Node > cc;
1016 bool emptyflag = false;
1017 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1018 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1019 cc.push_back( r[i][0] );
1020 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1021 emptyflag = true;
1022 break;
1023 } else {
1024 Node sk = NodeManager::currentNM()->mkSkolem( "rc", s.getType(), "created for regular expression concat" );
1025 Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
1026 nvec.push_back(lem);
1027 cc.push_back(sk);
1028 }
1029 }
1030 if(emptyflag) {
1031 conc = d_false;
1032 } else {
1033 Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
1034 nvec.push_back(lem);
1035 conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
1036 }
1037 break;
1038 }
1039 case kind::REGEXP_UNION: {
1040 std::vector< Node > c_or;
1041 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1042 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1043 c_or.push_back( r[i][0].eqNode(s) );
1044 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1045 continue;
1046 } else {
1047 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1048 }
1049 }
1050 conc = c_or.size() == 0 ? d_false :
1051 c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
1052 break;
1053 }
1054 case kind::REGEXP_INTER: {
1055 std::vector< Node > c_and;
1056 bool emptyflag = false;
1057 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1058 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
1059 c_and.push_back( r[i][0].eqNode(s) );
1060 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
1061 emptyflag = true;
1062 break;
1063 } else {
1064 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
1065 }
1066 }
1067 if(emptyflag) {
1068 conc = d_false;
1069 } else {
1070 conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
1071 }
1072 break;
1073 }
1074 case kind::REGEXP_STAR: {
1075 if(s == d_emptyString) {
1076 conc = d_true;
1077 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
1078 conc = s.eqNode(d_emptyString);
1079 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
1080 conc = d_true;
1081 } else {
1082 Node se = s.eqNode(d_emptyString);
1083 Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
1084 Node sk1 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1085 Node sk2 = NodeManager::currentNM()->mkSkolem( "rs", s.getType(), "created for regular expression star" );
1086 Node s1nz = sk1.eqNode(d_emptyString).negate();
1087 Node s2nz = sk2.eqNode(d_emptyString).negate();
1088 Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
1089 Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
1090 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
1091
1092 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
1093 conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
1094 }
1095 break;
1096 }
1097 default: {
1098 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
1099 Assert( false, "Unsupported Term" );
1100 }
1101 }
1102 conc = Rewriter::rewrite( conc );
1103 new_nodes.push_back( conc );
1104 d_simpl_cache[p] = conc;
1105 }
1106 }
1107
1108 void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
1109 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_cset_cache.find(r);
1110 if(itr != d_cset_cache.end()) {
1111 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
1112 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
1113 } else {
1114 std::set<unsigned> cset;
1115 SetNodes vset;
1116 int k = r.getKind();
1117 switch( k ) {
1118 case kind::REGEXP_EMPTY: {
1119 break;
1120 }
1121 case kind::REGEXP_SIGMA: {
1122 for(unsigned i=0; i<d_card; i++) {
1123 cset.insert(i);
1124 }
1125 break;
1126 }
1127 case kind::STRING_TO_REGEXP: {
1128 Node st = Rewriter::rewrite(r[0]);
1129 if(st.isConst()) {
1130 CVC4::String s = st.getConst< CVC4::String >();
1131 s.getCharSet( cset );
1132 } else if(st.getKind() == kind::VARIABLE) {
1133 vset.insert( st );
1134 } else {
1135 for(unsigned i=0; i<st.getNumChildren(); i++) {
1136 if(st[i].isConst()) {
1137 CVC4::String s = st[i].getConst< CVC4::String >();
1138 s.getCharSet( cset );
1139 } else {
1140 vset.insert( st[i] );
1141 }
1142 }
1143 }
1144 break;
1145 }
1146 case kind::REGEXP_CONCAT: {
1147 for(unsigned i=0; i<r.getNumChildren(); i++) {
1148 getCharSet(r[i], cset, vset);
1149 }
1150 break;
1151 }
1152 case kind::REGEXP_UNION: {
1153 for(unsigned i=0; i<r.getNumChildren(); i++) {
1154 getCharSet(r[i], cset, vset);
1155 }
1156 break;
1157 }
1158 case kind::REGEXP_INTER: {
1159 //TODO: Overapproximation for now
1160 for(unsigned i=0; i<r.getNumChildren(); i++) {
1161 getCharSet(r[i], cset, vset);
1162 }
1163 break;
1164 }
1165 case kind::REGEXP_STAR: {
1166 getCharSet(r[0], cset, vset);
1167 break;
1168 }
1169 default: {
1170 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
1171 Assert( false, "Unsupported Term" );
1172 }
1173 }
1174 pcset.insert(cset.begin(), cset.end());
1175 pvset.insert(vset.begin(), vset.end());
1176 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
1177 d_cset_cache[r] = p;
1178
1179 Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { ";
1180 for(std::set<unsigned>::const_iterator itr = cset.begin();
1181 itr != cset.end(); itr++) {
1182 Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
1183 }
1184 Trace("regexp-cset") << " }" << std::endl;
1185 }
1186 }
1187
1188 bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
1189 for(std::set< PairNodes >::const_iterator itr = s.begin();
1190 itr != s.end(); ++itr) {
1191 if(itr->first == n1 && itr->second == n2 ||
1192 itr->first == n2 && itr->second == n1) {
1193 return true;
1194 }
1195 }
1196 return false;
1197 }
1198
1199 Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache, bool &spflag ) {
1200 Trace("regexp-intersect") << "Starting INTERSECT:\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl;
1201 if(spflag) {
1202 //TODO: var
1203 return Node::null();
1204 }
1205 std::pair < Node, Node > p(r1, r2);
1206 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);
1207 Node rNode;
1208 if(itr != d_inter_cache.end()) {
1209 rNode = itr->second;
1210 } else {
1211 if(r1 == r2) {
1212 rNode = r1;
1213 } else if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1214 rNode = d_emptyRegexp;
1215 } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1216 Node exp;
1217 int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1218 if(r == 0) {
1219 //TODO: variable
1220 spflag = true;
1221 } else if(r == 1) {
1222 rNode = d_emptySingleton;
1223 } else {
1224 rNode = d_emptyRegexp;
1225 }
1226 } else {
1227 std::set< unsigned > cset, cset2;
1228 std::set< Node > vset, vset2;
1229 getCharSet(r1, cset, vset);
1230 getCharSet(r2, cset2, vset2);
1231 if(vset.empty() && vset2.empty()) {
1232 cset.clear();
1233 firstChars(r1, cset, vset);
1234 std::vector< Node > vec_nodes;
1235 Node delta_exp;
1236 int flag = delta(r1, delta_exp);
1237 int flag2 = delta(r2, delta_exp);
1238 if(flag != 2 && flag2 != 2) {
1239 if(flag == 1 && flag2 == 1) {
1240 vec_nodes.push_back(d_emptySingleton);
1241 } else {
1242 //TODO
1243 spflag = true;
1244 }
1245 }
1246 if(Trace.isOn("regexp-debug")) {
1247 Trace("regexp-debug") << "Try CSET( " << cset.size() << " ) = ";
1248 for(std::set<unsigned>::const_iterator itr = cset.begin();
1249 itr != cset.end(); itr++) {
1250 Trace("regexp-debug") << *itr << ", ";
1251 }
1252 Trace("regexp-debug") << std::endl;
1253 }
1254 for(std::set<unsigned>::const_iterator itr = cset.begin();
1255 itr != cset.end(); itr++) {
1256 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1257 if(!isPairNodesInSet(cache[ *itr ], r1, r2)) {
1258 Node r1l = derivativeSingle(r1, c);
1259 Node r2l = derivativeSingle(r2, c);
1260 std::map< unsigned, std::set< PairNodes > > cache2(cache);
1261 PairNodes p(r1l, r2l);
1262 cache2[ *itr ].insert( p );
1263 Node rt = intersectInternal(r1l, r2l, cache2, spflag);
1264 if(spflag) {
1265 //TODO:
1266 return Node::null();
1267 }
1268 rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1269 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1270 vec_nodes.push_back(rt);
1271 }
1272 }
1273 rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1274 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1275 rNode = Rewriter::rewrite( rNode );
1276 } else {
1277 //TODO: non-empty var set
1278 spflag = true;
1279 }
1280 }
1281 d_inter_cache[p] = rNode;
1282 }
1283 Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1284 return rNode;
1285 }
1286
1287 bool RegExpOpr::containC2(unsigned cnt, Node n) {
1288 if(n.getKind() == kind::REGEXP_RV) {
1289 Assert(n[0].getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in RegExpOpr::containC2");
1290 unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1291 return cnt == y;
1292 } else if(n.getKind() == kind::REGEXP_CONCAT) {
1293 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1294 if(containC2(cnt, n[i])) {
1295 return true;
1296 }
1297 }
1298 } else if(n.getKind() == kind::REGEXP_STAR) {
1299 return containC2(cnt, n[0]);
1300 } else if(n.getKind() == kind::REGEXP_UNION) {
1301 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1302 if(containC2(cnt, n[i])) {
1303 return true;
1304 }
1305 }
1306 }
1307 return false;
1308 }
1309 Node RegExpOpr::convert1(unsigned cnt, Node n) {
1310 Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl;
1311 Node r1, r2;
1312 convert2(cnt, n, r1, r2);
1313 Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl;
1314 Node ret = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1315 NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2);
1316 ret = Rewriter::rewrite( ret );
1317 Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl;
1318 return ret;
1319 }
1320 void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
1321 if(n == d_emptyRegexp) {
1322 r1 = d_emptyRegexp;
1323 r2 = d_emptyRegexp;
1324 } else if(n == d_emptySingleton) {
1325 r1 = d_emptySingleton;
1326 r2 = d_emptySingleton;
1327 } else if(n.getKind() == kind::REGEXP_RV) {
1328 Assert(n[0].getConst<Rational>() <= RMAXINT, "Exceeded LONG_MAX in RegExpOpr::containC2");
1329 unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1330 r1 = d_emptySingleton;
1331 if(cnt == y) {
1332 r2 = d_emptyRegexp;
1333 } else {
1334 r2 = n;
1335 }
1336 } else if(n.getKind() == kind::REGEXP_CONCAT) {
1337 //TODO
1338 //convert2 x (r@(Seq l r1))
1339 // | contains x r1 = let (r2,r3) = convert2 x r1
1340 // in (Seq l r2, r3)
1341 // | otherwise = (Empty, r)
1342 bool flag = true;
1343 std::vector<Node> vr1, vr2;
1344 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1345 if(containC2(cnt, n[i])) {
1346 Node t1, t2;
1347 convert2(cnt, n[i], t1, t2);
1348 vr1.push_back(t1);
1349 r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] :
1350 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1);
1351 vr2.push_back(t2);
1352 for( unsigned j=i+1; j<n.getNumChildren(); j++ ) {
1353 vr2.push_back(n[j]);
1354 }
1355 r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] :
1356 NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2);
1357 flag = false;
1358 break;
1359 } else {
1360 vr1.push_back(n[i]);
1361 }
1362 }
1363 if(flag) {
1364 r1 = d_emptySingleton;
1365 r2 = n;
1366 }
1367 } else if(n.getKind() == kind::REGEXP_UNION) {
1368 std::vector<Node> vr1, vr2;
1369 for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1370 Node t1, t2;
1371 convert2(cnt, n[i], t1, t2);
1372 vr1.push_back(t1);
1373 vr2.push_back(t2);
1374 }
1375 r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
1376 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
1377 } else if(n.getKind() == kind::STRING_TO_REGEXP) {
1378 r1 = d_emptySingleton;
1379 r2 = n;
1380 } else {
1381 //is it possible?
1382 }
1383 }
1384 Node RegExpOpr::intersectInternal2( Node r1, Node r2, std::map< PairNodes, Node > cache, bool &spflag, unsigned cnt ) {
1385 Trace("regexp-intersect") << "Starting INTERSECT:\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl;
1386 //if(Trace.isOn("regexp-debug")) {
1387 // Trace("regexp-debug") << "... with cache:\n";
1388 // for(std::map< PairNodes, Node >::const_iterator itr=cache.begin();
1389 // itr!=cache.end();itr++) {
1390 // Trace("regexp-debug") << "(" << itr->first.first << "," << itr->first.second << ")->" << itr->second << std::endl;
1391 // }
1392 //}
1393 if(spflag) {
1394 //TODO: var
1395 return Node::null();
1396 }
1397 std::pair < Node, Node > p(r1, r2);
1398 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);
1399 Node rNode;
1400 if(itr != d_inter_cache.end()) {
1401 rNode = itr->second;
1402 } else {
1403 if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1404 rNode = d_emptyRegexp;
1405 } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1406 Node exp;
1407 int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1408 if(r == 0) {
1409 //TODO: variable
1410 spflag = true;
1411 } else if(r == 1) {
1412 rNode = d_emptySingleton;
1413 } else {
1414 rNode = d_emptyRegexp;
1415 }
1416 } else if(r1 == r2) {
1417 rNode = convert1(cnt, r1);
1418 } else {
1419 PairNodes p(r1, r2);
1420 std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p);
1421 if(itrcache != cache.end()) {
1422 rNode = itrcache->second;
1423 } else {
1424 if(checkConstRegExp(r1) && checkConstRegExp(r2)) {
1425 std::vector< unsigned > cset;
1426 std::set< unsigned > cset1, cset2;
1427 std::set< Node > vset1, vset2;
1428 firstChars(r1, cset1, vset1);
1429 firstChars(r2, cset2, vset2);
1430 std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset1.end(),
1431 std::inserter(cset, cset.begin()));
1432 std::vector< Node > vec_nodes;
1433 Node delta_exp;
1434 int flag = delta(r1, delta_exp);
1435 int flag2 = delta(r2, delta_exp);
1436 if(flag != 2 && flag2 != 2) {
1437 if(flag == 1 && flag2 == 1) {
1438 vec_nodes.push_back(d_emptySingleton);
1439 } else {
1440 //TODO
1441 spflag = true;
1442 }
1443 }
1444 if(Trace.isOn("regexp-debug")) {
1445 Trace("regexp-debug") << "Try CSET( " << cset.size() << " ) = ";
1446 for(std::vector<unsigned>::const_iterator itr = cset.begin();
1447 itr != cset.end(); itr++) {
1448 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1449 Trace("regexp-debug") << c << ", ";
1450 }
1451 Trace("regexp-debug") << std::endl;
1452 }
1453 for(std::vector<unsigned>::const_iterator itr = cset.begin();
1454 itr != cset.end(); itr++) {
1455 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1456 Node r1l = derivativeSingle(r1, c);
1457 Node r2l = derivativeSingle(r2, c);
1458 std::map< PairNodes, Node > cache2(cache);
1459 PairNodes p(r1, r2);
1460 cache2[ p ] = NodeManager::currentNM()->mkNode(kind::REGEXP_RV, NodeManager::currentNM()->mkConst(CVC4::Rational(cnt)));
1461 Node rt = intersectInternal2(r1l, r2l, cache2, spflag, cnt+1);
1462 rt = convert1(cnt, rt);
1463 if(spflag) {
1464 //TODO:
1465 return Node::null();
1466 }
1467 rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1468 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1469 vec_nodes.push_back(rt);
1470 }
1471 rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1472 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1473 rNode = Rewriter::rewrite( rNode );
1474 } else {
1475 //TODO: non-empty var set
1476 spflag = true;
1477 }
1478 }
1479 }
1480 d_inter_cache[p] = rNode;
1481 }
1482 Trace("regexp-intersect") << "End of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1483 return rNode;
1484 }
1485 Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) {
1486 //std::map< unsigned, std::set< PairNodes > > cache;
1487 std::map< PairNodes, Node > cache;
1488 if(checkConstRegExp(r1) && checkConstRegExp(r2)) {
1489 return intersectInternal2(r1, r2, cache, spflag, 1);
1490 } else {
1491 spflag = true;
1492 return Node::null();
1493 }
1494 }
1495
1496 Node RegExpOpr::complement(Node r, int &ret) {
1497 Node rNode;
1498 ret = 1;
1499 if(d_compl_cache.find(r) != d_compl_cache.end()) {
1500 rNode = d_compl_cache[r].first;
1501 ret = d_compl_cache[r].second;
1502 } else {
1503 if(r == d_emptyRegexp) {
1504 rNode = d_sigma_star;
1505 } else if(r == d_emptySingleton) {
1506 rNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, d_sigma, d_sigma_star);
1507 } else if(!checkConstRegExp(r)) {
1508 //TODO: var to be extended
1509 ret = 0;
1510 } else {
1511 std::set<unsigned> cset;
1512 SetNodes vset;
1513 firstChars(r, cset, vset);
1514 std::vector< Node > vec_nodes;
1515 for(unsigned i=0; i<d_card; i++) {
1516 CVC4::String c = CVC4::String::convertUnsignedIntToChar(i);
1517 Node n = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c));
1518 Node r2;
1519 if(cset.find(i) == cset.end()) {
1520 r2 = d_sigma_star;
1521 } else {
1522 int rt;
1523 derivativeS(r, c, r2);
1524 if(r2 == r) {
1525 r2 = d_emptyRegexp;
1526 } else {
1527 r2 = complement(r2, rt);
1528 }
1529 }
1530 n = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, n, r2));
1531 vec_nodes.push_back(n);
1532 }
1533 rNode = vec_nodes.size()==0? d_emptyRegexp : vec_nodes.size()==1? vec_nodes[0] :
1534 NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes);
1535 }
1536 rNode = Rewriter::rewrite(rNode);
1537 std::pair< Node, int > p(rNode, ret);
1538 d_compl_cache[r] = p;
1539 }
1540 Trace("regexp-compl") << "COMPL( " << mkString(r) << " ) = " << mkString(rNode) << ", ret=" << ret << std::endl;
1541 return rNode;
1542 }
1543
1544 void RegExpOpr::splitRegExp(Node r, std::vector< PairNodes > &pset) {
1545 Assert(checkConstRegExp(r));
1546 if(d_split_cache.find(r) != d_split_cache.end()) {
1547 pset = d_split_cache[r];
1548 } else {
1549 switch( r.getKind() ) {
1550 case kind::REGEXP_EMPTY: {
1551 break;
1552 }
1553 case kind::REGEXP_OPT: {
1554 PairNodes tmp(d_emptySingleton, d_emptySingleton);
1555 pset.push_back(tmp);
1556 }
1557 case kind::REGEXP_RANGE:
1558 case kind::REGEXP_SIGMA: {
1559 PairNodes tmp1(d_emptySingleton, r);
1560 PairNodes tmp2(r, d_emptySingleton);
1561 pset.push_back(tmp1);
1562 pset.push_back(tmp2);
1563 break;
1564 }
1565 case kind::STRING_TO_REGEXP: {
1566 Assert(r[0].isConst());
1567 CVC4::String s = r[0].getConst< CVC4::String >();
1568 PairNodes tmp1(d_emptySingleton, r);
1569 pset.push_back(tmp1);
1570 for(unsigned i=1; i<s.size(); i++) {
1571 CVC4::String s1 = s.substr(0, i);
1572 CVC4::String s2 = s.substr(i);
1573 Node n1 = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(s1));
1574 Node n2 = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(s2));
1575 PairNodes tmp3(n1, n2);
1576 pset.push_back(tmp3);
1577 }
1578 PairNodes tmp2(r, d_emptySingleton);
1579 pset.push_back(tmp2);
1580 break;
1581 }
1582 case kind::REGEXP_CONCAT: {
1583 for(unsigned i=0; i<r.getNumChildren(); i++) {
1584 std::vector< PairNodes > tset;
1585 splitRegExp(r[i], tset);
1586 std::vector< Node > hvec;
1587 std::vector< Node > tvec;
1588 for(unsigned j=0; j<=i; j++) {
1589 hvec.push_back(r[j]);
1590 }
1591 for(unsigned j=i; j<r.getNumChildren(); j++) {
1592 tvec.push_back(r[j]);
1593 }
1594 for(unsigned j=0; j<tset.size(); j++) {
1595 hvec[i] = tset[j].first;
1596 tvec[0] = tset[j].second;
1597 Node r1 = Rewriter::rewrite( hvec.size()==1?hvec[0]:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, hvec) );
1598 Node r2 = Rewriter::rewrite( tvec.size()==1?tvec[0]:NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tvec) );
1599 PairNodes tmp2(r1, r2);
1600 pset.push_back(tmp2);
1601 }
1602 }
1603 break;
1604 }
1605 case kind::REGEXP_UNION: {
1606 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1607 std::vector< PairNodes > tset;
1608 splitRegExp(r[i], tset);
1609 pset.insert(pset.end(), tset.begin(), tset.end());
1610 }
1611 break;
1612 }
1613 case kind::REGEXP_INTER: {
1614 bool spflag = false;
1615 Node tmp = r[0];
1616 for(unsigned i=1; i<r.getNumChildren(); i++) {
1617 tmp = intersect(tmp, r[i], spflag);
1618 }
1619 splitRegExp(tmp, pset);
1620 break;
1621 }
1622 case kind::REGEXP_STAR: {
1623 std::vector< PairNodes > tset;
1624 splitRegExp(r[0], tset);
1625 PairNodes tmp1(d_emptySingleton, d_emptySingleton);
1626 pset.push_back(tmp1);
1627 for(unsigned i=0; i<tset.size(); i++) {
1628 Node r1 = tset[i].first==d_emptySingleton ? r : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, r, tset[i].first);
1629 Node r2 = tset[i].second==d_emptySingleton ? r : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r);
1630 PairNodes tmp2(r1, r2);
1631 pset.push_back(tmp2);
1632 }
1633 break;
1634 }
1635 case kind::REGEXP_PLUS: {
1636 std::vector< PairNodes > tset;
1637 splitRegExp(r[0], tset);
1638 for(unsigned i=0; i<tset.size(); i++) {
1639 Node r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, r, tset[i].first);
1640 Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r);
1641 PairNodes tmp2(r1, r2);
1642 pset.push_back(tmp2);
1643 }
1644 break;
1645 }
1646 default: {
1647 Trace("strings-error") << "Unsupported term: " << r << " in splitRegExp." << std::endl;
1648 Assert( false );
1649 //return Node::null();
1650 }
1651 }
1652 d_split_cache[r] = pset;
1653 }
1654 }
1655
1656 //printing
1657 std::string RegExpOpr::niceChar( Node r ) {
1658 if(r.isConst()) {
1659 std::string s = r.getConst<CVC4::String>().toString() ;
1660 return s == "" ? "{E}" : ( s == " " ? "{ }" : s.size()>1? "("+s+")" : s );
1661 } else {
1662 std::string ss = "$" + r.toString();
1663 return ss;
1664 }
1665 }
1666 std::string RegExpOpr::mkString( Node r ) {
1667 std::string retStr;
1668 if(r.isNull()) {
1669 retStr = "Empty";
1670 } else {
1671 int k = r.getKind();
1672 switch( k ) {
1673 case kind::REGEXP_EMPTY: {
1674 retStr += "Empty";
1675 break;
1676 }
1677 case kind::REGEXP_SIGMA: {
1678 retStr += "{W}";
1679 break;
1680 }
1681 case kind::STRING_TO_REGEXP: {
1682 retStr += niceChar( r[0] );
1683 break;
1684 }
1685 case kind::REGEXP_CONCAT: {
1686 retStr += "(";
1687 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1688 //if(i != 0) retStr += ".";
1689 retStr += mkString( r[i] );
1690 }
1691 retStr += ")";
1692 break;
1693 }
1694 case kind::REGEXP_UNION: {
1695 if(r == d_sigma) {
1696 retStr += "{A}";
1697 } else {
1698 retStr += "(";
1699 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1700 if(i != 0) retStr += "|";
1701 retStr += mkString( r[i] );
1702 }
1703 retStr += ")";
1704 }
1705 break;
1706 }
1707 case kind::REGEXP_INTER: {
1708 retStr += "(";
1709 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1710 if(i != 0) retStr += "&";
1711 retStr += mkString( r[i] );
1712 }
1713 retStr += ")";
1714 break;
1715 }
1716 case kind::REGEXP_STAR: {
1717 retStr += mkString( r[0] );
1718 retStr += "*";
1719 break;
1720 }
1721 case kind::REGEXP_PLUS: {
1722 retStr += mkString( r[0] );
1723 retStr += "+";
1724 break;
1725 }
1726 case kind::REGEXP_OPT: {
1727 retStr += mkString( r[0] );
1728 retStr += "?";
1729 break;
1730 }
1731 case kind::REGEXP_RANGE: {
1732 retStr += "[";
1733 retStr += niceChar( r[0] );
1734 retStr += "-";
1735 retStr += niceChar( r[1] );
1736 retStr += "]";
1737 break;
1738 }
1739 case kind::REGEXP_RV: {
1740 retStr += "<";
1741 retStr += r[0].getConst<Rational>().getNumerator().toString();
1742 retStr += ">";
1743 break;
1744 }
1745 default:
1746 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
1747 //Assert( false );
1748 //return Node::null();
1749 }
1750 }
1751
1752 return retStr;
1753 }
1754
1755 }/* CVC4::theory::strings namespace */
1756 }/* CVC4::theory namespace */
1757 }/* CVC4 namespace */