adds intersection
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2
3 /*! \file regexp_operation.cpp
4
5 ** \verbatim
6
7 ** Original author: Tianyi Liang
8
9 ** Major contributors: none
10
11 ** Minor contributors (to current version): none
12
13 ** This file is part of the CVC4 project.
14
15 ** Copyright (c) 2009-2013 New York University and The University of Iowa
16
17 ** See the file COPYING in the top-level source directory for licensing
18
19 ** information.\endverbatim
20
21 **
22
23 ** \brief Symbolic Regular Expresion Operations
24 **
25 ** Symbolic Regular Expresion Operations
26 **/
27
28 #include "theory/strings/regexp_operation.h"
29 #include "expr/kind.h"
30
31 namespace CVC4 {
32 namespace theory {
33 namespace strings {
34
35 RegExpOpr::RegExpOpr() {
36 d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
37 d_true = NodeManager::currentNM()->mkConst( true );
38 d_false = NodeManager::currentNM()->mkConst( false );
39 d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
40 d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
41 d_emptySingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
42 std::vector< Node > nvec;
43 d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec );
44 // All Charactors = all printable ones 32-126
45 //d_char_start = 'a';//' ';
46 //d_char_end = 'c';//'~';
47 //d_sigma = mkAllExceptOne( '\0' );
48 d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec );
49 d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
50 d_card = 256;
51 }
52
53 int RegExpOpr::gcd ( int a, int b ) {
54 int c;
55 while ( a != 0 ) {
56 c = a; a = b%a; b = c;
57 }
58 return b;
59 }
60
61 bool RegExpOpr::checkConstRegExp( Node r ) {
62 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
63 bool ret = true;
64 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
65 ret = d_cstre_cache[r];
66 } else {
67 if(r.getKind() == kind::STRING_TO_REGEXP) {
68 Node tmp = Rewriter::rewrite( r[0] );
69 ret = tmp.isConst();
70 } else {
71 for(unsigned i=0; i<r.getNumChildren(); ++i) {
72 if(!checkConstRegExp(r[i])) {
73 ret = false; break;
74 }
75 }
76 }
77 d_cstre_cache[r] = ret;
78 }
79 return ret;
80 }
81
82 // 0-unknown, 1-yes, 2-no
83 int RegExpOpr::delta( Node r ) {
84 Trace("strings-regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
85 int ret = 0;
86 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
87 ret = d_delta_cache[r];
88 } else {
89 int k = r.getKind();
90 switch( k ) {
91 case kind::REGEXP_EMPTY: {
92 ret = 2;
93 break;
94 }
95 case kind::REGEXP_SIGMA: {
96 ret = 2;
97 break;
98 }
99 case kind::STRING_TO_REGEXP: {
100 if(r[0].isConst()) {
101 if(r[0] == d_emptyString) {
102 ret = 1;
103 } else {
104 ret = 2;
105 }
106 } else {
107 ret = 0;
108 }
109 break;
110 }
111 case kind::REGEXP_CONCAT: {
112 bool flag = false;
113 for(unsigned i=0; i<r.getNumChildren(); ++i) {
114 int tmp = delta( r[i] );
115 if(tmp == 2) {
116 ret = 2;
117 break;
118 } else if(tmp == 0) {
119 flag = true;
120 }
121 }
122 if(!flag && ret != 2) {
123 ret = 1;
124 }
125 break;
126 }
127 case kind::REGEXP_UNION: {
128 bool flag = false;
129 for(unsigned i=0; i<r.getNumChildren(); ++i) {
130 int tmp = delta( r[i] );
131 if(tmp == 1) {
132 ret = 1;
133 break;
134 } else if(tmp == 0) {
135 flag = true;
136 }
137 }
138 if(!flag && ret != 1) {
139 ret = 2;
140 }
141 break;
142 }
143 case kind::REGEXP_INTER: {
144 bool flag = true;
145 for(unsigned i=0; i<r.getNumChildren(); ++i) {
146 int tmp = delta( r[i] );
147 if(tmp == 2) {
148 ret = 2; flag=false;
149 break;
150 } else if(tmp == 0) {
151 flag=false;
152 break;
153 }
154 }
155 if(flag) {
156 ret = 1;
157 }
158 break;
159 }
160 case kind::REGEXP_STAR: {
161 ret = 1;
162 break;
163 }
164 case kind::REGEXP_PLUS: {
165 ret = delta( r[0] );
166 break;
167 }
168 case kind::REGEXP_OPT: {
169 ret = 1;
170 break;
171 }
172 case kind::REGEXP_RANGE: {
173 ret = 2;
174 break;
175 }
176 default: {
177 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
178 Assert( false );
179 //return Node::null();
180 }
181 }
182 d_delta_cache[r] = ret;
183 }
184 Trace("strings-regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
185 return ret;
186 }
187
188 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
189 Assert( c.size() < 2 );
190 Trace("regexp-deriv") << "RegExp-deriv starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
191 Node retNode = d_emptyRegexp;
192 PairDvStr dv = std::make_pair( r, c );
193 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
194 retNode = d_dv_cache[dv];
195 } else if( c.isEmptyString() ){
196 int tmp = delta( r );
197 if(tmp == 0) {
198 // TODO variable
199 retNode = d_emptyRegexp;
200 } else if(tmp == 1) {
201 retNode = r;
202 } else {
203 retNode = d_emptyRegexp;
204 }
205 } else {
206 int k = r.getKind();
207 switch( k ) {
208 case kind::REGEXP_EMPTY: {
209 retNode = d_emptyRegexp;
210 break;
211 }
212 case kind::REGEXP_SIGMA: {
213 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
214 break;
215 }
216 case kind::STRING_TO_REGEXP: {
217 if(r[0].isConst()) {
218 if(r[0] == d_emptyString) {
219 retNode = d_emptyRegexp;
220 } else {
221 if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
222 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
223 r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
224 } else {
225 retNode = d_emptyRegexp;
226 }
227 }
228 } else {
229 // TODO variable
230 retNode = d_emptyRegexp;
231 }
232 break;
233 }
234 case kind::REGEXP_CONCAT: {
235 Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
236 std::vector< Node > vec_nodes;
237 for(unsigned i=0; i<r.getNumChildren(); ++i) {
238 Node dc = derivativeSingle(r[i], c);
239 if(dc != d_emptyRegexp) {
240 std::vector< Node > vec_nodes2;
241 if(dc != rees) {
242 vec_nodes2.push_back( dc );
243 }
244 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
245 if(r[j] != rees) {
246 vec_nodes2.push_back( r[j] );
247 }
248 }
249 Node tmp = vec_nodes2.size()==0 ? rees :
250 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
251 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
252 vec_nodes.push_back( tmp );
253 }
254 }
255
256 if( delta( r[i] ) != 1 ) {
257 break;
258 }
259 }
260 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
261 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
262 break;
263 }
264 case kind::REGEXP_UNION: {
265 std::vector< Node > vec_nodes;
266 for(unsigned i=0; i<r.getNumChildren(); ++i) {
267 Node dc = derivativeSingle(r[i], c);
268 if(dc != d_emptyRegexp) {
269 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
270 vec_nodes.push_back( dc );
271 }
272 }
273 Trace("regexp-deriv") << "RegExp-deriv OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
274 }
275 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
276 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
277 break;
278 }
279 case kind::REGEXP_INTER: {
280 bool flag = true;
281 bool flag_sg = false;
282 std::vector< Node > vec_nodes;
283 for(unsigned i=0; i<r.getNumChildren(); ++i) {
284 Node dc = derivativeSingle(r[i], c);
285 if(dc != d_emptyRegexp) {
286 if(dc == d_sigma_star) {
287 flag_sg = true;
288 } else {
289 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
290 vec_nodes.push_back( dc );
291 }
292 }
293 } else {
294 flag = false;
295 break;
296 }
297 }
298 if(flag) {
299 if(vec_nodes.size() == 0 && flag_sg) {
300 retNode = d_sigma_star;
301 } else {
302 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
303 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
304 }
305 } else {
306 retNode = d_emptyRegexp;
307 }
308 break;
309 }
310 case kind::REGEXP_STAR: {
311 Node dc = derivativeSingle(r[0], c);
312 if(dc != d_emptyRegexp) {
313 retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
314 } else {
315 retNode = d_emptyRegexp;
316 }
317 break;
318 }
319 default: {
320 //TODO: special sym: sigma, none, all
321 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
322 Assert( false, "Unsupported Term" );
323 //return Node::null();
324 }
325 }
326 if(retNode != d_emptyRegexp) {
327 retNode = Rewriter::rewrite( retNode );
328 }
329 d_dv_cache[dv] = retNode;
330 }
331 Trace("regexp-deriv") << "RegExp-deriv returns : " << mkString( retNode ) << std::endl;
332 return retNode;
333 }
334
335 //TODO:
336 bool RegExpOpr::guessLength( Node r, int &co ) {
337 int k = r.getKind();
338 switch( k ) {
339 case kind::STRING_TO_REGEXP:
340 {
341 if(r[0].isConst()) {
342 co += r[0].getConst< CVC4::String >().size();
343 return true;
344 } else {
345 return false;
346 }
347 }
348 break;
349 case kind::REGEXP_CONCAT:
350 {
351 for(unsigned i=0; i<r.getNumChildren(); ++i) {
352 if(!guessLength( r[i], co)) {
353 return false;
354 }
355 }
356 return true;
357 }
358 break;
359 case kind::REGEXP_UNION:
360 {
361 int g_co;
362 for(unsigned i=0; i<r.getNumChildren(); ++i) {
363 int cop = 0;
364 if(!guessLength( r[i], cop)) {
365 return false;
366 }
367 if(i == 0) {
368 g_co = cop;
369 } else {
370 g_co = gcd(g_co, cop);
371 }
372 }
373 return true;
374 }
375 break;
376 case kind::REGEXP_INTER:
377 {
378 int g_co;
379 for(unsigned i=0; i<r.getNumChildren(); ++i) {
380 int cop = 0;
381 if(!guessLength( r[i], cop)) {
382 return false;
383 }
384 if(i == 0) {
385 g_co = cop;
386 } else {
387 g_co = gcd(g_co, cop);
388 }
389 }
390 return true;
391 }
392 break;
393 case kind::REGEXP_STAR:
394 {
395 co = 0;
396 return true;
397 }
398 break;
399 default:
400 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in membership of RegExp." << std::endl;
401 return false;
402 }
403 }
404
405 void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
406 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_fset_cache.find(r);
407 if(itr != d_fset_cache.end()) {
408 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
409 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
410 } else {
411 std::set<unsigned> cset;
412 SetNodes vset;
413 int k = r.getKind();
414 switch( k ) {
415 case kind::REGEXP_EMPTY: {
416 break;
417 }
418 case kind::REGEXP_SIGMA: {
419 for(unsigned i=0; i<d_card; i++) {
420 cset.insert(i);
421 }
422 break;
423 }
424 case kind::STRING_TO_REGEXP: {
425 Node st = Rewriter::rewrite(r[0]);
426 if(st.isConst()) {
427 CVC4::String s = st.getConst< CVC4::String >();
428 if(s.size() != 0) {
429 cset.insert(s[0]);
430 }
431 } else if(st.getKind() == kind::VARIABLE) {
432 vset.insert( st );
433 } else {
434 if(st[0].isConst()) {
435 CVC4::String s = st[0].getConst< CVC4::String >();
436 cset.insert(s[0]);
437 } else {
438 vset.insert( st[0] );
439 }
440 }
441 break;
442 }
443 case kind::REGEXP_CONCAT: {
444 for(unsigned i=0; i<r.getNumChildren(); i++) {
445 firstChars(r[i], cset, vset);
446 Node n = r[i];
447 int r = delta( n );
448 if(r != 1) {
449 break;
450 }
451 }
452 break;
453 }
454 case kind::REGEXP_UNION: {
455 for(unsigned i=0; i<r.getNumChildren(); i++) {
456 firstChars(r[i], cset, vset);
457 }
458 break;
459 }
460 case kind::REGEXP_INTER: {
461 //TODO: Overapproximation for now
462 for(unsigned i=0; i<r.getNumChildren(); i++) {
463 firstChars(r[i], cset, vset);
464 }
465 break;
466 }
467 case kind::REGEXP_STAR: {
468 firstChars(r[0], cset, vset);
469 break;
470 }
471 default: {
472 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
473 Assert( false, "Unsupported Term" );
474 }
475 }
476 pcset.insert(cset.begin(), cset.end());
477 pvset.insert(vset.begin(), vset.end());
478 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
479 d_fset_cache[r] = p;
480
481 Trace("regexp-fset") << "FSET( " << mkString(r) << " ) = { ";
482 for(std::set<unsigned>::const_iterator itr = cset.begin();
483 itr != cset.end(); itr++) {
484 Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
485 }
486 Trace("regexp-fset") << " }" << std::endl;
487 }
488 }
489
490 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
491 int k = r.getKind();
492 switch( k ) {
493 case kind::STRING_TO_REGEXP:
494 {
495 if(r[0].isConst()) {
496 if(r[0] != d_emptyString) {
497 char t1 = r[0].getConst< CVC4::String >().getFirstChar();
498 if(c.isEmptyString()) {
499 vec_chars.push_back( t1 );
500 return true;
501 } else {
502 char t2 = c.getFirstChar();
503 if(t1 != t2) {
504 return false;
505 } else {
506 if(c.size() >= 2) {
507 vec_chars.push_back( c.substr(1,1).getFirstChar() );
508 } else {
509 vec_chars.push_back( '\0' );
510 }
511 return true;
512 }
513 }
514 } else {
515 return false;
516 }
517 } else {
518 return false;
519 }
520 }
521 break;
522 case kind::REGEXP_CONCAT:
523 {
524 for(unsigned i=0; i<r.getNumChildren(); ++i) {
525 if( follow(r[i], c, vec_chars) ) {
526 if(vec_chars[vec_chars.size() - 1] == '\0') {
527 vec_chars.pop_back();
528 c = d_emptyString.getConst< CVC4::String >();
529 }
530 } else {
531 return false;
532 }
533 }
534 vec_chars.push_back( '\0' );
535 return true;
536 }
537 break;
538 case kind::REGEXP_UNION:
539 {
540 bool flag = false;
541 for(unsigned i=0; i<r.getNumChildren(); ++i) {
542 if( follow(r[i], c, vec_chars) ) {
543 flag=true;
544 }
545 }
546 return flag;
547 }
548 break;
549 case kind::REGEXP_INTER:
550 {
551 std::vector< char > vt2;
552 for(unsigned i=0; i<r.getNumChildren(); ++i) {
553 std::vector< char > v_tmp;
554 if( !follow(r[i], c, v_tmp) ) {
555 return false;
556 }
557 std::vector< char > vt3(vt2);
558 vt2.clear();
559 std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
560 if(vt2.size() == 0) {
561 return false;
562 }
563 }
564 vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
565 return true;
566 }
567 break;
568 case kind::REGEXP_STAR:
569 {
570 if(follow(r[0], c, vec_chars)) {
571 if(vec_chars[vec_chars.size() - 1] == '\0') {
572 if(c.isEmptyString()) {
573 return true;
574 } else {
575 vec_chars.pop_back();
576 c = d_emptyString.getConst< CVC4::String >();
577 return follow(r[0], c, vec_chars);
578 }
579 } else {
580 return true;
581 }
582 } else {
583 vec_chars.push_back( '\0' );
584 return true;
585 }
586 }
587 break;
588 /*
589 case kind::REGEXP_PLUS:
590 {
591 ret = delta( r[0] );
592 }
593 break;
594 case kind::REGEXP_OPT:
595 {
596 ret = 1;
597 }
598 break;
599 case kind::REGEXP_RANGE:
600 {
601 ret = 2;
602 }
603 break;*/
604 default: {
605 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
606 //AlwaysAssert( false );
607 //return Node::null();
608 return false;
609 }
610 }
611 }
612
613 Node RegExpOpr::mkAllExceptOne( char exp_c ) {
614 std::vector< Node > vec_nodes;
615 for(char c=d_char_start; c<=d_char_end; ++c) {
616 if(c != exp_c ) {
617 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
618 vec_nodes.push_back( n );
619 }
620 }
621 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
622 }
623
624 //simplify
625 void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
626 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
627 Assert(t.getKind() == kind::STRING_IN_REGEXP);
628 Node str = Rewriter::rewrite(t[0]);
629 Node re = Rewriter::rewrite(t[1]);
630 if(polarity) {
631 simplifyPRegExp( str, re, new_nodes );
632 } else {
633 simplifyNRegExp( str, re, new_nodes );
634 }
635 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
636 for(unsigned i=0; i<new_nodes.size(); i++) {
637 Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
638 }
639 }
640 void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
641 std::pair < Node, Node > p(s, r);
642 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
643 if(itr != d_simpl_neg_cache.end()) {
644 new_nodes.push_back( itr->second );
645 } else {
646 int k = r.getKind();
647 Node conc;
648 switch( k ) {
649 case kind::REGEXP_EMPTY: {
650 conc = d_true;
651 break;
652 }
653 case kind::REGEXP_SIGMA: {
654 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
655 break;
656 }
657 case kind::STRING_TO_REGEXP: {
658 conc = s.eqNode(r[0]).negate();
659 break;
660 }
661 case kind::REGEXP_CONCAT: {
662 //TODO: rewrite empty
663 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
664 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
665 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
666 NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
667 Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
668 Node s2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
669 Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, s1, s2);
670 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2));
671 Node s1len = b1.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1));
672 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
673 if(r[0].getKind() == kind::STRING_TO_REGEXP) {
674 s1r1 = s1.eqNode(r[0][0]).negate();
675 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
676 s1r1 = d_true;
677 }
678 Node r2 = r[1];
679 if(r.getNumChildren() > 2) {
680 std::vector< Node > nvec;
681 for(unsigned i=1; i<r.getNumChildren(); i++) {
682 nvec.push_back( r[i] );
683 }
684 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
685 }
686 r2 = Rewriter::rewrite(r2);
687 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
688 if(r2.getKind() == kind::STRING_TO_REGEXP) {
689 s2r2 = s2.eqNode(r2[0]).negate();
690 } else if(r2.getKind() == kind::REGEXP_EMPTY) {
691 s2r2 = d_true;
692 }
693
694 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
695 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1len, conc);
696 conc = NodeManager::currentNM()->mkNode(kind::EXISTS, b2v, conc);
697 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
698 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
699 break;
700 }
701 case kind::REGEXP_UNION: {
702 std::vector< Node > c_and;
703 for(unsigned i=0; i<r.getNumChildren(); ++i) {
704 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
705 c_and.push_back( r[i][0].eqNode(s).negate() );
706 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
707 continue;
708 } else {
709 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
710 }
711 }
712 conc = c_and.size() == 0 ? d_true :
713 c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
714 break;
715 }
716 case kind::REGEXP_INTER: {
717 bool emptyflag = false;
718 std::vector< Node > c_or;
719 for(unsigned i=0; i<r.getNumChildren(); ++i) {
720 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
721 c_or.push_back( r[i][0].eqNode(s).negate() );
722 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
723 emptyflag = true;
724 break;
725 } else {
726 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
727 }
728 }
729 if(emptyflag) {
730 conc = d_true;
731 } else {
732 conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
733 }
734 break;
735 }
736 case kind::REGEXP_STAR: {
737 if(s == d_emptyString) {
738 conc = d_false;
739 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
740 conc = s.eqNode(d_emptyString).negate();
741 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
742 conc = d_false;
743 } else {
744 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
745 Node sne = s.eqNode(d_emptyString).negate();
746 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
747 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
748 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
749 NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
750 //internal
751 Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1);
752 Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
753 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
754 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
755
756 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
757 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
758 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
759 conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
760 }
761 break;
762 }
763 default: {
764 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
765 Assert( false, "Unsupported Term" );
766 }
767 }
768 conc = Rewriter::rewrite( conc );
769 new_nodes.push_back( conc );
770 d_simpl_neg_cache[p] = conc;
771 }
772 }
773 void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
774 std::pair < Node, Node > p(s, r);
775 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
776 if(itr != d_simpl_cache.end()) {
777 new_nodes.push_back( itr->second );
778 } else {
779 int k = r.getKind();
780 Node conc;
781 switch( k ) {
782 case kind::REGEXP_EMPTY: {
783 conc = d_false;
784 break;
785 }
786 case kind::REGEXP_SIGMA: {
787 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
788 break;
789 }
790 case kind::STRING_TO_REGEXP: {
791 conc = s.eqNode(r[0]);
792 break;
793 }
794 case kind::REGEXP_CONCAT: {
795 std::vector< Node > nvec;
796 std::vector< Node > cc;
797 bool emptyflag = false;
798 for(unsigned i=0; i<r.getNumChildren(); ++i) {
799 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
800 cc.push_back( r[i][0] );
801 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
802 emptyflag = true;
803 break;
804 } else {
805 Node sk = NodeManager::currentNM()->mkSkolem( "rc_$$", s.getType(), "created for regular expression concat" );
806 Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
807 nvec.push_back(lem);
808 cc.push_back(sk);
809 }
810 }
811 if(emptyflag) {
812 conc = d_false;
813 } else {
814 Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
815 nvec.push_back(lem);
816 conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
817 }
818 break;
819 }
820 case kind::REGEXP_UNION: {
821 std::vector< Node > c_or;
822 for(unsigned i=0; i<r.getNumChildren(); ++i) {
823 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
824 c_or.push_back( r[i][0].eqNode(s) );
825 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
826 continue;
827 } else {
828 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
829 }
830 }
831 conc = c_or.size() == 0 ? d_false :
832 c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
833 break;
834 }
835 case kind::REGEXP_INTER: {
836 std::vector< Node > c_and;
837 bool emptyflag = false;
838 for(unsigned i=0; i<r.getNumChildren(); ++i) {
839 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
840 c_and.push_back( r[i][0].eqNode(s) );
841 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
842 emptyflag = true;
843 break;
844 } else {
845 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
846 }
847 }
848 if(emptyflag) {
849 conc = d_false;
850 } else {
851 conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
852 }
853 break;
854 }
855 case kind::REGEXP_STAR: {
856 if(s == d_emptyString) {
857 conc = d_true;
858 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
859 conc = s.eqNode(d_emptyString);
860 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
861 conc = d_true;
862 } else {
863 Node se = s.eqNode(d_emptyString);
864 Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
865 Node sk1 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" );
866 Node sk2 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" );
867 Node s1nz = sk1.eqNode(d_emptyString).negate();
868 Node s2nz = sk2.eqNode(d_emptyString).negate();
869 Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
870 Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
871 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
872
873 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
874 conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
875 }
876 break;
877 }
878 default: {
879 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
880 Assert( false, "Unsupported Term" );
881 }
882 }
883 conc = Rewriter::rewrite( conc );
884 new_nodes.push_back( conc );
885 d_simpl_cache[p] = conc;
886 }
887 }
888
889 void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {
890 std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_cset_cache.find(r);
891 if(itr != d_cset_cache.end()) {
892 pcset.insert((itr->second).first.begin(), (itr->second).first.end());
893 pvset.insert((itr->second).second.begin(), (itr->second).second.end());
894 } else {
895 std::set<unsigned> cset;
896 SetNodes vset;
897 int k = r.getKind();
898 switch( k ) {
899 case kind::REGEXP_EMPTY: {
900 break;
901 }
902 case kind::REGEXP_SIGMA: {
903 for(unsigned i=0; i<d_card; i++) {
904 cset.insert(i);
905 }
906 break;
907 }
908 case kind::STRING_TO_REGEXP: {
909 Node st = Rewriter::rewrite(r[0]);
910 if(st.isConst()) {
911 CVC4::String s = st.getConst< CVC4::String >();
912 s.getCharSet( cset );
913 } else if(st.getKind() == kind::VARIABLE) {
914 vset.insert( st );
915 } else {
916 for(unsigned i=0; i<st.getNumChildren(); i++) {
917 if(st[i].isConst()) {
918 CVC4::String s = st[i].getConst< CVC4::String >();
919 s.getCharSet( cset );
920 } else {
921 vset.insert( st[i] );
922 }
923 }
924 }
925 break;
926 }
927 case kind::REGEXP_CONCAT: {
928 for(unsigned i=0; i<r.getNumChildren(); i++) {
929 getCharSet(r[i], cset, vset);
930 }
931 break;
932 }
933 case kind::REGEXP_UNION: {
934 for(unsigned i=0; i<r.getNumChildren(); i++) {
935 getCharSet(r[i], cset, vset);
936 }
937 break;
938 }
939 case kind::REGEXP_INTER: {
940 //TODO: Overapproximation for now
941 for(unsigned i=0; i<r.getNumChildren(); i++) {
942 getCharSet(r[i], cset, vset);
943 }
944 break;
945 }
946 case kind::REGEXP_STAR: {
947 getCharSet(r[0], cset, vset);
948 break;
949 }
950 default: {
951 Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;
952 Assert( false, "Unsupported Term" );
953 }
954 }
955 pcset.insert(cset.begin(), cset.end());
956 pvset.insert(vset.begin(), vset.end());
957 std::pair< std::set<unsigned>, SetNodes > p(cset, vset);
958 d_cset_cache[r] = p;
959
960 Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { ";
961 for(std::set<unsigned>::const_iterator itr = cset.begin();
962 itr != cset.end(); itr++) {
963 Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";
964 }
965 Trace("regexp-cset") << " }" << std::endl;
966 }
967 }
968
969 Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache ) {
970 std::pair < Node, Node > p(r1, r2);
971 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);
972 Node rNode;
973 if(itr != d_inter_cache.end()) {
974 //Trace("regexp-intersect") << "INTERSECT Case 0: Cached " << std::endl;
975 rNode = itr->second;
976 } else {
977 if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
978 Trace("regexp-intersect") << "INTERSECT Case 1: one empty RE" << std::endl;
979 rNode = d_emptyRegexp;
980 } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
981 Trace("regexp-intersect") << "INTERSECT Case 2: one empty Singleton" << std::endl;
982 int r = delta(r1 == d_emptySingleton ? r2 : r1);
983 if(r == 0) {
984 //TODO: variable
985 AlwaysAssert( false, "Unsupported Yet, 892 REO" );
986 } else if(r == 1) {
987 rNode = d_emptySingleton;
988 } else {
989 rNode = d_emptyRegexp;
990 }
991 } else {
992 Trace("regexp-intersect") << "INTERSECT Case 3: must compare" << std::endl;
993 std::set< unsigned > cset, cset2;
994 std::vector< unsigned > cdiff;
995 std::set< Node > vset;
996 getCharSet(r1, cset, vset);
997 getCharSet(r2, cset2, vset);
998 if(vset.empty()) {
999 std::set_symmetric_difference(cset.begin(), cset.end(), cset2.begin(), cset2.end(), std::back_inserter(cdiff));
1000 if(cdiff.empty()) {
1001 Trace("regexp-intersect") << "INTERSECT Case 3.1: compare" << std::endl;
1002 cset.clear();
1003 firstChars(r1, cset, vset);
1004 std::vector< Node > vec_nodes;
1005 for(std::set<unsigned>::const_iterator itr = cset.begin();
1006 itr != cset.end(); itr++) {
1007 CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );
1008 std::pair< Node, Node > p(r1, r2);
1009 if(cache[ *itr ].find(p) == cache[ *itr ].end()) {
1010 Node r1l = derivativeSingle(r1, c);
1011 Node r2l = derivativeSingle(r2, c);
1012 std::map< unsigned, std::set< PairNodes > > cache2(cache);
1013 PairNodes p(r1l, r2l);
1014 cache2[ *itr ].insert( p );
1015 Node rt = intersectInternal(r1l, r2l, cache2);
1016 rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1017 NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1018 vec_nodes.push_back(rt);
1019 }
1020 }
1021 rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1022 NodeManager::currentNM()->mkNode(kind::OR, vec_nodes);
1023 rNode = Rewriter::rewrite( rNode );
1024 } else {
1025 Trace("regexp-intersect") << "INTERSECT Case 3.2: diff cset" << std::endl;
1026 rNode = d_emptyRegexp;
1027 }
1028 } else {
1029 //TODO: non-empty var set
1030 AlwaysAssert( false, "Unsupported Yet, 927 REO" );
1031 }
1032 }
1033 d_inter_cache[p] = rNode;
1034 }
1035 Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1036 return rNode;
1037 }
1038 Node RegExpOpr::intersect(Node r1, Node r2) {
1039 std::map< unsigned, std::set< PairNodes > > cache;
1040 return intersectInternal(r1, r2, cache);
1041 }
1042 //printing
1043 std::string RegExpOpr::niceChar( Node r ) {
1044 if(r.isConst()) {
1045 std::string s = r.getConst<CVC4::String>().toString() ;
1046 return s == "" ? "{E}" : ( s == " " ? "{ }" : s );
1047 } else {
1048 return r.toString();
1049 }
1050 }
1051 std::string RegExpOpr::mkString( Node r ) {
1052 std::string retStr;
1053 if(r.isNull()) {
1054 retStr = "Empty";
1055 } else {
1056 int k = r.getKind();
1057 switch( k ) {
1058 case kind::REGEXP_EMPTY: {
1059 retStr += "Empty";
1060 break;
1061 }
1062 case kind::REGEXP_SIGMA: {
1063 retStr += "{W}";
1064 break;
1065 }
1066 case kind::STRING_TO_REGEXP: {
1067 retStr += niceChar( r[0] );
1068 break;
1069 }
1070 case kind::REGEXP_CONCAT: {
1071 //retStr += "(";
1072 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1073 //if(i != 0) retStr += ".";
1074 retStr += mkString( r[i] );
1075 }
1076 //retStr += ")";
1077 break;
1078 }
1079 case kind::REGEXP_UNION: {
1080 if(r == d_sigma) {
1081 retStr += "{A}";
1082 } else {
1083 retStr += "(";
1084 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1085 if(i != 0) retStr += "|";
1086 retStr += mkString( r[i] );
1087 }
1088 retStr += ")";
1089 }
1090 break;
1091 }
1092 case kind::REGEXP_INTER: {
1093 retStr += "(";
1094 for(unsigned i=0; i<r.getNumChildren(); ++i) {
1095 if(i != 0) retStr += "&";
1096 retStr += mkString( r[i] );
1097 }
1098 retStr += ")";
1099 break;
1100 }
1101 case kind::REGEXP_STAR: {
1102 retStr += mkString( r[0] );
1103 retStr += "*";
1104 break;
1105 }
1106 case kind::REGEXP_PLUS: {
1107 retStr += mkString( r[0] );
1108 retStr += "+";
1109 break;
1110 }
1111 case kind::REGEXP_OPT: {
1112 retStr += mkString( r[0] );
1113 retStr += "?";
1114 break;
1115 }
1116 case kind::REGEXP_RANGE: {
1117 retStr += "[";
1118 retStr += niceChar( r[0] );
1119 retStr += "-";
1120 retStr += niceChar( r[1] );
1121 retStr += "]";
1122 break;
1123 }
1124 default:
1125 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
1126 //Assert( false );
1127 //return Node::null();
1128 }
1129 }
1130
1131 return retStr;
1132 }
1133
1134 }/* CVC4::theory::strings namespace */
1135 }/* CVC4::theory namespace */
1136 }/* CVC4 namespace */