a new regular expression engine for solving both positive and negative membership...
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2
3 /*! \file regexp_operation.cpp
4
5 ** \verbatim
6
7 ** Original author: Tianyi Liang
8
9 ** Major contributors: none
10
11 ** Minor contributors (to current version): none
12
13 ** This file is part of the CVC4 project.
14
15 ** Copyright (c) 2009-2013 New York University and The University of Iowa
16
17 ** See the file COPYING in the top-level source directory for licensing
18
19 ** information.\endverbatim
20
21 **
22
23 ** \brief Regular Expresion Operations
24 **
25 ** Regular Expresion Operations
26 **/
27
28 #include "theory/strings/regexp_operation.h"
29 #include "expr/kind.h"
30
31 namespace CVC4 {
32 namespace theory {
33 namespace strings {
34
35 RegExpOpr::RegExpOpr() {
36 d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
37 d_true = NodeManager::currentNM()->mkConst( true );
38 d_false = NodeManager::currentNM()->mkConst( false );
39 d_emptyRegexp = NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY );
40 d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
41 d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
42 // All Charactors = all printable ones 32-126
43 d_char_start = 'a';//' ';
44 d_char_end = 'c';//'~';
45 d_sigma = mkAllExceptOne( '\0' );
46 //d_sigma = NodeManager::currentNM()->mkConst( kind::REGEXP_SIGMA );
47 d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
48 }
49
50 int RegExpOpr::gcd ( int a, int b ) {
51 int c;
52 while ( a != 0 ) {
53 c = a; a = b%a; b = c;
54 }
55 return b;
56 }
57
58 bool RegExpOpr::checkConstRegExp( Node r ) {
59 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
60 bool ret = true;
61 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
62 ret = d_cstre_cache[r];
63 } else {
64 if(r.getKind() == kind::STRING_TO_REGEXP) {
65 Node tmp = Rewriter::rewrite( r[0] );
66 ret = tmp.isConst();
67 } else {
68 for(unsigned i=0; i<r.getNumChildren(); ++i) {
69 if(!checkConstRegExp(r[i])) {
70 ret = false; break;
71 }
72 }
73 }
74 d_cstre_cache[r] = ret;
75 }
76 return ret;
77 }
78
79 // 0-unknown, 1-yes, 2-no
80 int RegExpOpr::delta( Node r ) {
81 Trace("strings-regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
82 int ret = 0;
83 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
84 ret = d_delta_cache[r];
85 } else {
86 int k = r.getKind();
87 switch( k ) {
88 case kind::REGEXP_EMPTY: {
89 ret = 2;
90 break;
91 }
92 case kind::REGEXP_SIGMA: {
93 ret = 2;
94 break;
95 }
96 case kind::STRING_TO_REGEXP: {
97 if(r[0].isConst()) {
98 if(r[0] == d_emptyString) {
99 ret = 1;
100 } else {
101 ret = 2;
102 }
103 } else {
104 ret = 0;
105 }
106 break;
107 }
108 case kind::REGEXP_CONCAT: {
109 bool flag = false;
110 for(unsigned i=0; i<r.getNumChildren(); ++i) {
111 int tmp = delta( r[i] );
112 if(tmp == 2) {
113 ret = 2;
114 break;
115 } else if(tmp == 0) {
116 flag = true;
117 }
118 }
119 if(!flag && ret != 2) {
120 ret = 1;
121 }
122 break;
123 }
124 case kind::REGEXP_UNION: {
125 bool flag = false;
126 for(unsigned i=0; i<r.getNumChildren(); ++i) {
127 int tmp = delta( r[i] );
128 if(tmp == 1) {
129 ret = 1;
130 break;
131 } else if(tmp == 0) {
132 flag = true;
133 }
134 }
135 if(!flag && ret != 1) {
136 ret = 2;
137 }
138 break;
139 }
140 case kind::REGEXP_INTER: {
141 bool flag = true;
142 for(unsigned i=0; i<r.getNumChildren(); ++i) {
143 int tmp = delta( r[i] );
144 if(tmp == 2) {
145 ret = 2; flag=false;
146 break;
147 } else if(tmp == 0) {
148 flag=false;
149 break;
150 }
151 }
152 if(flag) {
153 ret = 1;
154 }
155 break;
156 }
157 case kind::REGEXP_STAR: {
158 ret = 1;
159 break;
160 }
161 case kind::REGEXP_PLUS: {
162 ret = delta( r[0] );
163 break;
164 }
165 case kind::REGEXP_OPT: {
166 ret = 1;
167 break;
168 }
169 case kind::REGEXP_RANGE: {
170 ret = 2;
171 break;
172 }
173 default: {
174 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
175 AlwaysAssert( false );
176 //return Node::null();
177 }
178 }
179 d_delta_cache[r] = ret;
180 }
181 Trace("strings-regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
182 return ret;
183 }
184
185 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
186 Assert( c.size() < 2 );
187 Trace("strings-regexp-derivative") << "RegExp-derivative starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
188 Node retNode = d_emptyRegexp;
189 PairDvStr dv = std::make_pair( r, c );
190 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
191 retNode = d_dv_cache[dv];
192 } else if( c.isEmptyString() ){
193 int tmp = delta( r );
194 if(tmp == 0) {
195 // TODO variable
196 retNode = d_emptyRegexp;
197 } else if(tmp == 1) {
198 retNode = r;
199 } else {
200 retNode = d_emptyRegexp;
201 }
202 } else {
203 int k = r.getKind();
204 switch( k ) {
205 case kind::REGEXP_EMPTY: {
206 retNode = d_emptyRegexp;
207 break;
208 }
209 case kind::REGEXP_SIGMA: {
210 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
211 break;
212 }
213 case kind::STRING_TO_REGEXP: {
214 if(r[0].isConst()) {
215 if(r[0] == d_emptyString) {
216 retNode = d_emptyRegexp;
217 } else {
218 if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
219 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
220 r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
221 } else {
222 retNode = d_emptyRegexp;
223 }
224 }
225 } else {
226 // TODO variable
227 retNode = d_emptyRegexp;
228 }
229 break;
230 }
231 case kind::REGEXP_CONCAT: {
232 Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
233 std::vector< Node > vec_nodes;
234 for(unsigned i=0; i<r.getNumChildren(); ++i) {
235 Node dc = derivativeSingle(r[i], c);
236 if(dc != d_emptyRegexp) {
237 std::vector< Node > vec_nodes2;
238 if(dc != rees) {
239 vec_nodes2.push_back( dc );
240 }
241 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
242 if(r[j] != rees) {
243 vec_nodes2.push_back( r[j] );
244 }
245 }
246 Node tmp = vec_nodes2.size()==0 ? rees :
247 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
248 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
249 vec_nodes.push_back( tmp );
250 }
251 }
252
253 if( delta( r[i] ) != 1 ) {
254 break;
255 }
256 }
257 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
258 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
259 break;
260 }
261 case kind::REGEXP_UNION: {
262 std::vector< Node > vec_nodes;
263 for(unsigned i=0; i<r.getNumChildren(); ++i) {
264 Node dc = derivativeSingle(r[i], c);
265 if(dc != d_emptyRegexp) {
266 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
267 vec_nodes.push_back( dc );
268 }
269 }
270 Trace("strings-regexp-derivative") << "RegExp-derivative OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
271 }
272 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
273 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
274 break;
275 }
276 case kind::REGEXP_INTER: {
277 bool flag = true;
278 bool flag_sg = false;
279 std::vector< Node > vec_nodes;
280 for(unsigned i=0; i<r.getNumChildren(); ++i) {
281 Node dc = derivativeSingle(r[i], c);
282 if(dc != d_emptyRegexp) {
283 if(dc == d_sigma_star) {
284 flag_sg = true;
285 } else {
286 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
287 vec_nodes.push_back( dc );
288 }
289 }
290 } else {
291 flag = false;
292 break;
293 }
294 }
295 if(flag) {
296 if(vec_nodes.size() == 0 && flag_sg) {
297 retNode = d_sigma_star;
298 } else {
299 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
300 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
301 }
302 } else {
303 retNode = d_emptyRegexp;
304 }
305 break;
306 }
307 case kind::REGEXP_STAR: {
308 Node dc = derivativeSingle(r[0], c);
309 if(dc != d_emptyRegexp) {
310 retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
311 } else {
312 retNode = d_emptyRegexp;
313 }
314 break;
315 }
316 default: {
317 //TODO: special sym: sigma, none, all
318 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
319 Assert( false, "Unsupported Term" );
320 //return Node::null();
321 }
322 }
323 if(retNode != d_emptyRegexp) {
324 retNode = Rewriter::rewrite( retNode );
325 }
326 d_dv_cache[dv] = retNode;
327 }
328 Trace("strings-regexp-derivative") << "RegExp-derivative returns : " << mkString( retNode ) << std::endl;
329 return retNode;
330 }
331
332 //TODO:
333 bool RegExpOpr::guessLength( Node r, int &co ) {
334 int k = r.getKind();
335 switch( k ) {
336 case kind::STRING_TO_REGEXP:
337 {
338 if(r[0].isConst()) {
339 co += r[0].getConst< CVC4::String >().size();
340 return true;
341 } else {
342 return false;
343 }
344 }
345 break;
346 case kind::REGEXP_CONCAT:
347 {
348 for(unsigned i=0; i<r.getNumChildren(); ++i) {
349 if(!guessLength( r[i], co)) {
350 return false;
351 }
352 }
353 return true;
354 }
355 break;
356 case kind::REGEXP_UNION:
357 {
358 int g_co;
359 for(unsigned i=0; i<r.getNumChildren(); ++i) {
360 int cop = 0;
361 if(!guessLength( r[i], cop)) {
362 return false;
363 }
364 if(i == 0) {
365 g_co = cop;
366 } else {
367 g_co = gcd(g_co, cop);
368 }
369 }
370 return true;
371 }
372 break;
373 case kind::REGEXP_INTER:
374 {
375 int g_co;
376 for(unsigned i=0; i<r.getNumChildren(); ++i) {
377 int cop = 0;
378 if(!guessLength( r[i], cop)) {
379 return false;
380 }
381 if(i == 0) {
382 g_co = cop;
383 } else {
384 g_co = gcd(g_co, cop);
385 }
386 }
387 return true;
388 }
389 break;
390 case kind::REGEXP_STAR:
391 {
392 co = 0;
393 return true;
394 }
395 break;
396 default:
397 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in membership of RegExp." << std::endl;
398 return false;
399 }
400 }
401
402 void RegExpOpr::firstChar( Node r ) {
403 Trace("strings-regexp-firstchar") << "Head characters: ";
404 for(char ch = d_char_start; ch <= d_char_end; ++ch) {
405 CVC4::String c(ch);
406 Node dc = derivativeSingle(r, ch);
407 if(!dc.isNull()) {
408 Trace("strings-regexp-firstchar") << c << " (" << mkString(dc) << ")" << std::endl;
409 }
410 }
411 Trace("strings-regexp-firstchar") << std::endl;
412 }
413
414 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
415 int k = r.getKind();
416 switch( k ) {
417 case kind::STRING_TO_REGEXP:
418 {
419 if(r[0].isConst()) {
420 if(r[0] != d_emptyString) {
421 char t1 = r[0].getConst< CVC4::String >().getFirstChar();
422 if(c.isEmptyString()) {
423 vec_chars.push_back( t1 );
424 return true;
425 } else {
426 char t2 = c.getFirstChar();
427 if(t1 != t2) {
428 return false;
429 } else {
430 if(c.size() >= 2) {
431 vec_chars.push_back( c.substr(1,1).getFirstChar() );
432 } else {
433 vec_chars.push_back( '\0' );
434 }
435 return true;
436 }
437 }
438 } else {
439 return false;
440 }
441 } else {
442 return false;
443 }
444 }
445 break;
446 case kind::REGEXP_CONCAT:
447 {
448 for(unsigned i=0; i<r.getNumChildren(); ++i) {
449 if( follow(r[i], c, vec_chars) ) {
450 if(vec_chars[vec_chars.size() - 1] == '\0') {
451 vec_chars.pop_back();
452 c = d_emptyString.getConst< CVC4::String >();
453 }
454 } else {
455 return false;
456 }
457 }
458 vec_chars.push_back( '\0' );
459 return true;
460 }
461 break;
462 case kind::REGEXP_UNION:
463 {
464 bool flag = false;
465 for(unsigned i=0; i<r.getNumChildren(); ++i) {
466 if( follow(r[i], c, vec_chars) ) {
467 flag=true;
468 }
469 }
470 return flag;
471 }
472 break;
473 case kind::REGEXP_INTER:
474 {
475 std::vector< char > vt2;
476 for(unsigned i=0; i<r.getNumChildren(); ++i) {
477 std::vector< char > v_tmp;
478 if( !follow(r[i], c, v_tmp) ) {
479 return false;
480 }
481 std::vector< char > vt3(vt2);
482 vt2.clear();
483 std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
484 if(vt2.size() == 0) {
485 return false;
486 }
487 }
488 vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
489 return true;
490 }
491 break;
492 case kind::REGEXP_STAR:
493 {
494 if(follow(r[0], c, vec_chars)) {
495 if(vec_chars[vec_chars.size() - 1] == '\0') {
496 if(c.isEmptyString()) {
497 return true;
498 } else {
499 vec_chars.pop_back();
500 c = d_emptyString.getConst< CVC4::String >();
501 return follow(r[0], c, vec_chars);
502 }
503 } else {
504 return true;
505 }
506 } else {
507 vec_chars.push_back( '\0' );
508 return true;
509 }
510 }
511 break;
512 /*
513 case kind::REGEXP_PLUS:
514 {
515 ret = delta( r[0] );
516 }
517 break;
518 case kind::REGEXP_OPT:
519 {
520 ret = 1;
521 }
522 break;
523 case kind::REGEXP_RANGE:
524 {
525 ret = 2;
526 }
527 break;*/
528 default:
529 //TODO: special sym: sigma, none, all
530 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
531 //AlwaysAssert( false );
532 //return Node::null();
533 return false;
534 }
535 }
536
537 Node RegExpOpr::mkAllExceptOne( char exp_c ) {
538 std::vector< Node > vec_nodes;
539 for(char c=d_char_start; c<=d_char_end; ++c) {
540 if(c != exp_c ) {
541 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
542 vec_nodes.push_back( n );
543 }
544 }
545 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
546 }
547
548 Node RegExpOpr::complement( Node r ) {
549 Trace("strings-regexp-compl") << "RegExp-Compl starts with " << mkString( r ) << std::endl;
550 Node retNode = r;
551 if( d_compl_cache.find( r ) != d_compl_cache.end() ) {
552 retNode = d_compl_cache[r];
553 } else {
554 int k = r.getKind();
555 switch( k ) {
556 case kind::STRING_TO_REGEXP:
557 {
558 if(r[0].isConst()) {
559 if(r[0] == d_emptyString) {
560 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star );
561 } else {
562 std::vector< Node > vec_nodes;
563 vec_nodes.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) );
564 CVC4::String s = r[0].getConst<String>();
565 for(unsigned i=0; i<s.size(); ++i) {
566 char c = s.substr(i, 1).getFirstChar();
567 Node tmp = mkAllExceptOne( c );
568 if(i != 0 ) {
569 Node tmph = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
570 NodeManager::currentNM()->mkConst( s.substr(0, i) ) );
571 tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmph, tmp );
572 }
573 tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp, d_sigma_star );
574 vec_nodes.push_back( tmp );
575 }
576 Node tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, d_sigma, d_sigma_star );
577 vec_nodes.push_back( tmp );
578 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
579 }
580 } else {
581 Trace("strings-error") << "Unsupported string variable: " << r[0] << " in complement of RegExp." << std::endl;
582 //AlwaysAssert( false );
583 //return Node::null();
584 }
585 }
586 break;
587 case kind::REGEXP_CONCAT:
588 {
589 std::vector< Node > vec_nodes;
590 for(unsigned i=0; i<r.getNumChildren(); ++i) {
591 Node tmp = complement( r[i] );
592 for(unsigned j=0; j<i; ++j) {
593 tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r[j], tmp );
594 }
595 if(i != r.getNumChildren() - 1) {
596 tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp,
597 NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma ) );
598 }
599 vec_nodes.push_back( tmp );
600 }
601 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
602 }
603 break;
604 case kind::REGEXP_UNION:
605 {
606 std::vector< Node > vec_nodes;
607 for(unsigned i=0; i<r.getNumChildren(); ++i) {
608 Node tmp = complement( r[i] );
609 vec_nodes.push_back( tmp );
610 }
611 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes );
612 }
613 break;
614 case kind::REGEXP_INTER:
615 {
616 std::vector< Node > vec_nodes;
617 for(unsigned i=0; i<r.getNumChildren(); ++i) {
618 Node tmp = complement( r[i] );
619 vec_nodes.push_back( tmp );
620 }
621 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
622 }
623 break;
624 case kind::REGEXP_STAR:
625 {
626 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star );
627 Node tmp = complement( r[0] );
628 tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, tmp );
629 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, retNode, tmp );
630 }
631 break;
632 default:
633 //TODO: special sym: sigma, none, all
634 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in complement of RegExp." << std::endl;
635 //AlwaysAssert( false );
636 //return Node::null();
637 }
638 d_compl_cache[r] = retNode;
639 }
640 Trace("strings-regexp-compl") << "RegExp-Compl returns : " << mkString( retNode ) << std::endl;
641 return retNode;
642 }
643
644 //simplify
645 void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
646 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
647 Assert(t.getKind() == kind::STRING_IN_REGEXP);
648 Node str = Rewriter::rewrite(t[0]);
649 Node re = Rewriter::rewrite(t[1]);
650 if(polarity) {
651 simplifyPRegExp( str, re, new_nodes );
652 } else {
653 simplifyNRegExp( str, re, new_nodes );
654 }
655 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
656 for(unsigned i=0; i<new_nodes.size(); i++) {
657 Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
658 }
659 }
660 void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
661 std::pair < Node, Node > p(s, r);
662 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
663 if(itr != d_simpl_neg_cache.end()) {
664 new_nodes.push_back( itr->second );
665 return;
666 } else {
667 int k = r.getKind();
668 Node conc;
669 switch( k ) {
670 case kind::REGEXP_EMPTY: {
671 conc = d_true;
672 break;
673 }
674 case kind::REGEXP_SIGMA: {
675 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
676 break;
677 }
678 case kind::STRING_TO_REGEXP: {
679 conc = s.eqNode(r[0]).negate();
680 break;
681 }
682 case kind::REGEXP_CONCAT: {
683 //TODO: rewrite empty
684 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
685 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
686 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
687 NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
688 Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
689 Node s2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
690 Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, s1, s2);
691 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2));
692 Node s1len = b1.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1));
693 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
694 if(r[0].getKind() == kind::STRING_TO_REGEXP) {
695 s1r1 = s1.eqNode(r[0][0]).negate();
696 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
697 s1r1 = d_true;
698 }
699 Node r2 = r[1];
700 if(r.getNumChildren() > 2) {
701 std::vector< Node > nvec;
702 for(unsigned i=1; i<r.getNumChildren(); i++) {
703 nvec.push_back( r[i] );
704 }
705 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
706 }
707 r2 = Rewriter::rewrite(r2);
708 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
709 if(r2.getKind() == kind::STRING_TO_REGEXP) {
710 s2r2 = s2.eqNode(r2[0]).negate();
711 } else if(r2.getKind() == kind::REGEXP_EMPTY) {
712 s2r2 = d_true;
713 }
714
715 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
716 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1len, conc);
717 conc = NodeManager::currentNM()->mkNode(kind::EXISTS, b2v, conc);
718 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
719 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
720 break;
721 }
722 case kind::REGEXP_UNION: {
723 std::vector< Node > c_and;
724 for(unsigned i=0; i<r.getNumChildren(); ++i) {
725 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
726 c_and.push_back( r[i][0].eqNode(s).negate() );
727 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
728 continue;
729 } else {
730 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
731 }
732 }
733 conc = c_and.size() == 0 ? d_true :
734 c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
735 break;
736 }
737 case kind::REGEXP_INTER: {
738 bool emptyflag = false;
739 std::vector< Node > c_or;
740 for(unsigned i=0; i<r.getNumChildren(); ++i) {
741 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
742 c_or.push_back( r[i][0].eqNode(s).negate() );
743 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
744 emptyflag = true;
745 break;
746 } else {
747 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
748 }
749 }
750 if(emptyflag) {
751 conc = d_true;
752 } else {
753 conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
754 }
755 break;
756 }
757 case kind::REGEXP_STAR: {
758 if(s == d_emptyString) {
759 conc = d_false;
760 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
761 conc = s.eqNode(d_emptyString).negate();
762 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
763 conc = d_false;
764 } else {
765 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
766 Node sne = s.eqNode(d_emptyString).negate();
767 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
768 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
769 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
770 NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
771 //internal
772 Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1);
773 Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
774 //Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
775 //Node s2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
776 //Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, s1, s2);
777 //Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2));
778 //Node s1len = b1.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1));
779 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
780 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
781
782 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
783 //conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1len, conc);
784 //conc = NodeManager::currentNM()->mkNode(kind::EXISTS, b2v, conc);
785 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
786 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
787 conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
788 }
789 break;
790 }
791 default: {
792 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
793 AlwaysAssert( false, "Unsupported Term" );
794 }
795 }
796 conc = Rewriter::rewrite( conc );
797 new_nodes.push_back( conc );
798 d_simpl_neg_cache[p] = conc;
799 }
800 }
801 void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
802 std::pair < Node, Node > p(s, r);
803 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
804 if(itr != d_simpl_cache.end()) {
805 new_nodes.push_back( itr->second );
806 return;
807 } else {
808 int k = r.getKind();
809 Node conc;
810 switch( k ) {
811 case kind::REGEXP_EMPTY: {
812 conc = d_false;
813 break;
814 }
815 case kind::REGEXP_SIGMA: {
816 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
817 break;
818 }
819 case kind::STRING_TO_REGEXP: {
820 conc = s.eqNode(r[0]);
821 break;
822 }
823 case kind::REGEXP_CONCAT: {
824 std::vector< Node > nvec;
825 std::vector< Node > cc;
826 bool emptyflag = false;
827 for(unsigned i=0; i<r.getNumChildren(); ++i) {
828 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
829 cc.push_back( r[i][0] );
830 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
831 emptyflag = true;
832 break;
833 } else {
834 Node sk = NodeManager::currentNM()->mkSkolem( "rc_$$", s.getType(), "created for regular expression concat" );
835 Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
836 nvec.push_back(lem);
837 cc.push_back(sk);
838 }
839 }
840 if(emptyflag) {
841 conc = d_false;
842 } else {
843 Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
844 nvec.push_back(lem);
845 conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
846 }
847 break;
848 }
849 case kind::REGEXP_UNION: {
850 std::vector< Node > c_or;
851 for(unsigned i=0; i<r.getNumChildren(); ++i) {
852 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
853 c_or.push_back( r[i][0].eqNode(s) );
854 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
855 continue;
856 } else {
857 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
858 }
859 }
860 conc = c_or.size() == 0 ? d_false :
861 c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
862 break;
863 }
864 case kind::REGEXP_INTER: {
865 std::vector< Node > c_and;
866 bool emptyflag = false;
867 for(unsigned i=0; i<r.getNumChildren(); ++i) {
868 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
869 c_and.push_back( r[i][0].eqNode(s) );
870 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
871 emptyflag = true;
872 break;
873 } else {
874 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
875 }
876 }
877 if(emptyflag) {
878 conc = d_false;
879 } else {
880 conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
881 }
882 break;
883 }
884 case kind::REGEXP_STAR: {
885 if(s == d_emptyString) {
886 conc = d_true;
887 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
888 conc = s.eqNode(d_emptyString);
889 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
890 conc = d_true;
891 } else {
892 Node se = s.eqNode(d_emptyString);
893 Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
894 Node sk1 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" );
895 Node sk2 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" );
896 Node s1nz = sk1.eqNode(d_emptyString).negate();
897 Node s2nz = sk2.eqNode(d_emptyString).negate();
898 Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
899 Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
900 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
901
902 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
903 conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
904 }
905 break;
906 }
907 default: {
908 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
909 AlwaysAssert( false, "Unsupported Term" );
910 }
911 }
912 conc = Rewriter::rewrite( conc );
913 new_nodes.push_back( conc );
914 d_simpl_cache[p] = conc;
915 }
916 }
917
918 //printing
919 std::string RegExpOpr::niceChar( Node r ) {
920 if(r.isConst()) {
921 std::string s = r.getConst<CVC4::String>().toString() ;
922 return s == "" ? "{E}" : ( s == " " ? "{ }" : s );
923 } else {
924 return r.toString();
925 }
926 }
927 std::string RegExpOpr::mkString( Node r ) {
928 std::string retStr;
929 if(r.isNull()) {
930 retStr = "Empty";
931 } else {
932 int k = r.getKind();
933 switch( k ) {
934 case kind::REGEXP_EMPTY: {
935 retStr += "Empty";
936 break;
937 }
938 case kind::REGEXP_SIGMA: {
939 retStr += "{W}";
940 break;
941 }
942 case kind::STRING_TO_REGEXP: {
943 retStr += niceChar( r[0] );
944 break;
945 }
946 case kind::REGEXP_CONCAT: {
947 retStr += "(";
948 for(unsigned i=0; i<r.getNumChildren(); ++i) {
949 if(i != 0) retStr += ".";
950 retStr += mkString( r[i] );
951 }
952 retStr += ")";
953 break;
954 }
955 case kind::REGEXP_UNION: {
956 if(r == d_sigma) {
957 retStr += "{A}";
958 } else {
959 retStr += "(";
960 for(unsigned i=0; i<r.getNumChildren(); ++i) {
961 if(i != 0) retStr += "|";
962 retStr += mkString( r[i] );
963 }
964 retStr += ")";
965 }
966 break;
967 }
968 case kind::REGEXP_INTER: {
969 retStr += "(";
970 for(unsigned i=0; i<r.getNumChildren(); ++i) {
971 if(i != 0) retStr += "&";
972 retStr += mkString( r[i] );
973 }
974 retStr += ")";
975 break;
976 }
977 case kind::REGEXP_STAR: {
978 retStr += mkString( r[0] );
979 retStr += "*";
980 break;
981 }
982 case kind::REGEXP_PLUS: {
983 retStr += mkString( r[0] );
984 retStr += "+";
985 break;
986 }
987 case kind::REGEXP_OPT: {
988 retStr += mkString( r[0] );
989 retStr += "?";
990 break;
991 }
992 case kind::REGEXP_RANGE: {
993 retStr += "[";
994 retStr += niceChar( r[0] );
995 retStr += "-";
996 retStr += niceChar( r[1] );
997 retStr += "]";
998 break;
999 }
1000 default:
1001 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
1002 //AlwaysAssert( false );
1003 //return Node::null();
1004 }
1005 }
1006
1007 return retStr;
1008 }
1009
1010 }/* CVC4::theory::strings namespace */
1011 }/* CVC4::theory namespace */
1012 }/* CVC4 namespace */