adds incremental for strings; clean-up codes
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2
3 /*! \file regexp_operation.cpp
4
5 ** \verbatim
6
7 ** Original author: Tianyi Liang
8
9 ** Major contributors: none
10
11 ** Minor contributors (to current version): none
12
13 ** This file is part of the CVC4 project.
14
15 ** Copyright (c) 2009-2013 New York University and The University of Iowa
16
17 ** See the file COPYING in the top-level source directory for licensing
18
19 ** information.\endverbatim
20
21 **
22
23 ** \brief Regular Expresion Operations
24 **
25 ** Regular Expresion Operations
26 **/
27
28 #include "theory/strings/regexp_operation.h"
29 #include "expr/kind.h"
30
31 namespace CVC4 {
32 namespace theory {
33 namespace strings {
34
35 RegExpOpr::RegExpOpr() {
36 d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
37 d_true = NodeManager::currentNM()->mkConst( true );
38 d_false = NodeManager::currentNM()->mkConst( false );
39 d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );
40 d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );
41 std::vector< Node > nvec;
42 d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec );
43 // All Charactors = all printable ones 32-126
44 //d_char_start = 'a';//' ';
45 //d_char_end = 'c';//'~';
46 //d_sigma = mkAllExceptOne( '\0' );
47 d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec );
48 d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
49 }
50
51 int RegExpOpr::gcd ( int a, int b ) {
52 int c;
53 while ( a != 0 ) {
54 c = a; a = b%a; b = c;
55 }
56 return b;
57 }
58
59 bool RegExpOpr::checkConstRegExp( Node r ) {
60 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
61 bool ret = true;
62 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
63 ret = d_cstre_cache[r];
64 } else {
65 if(r.getKind() == kind::STRING_TO_REGEXP) {
66 Node tmp = Rewriter::rewrite( r[0] );
67 ret = tmp.isConst();
68 } else {
69 for(unsigned i=0; i<r.getNumChildren(); ++i) {
70 if(!checkConstRegExp(r[i])) {
71 ret = false; break;
72 }
73 }
74 }
75 d_cstre_cache[r] = ret;
76 }
77 return ret;
78 }
79
80 // 0-unknown, 1-yes, 2-no
81 int RegExpOpr::delta( Node r ) {
82 Trace("strings-regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
83 int ret = 0;
84 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
85 ret = d_delta_cache[r];
86 } else {
87 int k = r.getKind();
88 switch( k ) {
89 case kind::REGEXP_EMPTY: {
90 ret = 2;
91 break;
92 }
93 case kind::REGEXP_SIGMA: {
94 ret = 2;
95 break;
96 }
97 case kind::STRING_TO_REGEXP: {
98 if(r[0].isConst()) {
99 if(r[0] == d_emptyString) {
100 ret = 1;
101 } else {
102 ret = 2;
103 }
104 } else {
105 ret = 0;
106 }
107 break;
108 }
109 case kind::REGEXP_CONCAT: {
110 bool flag = false;
111 for(unsigned i=0; i<r.getNumChildren(); ++i) {
112 int tmp = delta( r[i] );
113 if(tmp == 2) {
114 ret = 2;
115 break;
116 } else if(tmp == 0) {
117 flag = true;
118 }
119 }
120 if(!flag && ret != 2) {
121 ret = 1;
122 }
123 break;
124 }
125 case kind::REGEXP_UNION: {
126 bool flag = false;
127 for(unsigned i=0; i<r.getNumChildren(); ++i) {
128 int tmp = delta( r[i] );
129 if(tmp == 1) {
130 ret = 1;
131 break;
132 } else if(tmp == 0) {
133 flag = true;
134 }
135 }
136 if(!flag && ret != 1) {
137 ret = 2;
138 }
139 break;
140 }
141 case kind::REGEXP_INTER: {
142 bool flag = true;
143 for(unsigned i=0; i<r.getNumChildren(); ++i) {
144 int tmp = delta( r[i] );
145 if(tmp == 2) {
146 ret = 2; flag=false;
147 break;
148 } else if(tmp == 0) {
149 flag=false;
150 break;
151 }
152 }
153 if(flag) {
154 ret = 1;
155 }
156 break;
157 }
158 case kind::REGEXP_STAR: {
159 ret = 1;
160 break;
161 }
162 case kind::REGEXP_PLUS: {
163 ret = delta( r[0] );
164 break;
165 }
166 case kind::REGEXP_OPT: {
167 ret = 1;
168 break;
169 }
170 case kind::REGEXP_RANGE: {
171 ret = 2;
172 break;
173 }
174 default: {
175 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
176 Assert( false );
177 //return Node::null();
178 }
179 }
180 d_delta_cache[r] = ret;
181 }
182 Trace("strings-regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
183 return ret;
184 }
185
186 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
187 Assert( c.size() < 2 );
188 Trace("strings-regexp-derivative") << "RegExp-derivative starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
189 Node retNode = d_emptyRegexp;
190 PairDvStr dv = std::make_pair( r, c );
191 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
192 retNode = d_dv_cache[dv];
193 } else if( c.isEmptyString() ){
194 int tmp = delta( r );
195 if(tmp == 0) {
196 // TODO variable
197 retNode = d_emptyRegexp;
198 } else if(tmp == 1) {
199 retNode = r;
200 } else {
201 retNode = d_emptyRegexp;
202 }
203 } else {
204 int k = r.getKind();
205 switch( k ) {
206 case kind::REGEXP_EMPTY: {
207 retNode = d_emptyRegexp;
208 break;
209 }
210 case kind::REGEXP_SIGMA: {
211 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
212 break;
213 }
214 case kind::STRING_TO_REGEXP: {
215 if(r[0].isConst()) {
216 if(r[0] == d_emptyString) {
217 retNode = d_emptyRegexp;
218 } else {
219 if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
220 retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
221 r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
222 } else {
223 retNode = d_emptyRegexp;
224 }
225 }
226 } else {
227 // TODO variable
228 retNode = d_emptyRegexp;
229 }
230 break;
231 }
232 case kind::REGEXP_CONCAT: {
233 Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
234 std::vector< Node > vec_nodes;
235 for(unsigned i=0; i<r.getNumChildren(); ++i) {
236 Node dc = derivativeSingle(r[i], c);
237 if(dc != d_emptyRegexp) {
238 std::vector< Node > vec_nodes2;
239 if(dc != rees) {
240 vec_nodes2.push_back( dc );
241 }
242 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
243 if(r[j] != rees) {
244 vec_nodes2.push_back( r[j] );
245 }
246 }
247 Node tmp = vec_nodes2.size()==0 ? rees :
248 vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
249 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
250 vec_nodes.push_back( tmp );
251 }
252 }
253
254 if( delta( r[i] ) != 1 ) {
255 break;
256 }
257 }
258 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
259 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
260 break;
261 }
262 case kind::REGEXP_UNION: {
263 std::vector< Node > vec_nodes;
264 for(unsigned i=0; i<r.getNumChildren(); ++i) {
265 Node dc = derivativeSingle(r[i], c);
266 if(dc != d_emptyRegexp) {
267 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
268 vec_nodes.push_back( dc );
269 }
270 }
271 Trace("strings-regexp-derivative") << "RegExp-derivative OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
272 }
273 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
274 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
275 break;
276 }
277 case kind::REGEXP_INTER: {
278 bool flag = true;
279 bool flag_sg = false;
280 std::vector< Node > vec_nodes;
281 for(unsigned i=0; i<r.getNumChildren(); ++i) {
282 Node dc = derivativeSingle(r[i], c);
283 if(dc != d_emptyRegexp) {
284 if(dc == d_sigma_star) {
285 flag_sg = true;
286 } else {
287 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
288 vec_nodes.push_back( dc );
289 }
290 }
291 } else {
292 flag = false;
293 break;
294 }
295 }
296 if(flag) {
297 if(vec_nodes.size() == 0 && flag_sg) {
298 retNode = d_sigma_star;
299 } else {
300 retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
301 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
302 }
303 } else {
304 retNode = d_emptyRegexp;
305 }
306 break;
307 }
308 case kind::REGEXP_STAR: {
309 Node dc = derivativeSingle(r[0], c);
310 if(dc != d_emptyRegexp) {
311 retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
312 } else {
313 retNode = d_emptyRegexp;
314 }
315 break;
316 }
317 default: {
318 //TODO: special sym: sigma, none, all
319 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
320 Assert( false, "Unsupported Term" );
321 //return Node::null();
322 }
323 }
324 if(retNode != d_emptyRegexp) {
325 retNode = Rewriter::rewrite( retNode );
326 }
327 d_dv_cache[dv] = retNode;
328 }
329 Trace("strings-regexp-derivative") << "RegExp-derivative returns : " << mkString( retNode ) << std::endl;
330 return retNode;
331 }
332
333 //TODO:
334 bool RegExpOpr::guessLength( Node r, int &co ) {
335 int k = r.getKind();
336 switch( k ) {
337 case kind::STRING_TO_REGEXP:
338 {
339 if(r[0].isConst()) {
340 co += r[0].getConst< CVC4::String >().size();
341 return true;
342 } else {
343 return false;
344 }
345 }
346 break;
347 case kind::REGEXP_CONCAT:
348 {
349 for(unsigned i=0; i<r.getNumChildren(); ++i) {
350 if(!guessLength( r[i], co)) {
351 return false;
352 }
353 }
354 return true;
355 }
356 break;
357 case kind::REGEXP_UNION:
358 {
359 int g_co;
360 for(unsigned i=0; i<r.getNumChildren(); ++i) {
361 int cop = 0;
362 if(!guessLength( r[i], cop)) {
363 return false;
364 }
365 if(i == 0) {
366 g_co = cop;
367 } else {
368 g_co = gcd(g_co, cop);
369 }
370 }
371 return true;
372 }
373 break;
374 case kind::REGEXP_INTER:
375 {
376 int g_co;
377 for(unsigned i=0; i<r.getNumChildren(); ++i) {
378 int cop = 0;
379 if(!guessLength( r[i], cop)) {
380 return false;
381 }
382 if(i == 0) {
383 g_co = cop;
384 } else {
385 g_co = gcd(g_co, cop);
386 }
387 }
388 return true;
389 }
390 break;
391 case kind::REGEXP_STAR:
392 {
393 co = 0;
394 return true;
395 }
396 break;
397 default:
398 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in membership of RegExp." << std::endl;
399 return false;
400 }
401 }
402
403 void RegExpOpr::firstChar( Node r ) {
404 Trace("strings-regexp-firstchar") << "Head characters: ";
405 for(char ch = d_char_start; ch <= d_char_end; ++ch) {
406 CVC4::String c(ch);
407 Node dc = derivativeSingle(r, ch);
408 if(!dc.isNull()) {
409 Trace("strings-regexp-firstchar") << c << " (" << mkString(dc) << ")" << std::endl;
410 }
411 }
412 Trace("strings-regexp-firstchar") << std::endl;
413 }
414
415 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
416 int k = r.getKind();
417 switch( k ) {
418 case kind::STRING_TO_REGEXP:
419 {
420 if(r[0].isConst()) {
421 if(r[0] != d_emptyString) {
422 char t1 = r[0].getConst< CVC4::String >().getFirstChar();
423 if(c.isEmptyString()) {
424 vec_chars.push_back( t1 );
425 return true;
426 } else {
427 char t2 = c.getFirstChar();
428 if(t1 != t2) {
429 return false;
430 } else {
431 if(c.size() >= 2) {
432 vec_chars.push_back( c.substr(1,1).getFirstChar() );
433 } else {
434 vec_chars.push_back( '\0' );
435 }
436 return true;
437 }
438 }
439 } else {
440 return false;
441 }
442 } else {
443 return false;
444 }
445 }
446 break;
447 case kind::REGEXP_CONCAT:
448 {
449 for(unsigned i=0; i<r.getNumChildren(); ++i) {
450 if( follow(r[i], c, vec_chars) ) {
451 if(vec_chars[vec_chars.size() - 1] == '\0') {
452 vec_chars.pop_back();
453 c = d_emptyString.getConst< CVC4::String >();
454 }
455 } else {
456 return false;
457 }
458 }
459 vec_chars.push_back( '\0' );
460 return true;
461 }
462 break;
463 case kind::REGEXP_UNION:
464 {
465 bool flag = false;
466 for(unsigned i=0; i<r.getNumChildren(); ++i) {
467 if( follow(r[i], c, vec_chars) ) {
468 flag=true;
469 }
470 }
471 return flag;
472 }
473 break;
474 case kind::REGEXP_INTER:
475 {
476 std::vector< char > vt2;
477 for(unsigned i=0; i<r.getNumChildren(); ++i) {
478 std::vector< char > v_tmp;
479 if( !follow(r[i], c, v_tmp) ) {
480 return false;
481 }
482 std::vector< char > vt3(vt2);
483 vt2.clear();
484 std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
485 if(vt2.size() == 0) {
486 return false;
487 }
488 }
489 vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
490 return true;
491 }
492 break;
493 case kind::REGEXP_STAR:
494 {
495 if(follow(r[0], c, vec_chars)) {
496 if(vec_chars[vec_chars.size() - 1] == '\0') {
497 if(c.isEmptyString()) {
498 return true;
499 } else {
500 vec_chars.pop_back();
501 c = d_emptyString.getConst< CVC4::String >();
502 return follow(r[0], c, vec_chars);
503 }
504 } else {
505 return true;
506 }
507 } else {
508 vec_chars.push_back( '\0' );
509 return true;
510 }
511 }
512 break;
513 /*
514 case kind::REGEXP_PLUS:
515 {
516 ret = delta( r[0] );
517 }
518 break;
519 case kind::REGEXP_OPT:
520 {
521 ret = 1;
522 }
523 break;
524 case kind::REGEXP_RANGE:
525 {
526 ret = 2;
527 }
528 break;*/
529 default: {
530 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
531 //AlwaysAssert( false );
532 //return Node::null();
533 return false;
534 }
535 }
536 }
537
538 Node RegExpOpr::mkAllExceptOne( char exp_c ) {
539 std::vector< Node > vec_nodes;
540 for(char c=d_char_start; c<=d_char_end; ++c) {
541 if(c != exp_c ) {
542 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
543 vec_nodes.push_back( n );
544 }
545 }
546 return NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes );
547 }
548
549 //simplify
550 void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) {
551 Trace("strings-regexp-simpl") << "RegExp-Simpl starts with " << t << ", polarity=" << polarity << std::endl;
552 Assert(t.getKind() == kind::STRING_IN_REGEXP);
553 Node str = Rewriter::rewrite(t[0]);
554 Node re = Rewriter::rewrite(t[1]);
555 if(polarity) {
556 simplifyPRegExp( str, re, new_nodes );
557 } else {
558 simplifyNRegExp( str, re, new_nodes );
559 }
560 Trace("strings-regexp-simpl") << "RegExp-Simpl returns (" << new_nodes.size() << "):\n";
561 for(unsigned i=0; i<new_nodes.size(); i++) {
562 Trace("strings-regexp-simpl") << "\t" << new_nodes[i] << std::endl;
563 }
564 }
565 void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
566 std::pair < Node, Node > p(s, r);
567 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);
568 if(itr != d_simpl_neg_cache.end()) {
569 new_nodes.push_back( itr->second );
570 return;
571 } else {
572 int k = r.getKind();
573 Node conc;
574 switch( k ) {
575 case kind::REGEXP_EMPTY: {
576 conc = d_true;
577 break;
578 }
579 case kind::REGEXP_SIGMA: {
580 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate();
581 break;
582 }
583 case kind::STRING_TO_REGEXP: {
584 conc = s.eqNode(r[0]).negate();
585 break;
586 }
587 case kind::REGEXP_CONCAT: {
588 //TODO: rewrite empty
589 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
590 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
591 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_zero),
592 NodeManager::currentNM()->mkNode( kind::GEQ, NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s), b1 ) );
593 Node s1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
594 Node s2 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->stringType());
595 Node b2v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, s1, s2);
596 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, s1, s2));
597 Node s1len = b1.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s1));
598 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
599 if(r[0].getKind() == kind::STRING_TO_REGEXP) {
600 s1r1 = s1.eqNode(r[0][0]).negate();
601 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
602 s1r1 = d_true;
603 }
604 Node r2 = r[1];
605 if(r.getNumChildren() > 2) {
606 std::vector< Node > nvec;
607 for(unsigned i=1; i<r.getNumChildren(); i++) {
608 nvec.push_back( r[i] );
609 }
610 r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, nvec);
611 }
612 r2 = Rewriter::rewrite(r2);
613 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r2).negate();
614 if(r2.getKind() == kind::STRING_TO_REGEXP) {
615 s2r2 = s2.eqNode(r2[0]).negate();
616 } else if(r2.getKind() == kind::REGEXP_EMPTY) {
617 s2r2 = d_true;
618 }
619
620 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
621 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1len, conc);
622 conc = NodeManager::currentNM()->mkNode(kind::EXISTS, b2v, conc);
623 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
624 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
625 break;
626 }
627 case kind::REGEXP_UNION: {
628 std::vector< Node > c_and;
629 for(unsigned i=0; i<r.getNumChildren(); ++i) {
630 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
631 c_and.push_back( r[i][0].eqNode(s).negate() );
632 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
633 continue;
634 } else {
635 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
636 }
637 }
638 conc = c_and.size() == 0 ? d_true :
639 c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
640 break;
641 }
642 case kind::REGEXP_INTER: {
643 bool emptyflag = false;
644 std::vector< Node > c_or;
645 for(unsigned i=0; i<r.getNumChildren(); ++i) {
646 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
647 c_or.push_back( r[i][0].eqNode(s).negate() );
648 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
649 emptyflag = true;
650 break;
651 } else {
652 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]).negate());
653 }
654 }
655 if(emptyflag) {
656 conc = d_true;
657 } else {
658 conc = c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
659 }
660 break;
661 }
662 case kind::REGEXP_STAR: {
663 if(s == d_emptyString) {
664 conc = d_false;
665 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
666 conc = s.eqNode(d_emptyString).negate();
667 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
668 conc = d_false;
669 } else {
670 Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
671 Node sne = s.eqNode(d_emptyString).negate();
672 Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
673 Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
674 Node g1 = NodeManager::currentNM()->mkNode( kind::AND, NodeManager::currentNM()->mkNode(kind::GEQ, b1, d_one),
675 NodeManager::currentNM()->mkNode( kind::GEQ, lens, b1 ) );
676 //internal
677 Node s1 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, d_zero, b1);
678 Node s2 = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, s, b1, NodeManager::currentNM()->mkNode(kind::MINUS, lens, b1));
679 Node s1r1 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s1, r[0]).negate();
680 Node s2r2 = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s2, r).negate();
681
682 conc = NodeManager::currentNM()->mkNode(kind::OR, s1r1, s2r2);
683 conc = NodeManager::currentNM()->mkNode(kind::IMPLIES, g1, conc);
684 conc = NodeManager::currentNM()->mkNode(kind::FORALL, b1v, conc);
685 conc = NodeManager::currentNM()->mkNode(kind::AND, sne, conc);
686 }
687 break;
688 }
689 default: {
690 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl;
691 Assert( false, "Unsupported Term" );
692 }
693 }
694 conc = Rewriter::rewrite( conc );
695 new_nodes.push_back( conc );
696 d_simpl_neg_cache[p] = conc;
697 }
698 }
699 void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {
700 std::pair < Node, Node > p(s, r);
701 std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);
702 if(itr != d_simpl_cache.end()) {
703 new_nodes.push_back( itr->second );
704 return;
705 } else {
706 int k = r.getKind();
707 Node conc;
708 switch( k ) {
709 case kind::REGEXP_EMPTY: {
710 conc = d_false;
711 break;
712 }
713 case kind::REGEXP_SIGMA: {
714 conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));
715 break;
716 }
717 case kind::STRING_TO_REGEXP: {
718 conc = s.eqNode(r[0]);
719 break;
720 }
721 case kind::REGEXP_CONCAT: {
722 std::vector< Node > nvec;
723 std::vector< Node > cc;
724 bool emptyflag = false;
725 for(unsigned i=0; i<r.getNumChildren(); ++i) {
726 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
727 cc.push_back( r[i][0] );
728 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
729 emptyflag = true;
730 break;
731 } else {
732 Node sk = NodeManager::currentNM()->mkSkolem( "rc_$$", s.getType(), "created for regular expression concat" );
733 Node lem = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk, r[i]);
734 nvec.push_back(lem);
735 cc.push_back(sk);
736 }
737 }
738 if(emptyflag) {
739 conc = d_false;
740 } else {
741 Node lem = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );
742 nvec.push_back(lem);
743 conc = nvec.size() == 1 ? nvec[0] : NodeManager::currentNM()->mkNode(kind::AND, nvec);
744 }
745 break;
746 }
747 case kind::REGEXP_UNION: {
748 std::vector< Node > c_or;
749 for(unsigned i=0; i<r.getNumChildren(); ++i) {
750 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
751 c_or.push_back( r[i][0].eqNode(s) );
752 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
753 continue;
754 } else {
755 c_or.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
756 }
757 }
758 conc = c_or.size() == 0 ? d_false :
759 c_or.size() == 1 ? c_or[0] : NodeManager::currentNM()->mkNode(kind::OR, c_or);
760 break;
761 }
762 case kind::REGEXP_INTER: {
763 std::vector< Node > c_and;
764 bool emptyflag = false;
765 for(unsigned i=0; i<r.getNumChildren(); ++i) {
766 if(r[i].getKind() == kind::STRING_TO_REGEXP) {
767 c_and.push_back( r[i][0].eqNode(s) );
768 } else if(r[i].getKind() == kind::REGEXP_EMPTY) {
769 emptyflag = true;
770 break;
771 } else {
772 c_and.push_back(NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[i]));
773 }
774 }
775 if(emptyflag) {
776 conc = d_false;
777 } else {
778 conc = c_and.size() == 1 ? c_and[0] : NodeManager::currentNM()->mkNode(kind::AND, c_and);
779 }
780 break;
781 }
782 case kind::REGEXP_STAR: {
783 if(s == d_emptyString) {
784 conc = d_true;
785 } else if(r[0].getKind() == kind::REGEXP_EMPTY) {
786 conc = s.eqNode(d_emptyString);
787 } else if(r[0].getKind() == kind::REGEXP_SIGMA) {
788 conc = d_true;
789 } else {
790 Node se = s.eqNode(d_emptyString);
791 Node sinr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]);
792 Node sk1 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" );
793 Node sk2 = NodeManager::currentNM()->mkSkolem( "rs_$$", s.getType(), "created for regular expression star" );
794 Node s1nz = sk1.eqNode(d_emptyString).negate();
795 Node s2nz = sk2.eqNode(d_emptyString).negate();
796 Node s1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]);
797 Node s2inrs = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, r);
798 Node s12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2));
799
800 conc = NodeManager::currentNM()->mkNode(kind::AND, s12, s1nz, s2nz, s1inr, s2inrs);
801 conc = NodeManager::currentNM()->mkNode(kind::OR, se, sinr, conc);
802 }
803 break;
804 }
805 default: {
806 Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl;
807 Assert( false, "Unsupported Term" );
808 }
809 }
810 conc = Rewriter::rewrite( conc );
811 new_nodes.push_back( conc );
812 d_simpl_cache[p] = conc;
813 }
814 }
815
816 //printing
817 std::string RegExpOpr::niceChar( Node r ) {
818 if(r.isConst()) {
819 std::string s = r.getConst<CVC4::String>().toString() ;
820 return s == "" ? "{E}" : ( s == " " ? "{ }" : s );
821 } else {
822 return r.toString();
823 }
824 }
825 std::string RegExpOpr::mkString( Node r ) {
826 std::string retStr;
827 if(r.isNull()) {
828 retStr = "Empty";
829 } else {
830 int k = r.getKind();
831 switch( k ) {
832 case kind::REGEXP_EMPTY: {
833 retStr += "Empty";
834 break;
835 }
836 case kind::REGEXP_SIGMA: {
837 retStr += "{W}";
838 break;
839 }
840 case kind::STRING_TO_REGEXP: {
841 retStr += niceChar( r[0] );
842 break;
843 }
844 case kind::REGEXP_CONCAT: {
845 retStr += "(";
846 for(unsigned i=0; i<r.getNumChildren(); ++i) {
847 if(i != 0) retStr += ".";
848 retStr += mkString( r[i] );
849 }
850 retStr += ")";
851 break;
852 }
853 case kind::REGEXP_UNION: {
854 if(r == d_sigma) {
855 retStr += "{A}";
856 } else {
857 retStr += "(";
858 for(unsigned i=0; i<r.getNumChildren(); ++i) {
859 if(i != 0) retStr += "|";
860 retStr += mkString( r[i] );
861 }
862 retStr += ")";
863 }
864 break;
865 }
866 case kind::REGEXP_INTER: {
867 retStr += "(";
868 for(unsigned i=0; i<r.getNumChildren(); ++i) {
869 if(i != 0) retStr += "&";
870 retStr += mkString( r[i] );
871 }
872 retStr += ")";
873 break;
874 }
875 case kind::REGEXP_STAR: {
876 retStr += mkString( r[0] );
877 retStr += "*";
878 break;
879 }
880 case kind::REGEXP_PLUS: {
881 retStr += mkString( r[0] );
882 retStr += "+";
883 break;
884 }
885 case kind::REGEXP_OPT: {
886 retStr += mkString( r[0] );
887 retStr += "?";
888 break;
889 }
890 case kind::REGEXP_RANGE: {
891 retStr += "[";
892 retStr += niceChar( r[0] );
893 retStr += "-";
894 retStr += niceChar( r[1] );
895 retStr += "]";
896 break;
897 }
898 default:
899 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
900 //AlwaysAssert( false );
901 //return Node::null();
902 }
903 }
904
905 return retStr;
906 }
907
908 }/* CVC4::theory::strings namespace */
909 }/* CVC4::theory namespace */
910 }/* CVC4 namespace */