bug fix
[cvc5.git] / src / theory / strings / regexp_operation.cpp
1 /********************* */
2 /*! \file regexp_operation.CPP
3 ** \verbatim
4 ** Original author: Tianyi Liang
5 ** Major contributors: Tianyi Liang, Andrew Reynolds
6 ** Minor contributors (to current version): none
7 ** This file is part of the CVC4 prototype.
8 ** Copyright (c) 2013-2013 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
11 **
12 ** \brief Regular Expresion Operations
13 **
14 ** Regular Expresion Operations
15 **/
16
17 #include "theory/strings/regexp_operation.h"
18 #include "expr/kind.h"
19
20 namespace CVC4 {
21 namespace theory {
22 namespace strings {
23
24 RegExpOpr::RegExpOpr() {
25 d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
26 // All Charactors = all printable ones 32-126
27 d_char_start = 'a';//' ';
28 d_char_end = 'c';//'~';
29 d_sigma = mkAllExceptOne( '\0' );
30 d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );
31 }
32
33 bool RegExpOpr::checkConstRegExp( Node r ) {
34 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl;
35 bool ret = true;
36 if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {
37 ret = d_cstre_cache[r];
38 } else {
39 if(r.getKind() == kind::STRING_TO_REGEXP) {
40 Node tmp = Rewriter::rewrite( r[0] );
41 ret = tmp.isConst();
42 } else {
43 for(unsigned i=0; i<r.getNumChildren(); ++i) {
44 if(!checkConstRegExp(r[i])) {
45 ret = false; break;
46 }
47 }
48 }
49 d_cstre_cache[r] = ret;
50 }
51 return ret;
52 }
53
54 // 0-unknown, 1-yes, 2-no
55 int RegExpOpr::delta( Node r ) {
56 Trace("strings-regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;
57 int ret = 0;
58 if( d_delta_cache.find( r ) != d_delta_cache.end() ) {
59 ret = d_delta_cache[r];
60 } else {
61 int k = r.getKind();
62 switch( k ) {
63 case kind::STRING_TO_REGEXP:
64 {
65 if(r[0].isConst()) {
66 if(r[0] == d_emptyString) {
67 ret = 1;
68 } else {
69 ret = 2;
70 }
71 } else {
72 ret = 0;
73 }
74 }
75 break;
76 case kind::REGEXP_CONCAT:
77 {
78 bool flag = false;
79 for(unsigned i=0; i<r.getNumChildren(); ++i) {
80 int tmp = delta( r[i] );
81 if(tmp == 2) {
82 ret = 2;
83 break;
84 } else if(tmp == 0) {
85 flag = true;
86 }
87 }
88 if(!flag && ret != 2) {
89 ret = 1;
90 }
91 }
92 break;
93 case kind::REGEXP_OR:
94 {
95 bool flag = false;
96 for(unsigned i=0; i<r.getNumChildren(); ++i) {
97 int tmp = delta( r[i] );
98 if(tmp == 1) {
99 ret = 1;
100 break;
101 } else if(tmp == 0) {
102 flag = true;
103 }
104 }
105 if(!flag && ret != 1) {
106 ret = 2;
107 }
108 }
109 break;
110 case kind::REGEXP_INTER:
111 {
112 bool flag = true;
113 for(unsigned i=0; i<r.getNumChildren(); ++i) {
114 int tmp = delta( r[i] );
115 if(tmp == 2) {
116 ret = 2; flag=false;
117 break;
118 } else if(tmp == 0) {
119 flag=false;
120 break;
121 }
122 }
123 if(flag) {
124 ret = 1;
125 }
126 }
127 break;
128 case kind::REGEXP_STAR:
129 {
130 ret = 1;
131 }
132 break;
133 case kind::REGEXP_PLUS:
134 {
135 ret = delta( r[0] );
136 }
137 break;
138 case kind::REGEXP_OPT:
139 {
140 ret = 1;
141 }
142 break;
143 case kind::REGEXP_RANGE:
144 {
145 ret = 2;
146 }
147 break;
148 default:
149 //TODO: special sym: sigma, none, all
150 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
151 //AlwaysAssert( false );
152 //return Node::null();
153 }
154 d_delta_cache[r] = ret;
155 }
156 Trace("strings-regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;
157 return ret;
158 }
159
160 //null - no solution
161 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
162 Assert( c.size() < 2 );
163 Trace("strings-regexp-derivative") << "RegExp-derivative starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;
164 Node retNode = Node::null();
165 PairDvStr dv = std::make_pair( r, c );
166 if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
167 retNode = d_dv_cache[dv];
168 } else if( c.isEmptyString() ){
169 int tmp = delta( r );
170 if(tmp == 0) {
171 retNode = Node::null();
172 // TODO variable
173 } else if(tmp == 1) {
174 retNode = r;
175 } else {
176 retNode = Node::null();
177 }
178 } else {
179 int k = r.getKind();
180 switch( k ) {
181 case kind::STRING_TO_REGEXP:
182 {
183 if(r[0].isConst()) {
184 if(r[0] == d_emptyString) {
185 retNode = Node::null();
186 } else {
187 if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {
188 retNode = r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
189 NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
190 } else {
191 retNode = Node::null();
192 }
193 }
194 } else {
195 retNode = Node::null();
196 // TODO variable
197 }
198 }
199 break;
200 case kind::REGEXP_CONCAT:
201 {
202 std::vector< Node > vec_nodes;
203 for(unsigned i=0; i<r.getNumChildren(); ++i) {
204 Node dc = derivativeSingle(r[i], c);
205 if(!dc.isNull()) {
206 std::vector< Node > vec_nodes2;
207 if(dc != d_emptyString) {
208 vec_nodes2.push_back( dc );
209 }
210 for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
211 if(r[j] != d_emptyString) {
212 vec_nodes2.push_back( r[j] );
213 }
214 }
215 Node tmp = vec_nodes2.size()==0 ? d_emptyString :
216 ( vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ) );
217 if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
218 vec_nodes.push_back( tmp );
219 }
220 }
221
222 if( delta( r[i] ) != 1 ) {
223 break;
224 }
225 }
226 retNode = vec_nodes.size() == 0 ? Node::null() :
227 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ) );
228 }
229 break;
230 case kind::REGEXP_OR:
231 {
232 std::vector< Node > vec_nodes;
233 for(unsigned i=0; i<r.getNumChildren(); ++i) {
234 Node dc = derivativeSingle(r[i], c);
235 if(!dc.isNull()) {
236 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
237 vec_nodes.push_back( dc );
238 }
239 }
240 Trace("strings-regexp-derivative") << "RegExp-derivative OR R[" << i << "]{ " << mkString(r[i]) << " returns " << (dc.isNull() ? "NULL" : mkString(dc) ) << std::endl;
241 }
242 retNode = vec_nodes.size() == 0 ? Node::null() :
243 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ) );
244 }
245 break;
246 case kind::REGEXP_INTER:
247 {
248 bool flag = true;
249 bool flag_sg = false;
250 std::vector< Node > vec_nodes;
251 for(unsigned i=0; i<r.getNumChildren(); ++i) {
252 Node dc = derivativeSingle(r[i], c);
253 if(!dc.isNull()) {
254 if(dc == d_sigma_star) {
255 flag_sg = true;
256 } else {
257 if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
258 vec_nodes.push_back( dc );
259 }
260 }
261 } else {
262 flag = false;
263 break;
264 }
265 }
266 if(flag) {
267 if(vec_nodes.size() == 0 && flag_sg) {
268 retNode = d_sigma_star;
269 } else {
270 retNode = vec_nodes.size() == 0 ? Node::null() :
271 ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
272 }
273 } else {
274 retNode = Node::null();
275 }
276 }
277 break;
278 case kind::REGEXP_STAR:
279 {
280 Node dc = derivativeSingle(r[0], c);
281 if(!dc.isNull()) {
282 retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
283 } else {
284 retNode = Node::null();
285 }
286 }
287 break;
288 case kind::REGEXP_PLUS:
289 {
290 Node dc = derivativeSingle(r[0], c);
291 if(!dc.isNull()) {
292 retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
293 } else {
294 retNode = Node::null();
295 }
296 }
297 break;
298 case kind::REGEXP_OPT:
299 {
300 retNode = derivativeSingle(r[0], c);
301 }
302 break;
303 case kind::REGEXP_RANGE:
304 {
305 char ch = c.getFirstChar();
306 if (ch >= r[0].getConst< CVC4::String >().getFirstChar() && ch <= r[1].getConst< CVC4::String >().getFirstChar() ) {
307 retNode = d_emptyString;
308 } else {
309 retNode = Node::null();
310 }
311 }
312 break;
313 default:
314 //TODO: special sym: sigma, none, all
315 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
316 //AlwaysAssert( false );
317 //return Node::null();
318 }
319 if(!retNode.isNull()) {
320 retNode = Rewriter::rewrite( retNode );
321 }
322 d_dv_cache[dv] = retNode;
323 }
324 Trace("strings-regexp-derivative") << "RegExp-derivative returns : " << ( retNode.isNull() ? "NULL" : mkString( retNode ) ) << std::endl;
325 return retNode;
326 }
327
328 void RegExpOpr::firstChar( Node r ) {
329 Trace("strings-regexp-firstchar") << "Head characters: ";
330 for(char ch = d_char_start; ch <= d_char_end; ++ch) {
331 CVC4::String c(ch);
332 Node dc = derivativeSingle(r, ch);
333 if(!dc.isNull()) {
334 Trace("strings-regexp-firstchar") << c << " (" << mkString(dc) << ")" << std::endl;
335 }
336 }
337 Trace("strings-regexp-firstchar") << std::endl;
338 }
339
340 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {
341 int k = r.getKind();
342 switch( k ) {
343 case kind::STRING_TO_REGEXP:
344 {
345 if(r[0].isConst()) {
346 if(r[0] != d_emptyString) {
347 char t1 = r[0].getConst< CVC4::String >().getFirstChar();
348 if(c.isEmptyString()) {
349 vec_chars.push_back( t1 );
350 return true;
351 } else {
352 char t2 = c.getFirstChar();
353 if(t1 != t2) {
354 return false;
355 } else {
356 if(c.size() >= 2) {
357 vec_chars.push_back( c.substr(1,1).getFirstChar() );
358 } else {
359 vec_chars.push_back( '\0' );
360 }
361 return true;
362 }
363 }
364 } else {
365 return false;
366 }
367 } else {
368 return false;
369 }
370 }
371 break;
372 case kind::REGEXP_CONCAT:
373 {
374 for(unsigned i=0; i<r.getNumChildren(); ++i) {
375 if( follow(r[i], c, vec_chars) ) {
376 if(vec_chars[vec_chars.size() - 1] == '\0') {
377 vec_chars.pop_back();
378 c = d_emptyString.getConst< CVC4::String >();
379 }
380 } else {
381 return false;
382 }
383 }
384 vec_chars.push_back( '\0' );
385 return true;
386 }
387 break;
388 case kind::REGEXP_OR:
389 {
390 bool flag = false;
391 for(unsigned i=0; i<r.getNumChildren(); ++i) {
392 if( follow(r[i], c, vec_chars) ) {
393 flag=true;
394 }
395 }
396 return flag;
397 }
398 break;
399 case kind::REGEXP_INTER:
400 {
401 std::vector< char > vt2;
402 for(unsigned i=0; i<r.getNumChildren(); ++i) {
403 std::vector< char > v_tmp;
404 if( !follow(r[i], c, v_tmp) ) {
405 return false;
406 }
407 std::vector< char > vt3(vt2);
408 vt2.clear();
409 std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );
410 if(vt2.size() == 0) {
411 return false;
412 }
413 }
414 vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );
415 return true;
416 }
417 break;
418 case kind::REGEXP_STAR:
419 {
420 if(follow(r[0], c, vec_chars)) {
421 if(vec_chars[vec_chars.size() - 1] == '\0') {
422 if(c.isEmptyString()) {
423 return true;
424 } else {
425 vec_chars.pop_back();
426 c = d_emptyString.getConst< CVC4::String >();
427 return follow(r[0], c, vec_chars);
428 }
429 } else {
430 return true;
431 }
432 } else {
433 vec_chars.push_back( '\0' );
434 return true;
435 }
436 }
437 break;
438 /*
439 case kind::REGEXP_PLUS:
440 {
441 ret = delta( r[0] );
442 }
443 break;
444 case kind::REGEXP_OPT:
445 {
446 ret = 1;
447 }
448 break;
449 case kind::REGEXP_RANGE:
450 {
451 ret = 2;
452 }
453 break;*/
454 default:
455 //TODO: special sym: sigma, none, all
456 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;
457 //AlwaysAssert( false );
458 //return Node::null();
459 return false;
460 }
461 }
462
463 Node RegExpOpr::mkAllExceptOne( char exp_c ) {
464 std::vector< Node > vec_nodes;
465 for(char c=d_char_start; c<=d_char_end; ++c) {
466 if(c != exp_c ) {
467 Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );
468 vec_nodes.push_back( n );
469 }
470 }
471 return NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
472 }
473
474 Node RegExpOpr::complement( Node r ) {
475 Trace("strings-regexp-compl") << "RegExp-Compl starts with " << mkString( r ) << std::endl;
476 Node retNode = r;
477 if( d_compl_cache.find( r ) != d_compl_cache.end() ) {
478 retNode = d_compl_cache[r];
479 } else {
480 int k = r.getKind();
481 switch( k ) {
482 case kind::STRING_TO_REGEXP:
483 {
484 if(r[0].isConst()) {
485 if(r[0] == d_emptyString) {
486 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star );
487 } else {
488 std::vector< Node > vec_nodes;
489 vec_nodes.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) );
490 CVC4::String s = r[0].getConst<String>();
491 for(unsigned i=0; i<s.size(); ++i) {
492 char c = s.substr(i, 1).getFirstChar();
493 Node tmp = mkAllExceptOne( c );
494 if(i != 0 ) {
495 Node tmph = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
496 NodeManager::currentNM()->mkConst( s.substr(0, i) ) );
497 tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmph, tmp );
498 }
499 tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp, d_sigma_star );
500 vec_nodes.push_back( tmp );
501 }
502 Node tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, d_sigma, d_sigma_star );
503 vec_nodes.push_back( tmp );
504 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
505 }
506 } else {
507 Trace("strings-error") << "Unsupported string variable: " << r[0] << " in complement of RegExp." << std::endl;
508 //AlwaysAssert( false );
509 //return Node::null();
510 }
511 }
512 break;
513 case kind::REGEXP_CONCAT:
514 {
515 std::vector< Node > vec_nodes;
516 for(unsigned i=0; i<r.getNumChildren(); ++i) {
517 Node tmp = complement( r[i] );
518 for(unsigned j=0; j<i; ++j) {
519 tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r[j], tmp );
520 }
521 if(i != r.getNumChildren() - 1) {
522 tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp,
523 NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma ) );
524 }
525 vec_nodes.push_back( tmp );
526 }
527 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
528 }
529 break;
530 case kind::REGEXP_OR:
531 {
532 std::vector< Node > vec_nodes;
533 for(unsigned i=0; i<r.getNumChildren(); ++i) {
534 Node tmp = complement( r[i] );
535 vec_nodes.push_back( tmp );
536 }
537 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes );
538 }
539 break;
540 case kind::REGEXP_INTER:
541 {
542 std::vector< Node > vec_nodes;
543 for(unsigned i=0; i<r.getNumChildren(); ++i) {
544 Node tmp = complement( r[i] );
545 vec_nodes.push_back( tmp );
546 }
547 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );
548 }
549 break;
550 case kind::REGEXP_STAR:
551 {
552 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star );
553 Node tmp = complement( r[0] );
554 tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, tmp );
555 retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, retNode, tmp );
556 }
557 break;
558 default:
559 //TODO: special sym: sigma, none, all
560 Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in complement of RegExp." << std::endl;
561 //AlwaysAssert( false );
562 //return Node::null();
563 }
564 d_compl_cache[r] = retNode;
565 }
566 Trace("strings-regexp-compl") << "RegExp-Compl returns : " << mkString( retNode ) << std::endl;
567 return retNode;
568 }
569
570 std::string RegExpOpr::niceChar( Node r ) {
571 if(r.isConst()) {
572 std::string s = r.getConst<CVC4::String>().toString() ;
573 return s == "" ? "{E}" : ( s == " " ? "{ }" : s );
574 } else {
575 return r.toString();
576 }
577 }
578 std::string RegExpOpr::mkString( Node r ) {
579 std::string retStr;
580 int k = r.getKind();
581 switch( k ) {
582 case kind::STRING_TO_REGEXP:
583 {
584 retStr += niceChar( r[0] );
585 }
586 break;
587 case kind::REGEXP_CONCAT:
588 {
589 retStr += "(";
590 for(unsigned i=0; i<r.getNumChildren(); ++i) {
591 if(i != 0) retStr += ".";
592 retStr += mkString( r[i] );
593 }
594 retStr += ")";
595 }
596 break;
597 case kind::REGEXP_OR:
598 {
599 if(r == d_sigma) {
600 retStr += "{A}";
601 } else {
602 retStr += "(";
603 for(unsigned i=0; i<r.getNumChildren(); ++i) {
604 if(i != 0) retStr += "|";
605 retStr += mkString( r[i] );
606 }
607 retStr += ")";
608 }
609 }
610 break;
611 case kind::REGEXP_INTER:
612 {
613 retStr += "(";
614 for(unsigned i=0; i<r.getNumChildren(); ++i) {
615 if(i != 0) retStr += "&";
616 retStr += mkString( r[i] );
617 }
618 retStr += ")";
619 }
620 break;
621 case kind::REGEXP_STAR:
622 {
623 retStr += mkString( r[0] );
624 retStr += "*";
625 }
626 break;
627 case kind::REGEXP_PLUS:
628 {
629 retStr += mkString( r[0] );
630 retStr += "+";
631 }
632 break;
633 case kind::REGEXP_OPT:
634 {
635 retStr += mkString( r[0] );
636 retStr += "?";
637 }
638 break;
639 case kind::REGEXP_RANGE:
640 {
641 retStr += "[";
642 retStr += niceChar( r[0] );
643 retStr += "-";
644 retStr += niceChar( r[1] );
645 retStr += "]";
646 }
647 break;
648 default:
649 //TODO: special sym: sigma, none, all
650 Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;
651 //AlwaysAssert( false );
652 //return Node::null();
653 }
654
655 return retStr;
656 }
657
658 }/* CVC4::theory::strings namespace */
659 }/* CVC4::theory namespace */
660 }/* CVC4 namespace */