1 /********************* */
2 /*! \file regexp_operation.CPP
4 ** Original author: Tianyi Liang
5 ** Major contributors: Tianyi Liang, Andrew Reynolds
6 ** Minor contributors (to current version): none
7 ** This file is part of the CVC4 prototype.
8 ** Copyright (c) 2013-2013 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
12 ** \brief Regular Expresion Operations
14 ** Regular Expresion Operations
17 #include "theory/strings/regexp_operation.h"
18 #include "expr/kind.h"
24 RegExpOpr::RegExpOpr() {
25 d_emptyString
= NodeManager::currentNM()->mkConst( ::CVC4::String("") );
26 // All Charactors = all printable ones 32-126
27 d_char_start
= 'a';//' ';
28 d_char_end
= 'c';//'~';
29 d_sigma
= mkAllExceptOne( '\0' );
30 d_sigma_star
= NodeManager::currentNM()->mkNode( kind::REGEXP_STAR
, d_sigma
);
33 bool RegExpOpr::checkConstRegExp( Node r
) {
34 Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r
) << std::endl
;
36 if( d_cstre_cache
.find( r
) != d_cstre_cache
.end() ) {
37 ret
= d_cstre_cache
[r
];
39 if(r
.getKind() == kind::STRING_TO_REGEXP
) {
40 Node tmp
= Rewriter::rewrite( r
[0] );
43 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
44 if(!checkConstRegExp(r
[i
])) {
49 d_cstre_cache
[r
] = ret
;
54 // 0-unknown, 1-yes, 2-no
55 int RegExpOpr::delta( Node r
) {
56 Trace("strings-regexp-delta") << "RegExp-Delta starts with " << mkString( r
) << std::endl
;
58 if( d_delta_cache
.find( r
) != d_delta_cache
.end() ) {
59 ret
= d_delta_cache
[r
];
63 case kind::STRING_TO_REGEXP
:
66 if(r
[0] == d_emptyString
) {
76 case kind::REGEXP_CONCAT
:
79 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
80 int tmp
= delta( r
[i
] );
88 if(!flag
&& ret
!= 2) {
96 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
97 int tmp
= delta( r
[i
] );
101 } else if(tmp
== 0) {
105 if(!flag
&& ret
!= 1) {
110 case kind::REGEXP_INTER
:
113 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
114 int tmp
= delta( r
[i
] );
118 } else if(tmp
== 0) {
128 case kind::REGEXP_STAR
:
133 case kind::REGEXP_PLUS
:
138 case kind::REGEXP_OPT
:
143 case kind::REGEXP_RANGE
:
149 //TODO: special sym: sigma, none, all
150 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in delta of RegExp." << std::endl
;
151 //AlwaysAssert( false );
152 //return Node::null();
154 d_delta_cache
[r
] = ret
;
156 Trace("strings-regexp-delta") << "RegExp-Delta returns : " << ret
<< std::endl
;
161 Node
RegExpOpr::derivativeSingle( Node r
, CVC4::String c
) {
162 Assert( c
.size() < 2 );
163 Trace("strings-regexp-derivative") << "RegExp-derivative starts with R{ " << mkString( r
) << " }, c=" << c
<< std::endl
;
164 Node retNode
= Node::null();
165 PairDvStr dv
= std::make_pair( r
, c
);
166 if( d_dv_cache
.find( dv
) != d_dv_cache
.end() ) {
167 retNode
= d_dv_cache
[dv
];
168 } else if( c
.isEmptyString() ){
169 int tmp
= delta( r
);
171 retNode
= Node::null();
173 } else if(tmp
== 1) {
176 retNode
= Node::null();
181 case kind::STRING_TO_REGEXP
:
184 if(r
[0] == d_emptyString
) {
185 retNode
= Node::null();
187 if(r
[0].getConst
< CVC4::String
>().getFirstChar() == c
.getFirstChar()) {
188 retNode
= r
[0].getConst
< CVC4::String
>().size() == 1 ? d_emptyString
: NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
189 NodeManager::currentNM()->mkConst( r
[0].getConst
< CVC4::String
>().substr(1) ) );
191 retNode
= Node::null();
195 retNode
= Node::null();
200 case kind::REGEXP_CONCAT
:
202 std::vector
< Node
> vec_nodes
;
203 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
204 Node dc
= derivativeSingle(r
[i
], c
);
206 std::vector
< Node
> vec_nodes2
;
207 if(dc
!= d_emptyString
) {
208 vec_nodes2
.push_back( dc
);
210 for(unsigned j
=i
+1; j
<r
.getNumChildren(); ++j
) {
211 if(r
[j
] != d_emptyString
) {
212 vec_nodes2
.push_back( r
[j
] );
215 Node tmp
= vec_nodes2
.size()==0 ? d_emptyString
:
216 ( vec_nodes2
.size()==1 ? vec_nodes2
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, vec_nodes2
) );
217 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), tmp
) == vec_nodes
.end()) {
218 vec_nodes
.push_back( tmp
);
222 if( delta( r
[i
] ) != 1 ) {
226 retNode
= vec_nodes
.size() == 0 ? Node::null() :
227 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR
, vec_nodes
) );
230 case kind::REGEXP_OR
:
232 std::vector
< Node
> vec_nodes
;
233 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
234 Node dc
= derivativeSingle(r
[i
], c
);
236 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
237 vec_nodes
.push_back( dc
);
240 Trace("strings-regexp-derivative") << "RegExp-derivative OR R[" << i
<< "]{ " << mkString(r
[i
]) << " returns " << (dc
.isNull() ? "NULL" : mkString(dc
) ) << std::endl
;
242 retNode
= vec_nodes
.size() == 0 ? Node::null() :
243 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR
, vec_nodes
) );
246 case kind::REGEXP_INTER
:
249 bool flag_sg
= false;
250 std::vector
< Node
> vec_nodes
;
251 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
252 Node dc
= derivativeSingle(r
[i
], c
);
254 if(dc
== d_sigma_star
) {
257 if(std::find(vec_nodes
.begin(), vec_nodes
.end(), dc
) == vec_nodes
.end()) {
258 vec_nodes
.push_back( dc
);
267 if(vec_nodes
.size() == 0 && flag_sg
) {
268 retNode
= d_sigma_star
;
270 retNode
= vec_nodes
.size() == 0 ? Node::null() :
271 ( vec_nodes
.size()==1 ? vec_nodes
[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER
, vec_nodes
) );
274 retNode
= Node::null();
278 case kind::REGEXP_STAR
:
280 Node dc
= derivativeSingle(r
[0], c
);
282 retNode
= dc
==d_emptyString
? r
: NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, dc
, r
);
284 retNode
= Node::null();
288 case kind::REGEXP_PLUS
:
290 Node dc
= derivativeSingle(r
[0], c
);
292 retNode
= dc
==d_emptyString
? r
: NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, dc
, r
);
294 retNode
= Node::null();
298 case kind::REGEXP_OPT
:
300 retNode
= derivativeSingle(r
[0], c
);
303 case kind::REGEXP_RANGE
:
305 char ch
= c
.getFirstChar();
306 if (ch
>= r
[0].getConst
< CVC4::String
>().getFirstChar() && ch
<= r
[1].getConst
< CVC4::String
>().getFirstChar() ) {
307 retNode
= d_emptyString
;
309 retNode
= Node::null();
314 //TODO: special sym: sigma, none, all
315 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in derivative of RegExp." << std::endl
;
316 //AlwaysAssert( false );
317 //return Node::null();
319 if(!retNode
.isNull()) {
320 retNode
= Rewriter::rewrite( retNode
);
322 d_dv_cache
[dv
] = retNode
;
324 Trace("strings-regexp-derivative") << "RegExp-derivative returns : " << ( retNode
.isNull() ? "NULL" : mkString( retNode
) ) << std::endl
;
328 void RegExpOpr::firstChar( Node r
) {
329 Trace("strings-regexp-firstchar") << "Head characters: ";
330 for(char ch
= d_char_start
; ch
<= d_char_end
; ++ch
) {
332 Node dc
= derivativeSingle(r
, ch
);
334 Trace("strings-regexp-firstchar") << c
<< " (" << mkString(dc
) << ")" << std::endl
;
337 Trace("strings-regexp-firstchar") << std::endl
;
340 bool RegExpOpr::follow( Node r
, CVC4::String c
, std::vector
< char > &vec_chars
) {
343 case kind::STRING_TO_REGEXP
:
346 if(r
[0] != d_emptyString
) {
347 char t1
= r
[0].getConst
< CVC4::String
>().getFirstChar();
348 if(c
.isEmptyString()) {
349 vec_chars
.push_back( t1
);
352 char t2
= c
.getFirstChar();
357 vec_chars
.push_back( c
.substr(1,1).getFirstChar() );
359 vec_chars
.push_back( '\0' );
372 case kind::REGEXP_CONCAT
:
374 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
375 if( follow(r
[i
], c
, vec_chars
) ) {
376 if(vec_chars
[vec_chars
.size() - 1] == '\0') {
377 vec_chars
.pop_back();
378 c
= d_emptyString
.getConst
< CVC4::String
>();
384 vec_chars
.push_back( '\0' );
388 case kind::REGEXP_OR
:
391 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
392 if( follow(r
[i
], c
, vec_chars
) ) {
399 case kind::REGEXP_INTER
:
401 std::vector
< char > vt2
;
402 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
403 std::vector
< char > v_tmp
;
404 if( !follow(r
[i
], c
, v_tmp
) ) {
407 std::vector
< char > vt3(vt2
);
409 std::set_intersection( vt3
.begin(), vt3
.end(), v_tmp
.begin(), v_tmp
.end(), vt2
.begin() );
410 if(vt2
.size() == 0) {
414 vec_chars
.insert( vec_chars
.end(), vt2
.begin(), vt2
.end() );
418 case kind::REGEXP_STAR
:
420 if(follow(r
[0], c
, vec_chars
)) {
421 if(vec_chars
[vec_chars
.size() - 1] == '\0') {
422 if(c
.isEmptyString()) {
425 vec_chars
.pop_back();
426 c
= d_emptyString
.getConst
< CVC4::String
>();
427 return follow(r
[0], c
, vec_chars
);
433 vec_chars
.push_back( '\0' );
439 case kind::REGEXP_PLUS:
444 case kind::REGEXP_OPT:
449 case kind::REGEXP_RANGE:
455 //TODO: special sym: sigma, none, all
456 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in delta of RegExp." << std::endl
;
457 //AlwaysAssert( false );
458 //return Node::null();
463 Node
RegExpOpr::mkAllExceptOne( char exp_c
) {
464 std::vector
< Node
> vec_nodes
;
465 for(char c
=d_char_start
; c
<=d_char_end
; ++c
) {
467 Node n
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, NodeManager::currentNM()->mkConst( ::CVC4::String( c
) ) );
468 vec_nodes
.push_back( n
);
471 return NodeManager::currentNM()->mkNode( kind::REGEXP_OR
, vec_nodes
);
474 Node
RegExpOpr::complement( Node r
) {
475 Trace("strings-regexp-compl") << "RegExp-Compl starts with " << mkString( r
) << std::endl
;
477 if( d_compl_cache
.find( r
) != d_compl_cache
.end() ) {
478 retNode
= d_compl_cache
[r
];
482 case kind::STRING_TO_REGEXP
:
485 if(r
[0] == d_emptyString
) {
486 retNode
= NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, d_sigma
, d_sigma_star
);
488 std::vector
< Node
> vec_nodes
;
489 vec_nodes
.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
, d_emptyString
) );
490 CVC4::String s
= r
[0].getConst
<String
>();
491 for(unsigned i
=0; i
<s
.size(); ++i
) {
492 char c
= s
.substr(i
, 1).getFirstChar();
493 Node tmp
= mkAllExceptOne( c
);
495 Node tmph
= NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP
,
496 NodeManager::currentNM()->mkConst( s
.substr(0, i
) ) );
497 tmp
= NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, tmph
, tmp
);
499 tmp
= NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, tmp
, d_sigma_star
);
500 vec_nodes
.push_back( tmp
);
502 Node tmp
= NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, r
, d_sigma
, d_sigma_star
);
503 vec_nodes
.push_back( tmp
);
504 retNode
= NodeManager::currentNM()->mkNode( kind::REGEXP_OR
, vec_nodes
);
507 Trace("strings-error") << "Unsupported string variable: " << r
[0] << " in complement of RegExp." << std::endl
;
508 //AlwaysAssert( false );
509 //return Node::null();
513 case kind::REGEXP_CONCAT
:
515 std::vector
< Node
> vec_nodes
;
516 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
517 Node tmp
= complement( r
[i
] );
518 for(unsigned j
=0; j
<i
; ++j
) {
519 tmp
= NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, r
[j
], tmp
);
521 if(i
!= r
.getNumChildren() - 1) {
522 tmp
= NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, tmp
,
523 NodeManager::currentNM()->mkNode( kind::REGEXP_STAR
, d_sigma
) );
525 vec_nodes
.push_back( tmp
);
527 retNode
= NodeManager::currentNM()->mkNode( kind::REGEXP_OR
, vec_nodes
);
530 case kind::REGEXP_OR
:
532 std::vector
< Node
> vec_nodes
;
533 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
534 Node tmp
= complement( r
[i
] );
535 vec_nodes
.push_back( tmp
);
537 retNode
= NodeManager::currentNM()->mkNode( kind::REGEXP_INTER
, vec_nodes
);
540 case kind::REGEXP_INTER
:
542 std::vector
< Node
> vec_nodes
;
543 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
544 Node tmp
= complement( r
[i
] );
545 vec_nodes
.push_back( tmp
);
547 retNode
= NodeManager::currentNM()->mkNode( kind::REGEXP_OR
, vec_nodes
);
550 case kind::REGEXP_STAR
:
552 retNode
= NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, d_sigma
, d_sigma_star
);
553 Node tmp
= complement( r
[0] );
554 tmp
= NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT
, r
, tmp
);
555 retNode
= NodeManager::currentNM()->mkNode( kind::REGEXP_INTER
, retNode
, tmp
);
559 //TODO: special sym: sigma, none, all
560 Trace("strings-error") << "Unsupported term: " << mkString( r
) << " in complement of RegExp." << std::endl
;
561 //AlwaysAssert( false );
562 //return Node::null();
564 d_compl_cache
[r
] = retNode
;
566 Trace("strings-regexp-compl") << "RegExp-Compl returns : " << mkString( retNode
) << std::endl
;
570 std::string
RegExpOpr::niceChar( Node r
) {
572 std::string s
= r
.getConst
<CVC4::String
>().toString() ;
573 return s
== "" ? "{E}" : ( s
== " " ? "{ }" : s
);
578 std::string
RegExpOpr::mkString( Node r
) {
582 case kind::STRING_TO_REGEXP
:
584 retStr
+= niceChar( r
[0] );
587 case kind::REGEXP_CONCAT
:
590 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
591 if(i
!= 0) retStr
+= ".";
592 retStr
+= mkString( r
[i
] );
597 case kind::REGEXP_OR
:
603 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
604 if(i
!= 0) retStr
+= "|";
605 retStr
+= mkString( r
[i
] );
611 case kind::REGEXP_INTER
:
614 for(unsigned i
=0; i
<r
.getNumChildren(); ++i
) {
615 if(i
!= 0) retStr
+= "&";
616 retStr
+= mkString( r
[i
] );
621 case kind::REGEXP_STAR
:
623 retStr
+= mkString( r
[0] );
627 case kind::REGEXP_PLUS
:
629 retStr
+= mkString( r
[0] );
633 case kind::REGEXP_OPT
:
635 retStr
+= mkString( r
[0] );
639 case kind::REGEXP_RANGE
:
642 retStr
+= niceChar( r
[0] );
644 retStr
+= niceChar( r
[1] );
649 //TODO: special sym: sigma, none, all
650 Trace("strings-error") << "Unsupported term: " << r
<< " in RegExp." << std::endl
;
651 //AlwaysAssert( false );
652 //return Node::null();
658 }/* CVC4::theory::strings namespace */
659 }/* CVC4::theory namespace */
660 }/* CVC4 namespace */